diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 518148887f939..e3e820dc422ef 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -261,7 +261,7 @@ jobs: echo LD_LIBRARY_PATH=$PWD/toolchain/lib/:$LD_LIBRARY_PATH >> $GITHUB_ENV - run: which clang++ sycl-ls - run: sycl-ls --verbose - - run: SYCL_PI_TRACE=-1 sycl-ls + - run: SYCL_UR_TRACE=1 sycl-ls - run: | if [ -f /usr/local/lib/igc/IGCTAG.txt ]; then cat /usr/local/lib/igc/IGCTAG.txt diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index ef13946a5b802..1d5859cca5016 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -22,10 +22,27 @@ if (NOT DEFINED SYCL_ENABLE_PLUGINS) set(SYCL_ENABLE_PLUGINS "opencl;level_zero") endif() +# Option to enable online kernel fusion via a JIT compiler +option(SYCL_ENABLE_KERNEL_FUSION "Enable kernel fusion via JIT compiler" ON) +if(SYCL_ENABLE_KERNEL_FUSION AND WIN32) + message(WARNING "Kernel fusion not yet supported on Windows") + set(SYCL_ENABLE_KERNEL_FUSION OFF CACHE + BOOL "Kernel fusion not yet supported on Windows" FORCE) +endif() + +if (NOT XPTI_INCLUDES) + set(XPTI_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../xpti/include) +endif() + +if (NOT XPTI_PROXY_SRC) + set(XPTI_PROXY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../xpti/src/xpti_proxy.cpp) +endif() + list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") include(AddSYCLExecutable) include(AddSYCL) include(SYCLUtils) +include(FetchUnifiedRuntime) # The change in SYCL_MAJOR_VERSION must be accompanied with the same update in # llvm/clang/lib/Driver/CMakeLists.txt. @@ -144,14 +161,6 @@ install(DIRECTORY ${OpenCL_INCLUDE_DIR}/CL DESTINATION ${SYCL_INCLUDE_DIR}/sycl COMPONENT OpenCL-Headers) -# Option to enable online kernel fusion via a JIT compiler -option(SYCL_ENABLE_KERNEL_FUSION "Enable kernel fusion via JIT compiler" ON) -if(SYCL_ENABLE_KERNEL_FUSION AND WIN32) - message(WARNING "Kernel fusion not yet supported on Windows") - set(SYCL_ENABLE_KERNEL_FUSION OFF CACHE - BOOL "Kernel fusion not yet supported on Windows" FORCE) -endif() - # Option for enabling building the SYCL major release preview library. option(SYCL_ENABLE_MAJOR_RELEASE_PREVIEW_LIB "Enable build of the SYCL major release preview library" ON) @@ -231,6 +240,8 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E copy_directory ${sycl_inc_dir}/std ${SYCL_INCLUDE_BUILD_DIR}/std COMMAND ${CMAKE_COMMAND} -E copy_directory ${sycl_inc_dir}/syclcompat ${SYCL_INCLUDE_BUILD_DIR}/syclcompat COMMAND ${CMAKE_COMMAND} -E copy ${sycl_inc_dir}/syclcompat.hpp ${SYCL_INCLUDE_BUILD_DIR}/syclcompat.hpp + COMMAND ${CMAKE_COMMAND} -E copy ${UNIFIED_RUNTIME_INCLUDE_DIR}/ur_api.h ${SYCL_INCLUDE_BUILD_DIR}/sycl + COMMAND ${CMAKE_COMMAND} -E copy ${UNIFIED_RUNTIME_INCLUDE_DIR}/ur_print.hpp ${SYCL_INCLUDE_BUILD_DIR}/sycl COMMENT "Copying SYCL headers ...") # Copy SYCL headers from source to install directory @@ -240,6 +251,10 @@ install(DIRECTORY "${sycl_inc_dir}/std" DESTINATION ${SYCL_INCLUDE_DIR} COMPONEN install(DIRECTORY ${BOOST_MP11_DESTINATION_DIR} DESTINATION ${SYCL_INCLUDE_DIR}/sycl/detail COMPONENT boost_mp11-headers) install(DIRECTORY "${sycl_inc_dir}/syclcompat" DESTINATION ${SYCL_INCLUDE_DIR} COMPONENT sycl-headers) install(FILES "${sycl_inc_dir}/syclcompat.hpp" DESTINATION ${SYCL_INCLUDE_DIR} COMPONENT sycl-headers) +install(FILES "${UNIFIED_RUNTIME_INCLUDE_DIR}/ur_api.h" DESTINATION ${SYCL_INCLUDE_DIR}/sycl + COMPONENT sycl-headers) +install(FILES "${UNIFIED_RUNTIME_INCLUDE_DIR}/ur_print.hpp" DESTINATION ${SYCL_INCLUDE_DIR}/sycl + COMPONENT sycl-headers) if (WIN32) set(SYCL_RT_LIBS sycl${SYCL_MAJOR_VERSION}) @@ -354,7 +369,7 @@ add_custom_target( sycl-toolchain ALL ) if (WIN32) - add_dependencies(sycl-toolchain pi_win_proxy_loader) + add_dependencies(sycl-toolchain ur_win_proxy_loader) endif() # Enable new IN_LIST operator. @@ -384,13 +399,10 @@ if(NOT "${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "") string(TOUPPER ${SYCL_BUILD_PI_HIP_PLATFORM} SYCL_BUILD_PI_HIP_PLATFORM) endif() -# Plugin Library -add_subdirectory( plugins ) - add_subdirectory(tools) if (WIN32) - add_subdirectory(pi_win_proxy_loader) + add_subdirectory(ur_win_proxy_loader) endif() if(SYCL_INCLUDE_TESTS) @@ -441,7 +453,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS ) if (WIN32) - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS pi_win_proxy_loader) + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS ur_win_proxy_loader) endif() if (TARGET sycl-prof) @@ -477,8 +489,8 @@ if("cuda" IN_LIST SYCL_ENABLE_PLUGINS) "CUDA support requires adding \"libclc\" to the CMake argument \"LLVM_ENABLE_PROJECTS\"") endif() - add_dependencies(sycl-toolchain pi_cuda) - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS pi_cuda) + add_dependencies(sycl-toolchain ur_adapter_cuda) + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS ur_adapter_cuda) endif() if("hip" IN_LIST SYCL_ENABLE_PLUGINS) @@ -494,8 +506,8 @@ if("hip" IN_LIST SYCL_ENABLE_PLUGINS) "HIP support requires adding \"lld\" to the CMake argument \"LLVM_ENABLE_PROJECTS\"") endif() - add_dependencies(sycl-toolchain pi_hip) - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS pi_hip) + add_dependencies(sycl-toolchain ur_adapter_hip) + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS ur_adapter_hip) endif() # Use it as fake dependency in order to force another command(s) to execute. diff --git a/sycl/cmake/modules/AddSYCL.cmake b/sycl/cmake/modules/AddSYCL.cmake index 61addd9d7dc10..16338b432f80e 100644 --- a/sycl/cmake/modules/AddSYCL.cmake +++ b/sycl/cmake/modules/AddSYCL.cmake @@ -34,6 +34,8 @@ function(add_sycl_library LIB_NAME TYPE) add_common_options(${LIB_NAME}) endfunction() +# current ur adapter dependency managing is a bit hacky, we should try to copy +# this as closely as possible instead function(add_sycl_plugin PLUGIN_NAME) cmake_parse_arguments("ARG" "" diff --git a/sycl/cmake/modules/AddSYCLUnitTest.cmake b/sycl/cmake/modules/AddSYCLUnitTest.cmake index 9571d43cc07c9..7843e4b54da1a 100644 --- a/sycl/cmake/modules/AddSYCLUnitTest.cmake +++ b/sycl/cmake/modules/AddSYCLUnitTest.cmake @@ -72,9 +72,12 @@ macro(add_sycl_unittest test_dirname link_variant) PRIVATE LLVMTestingSupport OpenCL-Headers + unified-runtime::mock ${SYCL_LINK_LIBS} ) + add_dependencies(${test_dirname} ur_adapter_mock) + if(SYCL_ENABLE_KERNEL_FUSION) target_link_libraries(${test_dirname} PRIVATE sycl-fusion) endif(SYCL_ENABLE_KERNEL_FUSION) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/cmake/modules/FetchUnifiedRuntime.cmake similarity index 77% rename from sycl/plugins/unified_runtime/CMakeLists.txt rename to sycl/cmake/modules/FetchUnifiedRuntime.cmake index de138107ce41a..e69d6512d5789 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -1,5 +1,13 @@ -# PI Unified Runtime plugin library. -# +# Either fetches UR from the appropriate repo or sets up variables based on user +# preference. + +# TODO: taken from sycl/plugins/CMakeLists.txt - maybe we should handle this +# within UR (although it is an obscure warning that the build system here +# seems to specifically enable) +if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang|IntelLLVM" ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-covered-switch-default") +endif() + # Options to override the default behaviour of the FetchContent to include UR # source code. @@ -15,8 +23,11 @@ option(SYCL_PI_UR_USE_FETCH_CONTENT set(SYCL_PI_UR_SOURCE_DIR "" CACHE PATH "Path to root of Unified Runtime repository") -# Override default to enable building tests from unified-runtime -set(UR_BUILD_TESTS OFF CACHE BOOL "Build unit tests.") +# Here we override the defaults to disable building tests from unified-runtime +set(UR_BUILD_EXAMPLES OFF CACHE BOOL "Build example applications." FORCE) +set(UR_BUILD_TESTS OFF CACHE BOOL "Build unit tests." FORCE) +set(UR_BUILD_XPTI_LIBS OFF) +set(UR_ENABLE_TRACING ON) if("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) set(UR_BUILD_ADAPTER_L0 ON) @@ -26,6 +37,9 @@ if("cuda" IN_LIST SYCL_ENABLE_PLUGINS) endif() if("hip" IN_LIST SYCL_ENABLE_PLUGINS) set(UR_BUILD_ADAPTER_HIP ON) + if (SYCL_ENABLE_KERNEL_FUSION) + set(UR_ENABLE_COMGR ON) + endif() endif() if("opencl" IN_LIST SYCL_ENABLE_PLUGINS) set(UR_BUILD_ADAPTER_OPENCL ON) @@ -99,13 +113,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 2baf095188b235bb2b0a0140f0187d2041aef4b0 - # Merge: 3d8fe8d2 58f85278 + # commit b7b0c8b3d17aa7d511c67ec219d58091d07cfa60 + # Merge: 2baf0951 5b8936da # Author: Piotr Balcer - # Date: Fri Jul 26 12:06:22 2024 +0200 - # Merge pull request #1900 from kswiecicki/umf-version-bump - # Bump UMF version - set(UNIFIED_RUNTIME_TAG 58f85278a4ebf37742dd10afb3350580b0b1d9d7) + # Date: Fri Jul 26 15:48:04 2024 +0200 + # Merge pull request #1903 from kswiecicki/umf-version-bump + # Bump UMF version again + set(UNIFIED_RUNTIME_TAG b7b0c8b3d17aa7d511c67ec219d58091d07cfa60) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need @@ -210,45 +224,12 @@ target_include_directories(UnifiedRuntime-Headers find_package(Threads REQUIRED) -set(UNIFIED_RUNTIME_PLUGIN_ARGS - SOURCES - # These are short-term shared with Unified Runtime - # The two plugins define a few things differently so must - # be built separately. This difference is spelled in - # their "ur_bindings.hpp" files. - "ur_bindings.hpp" - "pi2ur.hpp" - # These below belong to Unified Runtime PI Plugin only - "pi_unified_runtime.hpp" - "pi_unified_runtime.cpp" - LIBRARIES - Threads::Threads - UnifiedRuntimeLoader - UnifiedRuntime-Headers - UnifiedRuntimeCommon - INCLUDE_DIRS - "${UNIFIED_RUNTIME_SRC_INCLUDE_DIR}" - "${UNIFIED_RUNTIME_COMMON_INCLUDE_DIR}" - "${SYCL_SOURCE_DIR}/source" # for compiler.hpp -) - -# We need for #include in common.h -if("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) - list(APPEND UNIFIED_RUNTIME_PLUGIN_ARGS LevelZeroLoader-Headers) -endif() - -if("opencl" IN_LIST SYCL_ENABLE_PLUGINS) - list(APPEND UNIFIED_RUNTIME_PLUGIN_ARGS OpenCL-ICD) -endif() - -add_sycl_plugin(unified_runtime ${UNIFIED_RUNTIME_PLUGIN_ARGS}) - if(TARGET UnifiedRuntimeLoader) - set_target_properties(hello_world PROPERTIES EXCLUDE_FROM_ALL 1 EXCLUDE_FROM_DEFAULT_BUILD 1) # Install the UR loader. # TODO: this is piggy-backing on the existing target component level-zero-sycl-dev # When UR is moved to its separate repo perhaps we should introduce new component, # e.g. unified-runtime-sycl-dev. + # See github issue #14598 install(TARGETS ur_loader LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev @@ -256,40 +237,56 @@ if(TARGET UnifiedRuntimeLoader) ) endif() -# Install the UR adapters too -if("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) - add_dependencies(sycl-runtime-libraries ur_adapter_level_zero) +add_custom_target(UnifiedRuntimeAdapters) - # Install the UR adapters too - install(TARGETS ur_adapter_level_zero - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev +function(add_sycl_ur_adapter NAME) + add_dependencies(UnifiedRuntimeAdapters ur_adapter_${NAME}) + + install(TARGETS ur_adapter_${NAME} + LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT ur_adapter_${NAME} + RUNTIME DESTINATION "bin" COMPONENT ur_adapter_${NAME}) + + set(manifest_file + ${CMAKE_CURRENT_BINARY_DIR}/install_manifest_ur_adapter_${NAME}.txt) + add_custom_command(OUTPUT ${manifest_file} + COMMAND "${CMAKE_COMMAND}" + "-DCMAKE_INSTALL_COMPONENT=ur_adapter_${NAME}" + -P "${CMAKE_BINARY_DIR}/cmake_install.cmake" + COMMENT "Deploying component ur_adapter_${NAME}" + USES_TERMINAL ) + add_custom_target(install-sycl-ur-adapter-${NAME} + DEPENDS ${manifest_file} ur_adapter_${NAME} + ) + + set_property(GLOBAL APPEND PROPERTY + SYCL_TOOLCHAIN_INSTALL_COMPONENTS ur_adapter_${NAME}) +endfunction() + +if("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) + add_sycl_ur_adapter(level_zero) + + # TODO: L0 adapter does other... things in its cmake - make sure they get + # added to the new build system endif() if("cuda" IN_LIST SYCL_ENABLE_PLUGINS) - add_dependencies(sycl-runtime-libraries ur_adapter_cuda) + add_sycl_ur_adapter(cuda) endif() if("hip" IN_LIST SYCL_ENABLE_PLUGINS) - add_dependencies(sycl-runtime-libraries ur_adapter_hip) + add_sycl_ur_adapter(hip) endif() -if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) - add_dependencies(sycl-runtime-libraries ur_adapter_opencl) - - # Install the UR adapters too - install(TARGETS ur_adapter_opencl - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev - ) +if("opencl" IN_LIST SYCL_ENABLE_PLUGINS) + add_sycl_ur_adapter(opencl) endif() -if ("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) - add_dependencies(sycl-runtime-libraries ur_adapter_native_cpu) +if("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) + add_sycl_ur_adapter(native_cpu) + # Deal with OCK option + option(NATIVECPU_USE_OCK "Use the oneAPI Construction Kit for Native CPU" ON) if(NATIVECPU_USE_OCK) message(STATUS "Compiling Native CPU adapter with OCK support.") @@ -300,8 +297,6 @@ if ("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) endif() endif() -add_dependencies(sycl-runtime-libraries ur_umf) - # TODO: this is piggy-backing on the existing target component level-zero-sycl-dev install(TARGETS umf LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index e3d68eed3528b..6ea07a4b7fd4c 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -196,7 +196,7 @@ variables in production code. | Environment variable | Values | Description | | -------------------- | ------ | ----------- | | `SYCL_PREFER_UR` | Integer | If non-0 then run through Unified Runtime if desired backend is supported there. Default is 0. | -| `SYCL_PI_TRACE` | Described [below](#sycl_pi_trace-options) | Enable specified level of tracing for PI. | +| `SYCL_UR_TRACE` | Integer | If non-0 then enable Unified Runtime tracing. Default is 0. | | `SYCL_QUEUE_THREAD_POOL_SIZE` | Positive integer | Number of threads in thread pool of queue. | | `SYCL_DEVICELIB_NO_FALLBACK` | Any(\*) | Disable loading and linking of device library images | | `SYCL_PRINT_EXECUTION_GRAPH` | Described [below](#sycl_print_execution_graph-options) | Print execution graph to DOT text file. | @@ -231,16 +231,6 @@ variables in production code. | after_addHostAcc | print graph after addHostAccessor method | | always | print graph before and after each of the above methods | -### `SYCL_PI_TRACE` Options - -`SYCL_PI_TRACE` accepts a bit-mask. Supported tracing levels are in the table below - -| Option | Description | -| ------ | ----------- | -| 1 | Enable basic tracing, which is tracing of PI plugins/devices discovery | -| 2 | Enable tracing of the PI calls | -| -1 | Enable all levels of tracing | - ## Debugging variables for Level Zero Plugin :warning: **Warning:** the environment variables diff --git a/sycl/doc/design/PluginInterface.md b/sycl/doc/design/PluginInterface.md index 2b9c4e4bf3d2a..7d0d4a262d3a6 100644 --- a/sycl/doc/design/PluginInterface.md +++ b/sycl/doc/design/PluginInterface.md @@ -71,14 +71,12 @@ with arguments and returned values. SYCL_PI_TRACE=-1 lists all PI Traces above and more debug messages. #### Plugin binary interface -Plugins should implement all the Interface APIs required for the PI Version -it supports. There is [pi.def](../../include/sycl/detail/pi.def)/ -[pi.h](../../include/sycl/detail/pi.h) file listing all PI API names that -can be called by the specific version of Plugin Interface. -It exports a function - "piPluginInit" that returns the plugin details and -function pointer table containing the list of pointers to implemented Interface -Functions defined in pi.h. -In the future, this document will list the minimum set of Interface APIs +Plugins should implement all the Interface APIs required for the PI Version it +supports. There is pi.def/ pi.h file listing all PI API names that can be +called by the specific version of Plugin Interface. It exports a function - +"piPluginInit" that returns the plugin details and function pointer table +containing the list of pointers to implemented Interface Functions defined in +pi.h. In the future, this document will list the minimum set of Interface APIs to be supported by Plugins. This will also require adding functionality to SYCL Runtime to work with such limited functionality plugins. @@ -149,8 +147,7 @@ into - **Interoperability API** which allows interoperability with underlying runtimes such as OpenCL. -See [pi.h](../../include/sycl/detail/pi.h) header for the full list and -descriptions of PI APIs. +See pi.h header for the full list and descriptions of PI APIs. ### The Core OpenCL-based PI APIs diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index aeb5884c6ce6e..72f54f150ecfe 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -37,6 +37,7 @@ #include // for property_list #include // for range #include // for addressing_mode +#include // for UR_RESULT_ERRO... #include // for size_t #include // for hash diff --git a/sycl/include/sycl/backend.hpp b/sycl/include/sycl/backend.hpp index 7fab7ed12c8c3..3facf40a2efe8 100644 --- a/sycl/include/sycl/backend.hpp +++ b/sycl/include/sycl/backend.hpp @@ -1,4 +1,4 @@ -//==---------------- backend.hpp - SYCL PI backends ------------------------==// +//==---------------- backend.hpp - SYCL UR backends ------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -19,7 +19,6 @@ #include // for __SYCL_DEPRECATED #include // for __SYCL_EXPORT #include // for createSyclObjFr... -#include // for pi_native_handle #include // for device, get_native #include // for event, get_native #include // for make_error_code @@ -32,6 +31,7 @@ #include // for platform, get_n... #include // for property_list #include // for queue, get_native +#include // for ur_native_handle_t #if SYCL_BACKEND_OPENCL #include // for interop @@ -50,6 +50,8 @@ #include // for _ze_command_lis... #endif +#include + #include // for shared_ptr #include // for int32_t #include // for enable_if_t @@ -59,8 +61,12 @@ namespace sycl { inline namespace _V1 { namespace detail { -// Convert from PI backend to SYCL backend enum -backend convertBackend(pi_platform_backend PiBackend); +// TODO each backend can have its own custom errc enumeration +// but the details for this are not fully specified yet +enum class backend_errc : unsigned int {}; + +// Convert from UR backend to SYCL backend enum +backend convertUrBackend(ur_platform_backend_t UrBackend); } // namespace detail template class backend_traits { @@ -86,10 +92,11 @@ struct BufferInterop { using ReturnType = backend_return_t>; - static ReturnType GetNativeObjs(const std::vector &Handle) { + static ReturnType + GetNativeObjs(const std::vector &Handle) { ReturnType ReturnValue = 0; if (Handle.size()) { - ReturnValue = detail::pi::cast(Handle[0]); + ReturnValue = detail::ur::cast(Handle[0]); } return ReturnValue; } @@ -100,11 +107,12 @@ struct BufferInterop { using ReturnType = backend_return_t>; - static ReturnType GetNativeObjs(const std::vector &Handle) { + static ReturnType + GetNativeObjs(const std::vector &Handle) { ReturnType ReturnValue{}; for (auto &Obj : Handle) { ReturnValue.push_back( - detail::pi::cast(Obj)); + detail::ur::cast(Obj)); } return ReturnValue; } @@ -144,7 +152,7 @@ auto get_native(const queue &Obj) -> backend_return_t { "Backends mismatch"); } int32_t IsImmCmdList; - pi_native_handle Handle = Obj.getNative(IsImmCmdList); + ur_native_handle_t Handle = Obj.getNative(IsImmCmdList); backend_return_t RetVal; if constexpr (BackendName == backend::ext_oneapi_level_zero) RetVal = IsImmCmdList @@ -257,39 +265,41 @@ namespace detail { // Forward declaration class kernel_bundle_impl; -__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle, +__SYCL_EXPORT platform make_platform(ur_native_handle_t NativeHandle, backend Backend); -__SYCL_EXPORT device make_device(pi_native_handle NativeHandle, +__SYCL_EXPORT device make_device(ur_native_handle_t NativeHandle, backend Backend); -__SYCL_EXPORT context make_context(pi_native_handle NativeHandle, +__SYCL_EXPORT context make_context(ur_native_handle_t NativeHandle, const async_handler &Handler, backend Backend, bool KeepOwnership, const std::vector &DeviceList = {}); -__SYCL_EXPORT queue make_queue(pi_native_handle NativeHandle, +__SYCL_EXPORT queue make_queue(ur_native_handle_t NativeHandle, int32_t nativeHandleDesc, const context &TargetContext, const device *TargetDevice, bool KeepOwnership, const property_list &PropList, const async_handler &Handler, backend Backend); -__SYCL_EXPORT event make_event(pi_native_handle NativeHandle, +__SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, const context &TargetContext, backend Backend); -__SYCL_EXPORT event make_event(pi_native_handle NativeHandle, +__SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, const context &TargetContext, bool KeepOwnership, backend Backend); // TODO: Unused. Remove when allowed. -__SYCL_EXPORT kernel make_kernel(pi_native_handle NativeHandle, +__SYCL_EXPORT kernel make_kernel(ur_native_handle_t NativeHandle, const context &TargetContext, backend Backend); __SYCL_EXPORT kernel make_kernel( const context &TargetContext, const kernel_bundle &KernelBundle, - pi_native_handle NativeKernelHandle, bool KeepOwnership, backend Backend); + ur_native_handle_t NativeKernelHandle, bool KeepOwnership, backend Backend); // TODO: Unused. Remove when allowed. __SYCL_EXPORT std::shared_ptr -make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext, - bundle_state State, backend Backend); +make_kernel_bundle(ur_native_handle_t NativeHandle, + const context &TargetContext, bundle_state State, + backend Backend); __SYCL_EXPORT std::shared_ptr -make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext, - bool KeepOwnership, bundle_state State, backend Backend); +make_kernel_bundle(ur_native_handle_t NativeHandle, + const context &TargetContext, bool KeepOwnership, + bundle_state State, backend Backend); } // namespace detail template @@ -299,7 +309,7 @@ make_platform( const typename backend_traits::template input_type &BackendObject) { return detail::make_platform( - detail::pi::cast(BackendObject), Backend); + detail::ur::cast(BackendObject), Backend); } template @@ -317,8 +327,8 @@ make_device(const typename backend_traits::template input_type } } - return detail::make_device(detail::pi::cast(BackendObject), - Backend); + return detail::make_device( + detail::ur::cast(BackendObject), Backend); } template @@ -328,8 +338,9 @@ make_context( const typename backend_traits::template input_type &BackendObject, const async_handler &Handler = {}) { - return detail::make_context(detail::pi::cast(BackendObject), - Handler, Backend, false /* KeepOwnership */); + return detail::make_context( + detail::ur::cast(BackendObject), Handler, Backend, + false /* KeepOwnership */); } template @@ -340,7 +351,7 @@ make_queue(const typename backend_traits::template input_type const context &TargetContext, const async_handler Handler = {}) { auto KeepOwnership = Backend == backend::ext_oneapi_cuda || Backend == backend::ext_oneapi_hip; - return detail::make_queue(detail::pi::cast(BackendObject), + return detail::make_queue(detail::ur::cast(BackendObject), false, TargetContext, nullptr, KeepOwnership, {}, Handler, Backend); } @@ -351,7 +362,7 @@ std::enable_if_t::MakeEvent == true, make_event(const typename backend_traits::template input_type &BackendObject, const context &TargetContext) { - return detail::make_event(detail::pi::cast(BackendObject), + return detail::make_event(detail::ur::cast(BackendObject), TargetContext, Backend); } @@ -363,7 +374,7 @@ std::enable_if_t::MakeEvent == true, &BackendObject, const context &TargetContext, bool KeepOwnership) { - return detail::make_event(detail::pi::cast(BackendObject), + return detail::make_event(detail::ur::cast(BackendObject), TargetContext, KeepOwnership, Backend); } @@ -377,7 +388,7 @@ make_buffer(const typename backend_traits::template input_type< buffer> &BackendObject, const context &TargetContext, event AvailableEvent = {}) { return detail::make_buffer_helper( - detail::pi::cast(BackendObject), TargetContext, + detail::ur::cast(BackendObject), TargetContext, AvailableEvent); } @@ -390,7 +401,7 @@ make_image(const typename backend_traits::template input_type< image> &BackendObject, const context &TargetContext, event AvailableEvent = {}) { return image( - detail::pi::cast(BackendObject), TargetContext, + detail::ur::cast(BackendObject), TargetContext, AvailableEvent); } @@ -399,8 +410,9 @@ kernel make_kernel(const typename backend_traits::template input_type &BackendObject, const context &TargetContext) { - return detail::make_kernel(detail::pi::cast(BackendObject), - TargetContext, Backend); + return detail::make_kernel( + detail::ur::cast(BackendObject), TargetContext, + Backend); } template @@ -412,7 +424,7 @@ make_kernel_bundle(const typename backend_traits::template input_type< const context &TargetContext) { std::shared_ptr KBImpl = detail::make_kernel_bundle( - detail::pi::cast(BackendObject), TargetContext, + detail::ur::cast(BackendObject), TargetContext, false, State, Backend); return detail::createSyclObjFromImpl>(KBImpl); } diff --git a/sycl/include/sycl/backend/opencl.hpp b/sycl/include/sycl/backend/opencl.hpp index 7c5a4d70f7e33..d7b2861363d87 100644 --- a/sycl/include/sycl/backend/opencl.hpp +++ b/sycl/include/sycl/backend/opencl.hpp @@ -8,9 +8,10 @@ #pragma once -#include // for __SYCL_EXPORT -#include // for device -#include // for platform +#include // for __SYCL_EXPORT +#include // for cast +#include // for device +#include // for platform #include // for string #include // for enable_if_t diff --git a/sycl/include/sycl/buffer.hpp b/sycl/include/sycl/buffer.hpp index 752ac70878282..8b3a14af607f2 100644 --- a/sycl/include/sycl/buffer.hpp +++ b/sycl/include/sycl/buffer.hpp @@ -18,7 +18,6 @@ #include #include #include -#include // for pi_native_handle and PI_ERROR_INVAL #include #include #include @@ -26,6 +25,7 @@ #include #include #include +#include // for ur_native_handle_t #include // for size_t, nullptr_t #include // for function @@ -67,7 +67,7 @@ class buffer_impl; template buffer -make_buffer_helper(pi_native_handle Handle, const context &Ctx, +make_buffer_helper(ur_native_handle_t Handle, const context &Ctx, const event &Evt, bool OwnNativeHandle = true) { return buffer(Handle, Ctx, OwnNativeHandle, Evt); @@ -111,7 +111,7 @@ class __SYCL_EXPORT buffer_plain { std::unique_ptr Allocator, bool IsConstPtr); - buffer_plain(pi_native_handle MemObject, const context &SyclContext, + buffer_plain(ur_native_handle_t MemObject, const context &SyclContext, std::unique_ptr Allocator, bool OwnNativeHandle, const event &AvailableEvent); @@ -138,7 +138,7 @@ class __SYCL_EXPORT buffer_plain { return getPropList().template get_property(); } - std::vector getNativeVector(backend BackendName) const; + std::vector getNativeVector(backend BackendName) const; const std::unique_ptr &get_allocator_internal() const; @@ -732,7 +732,7 @@ class buffer : public detail::buffer_plain, friend class accessor; template friend buffer - detail::make_buffer_helper(pi_native_handle, const context &, const event &, + detail::make_buffer_helper(ur_native_handle_t, const context &, const event &, bool); template friend class ext::oneapi::weak_object; @@ -747,7 +747,7 @@ class buffer : public detail::buffer_plain, // Interop constructor template > - buffer(pi_native_handle MemObject, const context &SyclContext, + buffer(ur_native_handle_t MemObject, const context &SyclContext, bool OwnNativeHandle, const event &AvailableEvent, const detail::code_location CodeLoc = detail::code_location::current()) : buffer_plain(MemObject, SyclContext, diff --git a/sycl/include/sycl/context.hpp b/sycl/include/sycl/context.hpp index 5e387d5f94cc7..e517f6c432478 100644 --- a/sycl/include/sycl/context.hpp +++ b/sycl/include/sycl/context.hpp @@ -15,9 +15,9 @@ #include // for context_impl #include // for is_context_info_desc #include // for OwnerLessBase -#include // for pi_native_handle #include // for platform #include // for property_list +#include // for ur_native_handle_t #ifdef __SYCL_INTERNAL_API #include @@ -238,7 +238,7 @@ class __SYCL_EXPORT context : public detail::OwnerLessBase { /// Constructs a SYCL context object from a valid context_impl instance. context(std::shared_ptr Impl); - pi_native_handle getNative() const; + ur_native_handle_t getNative() const; std::shared_ptr impl; diff --git a/sycl/include/sycl/detail/array.hpp b/sycl/include/sycl/detail/array.hpp index 30dc6311b2105..f0602c52885d8 100644 --- a/sycl/include/sycl/detail/array.hpp +++ b/sycl/include/sycl/detail/array.hpp @@ -10,6 +10,7 @@ #include // for __SYCL_ALWAYS_INLINE #include +#include // for UR_RESULT_ERROR_INVALID_VALUE #include // for size_t #include // for enable_if_t diff --git a/sycl/include/sycl/detail/backend_traits_opencl.hpp b/sycl/include/sycl/detail/backend_traits_opencl.hpp index 142caeff540d4..4281dc161574f 100644 --- a/sycl/include/sycl/detail/backend_traits_opencl.hpp +++ b/sycl/include/sycl/detail/backend_traits_opencl.hpp @@ -19,7 +19,7 @@ #include // for context #include // for BackendInput, BackendReturn #include // for _cl_event, cl_event, cl_de... -#include // for assertion, PiDevice, PiPro... +#include // for assertion and ur handles #include // for device #include // for event #include // for buffer @@ -139,7 +139,7 @@ template <> struct InteropFeatureSupportMap { static constexpr bool MakeImage = false; }; -namespace pi { +namespace ur { // Cast for std::vector, according to the spec, make_event // should create one(?) event from a vector of cl_event template inline To cast(std::vector value) { @@ -149,15 +149,15 @@ template inline To cast(std::vector value) { return cast(value[0]); } -// These conversions should use PI interop API. +// These conversions should use UR interop API. template <> -inline PiProgram - cast(cl_program) = delete; // Use piextCreateProgramWithNativeHandle +inline ur_program_handle_t + cast(cl_program) = delete; // Use urProgramCreateWithNativeHandle template <> -inline PiDevice - cast(cl_device_id) = delete; // Use piextCreateDeviceWithNativeHandle -} // namespace pi +inline ur_device_handle_t + cast(cl_device_id) = delete; // Use urDeviceCreateWithNativeHandle +} // namespace ur } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/detail/cg_types.hpp b/sycl/include/sycl/detail/cg_types.hpp index 8075201bdf700..7db3171ba28fc 100644 --- a/sycl/include/sycl/detail/cg_types.hpp +++ b/sycl/include/sycl/detail/cg_types.hpp @@ -23,6 +23,7 @@ #include // for nd_item #include // for nd_range #include // for range, operator* +#include // for UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE #include // for function #include // for size_t diff --git a/sycl/include/sycl/detail/common.hpp b/sycl/include/sycl/detail/common.hpp index 43a5228c287cb..c5110eed711ee 100644 --- a/sycl/include/sycl/detail/common.hpp +++ b/sycl/include/sycl/detail/common.hpp @@ -10,7 +10,7 @@ #include // for __SYCL_ALWAYS_INLINE #include // for __SYCL_EXPORT -#include // for pi_int32 +#include // for ur_code_location_t #include // for array #include // for assert @@ -96,6 +96,8 @@ struct code_location { unsigned long MColumnNo; }; +ur_code_location_t codeLocationCallback(void *); + /// @brief Data type that manages the code_location information in TLS /// @details As new SYCL features are added, they all enable the propagation of /// the code location information where the SYCL API was called by the @@ -159,7 +161,7 @@ class __SYCL_EXPORT tls_code_loc_t { #define __SYCL_ASSERT(x) assert(x) #endif // #ifdef __SYCL_DEVICE_ONLY__ -#define __SYCL_PI_ERROR_REPORT \ +#define __SYCL_UR_ERROR_REPORT \ "Native API failed. " /*__FILE__*/ \ /* TODO: replace __FILE__ to report only relative path*/ \ /* ":" __SYCL_STRINGIFY(__LINE__) ": " */ \ diff --git a/sycl/include/sycl/detail/cuda_definitions.hpp b/sycl/include/sycl/detail/cuda_definitions.hpp deleted file mode 100644 index cda5198e6160f..0000000000000 --- a/sycl/include/sycl/detail/cuda_definitions.hpp +++ /dev/null @@ -1,20 +0,0 @@ -//==------------ cuda_definitions.hpp - SYCL CUDA backend ------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -// CUDA backend specific options -// TODO: Use values that won't overlap with others - -// Mem Object info: Retrieve the raw CUDA pointer from a cl_mem -#define __SYCL_PI_CUDA_RAW_POINTER (0xFF01) - -// PI Command Queue using Default stream -#define __SYCL_PI_CUDA_USE_DEFAULT_STREAM (0xFF03) -// PI Command queue will sync with default stream -#define __SYCL_PI_CUDA_SYNC_WITH_DEFAULT (0xFF04) diff --git a/sycl/include/sycl/detail/helpers.hpp b/sycl/include/sycl/detail/helpers.hpp index 4d632ada7c767..37e438655e1fe 100644 --- a/sycl/include/sycl/detail/helpers.hpp +++ b/sycl/include/sycl/detail/helpers.hpp @@ -11,7 +11,6 @@ #include // for MemorySemanticsMask #include // for fence_space #include // for __SYCL_EXPORT -#include // for PiProgram #include // for memory_order #ifdef __SYCL_DEVICE_ONLY__ @@ -253,7 +252,7 @@ template void loop(F &&f) { } inline constexpr bool is_power_of_two(int x) { return (x & (x - 1)) == 0; } -std::tuple +std::tuple retrieveKernelBinary(const QueueImplPtr &, const char *KernelName, CGExecKernel *CGKernel = nullptr); } // namespace detail diff --git a/sycl/include/sycl/detail/hip_definitions.hpp b/sycl/include/sycl/detail/hip_definitions.hpp deleted file mode 100644 index f5a07c9e2a2e4..0000000000000 --- a/sycl/include/sycl/detail/hip_definitions.hpp +++ /dev/null @@ -1,24 +0,0 @@ -//==------------- hip_definitions.hpp - SYCL HIP backend -------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -// HIP backend specific options -// TODO: Use values that won't overlap with others - -// Mem Object info: Retrieve the raw HIP pointer from a cl_mem -#define __SYCL_PI_HIP_RAW_POINTER (0xFF01) -// Context creation: Use a primary HIP context instead of a custom one by -// providing a property value of PI_TRUE for the following -// property ID. -#define __SYCL_PI_CONTEXT_PROPERTIES_HIP_PRIMARY (0xFF02) - -// PI Command Queue using Default stream -#define __SYCL_PI_HIP_USE_DEFAULT_STREAM (0xFF03) -// PI Command queue will sync with default stream -#define __SYCL_PI_HIP_SYNC_WITH_DEFAULT (0xFF04) diff --git a/sycl/include/sycl/detail/image_accessor_util.hpp b/sycl/include/sycl/detail/image_accessor_util.hpp index 03b5faab9d3ad..49dacd704a748 100644 --- a/sycl/include/sycl/detail/image_accessor_util.hpp +++ b/sycl/include/sycl/detail/image_accessor_util.hpp @@ -761,9 +761,8 @@ void imageWriteHostImpl(const CoordT &Coords, const WriteDataT &Color, ImgChannelType); break; case image_channel_type::fp16: - writePixel( - convertWriteData(Color, ImgChannelType), - reinterpret_cast(Ptr), ImgChannelOrder, ImgChannelType); + writePixel(convertWriteData(Color, ImgChannelType), + reinterpret_cast(Ptr), ImgChannelOrder, ImgChannelType); break; case image_channel_type::fp32: writePixel(convertWriteData(Color, ImgChannelType), diff --git a/sycl/include/sycl/detail/info_desc_helpers.hpp b/sycl/include/sycl/detail/info_desc_helpers.hpp index 9a52fe8021e2b..e8bc8f76c83db 100644 --- a/sycl/include/sycl/detail/info_desc_helpers.hpp +++ b/sycl/include/sycl/detail/info_desc_helpers.hpp @@ -8,7 +8,7 @@ #pragma once -#include // for pi_device_info +#include #include // for true_type @@ -23,6 +23,7 @@ namespace sycl { inline namespace _V1 { namespace detail { template struct PiInfoCode; +template struct UrInfoCode; template struct is_platform_info_desc : std::false_type {}; template struct is_context_info_desc : std::false_type {}; template struct is_device_info_desc : std::false_type {}; @@ -43,9 +44,10 @@ template struct is_event_profiling_info_desc : std::false_type {}; template struct is_backend_info_desc : std::false_type {}; // Similar approach to limit valid get_backend_info template argument -#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ - template <> struct PiInfoCode { \ - static constexpr pi_##DescType##_info value = PiCode; \ +#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, UrCode) \ + template <> struct UrInfoCode { \ + static constexpr ur_##DescType##_info_t value = \ + static_cast(UrCode); \ }; \ template <> \ struct is_##DescType##_info_desc : std::true_type { \ @@ -57,9 +59,10 @@ template struct is_backend_info_desc : std::false_type {}; #include #include #undef __SYCL_PARAM_TRAITS_SPEC -#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ - template <> struct PiInfoCode { \ - static constexpr pi_profiling_info value = PiCode; \ + +#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, UrCode) \ + template <> struct UrInfoCode { \ + static constexpr ur_profiling_info_t value = UrCode; \ }; \ template <> \ struct is_##DescType##_info_desc : std::true_type { \ @@ -81,13 +84,19 @@ struct IsSubGroupInfo template <> struct IsSubGroupInfo : std::true_type {}; +template struct IsKernelInfo : std::false_type {}; +template <> +struct IsKernelInfo + : std::true_type {}; -#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ - template <> struct PiInfoCode { \ - static constexpr \ - typename std::conditional::value, \ - pi_kernel_sub_group_info, \ - pi_kernel_group_info>::type value = PiCode; \ +#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, UrCode) \ + template <> struct UrInfoCode { \ + static constexpr typename std::conditional< \ + IsSubGroupInfo::value, \ + ur_kernel_sub_group_info_t, \ + std::conditional::value, \ + ur_kernel_info_t, \ + ur_kernel_group_info_t>::type>::type value = UrCode; \ }; \ template <> \ struct is_##DescType##_info_desc : std::true_type { \ @@ -95,12 +104,10 @@ struct IsSubGroupInfo }; #include #undef __SYCL_PARAM_TRAITS_SPEC -// Need a static_cast here since piDeviceGetInfo can also accept -// pi_usm_capability_query values. -#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ - template <> struct PiInfoCode { \ - static constexpr pi_device_info value = \ - static_cast(PiCode); \ +#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, UrCode) \ + template <> struct UrInfoCode { \ + static constexpr ur_device_info_t value = \ + static_cast(UrCode); \ }; \ template <> \ struct is_##DescType##_info_desc : std::true_type { \ @@ -113,11 +120,10 @@ struct IsSubGroupInfo #undef __SYCL_PARAM_TRAITS_SPEC #undef __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED - -#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, PiCode) \ - template <> struct PiInfoCode { \ - static constexpr pi_device_info value = \ - static_cast(PiCode); \ +#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, UrCode) \ + template <> struct UrInfoCode { \ + static constexpr ur_device_info_t value = \ + static_cast(UrCode); \ }; \ template <> \ struct is_##DescType##_info_desc \ diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def deleted file mode 100644 index 847675e5c95a8..0000000000000 --- a/sycl/include/sycl/detail/pi.def +++ /dev/null @@ -1,234 +0,0 @@ -//==------------ pi.def Plugin Interface list of API -----------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _PI_API -#error Undefined _PI_API macro expansion -#endif - -// The list of all PI interfaces wrapped with _PI_API macro. -// This is for convinience of doing same thing for all interfaces, e.g. -// declare, define, initialize. -// -// This list is used to define PiApiKind enum, which is part of external -// interface. To avoid ABI breakage, please, add new entries to the end of the -// list. -// -// Platform -_PI_API(piPlatformsGet) -_PI_API(piPlatformGetInfo) -_PI_API(piextPlatformGetNativeHandle) -_PI_API(piextPlatformCreateWithNativeHandle) -// Device -_PI_API(piDevicesGet) -_PI_API(piDeviceGetInfo) -_PI_API(piDevicePartition) -_PI_API(piDeviceRetain) -_PI_API(piDeviceRelease) -_PI_API(piextDeviceSelectBinary) -_PI_API(piextGetDeviceFunctionPointer) -_PI_API(piextGetGlobalVariablePointer) -_PI_API(piextDeviceGetNativeHandle) -_PI_API(piextDeviceCreateWithNativeHandle) -// Context -_PI_API(piContextCreate) -_PI_API(piContextGetInfo) -_PI_API(piContextRetain) -_PI_API(piContextRelease) -_PI_API(piextContextSetExtendedDeleter) -_PI_API(piextContextGetNativeHandle) -_PI_API(piextContextCreateWithNativeHandle) -// Queue -_PI_API(piQueueCreate) -_PI_API(piextQueueCreate) -_PI_API(piQueueGetInfo) -_PI_API(piQueueFinish) -_PI_API(piQueueFlush) -_PI_API(piQueueRetain) -_PI_API(piQueueRelease) -_PI_API(piextQueueGetNativeHandle) -_PI_API(piextQueueCreateWithNativeHandle) -// Memory -_PI_API(piMemBufferCreate) -_PI_API(piMemImageCreate) -_PI_API(piMemGetInfo) -_PI_API(piMemImageGetInfo) -_PI_API(piMemRetain) -_PI_API(piMemRelease) -_PI_API(piMemBufferPartition) -_PI_API(piextMemGetNativeHandle) -_PI_API(piextMemCreateWithNativeHandle) -_PI_API(piextMemImageCreateWithNativeHandle) -// Program -_PI_API(piProgramCreate) -_PI_API(piProgramCreateWithBinary) -_PI_API(piProgramGetInfo) -_PI_API(piProgramCompile) -_PI_API(piProgramBuild) -_PI_API(piProgramLink) -_PI_API(piProgramGetBuildInfo) -_PI_API(piProgramRetain) -_PI_API(piProgramRelease) -_PI_API(piextProgramSetSpecializationConstant) -_PI_API(piextProgramGetNativeHandle) -_PI_API(piextProgramCreateWithNativeHandle) -// Kernel -_PI_API(piKernelCreate) -_PI_API(piKernelSetArg) -_PI_API(piKernelGetInfo) -_PI_API(piKernelGetGroupInfo) -_PI_API(piKernelGetSubGroupInfo) -_PI_API(piKernelRetain) -_PI_API(piKernelRelease) -_PI_API(piextKernelSetArgPointer) -_PI_API(piKernelSetExecInfo) -_PI_API(piextKernelCreateWithNativeHandle) -_PI_API(piextKernelGetNativeHandle) -_PI_API(piextKernelSuggestMaxCooperativeGroupCount) -// Event -_PI_API(piEventCreate) -_PI_API(piEventGetInfo) -_PI_API(piEventGetProfilingInfo) -_PI_API(piEventsWait) -_PI_API(piEventSetCallback) -_PI_API(piEventSetStatus) -_PI_API(piEventRetain) -_PI_API(piEventRelease) -_PI_API(piextEventGetNativeHandle) -_PI_API(piextEventCreateWithNativeHandle) -_PI_API(piEnqueueTimestampRecordingExp) -// Sampler -_PI_API(piSamplerCreate) -_PI_API(piSamplerGetInfo) -_PI_API(piSamplerRetain) -_PI_API(piSamplerRelease) -// Queue commands -_PI_API(piEnqueueKernelLaunch) -_PI_API(piextEnqueueCooperativeKernelLaunch) -_PI_API(piEnqueueEventsWait) -_PI_API(piEnqueueEventsWaitWithBarrier) -_PI_API(piEnqueueMemBufferRead) -_PI_API(piEnqueueMemBufferReadRect) -_PI_API(piEnqueueMemBufferWrite) -_PI_API(piEnqueueMemBufferWriteRect) -_PI_API(piEnqueueMemBufferCopy) -_PI_API(piEnqueueMemBufferCopyRect) -_PI_API(piEnqueueMemBufferFill) -_PI_API(piEnqueueMemImageRead) -_PI_API(piEnqueueMemImageWrite) -_PI_API(piEnqueueMemImageCopy) -_PI_API(piEnqueueMemImageFill) -_PI_API(piEnqueueMemBufferMap) -_PI_API(piEnqueueMemUnmap) -// USM -_PI_API(piextUSMHostAlloc) -_PI_API(piextUSMDeviceAlloc) -_PI_API(piextUSMSharedAlloc) -_PI_API(piextUSMFree) -_PI_API(piextUSMEnqueueFill) -_PI_API(piextUSMEnqueueMemcpy) -_PI_API(piextUSMEnqueuePrefetch) -_PI_API(piextUSMEnqueueMemAdvise) -_PI_API(piextUSMGetMemAllocInfo) -// Host pipes -_PI_API(piextEnqueueReadHostPipe) -_PI_API(piextEnqueueWriteHostPipe) - -_PI_API(piextKernelSetArgMemObj) -_PI_API(piextKernelSetArgSampler) - -_PI_API(piextPluginGetOpaqueData) - -_PI_API(piPluginGetLastError) - -_PI_API(piTearDown) - -_PI_API(piextUSMEnqueueFill2D) -_PI_API(piextUSMEnqueueMemset2D) -_PI_API(piextUSMEnqueueMemcpy2D) - -_PI_API(piGetDeviceAndHostTimer) - -// Device global variable -_PI_API(piextEnqueueDeviceGlobalVariableWrite) -_PI_API(piextEnqueueDeviceGlobalVariableRead) - -_PI_API(piPluginGetBackendOption) - -_PI_API(piextEnablePeerAccess) -_PI_API(piextDisablePeerAccess) -_PI_API(piextPeerAccessGetInfo) - -// USM import/release APIs -_PI_API(piextUSMImport) -_PI_API(piextUSMRelease) - -// command-buffer Extension -_PI_API(piextCommandBufferCreate) -_PI_API(piextCommandBufferRetain) -_PI_API(piextCommandBufferRelease) -_PI_API(piextCommandBufferFinalize) -_PI_API(piextCommandBufferNDRangeKernel) -_PI_API(piextCommandBufferMemcpyUSM) -_PI_API(piextCommandBufferMemBufferCopy) -_PI_API(piextCommandBufferMemBufferCopyRect) -_PI_API(piextCommandBufferMemBufferWrite) -_PI_API(piextCommandBufferMemBufferWriteRect) -_PI_API(piextCommandBufferMemBufferRead) -_PI_API(piextCommandBufferMemBufferReadRect) -_PI_API(piextCommandBufferMemBufferFill) -_PI_API(piextCommandBufferFillUSM) -_PI_API(piextCommandBufferPrefetchUSM) -_PI_API(piextCommandBufferAdviseUSM) -_PI_API(piextEnqueueCommandBuffer) -_PI_API(piextCommandBufferUpdateKernelLaunch) -_PI_API(piextCommandBufferRetainCommand) -_PI_API(piextCommandBufferReleaseCommand) - -_PI_API(piextUSMPitchedAlloc) - -// Bindless Images -_PI_API(piextMemUnsampledImageHandleDestroy) -_PI_API(piextMemSampledImageHandleDestroy) -_PI_API(piextBindlessImageSamplerCreate) -_PI_API(piextMemImageAllocate) -_PI_API(piextMemImageFree) -_PI_API(piextMemUnsampledImageCreate) -_PI_API(piextMemSampledImageCreate) -_PI_API(piextMemImageCopy) -_PI_API(piextMemImageGetInfo) -_PI_API(piextMemMipmapGetLevel) -_PI_API(piextMemMipmapFree) - -// Interop -_PI_API(piextImportExternalMemory) -_PI_API(piextMemReleaseInterop) -_PI_API(piextMemMapExternalArray) -_PI_API(piextImportExternalSemaphore) -_PI_API(piextReleaseExternalSemaphore) -_PI_API(piextWaitExternalSemaphore) -_PI_API(piextSignalExternalSemaphore) - -// Virtual memory -_PI_API(piextVirtualMemGranularityGetInfo) -_PI_API(piextPhysicalMemCreate) -_PI_API(piextPhysicalMemRetain) -_PI_API(piextPhysicalMemRelease) -_PI_API(piextVirtualMemReserve) -_PI_API(piextVirtualMemFree) -_PI_API(piextVirtualMemMap) -_PI_API(piextVirtualMemUnmap) -_PI_API(piextVirtualMemSetAccess) -_PI_API(piextVirtualMemGetInfo) - -// Enqueue native command -_PI_API(piextEnqueueNativeCommand) - -// Kernel Launch Properties -_PI_API(piextEnqueueKernelLaunchCustom) - -#undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h deleted file mode 100644 index eb27b4416927d..0000000000000 --- a/sycl/include/sycl/detail/pi.h +++ /dev/null @@ -1,3096 +0,0 @@ -//==---------- pi.h - Plugin Interface -------------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// \defgroup sycl_pi The Plugin Interface -// TODO: link to sphinx page - -/// \file Main Plugin Interface header file. -/// -/// This is the definition of a generic offload Plugin Interface (PI), which is -/// used by the SYCL implementation to connect to multiple device back-ends, -/// e.g. to OpenCL. The interface is intentionally kept C-only for the -/// purpose of having full flexibility and interoperability with different -/// environments. -/// -/// \ingroup sycl_pi - -#ifndef _PI_H_ -#define _PI_H_ - -// Every single change in PI API should be accompanied with the minor -// version increase (+1). In the cases where backward compatibility is not -// maintained there should be a (+1) change to the major version in -// addition to the increase of the minor. -// -// PI version changes log: -// -- Version 1.2: -// 1. (Binary backward compatibility breaks) Two fields added to the -// pi_device_binary_struct structure: -// pi_device_binary_property_set PropertySetsBegin; -// pi_device_binary_property_set PropertySetsEnd; -// 2. A number of types needed to define pi_device_binary_property_set added. -// 3. Added new ownership argument to piextContextCreateWithNativeHandle. -// 4. Add interoperability interfaces for kernel. -// 4.6 Added new ownership argument to piextQueueCreateWithNativeHandle which -// changes the API version from 3.5 to 4.6. -// 5.7 Added new context and ownership arguments to -// piextEventCreateWithNativeHandle -// 6.8 Added new ownership argument to piextProgramCreateWithNativeHandle. Added -// piQueueFlush function. -// 7.9 Added new context and ownership arguments to -// piextMemCreateWithNativeHandle. -// 8.10 Added new optional device argument to piextQueueCreateWithNativeHandle -// 9.11 Use values of OpenCL enums directly, rather than including ``; -// NOTE that this results in a changed API for `piProgramGetBuildInfo`. -// 10.12 Change enum value PI_MEM_ADVICE_UNKNOWN from 0 to 999, and set enum -// PI_MEM_ADVISE_RESET to 0. -// 10.13 Added new PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS queue property. -// 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for -// piDeviceGetInfo. -// 11.15 piEventCreate creates even in the signalled state now. -// 11.16 Add PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE and -// PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH as an extension for -// piDeviceGetInfo. -// 11.17 Added new PI_EXT_ONEAPI_QUEUE_PRIORITY_LOW and -// PI_EXT_ONEAPI_QUEUE_PRIORITY_HIGH queue properties. -// 11.18 Add new parameter name PI_EXT_ONEAPI_QUEUE_INFO_EMPTY to -// _pi_queue_info. -// 12.19 Add new PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE piDevicePartition -// scheme. Sub-sub-devices (representing compute slice) creation via -// partitioning by affinity domain is disabled by default and can be temporarily -// restored via SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING -// environment variable. -// 12.20 Added piextQueueCreate API to be used instead of piQueueCreate, also -// added PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES for piDeviceGetInfo. -// Both are needed to support sycl_ext_intel_queue_index extension. -// 12.21 Added new piextUSMEnqueueFill2D, piextUSMEnqueueMemset2D, and -// piextUSMEnqueueMemcpy2D functions. Added new -// PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT, -// PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT, and -// PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT context info query -// descriptors. -// 12.22 Add piGetDeviceAndHostTimer to query device wall-clock timestamp -// 12.23 Added new piextEnqueueDeviceGlobalVariableWrite and -// piextEnqueueDeviceGlobalVariableRead functions. -// 12.24 Added new PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG property to the -// _pi_kernel_exec_info. Defined _pi_kernel_cache_config enum with values of -// the new PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG property. -// 12.25 Added PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES and -// PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES for piDeviceGetInfo. -// 12.26 Added piextEnqueueReadHostPipe and piextEnqueueWriteHostPipe functions. -// 12.27 Added properties parameter to piextQueueCreateWithNativeHandle and -// changed native handle type of piextQueueCreateWithNativeHandle and -// piextQueueGetNativeHandle -// 12.28 Added piextMemImageCreateWithNativeHandle for creating images from -// native handles. -// 12.29 Support PI_EXT_PLATFORM_INFO_BACKEND query in piPlatformGetInfo -// 12.30 Added PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT device info query. -// 12.31 Added PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP device -// info query. -// 13.32 Removed backwards compatibility of piextQueueCreateWithNativeHandle and -// piextQueueGetNativeHandle -// 14.33 Added new parameter (memory object properties) to -// piextKernelSetArgMemObj -// 14.34 Added command-buffer extension methods -// 14.35 Added piextEnablePeerAccess, piextDisablePeerAccess, -// piextPeerAccessGetInfo, and pi_peer_attr enum. -// 14.36 Adding support for experimental bindless images. This includes: -// - Added device info queries -// - Device queries for bindless image support -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT -// - Device queries for pitched USM allocations -// - PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_PITCH_ALIGN -// - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH -// - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT -// - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH -// - Device queries for mipmap image support -// - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY -// - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT -// - Device queries for interop memory support -// - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT -// - Added PI_IMAGE_INFO_DEPTH to _pi_image_info -// - Added _pi_image_copy_flags enum to determine direction of copy -// - Added new extension functions -// - piextBindlessImageSamplerCreate -// - piextUSMPitchedAlloc -// - piextMemUnsampledImageHandleDestroy -// - piextMemSampledImageHandleDestroy -// - piextMemImageAllocate -// - piextMemImageFree -// - piextMemUnsampledImageCreate -// - piextMemSampledImageCreate -// - piextMemImageCopy -// - piextMemImageGetInfo -// - piextMemMipmapGetLevel -// - piextMemMipmapFree -// - piextMemImportOpaqueFD -// - piextMemMapExternalArray -// - piextMemReleaseInterop -// - piextImportExternalSemaphoreOpaqueFD -// - piextDestroyExternalSemaphore -// - piextWaitExternalSemaphore -// - piextSignalExternalSemaphore -// 14.37 Added piextUSMImportExternalPointer and piextUSMReleaseImportedPointer. -// 14.38 Change PI_MEM_ADVICE_* values to flags for use in bitwise operations. -// 14.39 Added PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT device info query. -// 14.40 Add HIP _pi_mem_advice alises to match the PI_MEM_ADVICE_CUDA* ones. -// 14.41 Added piextCommandBufferMemBufferFill & piextCommandBufferFillUSM -// 14.42 Added piextCommandBufferPrefetchUSM and piextCommandBufferAdviseUSM -// 15.43 Changed the signature of piextMemGetNativeHandle to also take a -// pi_device -// 15.44 Add coarse-grain memory advice flag for HIP. -// 15.45 Added piextKernelSuggestMaxCooperativeGroupCount and -// piextEnqueueCooperativeKernelLaunch. -// 15.46 Add piextGetGlobalVariablePointer -// 15.47 Added PI_ERROR_FEATURE_UNSUPPORTED. -// 15.48 Add CommandBuffer update definitions -// 15.49 Added cubemap support: -// - Added cubemap image type, PI_MEM_TYPE_IMAGE_CUBEMAP, to _pi_mem_type -// - Added cubemap sampling capabilities -// - _pi_sampler_cubemap_filter_mode -// - PI_SAMPLER_PROPERTIES_CUBEMAP_FILTER_MODE -// - Added device queries for cubemap support -// - PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT -// 15.50 Added device queries for sampled image fetch support -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D -// 15.51 Removed ret_mem argument from piextMemUnsampledImageCreate and -// piextMemSampledImageCreate -// 15.52 Added piEnqueueTimestampRecordingExp and -// PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT. -// 15.53 Added new extension functions that enable importing various external -// handle types: -// - piextImportExternalMemory -// - piextImportExternalSemaphore -// Deprecated no longer necessary functions: -// - piextImportExternalSemaphoreOpaqueFD -// - piextMemImportOpaqueFD -// The following interop semaphore related functions now take extra -// `bool` and `pi_uint64` values: -// - `piextWaitExternalSemaphore` -// - `piextSignalExternalSemaphore` -// The `pi_external_mem_handle_type` enum now has a new -// `win32_nt_dx12_resource` value. -// the `pi_external_semaphore_handle_type` enum now has a new -// `win32_nt_dx12_fence` value. -// 15.54 Added piextVirtualMem* functions, and piextPhysicalMem* functions, -// PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM device info descriptor, -// _pi_virtual_mem_granularity_info enum, _pi_virtual_mem_info enum and -// pi_virtual_access_flags bit flags. -// 15.55 Added piextEnqueueNativeCommand as well as associated types and enums -// 16.56 Replaced piextUSMEnqueueMemset with piextUSMEnqueueFill -// 16.57 Added mappings to UR launch properties extension -// (piextEnqueueKernelLaunchCustom) -// 17.58 Added context parameter to piextMemImageGetInfo -// 17.59 Added const-qualifier to src_ptr in piextMemImageCopy. -// 18.60 Remove deprecated functions piextMemImportOpaqueFD and -// piextImportExternalSemaphoreOpaqueFD -// 19.61 Rename piextDestroyExternalSemaphore to piextReleaseExternalSemaphore -// 20.62 Changed the signature of piextMemImageCopy to take 2 image and format -// descriptors. -// 20.63 Added device queries -// - PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_ARRAY_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_UNIQUE_ADDRESSING_PER_DIM -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLE_1D_USM -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLE_2D_USM -// Removed device queries -// - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT -// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM - -#define _PI_H_VERSION_MAJOR 20 -#define _PI_H_VERSION_MINOR 63 - -#define _PI_STRING_HELPER(a) #a -#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) -#define _PI_TRIPLE_CONCAT(a, b, c) _PI_STRING_HELPER(a.b.c) - -// This is the macro that plugins should all use to define their version. -// _PI_PLUGIN_VERSION_STRING will be printed when environment variable -// SYCL_PI_TRACE is set to 1. PluginVersion should be defined for each plugin -// in plugins/*/pi_*.hpp. PluginVersion should be incremented with each change -// to the plugin. -#define _PI_PLUGIN_VERSION_STRING(PluginVersion) \ - _PI_TRIPLE_CONCAT(_PI_H_VERSION_MAJOR, _PI_H_VERSION_MINOR, PluginVersion) - -#define _PI_H_VERSION_STRING \ - _PI_CONCAT(_PI_H_VERSION_MAJOR, _PI_H_VERSION_MINOR) - -// This will be used to check the major versions of plugins versus the major -// versions of PI. -#define _PI_STRING_SUBSTITUTE(X) _PI_STRING_HELPER(X) -#define _PI_PLUGIN_VERSION_CHECK(PI_API_VERSION, PI_PLUGIN_VERSION) \ - if (strncmp(PI_API_VERSION, PI_PLUGIN_VERSION, \ - sizeof(_PI_STRING_SUBSTITUTE(_PI_H_VERSION_MAJOR))) < 0) { \ - return PI_ERROR_INVALID_OPERATION; \ - } - -// NOTE: This file presents a maping of OpenCL to PI enums, constants and -// typedefs. The general approach taken was to replace `CL_` prefix with `PI_`. -// Please consider this when adding or modifying values, as the strict value -// match is required. -// TODO: We should consider re-implementing PI enums and constants and only -// perform a mapping of PI to OpenCL in the pi_opencl backend. -#include - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -using pi_int32 = int32_t; -using pi_uint32 = uint32_t; -using pi_uint64 = uint64_t; -using pi_bool = pi_uint32; -using pi_bitfield = pi_uint64; -using pi_native_handle = uintptr_t; - -// -// NOTE: prefer to map 1:1 to OpenCL so that no translation is needed -// for PI <-> OpenCL ways. The PI <-> to other BE translation is almost -// always needed anyway. -// -typedef enum { -#define _PI_ERRC(NAME, VAL) NAME = VAL, -#define _PI_ERRC_WITH_MSG(NAME, VAL, MSG) NAME = VAL, -#include -#undef _PI_ERRC -#undef _PI_ERRC_WITH_MSG -} _pi_result; - -typedef enum { - PI_EVENT_COMPLETE = 0x0, - PI_EVENT_RUNNING = 0x1, - PI_EVENT_SUBMITTED = 0x2, - PI_EVENT_QUEUED = 0x3 -} _pi_event_status; - -typedef enum { - PI_PLATFORM_INFO_EXTENSIONS = 0x0904, - PI_PLATFORM_INFO_NAME = 0x0902, - PI_PLATFORM_INFO_PROFILE = 0x0900, - PI_PLATFORM_INFO_VENDOR = 0x0903, - PI_PLATFORM_INFO_VERSION = 0x0901, - PI_EXT_PLATFORM_INFO_BACKEND = 0x21000 // returns pi_platform_backend -} _pi_platform_info; - -typedef enum { - PI_PROGRAM_BUILD_INFO_STATUS = 0x1181, - PI_PROGRAM_BUILD_INFO_OPTIONS = 0x1182, - PI_PROGRAM_BUILD_INFO_LOG = 0x1183, - PI_PROGRAM_BUILD_INFO_BINARY_TYPE = 0x1184 -} _pi_program_build_info; - -typedef enum { - PI_PROGRAM_BUILD_STATUS_NONE = -1, - PI_PROGRAM_BUILD_STATUS_ERROR = -2, - PI_PROGRAM_BUILD_STATUS_SUCCESS = 0, - PI_PROGRAM_BUILD_STATUS_IN_PROGRESS = -3 -} _pi_program_build_status; - -typedef enum { - PI_PROGRAM_BINARY_TYPE_NONE = 0x0, - PI_PROGRAM_BINARY_TYPE_COMPILED_OBJECT = 0x1, - PI_PROGRAM_BINARY_TYPE_LIBRARY = 0x2, - PI_PROGRAM_BINARY_TYPE_EXECUTABLE = 0x4 -} _pi_program_binary_type; - -// NOTE: this is made 64-bit to match the size of cl_device_type to -// make the translation to OpenCL transparent. -// -typedef enum : pi_uint64 { - PI_DEVICE_TYPE_DEFAULT = - (1 << 0), ///< The default device available in the PI plugin. - PI_DEVICE_TYPE_ALL = 0xFFFFFFFF, ///< All devices available in the PI plugin. - PI_DEVICE_TYPE_CPU = (1 << 1), ///< A PI device that is the host processor. - PI_DEVICE_TYPE_GPU = (1 << 2), ///< A PI device that is a GPU. - PI_DEVICE_TYPE_ACC = (1 << 3), ///< A PI device that is a - ///< dedicated accelerator. - PI_DEVICE_TYPE_CUSTOM = (1 << 4) ///< A PI device that is a custom device. -} _pi_device_type; - -typedef enum { - PI_EXT_PLATFORM_BACKEND_UNKNOWN = 0, ///< The backend is not a recognized one - PI_EXT_PLATFORM_BACKEND_LEVEL_ZERO = 1, ///< The backend is Level Zero - PI_EXT_PLATFORM_BACKEND_OPENCL = 2, ///< The backend is OpenCL - PI_EXT_PLATFORM_BACKEND_CUDA = 3, ///< The backend is CUDA - PI_EXT_PLATFORM_BACKEND_HIP = 4, ///< The backend is HIP - // Not supported anymore: - // PI_EXT_PLATFORM_BACKEND_ESIMD = 5, - PI_EXT_PLATFORM_BACKEND_NATIVE_CPU = 6, ///< The backend is NATIVE_CPU -} _pi_platform_backend; - -typedef enum { - PI_DEVICE_MEM_CACHE_TYPE_NONE = 0x0, - PI_DEVICE_MEM_CACHE_TYPE_READ_ONLY_CACHE = 0x1, - PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE = 0x2 -} _pi_device_mem_cache_type; - -typedef enum { - PI_DEVICE_LOCAL_MEM_TYPE_LOCAL = 0x1, - PI_DEVICE_LOCAL_MEM_TYPE_GLOBAL = 0x2 -} _pi_device_local_mem_type; - -typedef enum { - PI_DEVICE_INFO_TYPE = 0x1000, - PI_DEVICE_INFO_VENDOR_ID = 0x1001, - PI_DEVICE_INFO_MAX_COMPUTE_UNITS = 0x1002, - PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = 0x1003, - PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES = 0x1005, - PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = 0x1004, - PI_DEVICE_INFO_SINGLE_FP_CONFIG = 0x101B, - PI_DEVICE_INFO_HALF_FP_CONFIG = 0x1033, - PI_DEVICE_INFO_DOUBLE_FP_CONFIG = 0x1032, - PI_DEVICE_INFO_QUEUE_PROPERTIES = 0x102A, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = 0x1006, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = 0x1007, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = 0x1008, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG = 0x1009, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = 0x100A, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = 0x100B, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = 0x1034, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = 0x1036, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = 0x1037, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = 0x1038, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = 0x1039, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = 0x103A, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = 0x103B, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = 0x103C, - PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY = 0x100C, - PI_DEVICE_INFO_ADDRESS_BITS = 0x100D, - PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = 0x1010, - PI_DEVICE_INFO_IMAGE_SUPPORT = 0x1016, - PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS = 0x100E, - PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = 0x100F, - PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH = 0x1011, - PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT = 0x1012, - PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH = 0x1013, - PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = 0x1014, - PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH = 0x1015, - PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = 0x1040, - PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = 0x1041, - PI_DEVICE_INFO_MAX_SAMPLERS = 0x1018, - PI_DEVICE_INFO_MAX_PARAMETER_SIZE = 0x1017, - PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = 0x1019, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = 0x101C, - PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = 0x101D, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = 0x101E, - PI_DEVICE_INFO_GLOBAL_MEM_SIZE = 0x101F, - PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = 0x1020, - PI_DEVICE_INFO_MAX_CONSTANT_ARGS = 0x1021, - PI_DEVICE_INFO_LOCAL_MEM_TYPE = 0x1022, - PI_DEVICE_INFO_LOCAL_MEM_SIZE = 0x1023, - PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = 0x1024, - PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = 0x1035, - PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = 0x1025, - PI_DEVICE_INFO_ENDIAN_LITTLE = 0x1026, - PI_DEVICE_INFO_AVAILABLE = 0x1027, - PI_DEVICE_INFO_COMPILER_AVAILABLE = 0x1028, - PI_DEVICE_INFO_LINKER_AVAILABLE = 0x103E, - PI_DEVICE_INFO_EXECUTION_CAPABILITIES = 0x1029, - PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = 0x104E, - PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = 0x102A, - PI_DEVICE_INFO_BUILT_IN_KERNELS = 0x103F, - PI_DEVICE_INFO_PLATFORM = 0x1031, - PI_DEVICE_INFO_REFERENCE_COUNT = 0x1047, - PI_DEVICE_INFO_IL_VERSION = 0x105B, - PI_DEVICE_INFO_NAME = 0x102B, - PI_DEVICE_INFO_VENDOR = 0x102C, - PI_DEVICE_INFO_DRIVER_VERSION = 0x102D, - PI_DEVICE_INFO_PROFILE = 0x102E, - PI_DEVICE_INFO_VERSION = 0x102F, - PI_DEVICE_INFO_OPENCL_C_VERSION = 0x103D, - PI_DEVICE_INFO_EXTENSIONS = 0x1030, - PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = 0x1049, - PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = 0x1048, - PI_DEVICE_INFO_PARENT_DEVICE = 0x1042, - PI_DEVICE_INFO_PARTITION_PROPERTIES = 0x1044, - PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = 0x1043, - PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = 0x1045, - PI_DEVICE_INFO_PARTITION_TYPE = 0x1046, - PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS = 0x105C, - PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 0x105D, - PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 0x4108, - PI_DEVICE_INFO_USM_HOST_SUPPORT = 0x4190, - PI_DEVICE_INFO_USM_DEVICE_SUPPORT = 0x4191, - PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = 0x4192, - PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT = 0x4193, - PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT = 0x4194, - // Intel UUID extension. - PI_DEVICE_INFO_UUID = 0x106A, - // These are Intel-specific extensions. - PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION = 0x4250, - PI_DEVICE_INFO_DEVICE_ID = 0x4251, - PI_DEVICE_INFO_PCI_ADDRESS = 0x10020, - PI_DEVICE_INFO_GPU_EU_COUNT = 0x10021, - PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH = 0x10022, - PI_DEVICE_INFO_GPU_SLICES = 0x10023, - PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE = 0x10024, - PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = 0x10025, - PI_DEVICE_INFO_MAX_MEM_BANDWIDTH = 0x10026, - PI_DEVICE_INFO_IMAGE_SRGB = 0x10027, - // Return true if sub-device should do its own program build - PI_DEVICE_INFO_BUILD_ON_SUBDEVICE = 0x10028, - PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY = 0x10029, - // Return 0 if device doesn't have any memory modules. Return the minimum of - // the clock rate values if there are several memory modules on the device. - PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE = 0x10030, - // Return 0 if device doesn't have any memory modules. Return the minimum of - // the bus width values if there are several memory modules on the device. - PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH = 0x10031, - // Return 1 if the device doesn't have a notion of a "queue index". Otherwise, - // return the number of queue indices that are available for this device. - PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = 0x10032, - PI_DEVICE_INFO_ATOMIC_64 = 0x10110, - PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111, - PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, - PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112, - PI_DEVICE_INFO_BACKEND_VERSION = 0x10113, - // Return whether bfloat16 math functions are supported by device - PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS = 0x1FFFF, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS = 0x20000, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D = 0x20001, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003, - PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER = 0x20004, - PI_EXT_CODEPLAY_DEVICE_INFO_SUPPORTS_FUSION = 0x20005, - PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x20006, - PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x20007, - PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT = 0x20008, - // The number of max registers per block (device specific) - PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP = 0x20009, - PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT = 0x2000A, - PI_EXT_ONEAPI_DEVICE_INFO_WORK_GROUP_PROGRESS_AT_ROOT_GROUP_LEVEL = 0x2000B, - PI_EXT_ONEAPI_DEVICE_INFO_SUB_GROUP_PROGRESS_AT_ROOT_GROUP_LEVEL = 0x2000C, - PI_EXT_ONEAPI_DEVICE_INFO_SUB_GROUP_PROGRESS_AT_WORK_GROUP_LEVEL = 0x2000D, - PI_EXT_ONEAPI_DEVICE_INFO_WORK_ITEM_PROGRESS_AT_ROOT_GROUP_LEVEL = 0x2000E, - PI_EXT_ONEAPI_DEVICE_INFO_WORK_ITEM_PROGRESS_AT_WORK_GROUP_LEVEL = 0x2000F, - PI_EXT_ONEAPI_DEVICE_INFO_WORK_ITEM_PROGRESS_AT_SUB_GROUP_LEVEL = 0x20010, - // Bindless images, mipmaps, interop - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT = 0x20100, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT = 0x20101, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT = 0x20102, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT = 0x20103, - PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_PITCH_ALIGN = 0x20104, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH = 0x20105, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT = 0x20106, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH = 0x20107, - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_SUPPORT = 0x20108, - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT = 0x20109, - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY = 0x2010A, - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT = 0x2010B, - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT = 0x2010C, - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT = 0x2010E, - - PI_EXT_ONEAPI_DEVICE_INFO_MATRIX_COMBINATIONS = 0x20110, - - // Composite device - PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES = 0x20111, - PI_EXT_ONEAPI_DEVICE_INFO_COMPOSITE_DEVICE = 0x20112, - - // Command Buffers - PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_SUPPORT = 0x20113, - PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT = 0x20114, - - // Bindless images cubemaps - PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SUPPORT = 0x20115, - PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT = 0x20116, - - // Bindless images sampled image fetch - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM = 0x20117, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D = 0x20118, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM = 0x20119, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D = 0x2011A, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D = 0x2011C, - - // Timestamp enqueue - PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT = 0x2011D, - - // Virtual memory support - PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM = 0x2011E, - - // Native enqueue - PI_EXT_ONEAPI_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT = 0x2011F, - - // Return whether cluster launch is supported by device - PI_EXT_ONEAPI_DEVICE_INFO_CLUSTER_LAUNCH = 0x2021, - - // Bindless image arrays, unique addressing and USM sampling - PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_ARRAY_SUPPORT = 0x20122, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_UNIQUE_ADDRESSING_PER_DIM = 0x20123, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLE_1D_USM = 0x20124, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLE_2D_USM = 0x20125, -} _pi_device_info; - -typedef enum { - PI_PROGRAM_INFO_REFERENCE_COUNT = 0x1160, - PI_PROGRAM_INFO_CONTEXT = 0x1161, - PI_PROGRAM_INFO_NUM_DEVICES = 0x1162, - PI_PROGRAM_INFO_DEVICES = 0x1163, - PI_PROGRAM_INFO_SOURCE = 0x1164, - PI_PROGRAM_INFO_BINARY_SIZES = 0x1165, - PI_PROGRAM_INFO_BINARIES = 0x1166, - PI_PROGRAM_INFO_NUM_KERNELS = 0x1167, - PI_PROGRAM_INFO_KERNEL_NAMES = 0x1168 -} _pi_program_info; - -typedef enum { - PI_CONTEXT_INFO_DEVICES = 0x1081, - PI_CONTEXT_INFO_PLATFORM = 0x1084, - PI_CONTEXT_INFO_NUM_DEVICES = 0x1083, - PI_CONTEXT_INFO_PROPERTIES = 0x1082, - PI_CONTEXT_INFO_REFERENCE_COUNT = 0x1080, - // Atomics capabilities extensions - PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010, - PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011, - PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x10012, - PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x10013, - // Native 2D USM memory operation support - PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT = 0x30000, - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT = 0x30001, - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT = 0x30002 -} _pi_context_info; - -typedef enum { - PI_QUEUE_INFO_CONTEXT = 0x1090, - PI_QUEUE_INFO_DEVICE = 0x1091, - PI_QUEUE_INFO_DEVICE_DEFAULT = 0x1095, - PI_QUEUE_INFO_PROPERTIES = 0x1093, - PI_QUEUE_INFO_REFERENCE_COUNT = 0x1092, - PI_QUEUE_INFO_SIZE = 0x1094, - // Return 'true' if all commands previously submitted to the queue have - // completed, otherwise return 'false'. - PI_EXT_ONEAPI_QUEUE_INFO_EMPTY = 0x2096 -} _pi_queue_info; - -typedef enum { - PI_KERNEL_INFO_FUNCTION_NAME = 0x1190, - PI_KERNEL_INFO_NUM_ARGS = 0x1191, - PI_KERNEL_INFO_REFERENCE_COUNT = 0x1192, - PI_KERNEL_INFO_CONTEXT = 0x1193, - PI_KERNEL_INFO_PROGRAM = 0x1194, - PI_KERNEL_INFO_ATTRIBUTES = 0x1195 -} _pi_kernel_info; - -typedef enum { - PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE = 0x11B5, - PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE = 0x11B0, - PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE = 0x11B1, - PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = 0x11B2, - PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 0x11B3, - PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = 0x11B4, - // The number of registers used by the compiled kernel (device specific) - PI_KERNEL_GROUP_INFO_NUM_REGS = 0x10112 -} _pi_kernel_group_info; - -typedef enum { - PI_IMAGE_INFO_FORMAT = 0x1110, - PI_IMAGE_INFO_ELEMENT_SIZE = 0x1111, - PI_IMAGE_INFO_ROW_PITCH = 0x1112, - PI_IMAGE_INFO_SLICE_PITCH = 0x1113, - PI_IMAGE_INFO_WIDTH = 0x1114, - PI_IMAGE_INFO_HEIGHT = 0x1115, - PI_IMAGE_INFO_DEPTH = 0x1116 -} _pi_image_info; - -typedef enum { - PI_KERNEL_MAX_SUB_GROUP_SIZE = 0x2033, - PI_KERNEL_MAX_NUM_SUB_GROUPS = 0x11B9, - PI_KERNEL_COMPILE_NUM_SUB_GROUPS = 0x11BA, - PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL = 0x410A -} _pi_kernel_sub_group_info; - -typedef enum { - PI_EVENT_INFO_COMMAND_QUEUE = 0x11D0, - PI_EVENT_INFO_CONTEXT = 0x11D4, - PI_EVENT_INFO_COMMAND_TYPE = 0x11D1, - PI_EVENT_INFO_COMMAND_EXECUTION_STATUS = 0x11D3, - PI_EVENT_INFO_REFERENCE_COUNT = 0x11D2 -} _pi_event_info; - -typedef enum { - PI_COMMAND_TYPE_NDRANGE_KERNEL = 0x11F0, - PI_COMMAND_TYPE_MEM_BUFFER_READ = 0x11F3, - PI_COMMAND_TYPE_MEM_BUFFER_WRITE = 0x11F4, - PI_COMMAND_TYPE_MEM_BUFFER_COPY = 0x11F5, - PI_COMMAND_TYPE_MEM_BUFFER_MAP = 0x11FB, - PI_COMMAND_TYPE_MEM_BUFFER_UNMAP = 0x11FD, - PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT = 0x1201, - PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT = 0x1202, - PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT = 0x1203, - PI_COMMAND_TYPE_USER = 0x1204, - PI_COMMAND_TYPE_MEM_BUFFER_FILL = 0x1207, - PI_COMMAND_TYPE_IMAGE_READ = 0x11F6, - PI_COMMAND_TYPE_IMAGE_WRITE = 0x11F7, - PI_COMMAND_TYPE_IMAGE_COPY = 0x11F8, - PI_COMMAND_TYPE_NATIVE_KERNEL = 0x11F2, - PI_COMMAND_TYPE_COPY_BUFFER_TO_IMAGE = 0x11FA, - PI_COMMAND_TYPE_COPY_IMAGE_TO_BUFFER = 0x11F9, - PI_COMMAND_TYPE_MAP_IMAGE = 0x11FC, - PI_COMMAND_TYPE_MARKER = 0x11FE, - PI_COMMAND_TYPE_ACQUIRE_GL_OBJECTS = 0x11FF, - PI_COMMAND_TYPE_RELEASE_GL_OBJECTS = 0x1200, - PI_COMMAND_TYPE_BARRIER = 0x1205, - PI_COMMAND_TYPE_MIGRATE_MEM_OBJECTS = 0x1206, - PI_COMMAND_TYPE_FILL_IMAGE = 0x1208, - PI_COMMAND_TYPE_SVM_FREE = 0x1209, - PI_COMMAND_TYPE_SVM_MEMCPY = 0x120A, - PI_COMMAND_TYPE_SVM_MEMFILL = 0x120B, - PI_COMMAND_TYPE_SVM_MAP = 0x120C, - PI_COMMAND_TYPE_SVM_UNMAP = 0x120D, - PI_COMMAND_TYPE_EXT_COMMAND_BUFFER = 0x12A8, - PI_COMMAND_TYPE_DEVICE_GLOBAL_VARIABLE_READ = 0x418E, - PI_COMMAND_TYPE_DEVICE_GLOBAL_VARIABLE_WRITE = 0x418F -} _pi_command_type; - -typedef enum { - PI_MEM_TYPE_BUFFER = 0x10F0, - PI_MEM_TYPE_IMAGE2D = 0x10F1, - PI_MEM_TYPE_IMAGE3D = 0x10F2, - PI_MEM_TYPE_IMAGE2D_ARRAY = 0x10F3, - PI_MEM_TYPE_IMAGE1D = 0x10F4, - PI_MEM_TYPE_IMAGE1D_ARRAY = 0x10F5, - PI_MEM_TYPE_IMAGE1D_BUFFER = 0x10F6, - PI_MEM_TYPE_IMAGE_CUBEMAP = 0x10F7, -} _pi_mem_type; - -typedef enum { - // Device-specific value opaque in PI API. - PI_MEM_ADVICE_RESET = 0, - PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY = 1 << 0, - PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY = 1 << 1, - PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION = 1 << 2, - PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION = 1 << 3, - PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY = 1 << 4, - PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY = 1 << 5, - PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION_HOST = 1 << 6, - PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST = 1 << 7, - PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST = 1 << 8, - PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST = 1 << 9, - PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED = 1 << 10, - PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED = 1 << 11, - PI_MEM_ADVICE_UNKNOWN = 0x7FFFFFFF, -} _pi_mem_advice; - -// HIP _pi_mem_advice aliases -static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_SET_READ_MOSTLY = - PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY; -static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_UNSET_READ_MOSTLY = - PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY; -static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_SET_PREFERRED_LOCATION = - PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION; -static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION = - PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION; -static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_SET_ACCESSED_BY = - PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY; -static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY = - PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY; -static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_SET_PREFERRED_LOCATION_HOST = - PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION_HOST; -static constexpr _pi_mem_advice - PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION_HOST = - PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST; -static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_SET_ACCESSED_BY_HOST = - PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST; -static constexpr _pi_mem_advice PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY_HOST = - PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST; - -typedef enum { - PI_IMAGE_CHANNEL_ORDER_A = 0x10B1, - PI_IMAGE_CHANNEL_ORDER_R = 0x10B0, - PI_IMAGE_CHANNEL_ORDER_RG = 0x10B2, - PI_IMAGE_CHANNEL_ORDER_RA = 0x10B3, - PI_IMAGE_CHANNEL_ORDER_RGB = 0x10B4, - PI_IMAGE_CHANNEL_ORDER_RGBA = 0x10B5, - PI_IMAGE_CHANNEL_ORDER_BGRA = 0x10B6, - PI_IMAGE_CHANNEL_ORDER_ARGB = 0x10B7, - PI_IMAGE_CHANNEL_ORDER_ABGR = 0x10C3, - PI_IMAGE_CHANNEL_ORDER_INTENSITY = 0x10B8, - PI_IMAGE_CHANNEL_ORDER_LUMINANCE = 0x10B9, - PI_IMAGE_CHANNEL_ORDER_Rx = 0x10BA, - PI_IMAGE_CHANNEL_ORDER_RGx = 0x10BB, - PI_IMAGE_CHANNEL_ORDER_RGBx = 0x10BC, - PI_IMAGE_CHANNEL_ORDER_sRGBA = 0x10C1 -} _pi_image_channel_order; - -typedef enum { - PI_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0x10D0, - PI_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 0x10D1, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 0x10D2, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 0x10D3, - PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 0x10D4, - PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 0x10D5, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010 = 0x10D6, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 0x10D7, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 0x10D8, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 0x10D9, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 0x10DA, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 0x10DB, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 0x10DC, - PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 0x10DD, - PI_IMAGE_CHANNEL_TYPE_FLOAT = 0x10DE -} _pi_image_channel_type; - -typedef enum { - PI_IMAGE_COPY_HOST_TO_DEVICE = 0, - PI_IMAGE_COPY_DEVICE_TO_HOST = 1, - PI_IMAGE_COPY_DEVICE_TO_DEVICE = 2 -} _pi_image_copy_flags; - -typedef enum { PI_BUFFER_CREATE_TYPE_REGION = 0x1220 } _pi_buffer_create_type; - -const pi_bool PI_TRUE = 1; -const pi_bool PI_FALSE = 0; - -typedef enum { - PI_SAMPLER_INFO_REFERENCE_COUNT = 0x1150, - PI_SAMPLER_INFO_CONTEXT = 0x1151, - PI_SAMPLER_INFO_NORMALIZED_COORDS = 0x1152, - PI_SAMPLER_INFO_ADDRESSING_MODE = 0x1153, - PI_SAMPLER_INFO_FILTER_MODE = 0x1154, - PI_SAMPLER_INFO_MIP_FILTER_MODE = 0x1155, - PI_SAMPLER_INFO_LOD_MIN = 0x1156, - PI_SAMPLER_INFO_LOD_MAX = 0x1157 -} _pi_sampler_info; - -typedef enum { - PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = 0x1134, - PI_SAMPLER_ADDRESSING_MODE_REPEAT = 0x1133, - PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = 0x1131, - PI_SAMPLER_ADDRESSING_MODE_CLAMP = 0x1132, - PI_SAMPLER_ADDRESSING_MODE_NONE = 0x1130 -} _pi_sampler_addressing_mode; - -typedef enum { - PI_SAMPLER_FILTER_MODE_NEAREST = 0x1140, - PI_SAMPLER_FILTER_MODE_LINEAR = 0x1141, -} _pi_sampler_filter_mode; - -typedef enum { - PI_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED = 0x1142, - PI_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS = 0x1143, -} _pi_sampler_cubemap_filter_mode; - -typedef enum { - PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM = 0x30100, - PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED = 0x30101, -} _pi_virtual_mem_granularity_info; - -typedef enum { - PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE = 0x30200, -} _pi_virtual_mem_info; - -using pi_context_properties = intptr_t; - -using pi_device_exec_capabilities = pi_bitfield; -constexpr pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_KERNEL = - (1 << 0); -constexpr pi_device_exec_capabilities - PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL = (1 << 1); - -using pi_sampler_properties = pi_bitfield; -constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS = - 0x1152; -constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = 0x1153; -constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = 0x1154; -constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_MIP_FILTER_MODE = 0x1155; -constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_CUBEMAP_FILTER_MODE = - 0x1156; - -using pi_memory_order_capabilities = pi_bitfield; -constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELAXED = 0x01; -constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQUIRE = 0x02; -constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE = 0x04; -constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL = 0x08; -constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST = 0x10; - -using pi_memory_scope_capabilities = pi_bitfield; -constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_ITEM = 0x01; -constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SUB_GROUP = 0x02; -constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP = 0x04; -constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE = 0x08; -constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM = 0x10; - -typedef enum { - PI_PROFILING_INFO_COMMAND_QUEUED = 0x1280, - PI_PROFILING_INFO_COMMAND_SUBMIT = 0x1281, - PI_PROFILING_INFO_COMMAND_START = 0x1282, - PI_PROFILING_INFO_COMMAND_END = 0x1283 -} _pi_profiling_info; - -// NOTE: this is made 64-bit to match the size of cl_mem_flags to -// make the translation to OpenCL transparent. -// TODO: populate -// -using pi_mem_flags = pi_bitfield; -// Access -constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW = (1 << 0); -constexpr pi_mem_flags PI_MEM_ACCESS_READ_ONLY = (1 << 2); -// Host pointer -constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE = (1 << 3); -constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY = (1 << 5); -constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC = (1 << 4); - -// flags passed to Map operations -using pi_map_flags = pi_bitfield; -constexpr pi_map_flags PI_MAP_READ = (1 << 0); -constexpr pi_map_flags PI_MAP_WRITE = (1 << 1); -constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION = (1 << 2); -// NOTE: this is made 64-bit to match the size of cl_mem_properties_intel to -// make the translation to OpenCL transparent. -using pi_mem_properties = pi_bitfield; -constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL = 0x4213; -constexpr pi_mem_properties PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION = 0x419E; - -// NOTE: this is made 64-bit to match the size of cl_mem_properties_intel to -// make the translation to OpenCL transparent. -using pi_usm_mem_properties = pi_bitfield; -constexpr pi_usm_mem_properties PI_MEM_ALLOC_FLAGS = 0x4195; -constexpr pi_usm_mem_properties PI_MEM_ALLOC_WRTITE_COMBINED = (1 << 0); -constexpr pi_usm_mem_properties PI_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE = - (1 << 1); -constexpr pi_usm_mem_properties PI_MEM_ALLOC_INITIAL_PLACEMENT_HOST = (1 << 2); -// Hints that the device/shared allocation will not be written on device. -constexpr pi_usm_mem_properties PI_MEM_ALLOC_DEVICE_READ_ONLY = (1 << 3); - -constexpr pi_usm_mem_properties PI_MEM_USM_ALLOC_BUFFER_LOCATION = 0x419E; - -// NOTE: queue properties are implemented this way to better support bit -// manipulations -using pi_queue_properties = pi_bitfield; -constexpr pi_queue_properties PI_QUEUE_FLAGS = -1; -constexpr pi_queue_properties PI_QUEUE_COMPUTE_INDEX = -2; -// clang-format off -constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0); -constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE = (1 << 1); -constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE = (1 << 2); -constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6); -constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE = (1 << 7); -constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE = (1 << 8); -// clang-format on - -using pi_virtual_access_flags = pi_bitfield; -constexpr pi_virtual_access_flags PI_VIRTUAL_ACCESS_FLAG_RW = (1 << 0); -constexpr pi_virtual_access_flags PI_VIRTUAL_ACCESS_FLAG_READ_ONLY = (1 << 1); - -typedef enum { - // No preference for SLM or data cache. - PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT = 0x0, - // Large SLM size. - PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_SLM = 0x1, - // Large General Data size. - PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_DATA = 0x2 -} _pi_kernel_cache_config; - -using pi_result = _pi_result; -using pi_platform_info = _pi_platform_info; -using pi_platform_backend = _pi_platform_backend; -using pi_device_type = _pi_device_type; -using pi_device_mem_cache_type = _pi_device_mem_cache_type; -using pi_device_local_mem_type = _pi_device_local_mem_type; -using pi_device_info = _pi_device_info; -using pi_program_info = _pi_program_info; -using pi_context_info = _pi_context_info; -using pi_queue_info = _pi_queue_info; -using pi_image_info = _pi_image_info; -using pi_kernel_info = _pi_kernel_info; -using pi_kernel_group_info = _pi_kernel_group_info; -using pi_kernel_sub_group_info = _pi_kernel_sub_group_info; -using pi_event_info = _pi_event_info; -using pi_command_type = _pi_command_type; -using pi_mem_type = _pi_mem_type; -using pi_mem_advice = _pi_mem_advice; -using pi_image_channel_order = _pi_image_channel_order; -using pi_image_channel_type = _pi_image_channel_type; -using pi_buffer_create_type = _pi_buffer_create_type; -using pi_sampler_addressing_mode = _pi_sampler_addressing_mode; -using pi_sampler_filter_mode = _pi_sampler_filter_mode; -using pi_sampler_cubemap_filter_mode = _pi_sampler_cubemap_filter_mode; -using pi_sampler_info = _pi_sampler_info; -using pi_event_status = _pi_event_status; -using pi_program_build_info = _pi_program_build_info; -using pi_program_build_status = _pi_program_build_status; -using pi_program_binary_type = _pi_program_binary_type; -using pi_kernel_info = _pi_kernel_info; -using pi_profiling_info = _pi_profiling_info; -using pi_kernel_cache_config = _pi_kernel_cache_config; -using pi_virtual_mem_granularity_info = _pi_virtual_mem_granularity_info; -using pi_virtual_mem_info = _pi_virtual_mem_info; - -using pi_image_copy_flags = _pi_image_copy_flags; - -// For compatibility with OpenCL define this not as enum. -using pi_device_partition_property = intptr_t; -static constexpr pi_device_partition_property PI_DEVICE_PARTITION_EQUALLY = - 0x1086; -static constexpr pi_device_partition_property PI_DEVICE_PARTITION_BY_COUNTS = - 0x1087; -static constexpr pi_device_partition_property - PI_DEVICE_PARTITION_BY_COUNTS_LIST_END = 0x0; -static constexpr pi_device_partition_property - PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN = 0x1088; -static constexpr pi_device_partition_property - PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE = 0x1089; - -// For compatibility with OpenCL define this not as enum. -using pi_device_affinity_domain = pi_bitfield; -static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NUMA = - (1 << 0); -static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L4_CACHE = - (1 << 1); -static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L3_CACHE = - (1 << 2); -static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L2_CACHE = - (1 << 3); -static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L1_CACHE = - (1 << 4); -static constexpr pi_device_affinity_domain - PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE = (1 << 5); - -// For compatibility with OpenCL define this not as enum. -using pi_device_fp_config = pi_bitfield; -static constexpr pi_device_fp_config PI_FP_DENORM = (1 << 0); -static constexpr pi_device_fp_config PI_FP_INF_NAN = (1 << 1); -static constexpr pi_device_fp_config PI_FP_ROUND_TO_NEAREST = (1 << 2); -static constexpr pi_device_fp_config PI_FP_ROUND_TO_ZERO = (1 << 3); -static constexpr pi_device_fp_config PI_FP_ROUND_TO_INF = (1 << 4); -static constexpr pi_device_fp_config PI_FP_FMA = (1 << 5); -static constexpr pi_device_fp_config PI_FP_SOFT_FLOAT = (1 << 6); -static constexpr pi_device_fp_config PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT = - (1 << 7); - -// For compatibility with OpenCL define this not as enum. -using pi_device_exec_capabilities = pi_bitfield; -static constexpr pi_device_exec_capabilities PI_EXEC_KERNEL = (1 << 0); -static constexpr pi_device_exec_capabilities PI_EXEC_NATIVE_KERNEL = (1 << 1); - -/// Extension to denote native support of assert feature by an arbitrary device -/// piDeviceGetInfo call should return this extension when the device supports -/// native asserts if supported extensions' names are requested -#define PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT "cl_intel_devicelib_assert" - -// pi_buffer_region structure repeats cl_buffer_region, used for sub buffers. -struct pi_buffer_region_struct { - size_t origin; - size_t size; -}; -using pi_buffer_region = pi_buffer_region_struct *; - -// pi_buff_rect_offset structure is 3D offset argument passed to buffer rect -// operations (piEnqueueMemBufferCopyRect, etc). -struct pi_buff_rect_offset_struct { - size_t x_bytes; - size_t y_scalar; - size_t z_scalar; -}; -using pi_buff_rect_offset = pi_buff_rect_offset_struct *; - -// pi_buff_rect_region structure represents size of 3D region passed to buffer -// rect operations (piEnqueueMemBufferCopyRect, etc). -struct pi_buff_rect_region_struct { - size_t width_bytes; - size_t height_scalar; - size_t depth_scalar; -}; -using pi_buff_rect_region = pi_buff_rect_region_struct *; - -// pi_image_offset structure is 3D offset argument passed to image operations -// (piEnqueueMemImageRead, etc). -struct pi_image_offset_struct { - size_t x; - size_t y; - size_t z; -}; -using pi_image_offset = pi_image_offset_struct *; - -// pi_image_region structure represents size of 3D region passed to image -// operations (piEnqueueMemImageRead, etc). -struct pi_image_region_struct { - size_t width; - size_t height; - size_t depth; -}; -using pi_image_region = pi_image_region_struct *; - -// This union encapsulates the two external handles we currently support. -// When choosing the correct field from the union we need to look at the value -// of the enum `pi_external_mem_handle_type` or -// `pi_external_semaphore_handle_type`. -union pi_external_handle { - // Used universally for all Linux based interoperability functionality. - // The associated enum `pi_external_mem_handle_type` in - // `pi_external_mem_descriptor` should always be set to - // `pi_external_mem_handle_type::opaque_fd`. Likewise for semaphore handles. - int file_descriptor; - - // Could be Win32 NT, KMT, or various DX12 handle types. - // The `void *` type is used for all of these. - // The exact handle type depends on the enum `pi_external_mem_handle_type`. - // This enum is found in `pi_external_mem_descriptor`. - // It could be a regular NT handle type (`win32_nt_handle`) or a DX12 specific - // resource handle type (`win32_nt_dx12_resource`), etc. - void *win32_handle; -}; - -// This enum enumerates the specific external memory handles types that we want -// to import. -enum class pi_external_mem_handle_type { - opaque_fd = 0, - win32_nt_handle = 1, - win32_nt_dx12_resource = 2, -}; - -// This struct holds all the information required to import external memory. -struct pi_external_mem_descriptor { - // The type of the external memory handle. - pi_external_mem_handle_type handleType; - // Union encapsulates both Opaque FD (linux) and Win32 handles (Windows). - pi_external_handle handle; - // Size of the external memory in bytes. - size_t memorySizeBytes; -}; - -// This enum enumerates the specific external semaphore handles types that we -// want to import. -enum class pi_external_semaphore_handle_type { - opaque_fd = 0, - win32_nt_handle = 1, - win32_nt_dx12_fence = 2, -}; - -// This struct holds all the information required to import external semaphores. -struct pi_external_semaphore_descriptor { - // The type of the external semaphore handle. - pi_external_semaphore_handle_type handleType; - // Union encapsulates both Opaque FD (linux) and Win32 handles (Windows). - pi_external_handle handle; -}; - -// Opaque types that make reading build log errors easier. -struct _pi_platform; -struct _pi_device; -struct _pi_context; -struct _pi_queue; -struct _pi_mem; -struct _pi_program; -struct _pi_kernel; -struct _pi_event; -struct _pi_sampler; -struct _pi_physical_mem; - -using pi_platform = _pi_platform *; -using pi_device = _pi_device *; -using pi_context = _pi_context *; -using pi_queue = _pi_queue *; -using pi_mem = _pi_mem *; -using pi_program = _pi_program *; -using pi_kernel = _pi_kernel *; -using pi_event = _pi_event *; -using pi_sampler = _pi_sampler *; -using pi_image_handle = pi_uint64; -using pi_image_mem_handle = void *; -using pi_interop_mem_handle = pi_uint64; -using pi_interop_semaphore_handle = pi_uint64; -using pi_physical_mem = _pi_physical_mem *; -using pi_enqueue_native_command_function = void (*)(pi_queue, void *); - -typedef struct { - pi_image_channel_order image_channel_order; - pi_image_channel_type image_channel_data_type; -} _pi_image_format; - -typedef struct { - pi_mem_type image_type; - size_t image_width; - size_t image_height; - size_t image_depth; - size_t image_array_size; - size_t image_row_pitch; - size_t image_slice_pitch; - pi_uint32 num_mip_levels; - pi_uint32 num_samples; - pi_mem buffer; -} _pi_image_desc; - -using pi_image_format = _pi_image_format; -using pi_image_desc = _pi_image_desc; - -typedef enum { PI_MEM_CONTEXT = 0x1106, PI_MEM_SIZE = 0x1102 } _pi_mem_info; - -typedef enum { - PI_PEER_ACCESS_SUPPORTED = - 0x0, ///< returns a uint32_t: 1 if P2P Access is supported - ///< otherwise P2P Access is not supported. - PI_PEER_ATOMICS_SUPPORTED = - 0x1 ///< returns a uint32_t: 1 if Atomic operations are supported over the - ///< P2P link, otherwise such operations are not supported. -} _pi_peer_attr; - -typedef enum { - PI_LAUNCH_PROPERTY_IGNORE = 0x0, - PI_LAUNCH_PROPERTY_COOPERATIVE = 0x1, - PI_LAUNCH_PROPERTY_CLUSTER_DIMENSION = 0x2, -} _pi_launch_property_id; - -typedef union { - int cooperative; - int32_t cluster_dims[3]; -} _pi_launch_property_value; - -using pi_mem_info = _pi_mem_info; -using pi_peer_attr = _pi_peer_attr; -using pi_launch_property_id = _pi_launch_property_id; -using pi_launch_property_value = _pi_launch_property_value; - -typedef struct { - pi_launch_property_id id; - pi_launch_property_value value; -} _pi_launch_property; - -using pi_launch_property = _pi_launch_property; - -struct sycl_device_binaries_struct; -typedef sycl_device_binaries_struct *pi_device_binary; -struct _sycl_device_binary_property_struct; -typedef _sycl_device_binary_property_struct *pi_device_binary_property; - -// -// Following section contains SYCL RT Plugin Interface (PI) functions. -// They are 3 distinct categories: -// -// 1) Ones having direct analogy in OpenCL and needed for the core SYCL -// functionality are started with just "pi" prefix in their names. -// 2) Those having direct analogy in OpenCL but only needed for SYCL -// interoperability with OpenCL are started with "picl" prefix. -// 3) Functions having no direct analogy in OpenCL, started with "piext". -// -// TODO: describe interfaces in Doxygen format -// - -struct _pi_plugin; -using pi_plugin = _pi_plugin; - -// PI Plugin Initialise. -// Plugin will check the PI version of Plugin Interface, -// populate the PI Version it supports, update targets field and populate -// PiFunctionTable with Supported APIs. The pointers are in a predetermined -// order in pi.def file. -__SYCL_EXPORT pi_result piPluginInit(pi_plugin *plugin_info); - -// -// Platform -// -__SYCL_EXPORT pi_result piPlatformsGet(pi_uint32 num_entries, - pi_platform *platforms, - pi_uint32 *num_platforms); - -__SYCL_EXPORT pi_result piPlatformGetInfo(pi_platform platform, - pi_platform_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -/// Gets the native handle of a PI platform object. -/// -/// \param platform is the PI platform to get the native handle of. -/// \param nativeHandle is the native handle of platform. -__SYCL_EXPORT pi_result piextPlatformGetNativeHandle( - pi_platform platform, pi_native_handle *nativeHandle); - -/// Creates PI platform object from a native handle. -/// NOTE: The created PI object takes ownership of the native handle. -/// -/// \param nativeHandle is the native handle to create PI device from. -/// \param platform is the PI platform created from the native handle. -__SYCL_EXPORT pi_result piextPlatformCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_platform *platform); - -__SYCL_EXPORT pi_result piDevicesGet(pi_platform platform, - pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices); - -__SYCL_EXPORT pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device); -__SYCL_EXPORT pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device); -__SYCL_EXPORT pi_result piextPeerAccessGetInfo( - pi_device command_device, pi_device peer_device, pi_peer_attr attr, - size_t param_value_size, void *param_value, size_t *param_value_size_ret); - -/// Returns requested info for provided native device -/// Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for -/// PI_DEVICE_INFO_EXTENSIONS query when the device supports native asserts -__SYCL_EXPORT pi_result piDeviceGetInfo(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piDeviceRetain(pi_device device); - -__SYCL_EXPORT pi_result piDeviceRelease(pi_device device); - -__SYCL_EXPORT pi_result piDevicePartition( - pi_device device, const pi_device_partition_property *properties, - pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices); - -/// Gets the native handle of a PI device object. -/// -/// \param device is the PI device to get the native handle of. -/// \param nativeHandle is the native handle of device. -__SYCL_EXPORT pi_result -piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle); - -/// Creates PI device object from a native handle. -/// NOTE: The created PI object takes ownership of the native handle. -/// -/// \param nativeHandle is the native handle to create PI device from. -/// \param platform is the platform of the device (optional). -/// \param device is the PI device created from the native handle. -__SYCL_EXPORT pi_result piextDeviceCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_platform platform, pi_device *device); - -/// Selects the most appropriate device binary based on runtime information -/// and the IR characteristics. -/// -__SYCL_EXPORT pi_result piextDeviceSelectBinary(pi_device device, - pi_device_binary *binaries, - pi_uint32 num_binaries, - pi_uint32 *selected_binary_ind); - -/// Retrieves a device function pointer to a user-defined function -/// \arg \c function_name. \arg \c function_pointer_ret is set to 0 if query -/// failed. -/// -/// \arg \c program must be built before calling this API. \arg \c device -/// must present in the list of devices returned by \c get_device method for -/// \arg \c program. -/// -/// If a fallback method determines the function exists but the address is -/// not available PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE is returned. If the -/// address does not exist PI_ERROR_INVALID_KERNEL_NAME is returned. -__SYCL_EXPORT pi_result piextGetDeviceFunctionPointer( - pi_device device, pi_program program, const char *function_name, - pi_uint64 *function_pointer_ret); - -__SYCL_EXPORT pi_result piextGetGlobalVariablePointer( - pi_device Device, pi_program Program, const char *GlobalVariableName, - size_t *GlobalVariableSize, void **GlobalVariablePointerRet); - -// -// Context -// -__SYCL_EXPORT pi_result piContextCreate( - const pi_context_properties *properties, pi_uint32 num_devices, - const pi_device *devices, - void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, - void *user_data), - void *user_data, pi_context *ret_context); - -__SYCL_EXPORT pi_result piContextGetInfo(pi_context context, - pi_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piContextRetain(pi_context context); - -__SYCL_EXPORT pi_result piContextRelease(pi_context context); - -typedef void (*pi_context_extended_deleter)(void *user_data); - -__SYCL_EXPORT pi_result piextContextSetExtendedDeleter( - pi_context context, pi_context_extended_deleter func, void *user_data); - -/// Gets the native handle of a PI context object. -/// -/// \param context is the PI context to get the native handle of. -/// \param nativeHandle is the native handle of context. -__SYCL_EXPORT pi_result -piextContextGetNativeHandle(pi_context context, pi_native_handle *nativeHandle); - -/// Creates PI context object from a native handle. -/// NOTE: The created PI object takes ownership of the native handle. -/// NOTE: The number of devices and the list of devices is needed for Level Zero -/// backend because there is no possilibity to query this information from -/// context handle for Level Zero. If backend has API to query a list of devices -/// from the context native handle then these parameters are ignored. -/// -/// \param nativeHandle is the native handle to create PI context from. -/// \param numDevices is the number of devices in the context. Parameter is -/// ignored if number of devices can be queried from the context native -/// handle for a backend. -/// \param devices is the list of devices in the context. Parameter is ignored -/// if devices can be queried from the context native handle for a -/// backend. -/// \param pluginOwnsNativeHandle Indicates whether the created PI object -/// should take ownership of the native handle. -/// \param context is the PI context created from the native handle. -/// \return PI_SUCCESS if successfully created pi_context from the handle. -/// PI_ERROR_OUT_OF_HOST_MEMORY if can't allocate memory for the -/// pi_context object. PI_ERROR_INVALID_VALUE if numDevices == 0 or -/// devices is NULL but backend doesn't have API to query a list of -/// devices from the context native handle. PI_UNKNOWN_ERROR in case of -/// another error. -__SYCL_EXPORT pi_result piextContextCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_uint32 numDevices, - const pi_device *devices, bool pluginOwnsNativeHandle, pi_context *context); - -// -// Queue -// - -// TODO: Remove during next ABI break and rename piextQueueCreate to -// piQueueCreate. -__SYCL_EXPORT pi_result piQueueCreate(pi_context context, pi_device device, - pi_queue_properties properties, - pi_queue *queue); -/// \param properties points to a zero-terminated array of extra data describing -/// desired queue properties. Format is -/// {[PROPERTY[, property-specific elements of data]*,]* 0} -__SYCL_EXPORT pi_result piextQueueCreate(pi_context context, pi_device device, - pi_queue_properties *properties, - pi_queue *queue); - -__SYCL_EXPORT pi_result piQueueGetInfo(pi_queue command_queue, - pi_queue_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piQueueRetain(pi_queue command_queue); - -__SYCL_EXPORT pi_result piQueueRelease(pi_queue command_queue); - -__SYCL_EXPORT pi_result piQueueFinish(pi_queue command_queue); - -__SYCL_EXPORT pi_result piQueueFlush(pi_queue command_queue); - -/// Gets the native handle of a PI queue object. -/// -/// \param queue is the PI queue to get the native handle of. -/// \param nativeHandle is the native handle of queue or commandlist. -/// \param nativeHandleDesc provides additional properties of the native handle. -__SYCL_EXPORT pi_result piextQueueGetNativeHandle( - pi_queue queue, pi_native_handle *nativeHandle, int32_t *nativeHandleDesc); - -/// Creates PI queue object from a native handle. -/// NOTE: The created PI object takes ownership of the native handle. -/// -/// \param nativeHandle is the native handle to create PI queue from. -/// \param nativeHandleDesc provides additional properties of the native handle. -/// \param context is the PI context of the queue. -/// \param device is the PI device associated with the native device used when -/// creating the native queue. This parameter is optional but some backends -/// may fail to create the right PI queue if omitted. -/// \param pluginOwnsNativeHandle Indicates whether the created PI object -/// should take ownership of the native handle. -/// \param Properties holds queue properties. -/// \param queue is the PI queue created from the native handle. -__SYCL_EXPORT pi_result piextQueueCreateWithNativeHandle( - pi_native_handle nativeHandle, int32_t nativeHandleDesc, pi_context context, - pi_device device, bool pluginOwnsNativeHandle, - pi_queue_properties *Properties, pi_queue *queue); - -// -// Memory -// -__SYCL_EXPORT pi_result piMemBufferCreate( - pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, - pi_mem *ret_mem, const pi_mem_properties *properties = nullptr); - -__SYCL_EXPORT pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, - const pi_image_format *image_format, - const pi_image_desc *image_desc, - void *host_ptr, pi_mem *ret_mem); - -__SYCL_EXPORT pi_result piMemGetInfo(pi_mem mem, pi_mem_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piMemImageGetInfo(pi_mem image, - pi_image_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piMemRetain(pi_mem mem); - -__SYCL_EXPORT pi_result piMemRelease(pi_mem mem); - -__SYCL_EXPORT pi_result piMemBufferPartition( - pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, - void *buffer_create_info, pi_mem *ret_mem); - -/// Gets the native handle of a PI mem object. -/// -/// \param mem is the PI mem to get the native handle of. -/// \param dev is the PI device that the native allocation will be resident on -/// \param nativeHandle is the native handle of mem. -__SYCL_EXPORT pi_result piextMemGetNativeHandle(pi_mem mem, pi_device dev, - pi_native_handle *nativeHandle); - -/// Creates PI mem object from a native handle. -/// NOTE: The created PI object takes ownership of the native handle. -/// -/// \param nativeHandle is the native handle to create PI mem from. -/// \param context The PI context of the memory allocation. -/// \param ownNativeHandle Indicates if we own the native memory handle or it -/// came from interop that asked to not transfer the ownership to SYCL RT. -/// \param mem is the PI mem created from the native handle. -__SYCL_EXPORT pi_result piextMemCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, - pi_mem *mem); - -/// Creates PI image object from a native handle. -/// -/// \param nativeHandle is the native handle to create PI image from. -/// \param context The PI context of the memory allocation. -/// \param ownNativeHandle Indicates if we own the native memory handle or it -/// came from interop that asked to not transfer the ownership to SYCL RT. -/// \param ImageFormat is the pi_image_format struct that -/// specifies the image channnel order and channel data type that -/// match what the nativeHandle uses -/// \param ImageDesc is the pi_image_desc struct that specifies -/// the image dimension, pitch, slice and other information about -/// the nativeHandle -/// \param img is the PI img created from the native handle. -__SYCL_EXPORT pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *img); - -// -// Program -// - -__SYCL_EXPORT pi_result piProgramCreate(pi_context context, const void *il, - size_t length, pi_program *res_program); - -/// Creates a PI program for a context and loads the given binary into it. -/// -/// \param context is the PI context to associate the program with. -/// \param num_devices is the number of devices in device_list. -/// \param device_list is a pointer to a list of devices. These devices must all -/// be in context. -/// \param lengths is an array of sizes in bytes of the binary in binaries. -/// \param binaries is a pointer to a list of program binaries. -/// \param num_metadata_entries is the number of metadata entries in metadata. -/// \param metadata is a pointer to a list of program metadata entries. The -/// use of metadata entries is backend-defined. -/// \param binary_status returns whether the program binary was loaded -/// succesfully or not, for each device in device_list. -/// binary_status is ignored if it is null and otherwise -/// it must be an array of num_devices elements. -/// \param ret_program is the PI program created from the program binaries. -__SYCL_EXPORT pi_result piProgramCreateWithBinary( - pi_context context, pi_uint32 num_devices, const pi_device *device_list, - const size_t *lengths, const unsigned char **binaries, - size_t num_metadata_entries, const pi_device_binary_property *metadata, - pi_int32 *binary_status, pi_program *ret_program); - -__SYCL_EXPORT pi_result piProgramGetInfo(pi_program program, - pi_program_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result -piProgramLink(pi_context context, pi_uint32 num_devices, - const pi_device *device_list, const char *options, - pi_uint32 num_input_programs, const pi_program *input_programs, - void (*pfn_notify)(pi_program program, void *user_data), - void *user_data, pi_program *ret_program); - -__SYCL_EXPORT pi_result piProgramCompile( - pi_program program, pi_uint32 num_devices, const pi_device *device_list, - const char *options, pi_uint32 num_input_headers, - const pi_program *input_headers, const char **header_include_names, - void (*pfn_notify)(pi_program program, void *user_data), void *user_data); - -__SYCL_EXPORT pi_result piProgramBuild( - pi_program program, pi_uint32 num_devices, const pi_device *device_list, - const char *options, - void (*pfn_notify)(pi_program program, void *user_data), void *user_data); - -__SYCL_EXPORT pi_result piProgramGetBuildInfo( - pi_program program, pi_device device, _pi_program_build_info param_name, - size_t param_value_size, void *param_value, size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piProgramRetain(pi_program program); - -__SYCL_EXPORT pi_result piProgramRelease(pi_program program); - -/// Sets a specialization constant to a specific value. -/// -/// Note: Only used when specialization constants are natively supported (SPIR-V -/// binaries), and not when they are emulated (AOT binaries). -/// -/// \param prog the program object which will use the value -/// \param spec_id integer ID of the constant -/// \param spec_size size of the value -/// \param spec_value bytes of the value -__SYCL_EXPORT pi_result -piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, - size_t spec_size, const void *spec_value); - -/// Gets the native handle of a PI program object. -/// -/// \param program is the PI program to get the native handle of. -/// \param nativeHandle is the native handle of program. -__SYCL_EXPORT pi_result -piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle); - -/// Creates PI program object from a native handle. -/// NOTE: The created PI object takes ownership of the native handle. -/// -/// \param nativeHandle is the native handle to create PI program from. -/// \param context is the PI context of the program. -/// \param pluginOwnsNativeHandle Indicates whether the created PI object -/// should take ownership of the native handle. -/// \param program is the PI program created from the native handle. -__SYCL_EXPORT pi_result piextProgramCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_context context, - bool pluginOwnsNativeHandle, pi_program *program); - -// -// Kernel -// - -typedef enum { - /// indicates that the kernel might access data through USM ptrs - PI_USM_INDIRECT_ACCESS, - /// provides an explicit list of pointers that the kernel will access - PI_USM_PTRS = 0x4203, - /// provides the preferred cache configuration (large slm or large data) - PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG = 0x4204 -} _pi_kernel_exec_info; - -using pi_kernel_exec_info = _pi_kernel_exec_info; - -__SYCL_EXPORT pi_result piKernelCreate(pi_program program, - const char *kernel_name, - pi_kernel *ret_kernel); - -__SYCL_EXPORT pi_result piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, - size_t arg_size, const void *arg_value); - -__SYCL_EXPORT pi_result piKernelGetInfo(pi_kernel kernel, - pi_kernel_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, - pi_kernel_group_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -/// API to query information from the sub-group from a kernel -/// -/// \param kernel is the pi_kernel to query -/// \param device is the device the kernel is executed on -/// \param param_name is a pi_kernel_sub_group_info enum value that -/// specifies the informtation queried for. -/// \param input_value_size is the size of input value passed in -/// ptr input_value param -/// \param input_value is the ptr to the input value passed. -/// \param param_value_size is the size of the value in bytes. -/// \param param_value is a pointer to the value to set. -/// \param param_value_size_ret is a pointer to return the size of data in -/// param_value ptr. -/// -/// All queries expect a return of 4 bytes in param_value_size, -/// param_value_size_ret, and a uint32_t value should to be written in -/// param_value ptr. -/// Note: This behaviour differs from OpenCL. OpenCL returns size_t. -__SYCL_EXPORT pi_result piKernelGetSubGroupInfo( - pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, - size_t input_value_size, const void *input_value, size_t param_value_size, - void *param_value, size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piKernelRetain(pi_kernel kernel); - -__SYCL_EXPORT pi_result piKernelRelease(pi_kernel kernel); - -/// Sets up pointer arguments for CL kernels. An extra indirection -/// is required due to CL argument conventions. -/// -/// \param kernel is the kernel to be launched -/// \param arg_index is the index of the kernel argument -/// \param arg_size is the size in bytes of the argument (ignored in CL) -/// \param arg_value is the pointer argument -__SYCL_EXPORT pi_result piextKernelSetArgPointer(pi_kernel kernel, - pi_uint32 arg_index, - size_t arg_size, - const void *arg_value); - -/// API to set attributes controlling kernel execution -/// -/// \param kernel is the pi kernel to execute -/// \param param_name is a pi_kernel_exec_info value that specifies the info -/// passed to the kernel -/// \param param_value_size is the size of the value in bytes -/// \param param_value is a pointer to the value to set for the kernel -/// -/// If param_name is PI_USM_INDIRECT_ACCESS, the value will be a ptr to -/// the pi_bool value PI_TRUE -/// If param_name is PI_USM_PTRS, the value will be an array of ptrs -__SYCL_EXPORT pi_result piKernelSetExecInfo(pi_kernel kernel, - pi_kernel_exec_info value_name, - size_t param_value_size, - const void *param_value); - -/// Creates PI kernel object from a native handle. -/// NOTE: The created PI object takes ownership of the native handle. -/// -/// \param nativeHandle is the native handle to create PI kernel from. -/// \param context is the PI context of the kernel. -/// \param program is the PI program of the kernel. -/// \param pluginOwnsNativeHandle Indicates whether the created PI object -/// should take ownership of the native handle. -/// \param kernel is the PI kernel created from the native handle. -__SYCL_EXPORT pi_result piextKernelCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_context context, pi_program program, - bool pluginOwnsNativeHandle, pi_kernel *kernel); - -/// Gets the native handle of a PI kernel object. -/// -/// \param kernel is the PI kernel to get the native handle of. -/// \param nativeHandle is the native handle of kernel. -__SYCL_EXPORT pi_result -piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle); - -/// Gets the max work group count for a cooperative kernel. -/// -/// \param kernel is the PI kernel being queried. -/// \param local_work_size is the number of work items in a work group that will -/// be used when the kernel is launched. \param dynamic_shared_memory_size is -/// the size of dynamic shared memory, for each work group, in bytes, that will -/// be used when the kernel is launched." \param group_count_ret is a pointer to -/// where the query result will be stored. -__SYCL_EXPORT pi_result piextKernelSuggestMaxCooperativeGroupCount( - pi_kernel kernel, size_t local_work_size, size_t dynamic_shared_memory_size, - pi_uint32 *group_count_ret); - -// -// Events -// - -/// Create PI event object in a signalled/completed state. -/// -/// \param context is the PI context of the event. -/// \param ret_event is the PI even created. -__SYCL_EXPORT pi_result piEventCreate(pi_context context, pi_event *ret_event); - -__SYCL_EXPORT pi_result piEventGetInfo(pi_event event, pi_event_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piEventGetProfilingInfo(pi_event event, - pi_profiling_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piEventsWait(pi_uint32 num_events, - const pi_event *event_list); - -__SYCL_EXPORT pi_result piEventSetCallback( - pi_event event, pi_int32 command_exec_callback_type, - void (*pfn_notify)(pi_event event, pi_int32 event_command_status, - void *user_data), - void *user_data); - -__SYCL_EXPORT pi_result piEventSetStatus(pi_event event, - pi_int32 execution_status); - -__SYCL_EXPORT pi_result piEventRetain(pi_event event); - -__SYCL_EXPORT pi_result piEventRelease(pi_event event); - -__SYCL_EXPORT pi_result piEnqueueTimestampRecordingExp( - pi_queue queue, pi_bool blocking, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -/// Gets the native handle of a PI event object. -/// -/// \param event is the PI event to get the native handle of. -/// \param nativeHandle is the native handle of event. -__SYCL_EXPORT pi_result -piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle); - -/// Creates PI event object from a native handle. -/// NOTE: The created PI object takes ownership of the native handle. -/// -/// \param nativeHandle is the native handle to create PI event from. -/// \param context is the corresponding PI context -/// \param pluginOwnsNativeHandle Indicates whether the created PI object -/// should take ownership of the native handle. -/// \param event is the PI event created from the native handle. -__SYCL_EXPORT pi_result piextEventCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, - pi_event *event); - -// -// Sampler -// -__SYCL_EXPORT pi_result piSamplerCreate( - pi_context context, const pi_sampler_properties *sampler_properties, - pi_sampler *result_sampler); - -__SYCL_EXPORT pi_result piSamplerGetInfo(pi_sampler sampler, - pi_sampler_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -__SYCL_EXPORT pi_result piSamplerRetain(pi_sampler sampler); - -__SYCL_EXPORT pi_result piSamplerRelease(pi_sampler sampler); - -// -// Queue Commands -// -__SYCL_EXPORT pi_result piEnqueueKernelLaunch( - pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result piextEnqueueCooperativeKernelLaunch( - pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result piextEnqueueKernelLaunchCustom( - pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_size, const size_t *local_work_size, - pi_uint32 num_props_in_launch_prop_list, - const pi_launch_property *launch_prop_list, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event); - -__SYCL_EXPORT pi_result piEnqueueEventsWait(pi_queue command_queue, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event); - -__SYCL_EXPORT pi_result piEnqueueEventsWaitWithBarrier( - pi_queue command_queue, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result piEnqueueMemBufferRead( - pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, - size_t size, void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result piEnqueueMemBufferReadRect( - pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result -piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, - pi_bool blocking_write, size_t offset, size_t size, - const void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result piEnqueueMemBufferWriteRect( - pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - const void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result -piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, - pi_mem dst_buffer, size_t src_offset, size_t dst_offset, - size_t size, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result piEnqueueMemBufferCopyRect( - pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, - pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, - pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, - size_t dst_row_pitch, size_t dst_slice_pitch, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event); - -__SYCL_EXPORT pi_result -piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, - const void *pattern, size_t pattern_size, size_t offset, - size_t size, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result piEnqueueMemImageRead( - pi_queue command_queue, pi_mem image, pi_bool blocking_read, - pi_image_offset origin, pi_image_region region, size_t row_pitch, - size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result piEnqueueMemImageWrite( - pi_queue command_queue, pi_mem image, pi_bool blocking_write, - pi_image_offset origin, pi_image_region region, size_t input_row_pitch, - size_t input_slice_pitch, const void *ptr, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event); - -__SYCL_EXPORT pi_result piEnqueueMemImageCopy( - pi_queue command_queue, pi_mem src_image, pi_mem dst_image, - pi_image_offset src_origin, pi_image_offset dst_origin, - pi_image_region region, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result -piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, - const void *fill_color, const size_t *origin, - const size_t *region, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -__SYCL_EXPORT pi_result piEnqueueMemBufferMap( - pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, - pi_map_flags map_flags, size_t offset, size_t size, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event, void **ret_map); - -__SYCL_EXPORT pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, - void *mapped_ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event); - -#ifndef PI_BIT -#define PI_BIT(_i) (1 << _i) -#endif // PI_BIT - -typedef enum { - PI_ACCESS_READ_WRITE = PI_BIT(0), - PI_ACCESS_WRITE_ONLY = PI_BIT(1), - PI_ACCESS_READ_ONLY = PI_BIT(2) -} _pi_mem_obj_access; -using pi_mem_obj_access = _pi_mem_obj_access; -typedef uint32_t pi_mem_access_flag; - -typedef enum { - PI_KERNEL_ARG_MEM_OBJ_ACCESS = 27, - PI_ENUM_FORCE_UINT32 = 0x7fffffff -} _pi_mem_obj_property_type; -using pi_mem_obj_property_type = _pi_mem_obj_property_type; - -typedef struct { - pi_mem_obj_property_type type; - void *pNext; - pi_mem_access_flag mem_access; -} _pi_mem_obj_property; -using pi_mem_obj_property = _pi_mem_obj_property; - -// Extension to allow backends to process a PI memory object before adding it -// as an argument for a kernel. -// Note: This is needed by the CUDA backend to extract the device pointer to -// the memory as the kernels uses it rather than the PI object itself. -__SYCL_EXPORT pi_result piextKernelSetArgMemObj( - pi_kernel kernel, pi_uint32 arg_index, - const pi_mem_obj_property *arg_properties, const pi_mem *arg_value); - -// Extension to allow backends to process a PI sampler object before adding it -// as an argument for a kernel. -// Note: This is needed by the CUDA backend to extract the properties of the -// sampler as the kernels uses it rather than the PI object itself. -__SYCL_EXPORT pi_result piextKernelSetArgSampler(pi_kernel kernel, - pi_uint32 arg_index, - const pi_sampler *arg_value); - -/// -// USM -/// -typedef enum { - PI_USM_HOST_SUPPORT = 0x4190, - PI_USM_DEVICE_SUPPORT = 0x4191, - PI_USM_SINGLE_SHARED_SUPPORT = 0x4192, - PI_USM_CROSS_SHARED_SUPPORT = 0x4193, - PI_USM_SYSTEM_SHARED_SUPPORT = 0x4194 -} _pi_usm_capability_query; - -typedef enum : pi_bitfield { - PI_USM_ACCESS = (1 << 0), - PI_USM_ATOMIC_ACCESS = (1 << 1), - PI_USM_CONCURRENT_ACCESS = (1 << 2), - PI_USM_CONCURRENT_ATOMIC_ACCESS = (1 << 3) -} _pi_usm_capabilities; - -typedef enum { - PI_MEM_ALLOC_TYPE = 0x419A, - PI_MEM_ALLOC_BASE_PTR = 0x419B, - PI_MEM_ALLOC_SIZE = 0x419C, - PI_MEM_ALLOC_DEVICE = 0x419D, -} _pi_mem_alloc_info; - -typedef enum { - PI_MEM_TYPE_UNKNOWN = 0x4196, - PI_MEM_TYPE_HOST = 0x4197, - PI_MEM_TYPE_DEVICE = 0x4198, - PI_MEM_TYPE_SHARED = 0x4199 -} _pi_usm_type; - -// Flag is used for piProgramUSMEnqueuePrefetch. PI_USM_MIGRATION_TBD0 is a -// placeholder for future developments and should not change the behaviour of -// piProgramUSMEnqueuePrefetch -typedef enum : pi_bitfield { - PI_USM_MIGRATION_TBD0 = (1 << 0) -} _pi_usm_migration_flags; - -using pi_usm_capability_query = _pi_usm_capability_query; -using pi_usm_capabilities = _pi_usm_capabilities; -using pi_mem_alloc_info = _pi_mem_alloc_info; -using pi_usm_type = _pi_usm_type; -using pi_usm_migration_flags = _pi_usm_migration_flags; - -/// Allocates host memory accessible by the device. -/// -/// \param result_ptr contains the allocated memory -/// \param context is the pi_context -/// \param properties are optional allocation properties -/// \param size is the size of the allocation -/// \param alignment is the desired alignment of the allocation -__SYCL_EXPORT pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, - pi_usm_mem_properties *properties, - size_t size, pi_uint32 alignment); - -/// Allocates device memory -/// -/// \param result_ptr contains the allocated memory -/// \param context is the pi_context -/// \param device is the device the memory will be allocated on -/// \param properties are optional allocation properties -/// \param size is the size of the allocation -/// \param alignment is the desired alignment of the allocation -__SYCL_EXPORT pi_result piextUSMDeviceAlloc(void **result_ptr, - pi_context context, - pi_device device, - pi_usm_mem_properties *properties, - size_t size, pi_uint32 alignment); - -/// Allocates memory accessible on both host and device -/// -/// \param result_ptr contains the allocated memory -/// \param context is the pi_context -/// \param device is the device the memory will be allocated on -/// \param properties are optional allocation properties -/// \param size is the size of the allocation -/// \param alignment is the desired alignment of the allocation -__SYCL_EXPORT pi_result piextUSMSharedAlloc(void **result_ptr, - pi_context context, - pi_device device, - pi_usm_mem_properties *properties, - size_t size, pi_uint32 alignment); - -/// Allocates memory accessible on device -/// -/// \param result_ptr contains the allocated memory -/// \param result_pitch contains the returned memory pitch -/// \param context is the pi_context -/// \param device is the device the memory will be allocated on -/// \param properties are optional allocation properties -/// \param width_in_bytes is the width of the allocation in bytes -/// \param height is the height of the allocation in rows -/// \param element_size_bytes is the size in bytes of an element in the -/// allocation -__SYCL_EXPORT pi_result piextUSMPitchedAlloc( - void **result_ptr, size_t *result_pitch, pi_context context, - pi_device device, pi_usm_mem_properties *properties, size_t width_in_bytes, - size_t height, unsigned int element_size_bytes); - -/// Indicates that the allocated USM memory is no longer needed on the runtime -/// side. The actual freeing of the memory may be done in a blocking or deferred -/// manner, e.g. to avoid issues with indirect memory access from kernels. -/// -/// \param context is the pi_context of the allocation -/// \param ptr is the memory to be freed -__SYCL_EXPORT pi_result piextUSMFree(pi_context context, void *ptr); - -/// USM Fill API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pattern is the ptr with the bytes of the pattern to set -/// \param patternSize is the size in bytes of the pattern to set -/// \param count is the size in bytes to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueFill(pi_queue queue, void *ptr, - const void *pattern, - size_t patternSize, size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event); - -/// USM Memcpy API -/// -/// \param queue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param src_ptr is the data to be copied -/// \param dst_ptr is the location the data will be copied -/// \param size is number of bytes to copy -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, - void *dst_ptr, - const void *src_ptr, size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event); - -/// Hint to migrate memory to the device -/// -/// \param queue is the queue to submit to -/// \param ptr points to the memory to migrate -/// \param size is the number of bytes to migrate -/// \param flags is a bitfield used to specify memory migration options -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueuePrefetch( - pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, - pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, - pi_event *event); - -/// USM Memadvise API -/// -/// \param queue is the queue to submit to -/// \param ptr is the data to be advised -/// \param length is the size in bytes of the memory to advise -/// \param advice is device specific advice -/// \param event is the event that represents this operation -// USM memadvise API to govern behavior of automatic migration mechanisms -__SYCL_EXPORT pi_result piextUSMEnqueueMemAdvise(pi_queue queue, - const void *ptr, size_t length, - pi_mem_advice advice, - pi_event *event); - -/// API to query information about USM allocated pointers -/// Valid Queries: -/// PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value -/// PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if -/// the queried pointer fell inside an allocation. -/// Result must fit in void * -/// PI_MEM_ALLOC_SIZE returns how big the queried pointer's -/// allocation is in bytes. Result is a size_t. -/// PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against -/// -/// \param context is the pi_context -/// \param ptr is the pointer to query -/// \param param_name is the type of query to perform -/// \param param_value_size is the size of the result in bytes -/// \param param_value is the result -/// \param param_value_size_ret is how many bytes were written -__SYCL_EXPORT pi_result piextUSMGetMemAllocInfo( - pi_context context, const void *ptr, pi_mem_alloc_info param_name, - size_t param_value_size, void *param_value, size_t *param_value_size_ret); - -/// USM 2D fill API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pitch is the total width of the destination memory including padding -/// \param pattern is a pointer with the bytes of the pattern to set -/// \param pattern_size is the size in bytes of the pattern -/// \param width is width in bytes of each row to fill -/// \param height is height the columns to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, - size_t pitch, size_t pattern_size, - const void *pattern, size_t width, - size_t height, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event); - -/// USM 2D Memset API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pitch is the total width of the destination memory including padding -/// \param value the value to fill into the region in \param ptr -/// \param width is width in bytes of each row to fill -/// \param height is height the columns to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D( - pi_queue queue, void *ptr, size_t pitch, int value, size_t width, - size_t height, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event); - -/// USM 2D Memcpy API -/// -/// \param queue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param dst_ptr is the location the data will be copied -/// \param dst_pitch is the total width of the destination memory including -/// padding -/// \param src_ptr is the data to be copied -/// \param src_pitch is the total width of the source memory including padding -/// \param width is width in bytes of each row to be copied -/// \param height is height the columns to be copied -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D( - pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, - const void *src_ptr, size_t src_pitch, size_t width, size_t height, - pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, - pi_event *event); - -/// Import host system memory into USM. -/// -/// \param ptr start address of memory range to import -/// \param size is the number of bytes to import -/// \param context is the pi_context -__SYCL_EXPORT pi_result piextUSMImport(const void *ptr, size_t size, - pi_context context); - -/// Release host system memory from USM. -/// -/// \param ptr start address of imported memory range -/// \param context is the pi_context -__SYCL_EXPORT pi_result piextUSMRelease(const void *ptr, pi_context context); - -/// -/// Device global variable -/// - -/// API for writing data from host to a device global variable. -/// -/// \param queue is the queue -/// \param program is the program containing the device global variable -/// \param blocking_write is true if the write should block -/// \param name is the unique identifier for the device global variable -/// \param count is the number of bytes to copy -/// \param offset is the byte offset into the device global variable to start -/// copying -/// \param src is a pointer to where the data must be copied from -/// \param num_events_in_wait_list is a number of events in the wait list -/// \param event_wait_list is the wait list -/// \param event is the resulting event -pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue queue, pi_program program, const char *name, - pi_bool blocking_write, size_t count, size_t offset, const void *src, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event); - -/// API reading data from a device global variable to host. -/// -/// \param queue is the queue -/// \param program is the program containing the device global variable -/// \param blocking_read is true if the read should block -/// \param name is the unique identifier for the device global variable -/// \param count is the number of bytes to copy -/// \param offset is the byte offset into the device global variable to start -/// copying -/// \param dst is a pointer to where the data must be copied to -/// \param num_events_in_wait_list is a number of events in the wait list -/// \param event_wait_list is the wait list -/// \param event is the resulting event -pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, - size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -/// -/// Virtual memory -/// - -/// API for getting information about the minimum and recommended granularity -/// of physical and virtual memory. -/// -/// \param context is the context to get the granularity from. -/// \param device is the device to get the granularity from. -/// \param param_name is the type of query to perform. -/// \param param_value_size is the size of the result in bytes. -/// \param param_value is the result. -/// \param param_value_size_ret is how many bytes were written. -__SYCL_EXPORT pi_result piextVirtualMemGranularityGetInfo( - pi_context context, pi_device device, - pi_virtual_mem_granularity_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret); - -/// API for creating a physical memory handle that virtual memory can be mapped -/// to. -/// -/// \param context is the context within which the physical memory is allocated. -/// \param device is the device the physical memory is on. -/// \param mem_size is the size of physical memory to allocate. This must be a -/// multiple of the minimum virtual memory granularity. -/// \param ret_physical_mem is the handle for the resulting physical memory. -__SYCL_EXPORT pi_result -piextPhysicalMemCreate(pi_context context, pi_device device, size_t mem_size, - pi_physical_mem *ret_physical_mem); - -/// API for retaining a physical memory handle. -/// -/// \param physical_mem is the handle for the physical memory to retain. -__SYCL_EXPORT pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem); - -/// API for releasing a physical memory handle. -/// -/// \param physical_mem is the handle for the physical memory to free. -__SYCL_EXPORT pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem); - -/// API for reserving a virtual memory range. -/// -/// \param context is the context within which the virtual memory range is -/// reserved. -/// \param start is a pointer to the start of the region to reserve. If nullptr -/// the implementation selects a start address. -/// \param range_size is the size of the virtual address range to reserve in -/// bytes. -/// \param ret_ptr is the pointer to the start of the resulting virtual memory -/// range. -__SYCL_EXPORT pi_result piextVirtualMemReserve(pi_context context, - const void *start, - size_t range_size, - void **ret_ptr); - -/// API for freeing a virtual memory range. -/// -/// \param context is the context within which the virtual memory range is -/// reserved. -/// \param ptr is the pointer to the start of the virtual memory range. -/// \param range_size is the size of the virtual address range. -__SYCL_EXPORT pi_result piextVirtualMemFree(pi_context context, const void *ptr, - size_t range_size); - -/// API for mapping a virtual memory range to a a physical memory allocation at -/// a given offset. -/// -/// \param context is the context within which both the virtual memory range is -/// reserved and the physical memory is allocated. -/// \param ptr is the pointer to the start of the virtual memory range. -/// \param range_size is the size of the virtual address range. -/// \param physical_mem is the handle for the physical memory to map ptr to. -/// \param offset is the offset into physical_mem in bytes to map ptr to. -/// \param flags is the access flags to set for the mapping. -__SYCL_EXPORT pi_result piextVirtualMemMap(pi_context context, const void *ptr, - size_t range_size, - pi_physical_mem physical_mem, - size_t offset, - pi_virtual_access_flags flags); - -/// API for unmapping a virtual memory range previously mapped in a context. -/// After a call to this function, the virtual memory range is left in a state -/// ready to be remapped. -/// -/// \param context is the context within which the virtual memory range is -/// currently mapped. -/// \param ptr is the pointer to the start of the virtual memory range. -/// \param range_size is the size of the virtual address range in bytes. -__SYCL_EXPORT pi_result piextVirtualMemUnmap(pi_context context, - const void *ptr, - size_t range_size); - -/// API for setting the access mode of a mapped virtual memory range. -/// -/// \param context is the context within which the virtual memory range is -/// currently mapped. -/// \param ptr is the pointer to the start of the virtual memory range. -/// \param range_size is the size of the virtual address range in bytes. -/// \param flags is the access flags to set for the mapped virtual access range. -__SYCL_EXPORT pi_result piextVirtualMemSetAccess(pi_context context, - const void *ptr, - size_t range_size, - pi_virtual_access_flags flags); - -/// API for getting info about a mapped virtual memory range. -/// -/// \param context is the context within which the virtual memory range is -/// currently mapped. -/// \param ptr is the pointer to the start of the virtual memory range. -/// \param range_size is the size of the virtual address range in bytes. -/// \param param_name is the type of query to perform. -/// \param param_value_size is the size of the result in bytes. -/// \param param_value is the result. -/// \param param_value_size_ret is how many bytes were written. -__SYCL_EXPORT pi_result -piextVirtualMemGetInfo(pi_context context, const void *ptr, size_t range_size, - pi_virtual_mem_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret); - -/// -/// Plugin -/// -/// -// Host Pipes -/// - -/// Read from pipe of a given name -/// -/// @param queue a valid host command-queue in which the read / write command -/// will be queued. command_queue and program must be created with the same -/// OpenCL context. -/// @param program a program object with a successfully built executable. -/// @param pipe_symbol the name of the program scope pipe global variable. -/// @param blocking indicate if the read and write operations are blocking or -/// non-blocking -/// @param ptr a pointer to buffer in host memory that will hold resulting data -/// from pipe -/// @param size size of the memory region to read or write, in bytes. -/// @param num_events_in_waitlist number of events in the wait list. -/// @param events_waitlist specify events that need to complete before this -/// particular command can be executed. -/// @param event returns an event object that identifies this read / write -/// command and can be used to query or queue a wait for this command to -/// complete. -__SYCL_EXPORT pi_result piextEnqueueReadHostPipe( - pi_queue queue, pi_program program, const char *pipe_symbol, - pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event); - -/// Write to pipe of a given name -/// -/// @param queue a valid host command-queue in which the read / write command -/// will be queued. command_queue and program must be created with the same -/// OpenCL context. -/// @param program a program object with a successfully built executable. -/// @param pipe_symbol the name of the program scope pipe global variable. -/// @param blocking indicate if the read and write operations are blocking or -/// non-blocking -/// @param ptr a pointer to buffer in host memory that holds data to be written -/// to host pipe. -/// @param size size of the memory region to read or write, in bytes. -/// @param num_events_in_waitlist number of events in the wait list. -/// @param events_waitlist specify events that need to complete before this -/// particular command can be executed. -/// @param event returns an event object that identifies this read / write -/// command and can be used to query or queue a wait for this command to -/// complete. -__SYCL_EXPORT pi_result piextEnqueueWriteHostPipe( - pi_queue queue, pi_program program, const char *pipe_symbol, - pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event); - -/// API to get Plugin internal data, opaque to SYCL RT. Some devices whose -/// device code is compiled by the host compiler (e.g. CPU emulators) may use it -/// to access some device code functionality implemented in/behind the plugin. -/// \param opaque_data_param - unspecified argument, interpretation is specific -/// to a plugin \param opaque_data_return - placeholder for the returned opaque -/// data. -__SYCL_EXPORT pi_result piextPluginGetOpaqueData(void *opaque_data_param, - void **opaque_data_return); - -/// API to notify that the plugin should clean up its resources. -/// No PI calls should be made until the next piPluginInit call. -/// \param PluginParameter placeholder for future use, currenly not used. -__SYCL_EXPORT pi_result piTearDown(void *PluginParameter); - -/// API to get Plugin specific warning and error messages. -/// \param message is a returned address to the first element in the message the -/// plugin owns the error message string. The string is thread-local. As a -/// result, different threads may return different errors. A message is -/// overwritten by the following error or warning that is produced within the -/// given thread. The memory is cleaned up at the end of the thread's lifetime. -/// -/// \return PI_SUCCESS if plugin is indicating non-fatal warning. Any other -/// error code indicates that plugin considers this to be a fatal error and the -/// Returns the global timestamp from \param device , and syncronized host -/// timestamp -__SYCL_EXPORT pi_result piPluginGetLastError(char **message); - -/// API to get backend specific option. -/// \param frontend_option is a string that contains frontend option. -/// \param backend_option is used to return the backend option corresponding to -/// frontend option. -/// -/// \return PI_SUCCESS is returned for valid frontend_option. If a valid backend -/// option is not available, an empty string is returned. -__SYCL_EXPORT pi_result piPluginGetBackendOption(pi_platform platform, - const char *frontend_option, - const char **backend_option); - -/// Queries device for it's global timestamp in nanoseconds, and updates -/// HostTime with the value of the host timer at the closest possible point in -/// time to that at which DeviceTime was returned. -/// -/// \param Device device to query for timestamp -/// \param DeviceTime pointer to store device timestamp in nanoseconds. Optional -/// argument, can be nullptr -/// \param HostTime pointer to store host timestamp in -/// nanoseconds. Optional argurment, can be nullptr in which case timestamp will -/// not be written -__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, - uint64_t *DeviceTime, - uint64_t *HostTime); - -/// Command buffer extension -struct _pi_ext_command_buffer; -struct _pi_ext_sync_point; -struct _pi_ext_command_buffer_command; - -using pi_ext_command_buffer = _pi_ext_command_buffer *; -using pi_ext_command_buffer_command = _pi_ext_command_buffer_command *; -using pi_ext_sync_point = pi_uint32; - -typedef enum { - PI_EXT_STRUCTURE_TYPE_COMMAND_BUFFER_DESC = 0 -} pi_ext_structure_type; - -struct pi_ext_command_buffer_desc final { - pi_ext_structure_type stype; - const void *pNext; - pi_bool is_in_order; - pi_bool enable_profiling; - pi_bool is_updatable; -}; - -// Command Buffer Update types -struct pi_ext_command_buffer_update_memobj_arg_desc_t final { - uint32_t arg_index; - const pi_mem_obj_property *properties; - pi_mem new_mem_obj; -}; - -struct pi_ext_command_buffer_update_pointer_arg_desc_t final { - uint32_t arg_index; - void *new_ptr; -}; - -struct pi_ext_command_buffer_update_value_arg_desc_t final { - uint32_t arg_index; - uint32_t arg_size; - void *new_value; -}; - -struct pi_ext_command_buffer_update_kernel_launch_desc final { - uint32_t num_mem_obj_args; - uint32_t num_ptr_args; - uint32_t num_value_args; - uint32_t num_work_dim; - - pi_ext_command_buffer_update_memobj_arg_desc_t *mem_obj_arg_list; - pi_ext_command_buffer_update_pointer_arg_desc_t *ptr_arg_list; - pi_ext_command_buffer_update_value_arg_desc_t *value_arg_list; - - size_t *global_work_offset; - size_t *global_work_size; - size_t *local_work_size; -}; - -/// API to create a command-buffer. -/// \param context The context to associate the command-buffer with. -/// \param device The device to associate the command-buffer with. -/// \param desc Descriptor for the new command-buffer. -/// \param ret_command_buffer Pointer to fill with the address of the new -/// command-buffer. -__SYCL_EXPORT pi_result -piextCommandBufferCreate(pi_context context, pi_device device, - const pi_ext_command_buffer_desc *desc, - pi_ext_command_buffer *ret_command_buffer); - -/// API to increment the reference count of the command-buffer -/// \param command_buffer The command_buffer to retain. -__SYCL_EXPORT pi_result -piextCommandBufferRetain(pi_ext_command_buffer command_buffer); - -/// API to decrement the reference count of the command-buffer. After the -/// command_buffer reference count becomes zero and has finished execution, the -/// command-buffer is deleted. -/// \param command_buffer The command_buffer to release. -__SYCL_EXPORT pi_result -piextCommandBufferRelease(pi_ext_command_buffer command_buffer); - -/// API to stop command-buffer recording such that no more commands can be -/// appended, and makes the command-buffer ready to enqueue on a command-queue. -/// \param command_buffer The command_buffer to finalize. -__SYCL_EXPORT pi_result -piextCommandBufferFinalize(pi_ext_command_buffer command_buffer); - -/// API to append a kernel execution command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param kernel The kernel to append. -/// \param work_dim Dimension of the kernel execution. -/// \param global_work_offset Offset to use when executing kernel. -/// \param global_work_size Global work size to use when executing kernel. -/// \param local_work_size Local work size to use when executing kernel. -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this kernel execution. -/// \param command Return pointer to the command representing this kernel -/// execution. -__SYCL_EXPORT pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point, pi_ext_command_buffer_command *command); - -/// API to append a USM memcpy command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param dst_ptr is the location the data will be copied -/// \param src_ptr is the data to be copied -/// \param size is number of bytes to copy -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferMemcpyUSM( - pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr, - size_t size, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to append a mem buffer copy command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param src_buffer is the data to be copied -/// \param dst_buffer is the location the data will be copied -/// \param src_offset offset into \p src_buffer -/// \param dst_offset offset into \p dst_buffer -/// \param size is number of bytes to copy -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, - size_t src_offset, size_t dst_offset, size_t size, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to append a rectangular mem buffer copy command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param src_buffer is the data to be copied -/// \param dst_buffer is the location the data will be copied -/// \param src_origin offset for the start of the region to copy in src_buffer -/// \param dst_origin offset for the start of the region to copy in dst_buffer -/// \param region The size of the region to be copied -/// \param src_row_pitch Row pitch for the src data -/// \param src_slice_pitch Slice pitch for the src data -/// \param dst_row_pitch Row pitch for the dst data -/// \param dst_slice_pitch Slice pitch for the dst data -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, - pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, - pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, - size_t dst_row_pitch, size_t dst_slice_pitch, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to append a mem buffer read command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param buffer is the data to be read -/// \param offset offset into \p buffer -/// \param size is number of bytes to read -/// \param dst is the pointer to the destination -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, - size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to append a rectangular mem buffer read command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param buffer is the data to be read -/// \param buffer_offset offset for the start of the region to read in buffer -/// \param host_offset offset for the start of the region to be written from ptr -/// \param region The size of the region to read -/// \param buffer_row_pitch Row pitch for the source buffer data -/// \param buffer_slice_pitch Slice pitch for the source buffer data -/// \param host_row_pitch Row pitch for the destination data ptr -/// \param host_slice_pitch Slice pitch for the destination data ptr -/// \param ptr is the location the data will be written -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer command_buffer, pi_mem buffer, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to append a mem buffer write command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param buffer is the location to write the data -/// \param offset offset into \p buffer -/// \param size is number of bytes to write -/// \param ptr is the pointer to the source -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, - size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to append a rectangular mem buffer write command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param buffer is the location to write the data -/// \param buffer_offset offset for the start of the region to write in buffer -/// \param host_offset offset for the start of the region to be read from ptr -/// \param region The size of the region to write -/// \param buffer_row_pitch Row pitch for the buffer data -/// \param buffer_slice_pitch Slice pitch for the buffer data -/// \param host_row_pitch Row pitch for the source data ptr -/// \param host_slice_pitch Slice pitch for the source data ptr -/// \param ptr is the pointer to the source -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer command_buffer, pi_mem buffer, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - const void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to append a mem buffer fill command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param buffer is the location to fill the data. -/// \param pattern pointer to the pattern to fill the buffer with. -/// \param pattern_size size of the pattern in bytes. -/// \param offset Offset into the buffer to fill from. -/// \param size fill size in bytes. -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferMemBufferFill( - pi_ext_command_buffer command_buffer, pi_mem buffer, const void *pattern, - size_t pattern_size, size_t offset, size_t size, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to append a USM fill command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param ptr pointer to the USM allocation to fill. -/// \param pattern pointer to the pattern to fill ptr with. -/// \param pattern_size size of the pattern in bytes. -/// \param size fill size in bytes. -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferFillUSM( - pi_ext_command_buffer command_buffer, void *ptr, const void *pattern, - size_t pattern_size, size_t size, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to append a USM Prefetch command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param ptr points to the memory to migrate. -/// \param size is the number of bytes to migrate. -/// \param flags is a bitfield used to specify memory migration options. -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferPrefetchUSM( - pi_ext_command_buffer command_buffer, const void *ptr, size_t size, - pi_usm_migration_flags flags, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to append a USM Advise command to the command-buffer. -/// \param command_buffer The command-buffer to append onto. -/// \param ptr is the data to be advised. -/// \param length is the size in bytes of the memory to advise. -/// \param advice is device specific advice. -/// \param num_sync_points_in_wait_list The number of sync points in the -/// provided wait list. -/// \param sync_point_wait_list A list of sync points that this command must -/// wait on. -/// \param sync_point The sync_point associated with this memory operation. -__SYCL_EXPORT pi_result piextCommandBufferAdviseUSM( - pi_ext_command_buffer command_buffer, const void *ptr, size_t length, - pi_mem_advice advice, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point); - -/// API to submit the command-buffer to queue for execution, returns an error if -/// the command-buffer is not finalized or another instance of the same -/// command-buffer is currently executing. -/// \param command_buffer The command-buffer to be submitted. -/// \param queue The PI queue to submit on. -/// \param num_events_in_wait_list The number of events that this execution -/// depends on. -/// \param event_wait_list List of pi_events to wait on. -/// \param event The pi_event associated with this enqueue. -__SYCL_EXPORT pi_result -piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer, pi_queue queue, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -/// API to update a kernel launch command inside of a command-buffer. -/// @param command The command to be updated. -/// @param desc Descriptor which describes the updated parameters of the kernel -/// launch. -__SYCL_EXPORT pi_result piextCommandBufferUpdateKernelLaunch( - pi_ext_command_buffer_command command, - pi_ext_command_buffer_update_kernel_launch_desc *desc); - -/// API to increment the reference count of a command-buffer command. -/// \param command The command to release. -__SYCL_EXPORT pi_result -piextCommandBufferRetainCommand(pi_ext_command_buffer_command command); - -/// API to decrement the reference count of a command-buffer command. After the -/// command reference count becomes zero, the command is deleted. -/// \param command The command to release. -__SYCL_EXPORT pi_result -piextCommandBufferReleaseCommand(pi_ext_command_buffer_command command); - -/// API to destroy bindless unsampled image handles. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param handle is the image handle -__SYCL_EXPORT pi_result piextMemUnsampledImageHandleDestroy( - pi_context context, pi_device device, pi_image_handle handle); - -/// API to destroy bindless sampled image handles. -/// -/// \param context is the pi_context -/// \param handle is the image handle -__SYCL_EXPORT pi_result piextMemSampledImageHandleDestroy( - pi_context context, pi_device device, pi_image_handle handle); - -/// API to allocate memory for bindless images. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param flags are extra flags to pass (currently unused) -/// \param image_format format of the image (channel order and data type) -/// \param image_desc image descriptor -/// \param ret_mem is the returning memory handle to newly allocated memory -__SYCL_EXPORT pi_result piextMemImageAllocate(pi_context context, - pi_device device, - pi_image_format *image_format, - pi_image_desc *image_desc, - pi_image_mem_handle *ret_mem); - -/// API to retrieve individual image from mipmap. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param mip_mem is the memory handle to the mipmap -/// \param level is the requested level of the mipmap -/// \param ret_mem is the returning memory handle to the individual image -__SYCL_EXPORT pi_result piextMemMipmapGetLevel(pi_context context, - pi_device device, - pi_image_mem_handle mip_mem, - unsigned int level, - pi_image_mem_handle *ret_mem); - -/// API to free memory for bindless images. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param memory_handle is the handle to image memory to be freed -__SYCL_EXPORT pi_result piextMemImageFree(pi_context context, pi_device device, - pi_image_mem_handle memory_handle); - -/// API to free mipmap memory for bindless images. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param memory_handle is the handle to image memory to be freed -__SYCL_EXPORT pi_result piextMemMipmapFree(pi_context context, pi_device device, - pi_image_mem_handle memory_handle); - -/// API to create bindless image handles. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param img_mem is the handle to memory from which to create the image -/// \param image_format format of the image (channel order and data type) -/// \param image_desc image descriptor -/// \param ret_mem is the returning pi_mem image object -/// \param ret_handle is the returning memory handle to newly allocated memory -__SYCL_EXPORT pi_result piextMemUnsampledImageCreate( - pi_context context, pi_device device, pi_image_mem_handle img_mem, - pi_image_format *image_format, pi_image_desc *image_desc, - pi_image_handle *ret_handle); - -/// API to create sampled bindless image handles. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param img_mem is the handle to memory from which to create the image -/// \param image_format format of the image (channel order and data type) -/// \param image_desc image descriptor -/// \param sampler is the pi_sampler -/// \param ret_mem is the returning pi_mem image object -/// \param ret_handle is the returning memory handle to newly allocated memory -__SYCL_EXPORT pi_result piextMemSampledImageCreate( - pi_context context, pi_device device, pi_image_mem_handle img_mem, - pi_image_format *image_format, pi_image_desc *image_desc, - pi_sampler sampler, pi_image_handle *ret_handle); - -/// API to create samplers for bindless images. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param sampler_properties is the pointer to the sampler properties bitfield -/// \param min_mipmap_level_clamp is the minimum mipmap level to sample from -/// \param max_mipmap_level_clamp is the maximum mipmap level to sample from -/// \param max_anisotropy is the maximum anisotropic ratio -/// \param result_sampler is the returned sampler -__SYCL_EXPORT pi_result piextBindlessImageSamplerCreate( - pi_context context, const pi_sampler_properties *sampler_properties, - float min_mipmap_level_clamp, float max_mipmap_level_clamp, - float max_anisotropy, pi_sampler *result_sampler); - -/// API to copy image data Host to Device or Device to Host. -/// -/// \param queue is the queue to submit to -/// \param src_ptr is the data to be copied -/// \param dst_ptr is the location the data will be copied to -/// \param src_image_desc source image descriptor -/// \param dst_image_desc destination image descriptor -/// \param src_image_format format of the image (channel order and data type) -/// \param dst_image_format format of the image (channel order and data type) -/// \param flags flags describing copy direction (H2D or D2H) -/// \param src_offset is the offset into the source image/memory -/// \param dst_offset is the offset into the destination image/memory -/// \param copy_extent is the extent (region) of the image/memory to copy -/// \param num_events_in_wait_list is the number of events in the wait list -/// \param event_wait_list is the list of events to wait on before copying -/// \param event is the returned event representing this operation -__SYCL_EXPORT pi_result piextMemImageCopy( - pi_queue queue, void *dst_ptr, const void *src_ptr, - const pi_image_desc *src_image_desc, const pi_image_desc *dst_image_desc, - const pi_image_format *src_image_format, - const pi_image_format *dst_image_format, const pi_image_copy_flags flags, - pi_image_offset src_offset, pi_image_offset dst_offset, - pi_image_region copy_extent, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); - -/// API to query an image memory handle for specific properties. -/// -/// \param context is the handle to the context -/// \param mem_handle is the handle to the image memory -/// \param param_name is the queried info name -/// \param param_value is the returned query value -/// \param param_value_size_ret is the returned query value size -__SYCL_EXPORT pi_result piextMemImageGetInfo(pi_context context, - pi_image_mem_handle mem_handle, - pi_image_info param_name, - void *param_value, - size_t *param_value_size_ret); - -/// API to import external memory -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param mem_descriptor is the interop memory descriptor -/// \param ret_handle is the returned interop memory handle to the external -/// memory -__SYCL_EXPORT pi_result -piextImportExternalMemory(pi_context context, pi_device device, - pi_external_mem_descriptor *mem_descriptor, - pi_interop_mem_handle *ret_handle); - -/// API to map an interop memory handle to an image memory handle. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param image_format format of the image (channel order and data type) -/// \param image_desc image descriptor -/// \param mem_handle is the interop memory handle to the external memory -/// \param ret_mem is the returned image memory handle to the externally -/// allocated memory -__SYCL_EXPORT pi_result piextMemMapExternalArray( - pi_context context, pi_device device, pi_image_format *image_format, - pi_image_desc *image_desc, pi_interop_mem_handle mem_handle, - pi_image_mem_handle *ret_mem); - -/// API to destroy interop memory. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param memory_handle is the handle to interop memory to be freed -__SYCL_EXPORT pi_result piextMemReleaseInterop( - pi_context context, pi_device device, pi_interop_mem_handle memory_handle); - -/// API to import an external semaphore -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param sem_descriptor is the interop semaphore descriptor -/// \param ret_handle is the returned interop semaphore handle to the external -/// semaphore -__SYCL_EXPORT pi_result -piextImportExternalSemaphore(pi_context context, pi_device device, - pi_external_semaphore_descriptor *sem_descriptor, - pi_interop_semaphore_handle *ret_handle); - -/// API to release the external semaphore. -/// -/// \param context is the pi_context -/// \param device is the pi_device -/// \param sem_handle is the interop semaphore handle to the external semaphore -/// to be destroyed -__SYCL_EXPORT pi_result -piextReleaseExternalSemaphore(pi_context context, pi_device device, - pi_interop_semaphore_handle sem_handle); - -/// API to instruct the queue with a non-blocking wait on an external semaphore. -/// -/// \param command_queue is the queue instructed to wait -/// \param sem_handle is the interop semaphore handle -/// \param has_wait_value indicates whether the semaphore is capable of setting -/// user defined state passed through `wait_value`. -/// Otherwise `wait_value` is ignored. -/// \param wait_value is the user defined value of the semaphore state for -/// which this operation will wait upon, provided the -/// semaphore type has this capability, and -/// `has_wait_value` is `true`. -/// \param num_events_in_wait_list is the number of events in the wait list -/// \param event_wait_list is the list of events to wait on before this -/// operation -/// \param event is the returned event representing this operation -__SYCL_EXPORT pi_result piextWaitExternalSemaphore( - pi_queue command_queue, pi_interop_semaphore_handle sem_handle, - bool has_wait_value, pi_uint64 wait_value, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event); - -/// API to instruct the queue to signal the external semaphore handle once all -/// previous commands have completed execution. -/// -/// \param command_queue is the queue instructed to signal -/// \param sem_handle is the interop semaphore handle to signal -/// \param has_signal_value indicates whether the semaphore is capable of -/// setting user defined state passed through -/// `signal_value`. Otherwise `signal_value` is ignored. -/// \param signal_value is the user defined value to which the state of the -/// semaphore will be set, provided the semaphore type has -/// this capability, and `has_signal_value` is `true`. -/// \param num_events_in_wait_list is the number of events in the wait list -/// \param event_wait_list is the list of events to wait on before this -/// operation -/// \param event is the returned event representing this operation -__SYCL_EXPORT pi_result piextSignalExternalSemaphore( - pi_queue command_queue, pi_interop_semaphore_handle sem_handle, - bool has_signal_value, pi_uint64 signal_value, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event); - -/// API to enqueue work through a backend API such that the plugin can schedule -/// the backend API calls within its own DAG. -/// -/// \param command_queue is the queue instructed to signal -/// \param fn is the user submitted native function enqueueing work to a -/// backend API -/// \param data is the data that will be used in fn -/// \param num_mems is the number of mems in mem_list -/// \param mem_list is the list of mems that are used in fn -/// \param num_events_in_wait_list is the number of events in the wait list -/// \param event_wait_list is the list of events to wait on before this -/// operation -/// \param event is the returned event representing this operation -__SYCL_EXPORT pi_result piextEnqueueNativeCommand( - pi_queue command_queue, pi_enqueue_native_command_function fn, void *data, - pi_uint32 num_mems, const pi_mem *mem_list, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event); - -typedef enum { - _PI_SANITIZE_TYPE_NONE = 0x0, - _PI_SANITIZE_TYPE_ADDRESS = 0x1, - _PI_SANITIZE_TYPE_MEMORY = 0x2, - _PI_SANITIZE_TYPE_THREAD = 0x3 -} _pi_sanitize_type; - -struct _pi_plugin { - // PI version supported by host passed to the plugin. The Plugin - // checks and writes the appropriate Function Pointers in - // PiFunctionTable. - // TODO: Work on version fields and their handshaking mechanism. - // Some choices are: - // - Use of integers to keep major and minor version. - // - Keeping char* Versions. - char PiVersion[20]; - // Plugin edits this. - char PluginVersion[20]; - char *Targets; - struct FunctionPointers { -#define _PI_API(api) decltype(::api) *api; -#include - } PiFunctionTable; - - _pi_sanitize_type SanitizeType; -}; - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // _PI_H_ diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp deleted file mode 100644 index 58416b2157cf9..0000000000000 --- a/sycl/include/sycl/detail/pi.hpp +++ /dev/null @@ -1,276 +0,0 @@ -//==---------- pi.hpp - Plugin Interface for SYCL RT -----------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// \file pi.hpp -/// C++ wrapper of extern "C" PI interfaces -/// -/// \ingroup sycl_pi - -#pragma once - -#include // for backend -#include // for __SYCL_EXPORT -#include // for __SYCL_RT_OS_LINUX -#include // for piContextCreate, piContextGetInfo - -#include // for uint64_t, uint32_t -#include // for shared_ptr -#include // for size_t -#include // for char_traits, string -#include // for false_type, true_type -#include // for vector - -#ifdef XPTI_ENABLE_INSTRUMENTATION -// Forward declarations -namespace xpti { -struct trace_event_data_t; -} -#endif - -namespace sycl { -inline namespace _V1 { - -class context; - -namespace detail { - -enum class PiApiKind { -#define _PI_API(api) api, -#include -}; -class plugin; -using PluginPtr = std::shared_ptr; - -template -__SYCL_EXPORT void *getPluginOpaqueData(void *opaquedata_arg); - -namespace pi { - -// The SYCL_PI_TRACE sets what we will trace. -// This is a bit-mask of various things we'd want to trace. -enum TraceLevel { - PI_TRACE_BASIC = 0x1, - PI_TRACE_CALLS = 0x2, - PI_TRACE_ALL = -1 -}; - -// Return true if we want to trace PI related activities. -bool trace(TraceLevel level); - -#ifdef __SYCL_RT_OS_WINDOWS -// these same constants are used by pi_win_proxy_loader.dll -// if a plugin is added here, add it there as well. -#ifdef _MSC_VER -#define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll" -#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" -#define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" -#define __SYCL_HIP_PLUGIN_NAME "pi_hip.dll" -#define __SYCL_UR_PLUGIN_NAME "pi_unified_runtime.dll" -#define __SYCL_NATIVE_CPU_PLUGIN_NAME "pi_native_cpu.dll" -#else -#define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dll" -#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dll" -#define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dll" -#define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dll" -#define __SYCL_UR_PLUGIN_NAME "libpi_unified_runtime.dll" -#define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.dll" -#endif -#elif defined(__SYCL_RT_OS_LINUX) -#define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.so" -#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so" -#define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.so" -#define __SYCL_HIP_PLUGIN_NAME "libpi_hip.so" -#define __SYCL_UR_PLUGIN_NAME "libpi_unified_runtime.so" -#define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.so" -#elif defined(__SYCL_RT_OS_DARWIN) -#define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dylib" -#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dylib" -#define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dylib" -#define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dylib" -#define __SYCL_UR_PLUGIN_NAME "libpi_unified_runtime.dylib" -#define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.dylib" -#else -#error "Unsupported OS" -#endif - -using PiPlugin = ::pi_plugin; -using PiResult = ::pi_result; -using PiPlatform = ::pi_platform; -using PiPlatformBackend = ::pi_platform_backend; -using PiDevice = ::pi_device; -using PiDeviceType = ::pi_device_type; -using PiDeviceInfo = ::pi_device_info; -using PiContext = ::pi_context; -using PiContextInfo = ::pi_context_info; -using PiProgram = ::pi_program; -using PiKernel = ::pi_kernel; -using PiQueue = ::pi_queue; -using PiQueueProperties = ::pi_queue_properties; -using PiMem = ::pi_mem; -using PiMemFlags = ::pi_mem_flags; -using PiEvent = ::pi_event; -using PiSampler = ::pi_sampler; -using PiSamplerInfo = ::pi_sampler_info; -using PiSamplerProperties = ::pi_sampler_properties; -using PiSamplerAddressingMode = ::pi_sampler_addressing_mode; -using PiSamplerFilterMode = ::pi_sampler_filter_mode; -using PiMemImageFormat = ::pi_image_format; -using PiMemImageDesc = ::pi_image_desc; -using PiMemImageInfo = ::pi_image_info; -using PiMemObjectType = ::pi_mem_type; -using PiMemImageChannelOrder = ::pi_image_channel_order; -using PiMemImageChannelType = ::pi_image_channel_type; -using PiKernelCacheConfig = ::pi_kernel_cache_config; -using PiExtSyncPoint = ::pi_ext_sync_point; -using PiExtCommandBuffer = ::pi_ext_command_buffer; -using PiExtCommandBufferDesc = ::pi_ext_command_buffer_desc; -using PiExtCommandBufferCommand = ::pi_ext_command_buffer_command; -using PiPeerAttr = ::pi_peer_attr; -using PiImageHandle = ::pi_image_handle; -using PiImageMemHandle = ::pi_image_mem_handle; -using PiImageCopyFlags = ::pi_image_copy_flags; -using PiInteropMemHandle = ::pi_interop_mem_handle; -using PiInteropSemaphoreHandle = ::pi_interop_semaphore_handle; -using PiExternalMemDescriptor = ::pi_external_mem_descriptor; -using PiExternalSemaphoreDescriptor = ::pi_external_semaphore_descriptor; -using PiImageOffset = ::pi_image_offset_struct; -using PiImageRegion = ::pi_image_region_struct; -using PiPhysicalMem = ::pi_physical_mem; -using PiVirtualAccessFlags = ::pi_virtual_access_flags; -using PiLaunchProperty = ::pi_launch_property; - -__SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext, - pi_context_extended_deleter func, - void *user_data); - -// Function to load a shared library -// Implementation is OS dependent -void *loadOsLibrary(const std::string &Library); - -// Function to unload a shared library -// Implementation is OS dependent (see posix-pi.cpp and windows-pi.cpp) -int unloadOsLibrary(void *Library); - -// Function to load the shared plugin library -// On Windows, this will have been pre-loaded by proxy loader. -// Implementation is OS dependent. -void *loadOsPluginLibrary(const std::string &Library); - -// Function to unload the shared plugin library -// Implementation is OS dependent (see posix-pi.cpp and windows-pi.cpp) -int unloadOsPluginLibrary(void *Library); - -// OS agnostic function to unload the shared library -int unloadPlugin(void *Library); - -// Function to get Address of a symbol defined in the shared -// library, implementation is OS dependent. -void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName); - -// Get a string representing a _pi_platform_info enum -std::string platformInfoToString(pi_platform_info info); - -// Want all the needed casts be explicit, do not define conversion operators. -template To cast(From value); - -// Holds the PluginInformation for the plugin that is bound. -// Currently a global variable is used to store OpenCL plugin information to be -// used with SYCL Interoperability Constructors. -// TODO: GlobalPlugin does not seem to be needed anymore. Consider removing it! -extern std::shared_ptr GlobalPlugin; - -// Performs PI one-time initialization. -std::vector &initialize(); - -// Get the plugin serving given backend. -template const PluginPtr &getPlugin(); - -// Utility Functions to get Function Name for a PI Api. -template struct PiFuncInfo {}; - -#define _PI_API(api) \ - template <> struct PiFuncInfo { \ - using FuncPtrT = decltype(&::api); \ - inline const char *getFuncName() { return #api; } \ - inline FuncPtrT getFuncPtr(PiPlugin MPlugin) { \ - return MPlugin.PiFunctionTable.api; \ - } \ - }; -#include - -/// Emits an XPTI trace before a PI API call is made -/// \param FName The name of the PI API call -/// \return The correlation ID for the API call that is to be used by the -/// emitFunctionEndTrace() call -uint64_t emitFunctionBeginTrace(const char *FName); - -/// Emits an XPTI trace after the PI API call has been made -/// \param CorrelationID The correlation ID for the API call generated by the -/// emitFunctionBeginTrace() call. -/// \param FName The name of the PI API call -void emitFunctionEndTrace(uint64_t CorrelationID, const char *FName); - -/// Notifies XPTI subscribers about PI function calls and packs call arguments. -/// -/// \param FuncID is the API hash ID from PiApiID type trait. -/// \param FName The name of the PI API call. -/// \param ArgsData is a pointer to packed function call arguments. -/// \param Plugin is the plugin, which is used to make call. -uint64_t emitFunctionWithArgsBeginTrace(uint32_t FuncID, const char *FName, - unsigned char *ArgsData, - pi_plugin Plugin); - -/// Notifies XPTI subscribers about PI function call result. -/// -/// \param CorrelationID The correlation ID for the API call generated by the -/// emitFunctionWithArgsBeginTrace() call. -/// \param FuncID is the API hash ID from PiApiID type trait. -/// \param FName The name of the PI API call. -/// \param ArgsData is a pointer to packed function call arguments. -/// \param Result is function call result value. -/// \param Plugin is the plugin, which is used to make call. -void emitFunctionWithArgsEndTrace(uint64_t CorrelationID, uint32_t FuncID, - const char *FName, unsigned char *ArgsData, - pi_result Result, pi_plugin Plugin); -} // namespace pi - -// Workaround for build with GCC 5.x -// An explicit specialization shall be declared in the namespace block. -// Having namespace as part of template name is not supported by GCC -// older than 7.x. -// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56480 -namespace pi { -// Want all the needed casts be explicit, do not define conversion -// operators. -template inline To cast(From value) { - static_assert(sizeof(From) == sizeof(To), "cast failed size check"); - return (To)(value); -} - -// Helper traits for identifying std::vector with arbitrary element type. -template struct IsStdVector : std::false_type {}; -template struct IsStdVector> : std::true_type {}; - -// Overload for vectors that applies the cast to all elements. This -// creates a new vector. -template To cast(std::vector Values) { - static_assert(IsStdVector::value, "Return type must be a vector."); - To ResultVec; - ResultVec.reserve(Values.size()); - for (FromE &Val : Values) - ResultVec.push_back(cast(Val)); - return ResultVec; -} - -} // namespace pi -} // namespace detail - -} // namespace _V1 -} // namespace sycl - -#undef _PI_API diff --git a/sycl/include/sycl/detail/pi_error.def b/sycl/include/sycl/detail/pi_error.def deleted file mode 100644 index e58cd52032b60..0000000000000 --- a/sycl/include/sycl/detail/pi_error.def +++ /dev/null @@ -1,116 +0,0 @@ -_PI_ERRC(PI_SUCCESS, 0) -_PI_ERRC(PI_ERROR_DEVICE_NOT_FOUND, -1) -_PI_ERRC(PI_ERROR_DEVICE_NOT_AVAILABLE, -2) -_PI_ERRC(PI_ERROR_COMPILER_NOT_AVAILABLE, -3) -_PI_ERRC(PI_ERROR_MEM_OBJECT_ALLOCATION_FAILURE, -4) -_PI_ERRC(PI_ERROR_OUT_OF_RESOURCES, -5) -_PI_ERRC(PI_ERROR_OUT_OF_HOST_MEMORY, -6) -_PI_ERRC(PI_ERROR_PROFILING_INFO_NOT_AVAILABLE, -7) -_PI_ERRC(PI_ERROR_MEM_COPY_OVERLAP, -8) -_PI_ERRC(PI_ERROR_IMAGE_FORMAT_MISMATCH, -9) -_PI_ERRC(PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED, -10) -_PI_ERRC(PI_ERROR_BUILD_PROGRAM_FAILURE, -11) -_PI_ERRC(PI_ERROR_MAP_FAILURE, -12) -_PI_ERRC(PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET, -13) -_PI_ERRC(PI_ERROR_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, -14) -_PI_ERRC(PI_ERROR_COMPILE_PROGRAM_FAILURE, -15) -_PI_ERRC(PI_ERROR_LINKER_NOT_AVAILABLE, -16) -_PI_ERRC(PI_ERROR_LINK_PROGRAM_FAILURE, -17) -_PI_ERRC(PI_ERROR_DEVICE_PARTITION_FAILED, -18) -_PI_ERRC(PI_ERROR_KERNEL_ARG_INFO_NOT_AVAILABLE, -19) -_PI_ERRC(PI_ERROR_INVALID_VALUE, -30) -_PI_ERRC(PI_ERROR_INVALID_DEVICE_TYPE, -31) -_PI_ERRC(PI_ERROR_INVALID_PLATFORM, -32) -_PI_ERRC(PI_ERROR_INVALID_DEVICE, -33) -_PI_ERRC(PI_ERROR_INVALID_CONTEXT, -34) -_PI_ERRC(PI_ERROR_INVALID_QUEUE_PROPERTIES, -35) -_PI_ERRC(PI_ERROR_INVALID_QUEUE, -36) -_PI_ERRC(PI_ERROR_INVALID_HOST_PTR, -37) -_PI_ERRC(PI_ERROR_INVALID_MEM_OBJECT, -38) -_PI_ERRC(PI_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR, -39) -_PI_ERRC(PI_ERROR_INVALID_IMAGE_SIZE, -40) -_PI_ERRC(PI_ERROR_INVALID_SAMPLER, -41) -_PI_ERRC(PI_ERROR_INVALID_BINARY, -42) -_PI_ERRC(PI_ERROR_INVALID_BUILD_OPTIONS, -43) -_PI_ERRC(PI_ERROR_INVALID_PROGRAM, -44) -_PI_ERRC(PI_ERROR_INVALID_PROGRAM_EXECUTABLE, -45) -_PI_ERRC(PI_ERROR_INVALID_KERNEL_NAME, -46) -_PI_ERRC(PI_ERROR_INVALID_KERNEL_DEFINITION, -47) -_PI_ERRC(PI_ERROR_INVALID_KERNEL, -48) -_PI_ERRC(PI_ERROR_INVALID_ARG_INDEX, -49) -_PI_ERRC(PI_ERROR_INVALID_ARG_VALUE, -50) -_PI_ERRC(PI_ERROR_INVALID_ARG_SIZE, -51) -_PI_ERRC(PI_ERROR_INVALID_KERNEL_ARGS, -52) -_PI_ERRC(PI_ERROR_INVALID_WORK_DIMENSION, -53) -_PI_ERRC(PI_ERROR_INVALID_WORK_GROUP_SIZE, -54) -_PI_ERRC(PI_ERROR_INVALID_WORK_ITEM_SIZE, -55) -_PI_ERRC(PI_ERROR_INVALID_GLOBAL_OFFSET, -56) -_PI_ERRC(PI_ERROR_INVALID_EVENT_WAIT_LIST, -57) -_PI_ERRC(PI_ERROR_INVALID_EVENT, -58) -_PI_ERRC(PI_ERROR_INVALID_OPERATION, -59) -_PI_ERRC(PI_ERROR_INVALID_GL_OBJECT, -60) -_PI_ERRC(PI_ERROR_INVALID_BUFFER_SIZE, -61) -_PI_ERRC(PI_ERROR_INVALID_MIP_LEVEL, -62) -_PI_ERRC(PI_ERROR_INVALID_GLOBAL_WORK_SIZE, -63) -_PI_ERRC(PI_ERROR_INVALID_PROPERTY, -64) -_PI_ERRC(PI_ERROR_INVALID_IMAGE_DESCRIPTOR, -65) -_PI_ERRC(PI_ERROR_INVALID_COMPILER_OPTIONS, -66) -_PI_ERRC(PI_ERROR_INVALID_LINKER_OPTIONS, -67) -_PI_ERRC(PI_ERROR_INVALID_DEVICE_PARTITION_COUNT, -68) -// cl_gl -_PI_ERRC(PI_ERROR_INVALID_GL_SHAREGROUP_REFERENCE_KHR, -1000) -// KHR Extension -_PI_ERRC(PI_ERROR_PLATFORM_NOT_FOUND_KHR, -1001) -// cl_d3d10 -_PI_ERRC(PI_ERROR_INVALID_D3D10_DEVICE_KHR, -1002) -_PI_ERRC(PI_ERROR_INVALID_D3D10_RESOURCE_KHR, -1003) -_PI_ERRC(PI_ERROR_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR, -1004) -_PI_ERRC(PI_ERROR_D3D10_RESOURCE_NOT_ACQUIRED_KHR, -1005) -// cl_d3d11 -_PI_ERRC(PI_ERROR_INVALID_D3D11_DEVICE_KHR, -1006) -_PI_ERRC(PI_ERROR_INVALID_D3D11_RESOURCE_KHR, -1007) -_PI_ERRC(PI_ERROR_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR, -1008) -_PI_ERRC(PI_ERROR_D3D11_RESOURCE_NOT_ACQUIRED_KHR, -1009) -// cl_dx9_media_sharing -_PI_ERRC(PI_ERROR_INVALID_DX9_DEVICE_INTEL, -1010) -_PI_ERRC(PI_ERROR_INVALID_DX9_RESOURCE_INTEL, -1011) -_PI_ERRC(PI_ERROR_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL, -1012) -_PI_ERRC(PI_ERROR_DX9_RESOURCE_NOT_ACQUIRED_INTEL, -1013) -// cl_ext -_PI_ERRC(PI_ERROR_INVALID_COMMAND_BUFFER_KHR, -1138) -_PI_ERRC(PI_ERROR_INVALID_SYNC_POINT_WAIT_LIST_KHR, -1139) -_PI_ERRC(PI_ERROR_INCOMPATIBLE_COMMAND_QUEUE_KHR, -1140) -// Generic extensions -_PI_ERRC(PI_ERROR_DEVICE_PARTITION_FAILED_EXT, -1057) -_PI_ERRC(PI_ERROR_INVALID_PARTITION_COUNT_EXT, -1058) -_PI_ERRC(PI_ERROR_INVALID_PARTITION_NAME_EXT, -1059) -// cl_egl -_PI_ERRC(PI_ERROR_EGL_RESOURCE_NOT_ACQUIRED_KHR, -1092) -_PI_ERRC(PI_ERROR_INVALID_EGL_OBJECT_KHR, -1093) -// cl_intel_accelerator -_PI_ERRC(PI_ERROR_INVALID_ACCELERATOR_INTEL, -1094) -_PI_ERRC(PI_ERROR_INVALID_ACCELERATOR_TYPE_INTEL, -1095) -_PI_ERRC(PI_ERROR_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, -1096) -_PI_ERRC(PI_ERROR_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL, -1097) -// cl_va_api_media_sharing_intel -_PI_ERRC(PI_ERROR_INVALID_VA_API_MEDIA_ADAPTER_INTEL, -1098) -_PI_ERRC(PI_ERROR_INVALID_VA_API_MEDIA_SURFACE_INTEL, -1099) -_PI_ERRC(PI_ERROR_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL, -1100) -_PI_ERRC(PI_ERROR_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL, -1101) -// backend is lost, e.g. it was already unloaded -_PI_ERRC(PI_ERROR_UNINITIALIZED, -1102) - -// PI specific error codes -// PI_ERROR_UNSUPPORTED_FEATURE indicates that the backend or the corresponding -// device does not support the feature. -_PI_ERRC_WITH_MSG(PI_ERROR_UNSUPPORTED_FEATURE, -995, "The plugin or device does not support the called function") -// PI_ERROR_PLUGIN_SPECIFIC_ERROR indicates that an backend spcific error or -// warning has been emitted by the plugin. -_PI_ERRC_WITH_MSG(PI_ERROR_PLUGIN_SPECIFIC_ERROR, -996, "The plugin has emitted a backend specific error") -// PI_ERROR_COMMAND_EXECUTION_FAILURE indicates an error occurred during command -// enqueue or execution. -_PI_ERRC_WITH_MSG(PI_ERROR_COMMAND_EXECUTION_FAILURE, -997, "Command failed to enqueue/execute") -// PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE indicates a fallback method -// determines the function exists but its address cannot be found. -_PI_ERRC_WITH_MSG(PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE, -998, "Function exists but address is not available") -_PI_ERRC_WITH_MSG(PI_ERROR_UNKNOWN, -999, "Unknown PI error") diff --git a/sycl/include/sycl/detail/ur.hpp b/sycl/include/sycl/detail/ur.hpp new file mode 100644 index 0000000000000..48e876e3adb6f --- /dev/null +++ b/sycl/include/sycl/detail/ur.hpp @@ -0,0 +1,122 @@ +//==---------- ur.hpp - Unified Runtime integration helpers ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// \file +/// +/// C++ utilities for Unified Runtime integration. +/// +/// \ingroup sycl_ur + +#pragma once + +#include +#include +#include +# +#include + +#include +#include +#include + +/// Extension to denote native support of assert feature by an arbitrary device +/// urDeviceGetInfo call should return this extension when the device supports +/// native asserts if supported extensions' names are requested +#define UR_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT "cl_intel_devicelib_assert" + +typedef void (*pi_context_extended_deleter)(void *user_data); + +struct _sycl_device_binary_property_struct; +using sycl_device_binary_property = _sycl_device_binary_property_struct*; + +#ifdef XPTI_ENABLE_INSTRUMENTATION +// Forward declarations +namespace xpti { +struct trace_event_data_t; +} +#endif + +namespace sycl { +inline namespace _V1 { + +class context; + +namespace detail { + +namespace pi { +// This function is deprecated and it should be removed in the next release +// cycle (along with the definition for pi_context_extended_deleter). +__SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext, + pi_context_extended_deleter func, + void *user_data); +} + +class plugin; +using PluginPtr = std::shared_ptr; + +// TODO: To be removed as this was only introduced for esimd which was removed. +template +__SYCL_EXPORT void *getPluginOpaqueData(void *opaquedata_arg); + +namespace ur { +// Function to load a shared library +// Implementation is OS dependent +void *loadOsLibrary(const std::string &Library); + +// Function to unload a shared library +// Implementation is OS dependent (see posix-ur.cpp and windows-ur.cpp) +int unloadOsLibrary(void *Library); + +// Function to get Address of a symbol defined in the shared +// library, implementation is OS dependent. +void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName); + +// Performs UR one-time initialization. +std::vector & +initializeUr(ur_loader_config_handle_t LoaderConfig = nullptr); + +// Get the plugin serving given backend. +template const PluginPtr &getPlugin(); + +// Return true if we want to trace UR related activities. +bool trace(); + +// Want all the needed casts be explicit, do not define conversion operators. +template To cast(From value); + +// Want all the needed casts be explicit, do not define conversion +// operators. +template inline To cast(From value) { + // TODO: see if more sanity checks are possible. + static_assert(sizeof(From) == sizeof(To), "assert: cast failed size check"); + return reinterpret_cast(value); +} + +// Helper traits for identifying std::vector with arbitrary element type. +template struct IsStdVector : std::false_type {}; +template struct IsStdVector> : std::true_type {}; + +// Overload for vectors that applies the cast to all elements. This +// creates a new vector. +template To cast(std::vector Values) { + static_assert(IsStdVector::value, "Return type must be a vector."); + To ResultVec; + ResultVec.reserve(Values.size()); + for (FromE &Val : Values) { + ResultVec.push_back(cast(Val)); + } + return ResultVec; +} + +ur_program_metadata_t mapDeviceBinaryPropertyToProgramMetadata( + const sycl_device_binary_property &DeviceBinaryProperty); + +} // namespace ur +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 4f0882281e29f..7b26b5911e3b1 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -23,6 +22,7 @@ #include #include #include +#include #include #include @@ -347,7 +347,7 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { std::shared_ptr impl; device(std::shared_ptr impl) : impl(impl) {} - pi_native_handle getNative() const; + ur_native_handle_t getNative() const; template friend const decltype(Obj::impl) & diff --git a/sycl/include/sycl/event.hpp b/sycl/include/sycl/event.hpp index ded2794c6c603..2f3974dca5e39 100644 --- a/sycl/include/sycl/event.hpp +++ b/sycl/include/sycl/event.hpp @@ -13,7 +13,7 @@ #include // for __SYCL_EXPORT #include // for is_event_info_desc, is_... #include // for OwnerLessBase -#include // for pi_native_handle +#include // for ur_native_handle_t #ifdef __SYCL_INTERNAL_API #include @@ -138,9 +138,9 @@ class __SYCL_EXPORT event : public detail::OwnerLessBase { private: event(std::shared_ptr EventImpl); - pi_native_handle getNative() const; + ur_native_handle_t getNative() const; - std::vector getNativeVector() const; + std::vector getNativeVector() const; std::shared_ptr impl; diff --git a/sycl/include/sycl/exception.hpp b/sycl/include/sycl/exception.hpp index 15013ab18a3c2..34d58aac9a6fe 100644 --- a/sycl/include/sycl/exception.hpp +++ b/sycl/include/sycl/exception.hpp @@ -13,7 +13,6 @@ #include // for cl_int #include // for __SYCL2020_DEPRECATED #include // for __SYCL_EXPORT -#include // for pi_int32 #include #include // for exception @@ -53,11 +52,10 @@ __SYCL_EXPORT std::error_code make_error_code(sycl::errc E) noexcept; __SYCL_EXPORT const std::error_category &sycl_category() noexcept; namespace detail { -__SYCL_EXPORT const char *stringifyErrorCode(pi_int32 error); +__SYCL_EXPORT const char *stringifyErrorCode(int32_t error); -inline std::string codeToString(pi_int32 code) { - return std::string(std::to_string(code) + " (" + stringifyErrorCode(code) + - ")"); +inline std::string codeToString(int32_t code) { + return std::to_string(code) + " (" + std::string(stringifyErrorCode(code)) + ")"; } class __SYCL_EXPORT SYCLCategory : public std::error_category { @@ -67,8 +65,8 @@ class __SYCL_EXPORT SYCLCategory : public std::error_category { }; // Forward declare to declare as a friend in sycl::excepton. -pi_int32 get_pi_error(const exception &e); -exception set_pi_error(exception &&e, pi_int32 pi_err); +int32_t get_ur_error(const exception &e); +exception set_ur_error(exception &&e, int32_t ur_err); } // namespace detail // Derive from std::exception so uncaught exceptions are printed in c++ default @@ -117,18 +115,18 @@ class __SYCL_EXPORT exception : public virtual std::exception { // Exceptions must be noexcept copy constructible, so cannot use std::string // directly. std::shared_ptr MMsg; - pi_int32 MPIErr = 0; + int32_t MErr = 0; std::shared_ptr MContext; std::error_code MErrC = make_error_code(sycl::errc::invalid); protected: // base constructors used by SYCL 1.2.1 exception subclasses - exception(std::error_code Ec, const char *Msg, const pi_int32 PIErr) + exception(std::error_code Ec, const char *Msg, const int32_t PIErr) : exception(Ec, std::string(Msg), PIErr) {} - exception(std::error_code Ec, const std::string &Msg, const pi_int32 PIErr) - : exception(Ec, nullptr, Msg + " " + detail::codeToString(PIErr)) { - MPIErr = PIErr; + exception(std::error_code Ec, const std::string &Msg, const int32_t URErr) + : exception(Ec, nullptr, Msg + " " + detail::codeToString(URErr)) { + MErr = URErr; } // base constructor for all SYCL 2020 constructors @@ -139,13 +137,13 @@ class __SYCL_EXPORT exception : public virtual std::exception { exception(std::error_code Ec, std::shared_ptr SharedPtrCtx, const char *WhatArg); - friend pi_int32 detail::get_pi_error(const exception &); + friend int32_t detail::get_ur_error(const exception &); // To be used like this: - // throw/return detail::set_pi_error(exception(...), some_pi_error); - // *only* when such a error is coming from the PI/UR level. Otherwise it - // *should be left unset/default-initialized and exception should be thrown + // throw/return detail::set_ur_error(exception(...), some_ur_error); + // *only* when such a error is coming from the UR level. Otherwise it + // *should* be left unset/default-initialized and exception should be thrown // as-is using public ctors. - friend exception detail::set_pi_error(exception &&e, pi_int32 pi_err); + friend exception detail::set_ur_error(exception &&e, int32_t ur_err); }; namespace detail { @@ -155,9 +153,9 @@ namespace detail { // from an exception. // And we don't want them to be part of our library ABI, because of future // underlying changes (PI -> UR -> Offload). -inline pi_int32 get_pi_error(const exception &e) { return e.MPIErr; } -inline exception set_pi_error(exception &&e, pi_int32 pi_err) { - e.MPIErr = pi_err; +inline int32_t get_ur_error(const exception &e) { return e.MErr; } +inline exception set_ur_error(exception &&e, int32_t ur_err) { + e.MErr = ur_err; return std::move(e); } } // namespace detail diff --git a/sycl/include/sycl/ext/oneapi/accessor_property_list.hpp b/sycl/include/sycl/ext/oneapi/accessor_property_list.hpp index 3fd3648671cb4..7a1ce1119dbbd 100644 --- a/sycl/include/sycl/ext/oneapi/accessor_property_list.hpp +++ b/sycl/include/sycl/ext/oneapi/accessor_property_list.hpp @@ -14,6 +14,7 @@ #include // for PropertyListBase #include #include // for property_list +#include // for UR_RESULT_ERROR_INVALID_VALUE #include // for bitset #include // for shared_ptr diff --git a/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp b/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp index c29630da0ff2c..3230757124c7d 100644 --- a/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp +++ b/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp @@ -18,8 +18,7 @@ #include // for __SYCL_DE... #include // for __SYCL_EX... #include // for createSyc... -#include // for pi_native... -#include // for cast +#include // for cast #include // for device #include // for event #include // for buffer @@ -43,7 +42,7 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::level_zero::detail { __SYCL_EXPORT device make_device(const platform &Platform, - pi_native_handle NativeHandle); + ur_native_handle_t NativeHandle); } // namespace ext::oneapi::level_zero::detail // Specialization of sycl::make_context for Level-Zero backend. @@ -52,10 +51,9 @@ inline context make_context( const backend_input_t &BackendObject, const async_handler &Handler) { - const std::vector &DeviceList = BackendObject.DeviceList; - pi_native_handle NativeHandle = - detail::pi::cast(BackendObject.NativeHandle); + ur_native_handle_t NativeHandle = + detail::ur::cast(BackendObject.NativeHandle); bool KeepOwnership = BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep; @@ -133,13 +131,13 @@ inline queue make_queue( const device Device = device{BackendObject.Device}; bool IsImmCmdList = std::holds_alternative( BackendObject.NativeHandle); - pi_native_handle Handle = IsImmCmdList - ? reinterpret_cast( - *(std::get_if( - &BackendObject.NativeHandle))) - : reinterpret_cast( - *(std::get_if( - &BackendObject.NativeHandle))); + ur_native_handle_t Handle = + IsImmCmdList ? reinterpret_cast( + *(std::get_if( + &BackendObject.NativeHandle))) + : reinterpret_cast( + *(std::get_if( + &BackendObject.NativeHandle))); return sycl::detail::make_queue( Handle, IsImmCmdList, TargetContext, &Device, @@ -152,7 +150,7 @@ template <> inline auto get_native(const queue &Obj) -> backend_return_t { int32_t IsImmCmdList; - pi_native_handle Handle = Obj.getNative(IsImmCmdList); + ur_native_handle_t Handle = Obj.getNative(IsImmCmdList); return IsImmCmdList ? backend_return_t< backend::ext_oneapi_level_zero, @@ -167,7 +165,7 @@ inline event make_event( const backend_input_t &BackendObject, const context &TargetContext) { return sycl::detail::make_event( - detail::pi::cast(BackendObject.NativeHandle), + detail::ur::cast(BackendObject.NativeHandle), TargetContext, BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep, backend::ext_oneapi_level_zero); @@ -183,7 +181,7 @@ make_kernel_bundle( const context &TargetContext) { std::shared_ptr KBImpl = detail::make_kernel_bundle( - detail::pi::cast(BackendObject.NativeHandle), + detail::ur::cast(BackendObject.NativeHandle), TargetContext, BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep, bundle_state::executable, backend::ext_oneapi_level_zero); @@ -199,7 +197,7 @@ inline kernel make_kernel( const context &TargetContext) { return detail::make_kernel( TargetContext, BackendObject.KernelBundle, - detail::pi::cast(BackendObject.NativeHandle), + detail::ur::cast(BackendObject.NativeHandle), BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep, backend::ext_oneapi_level_zero); } @@ -214,7 +212,7 @@ make_buffer( buffer> &BackendObject, const context &TargetContext, event AvailableEvent) { return detail::make_buffer_helper( - detail::pi::cast(BackendObject.NativeHandle), + detail::ur::cast(BackendObject.NativeHandle), TargetContext, AvailableEvent, !(BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep)); } @@ -229,7 +227,7 @@ make_buffer( buffer> &BackendObject, const context &TargetContext) { return detail::make_buffer_helper( - detail::pi::cast(BackendObject.NativeHandle), + detail::ur::cast(BackendObject.NativeHandle), TargetContext, event{}, !(BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep)); } @@ -247,7 +245,7 @@ make_image(const backend_input_t> (BackendObject.Ownership == ext::oneapi::level_zero::ownership::transfer); return image( - detail::pi::cast(BackendObject.ZeImageHandle), + detail::ur::cast(BackendObject.ZeImageHandle), TargetContext, AvailableEvent, BackendObject.ChanOrder, BackendObject.ChanType, OwnNativeHandle, BackendObject.Range); } diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 9e0b80f5077de..447e66ea50e98 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -10,7 +10,6 @@ #include // for context #include // for __SYCL_EXPORT -#include // for pi_uint64 #include // for device #include // for image_desc... #include // for interop_me... @@ -34,9 +33,9 @@ namespace ext::oneapi::experimental { /// Opaque unsampled image handle type. struct unsampled_image_handle { - using raw_image_handle_type = pi_uint64; + using raw_image_handle_type = uint64_t; - unsampled_image_handle() : raw_handle(~0) {} + unsampled_image_handle() : raw_handle(0) {} unsampled_image_handle(raw_image_handle_type raw_image_handle) : raw_handle(raw_image_handle) {} @@ -46,9 +45,9 @@ struct unsampled_image_handle { /// Opaque sampled image handle type. struct sampled_image_handle { - using raw_image_handle_type = pi_uint64; + using raw_image_handle_type = uint64_t; - sampled_image_handle() : raw_handle(~0) {} + sampled_image_handle() : raw_handle(0) {} sampled_image_handle(raw_image_handle_type handle) : raw_handle(handle) {} diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp index 7c7400404f677..a9307c069e15e 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_interop.hpp @@ -8,7 +8,7 @@ #pragma once -#include // for pi_uint64 +#include #include // for size_t @@ -32,13 +32,13 @@ enum class external_semaphore_handle_type { /// Opaque interop memory handle type struct interop_mem_handle { - using raw_handle_type = pi_uint64; + using raw_handle_type = ur_exp_interop_mem_handle_t; raw_handle_type raw_handle; }; /// Opaque interop semaphore handle type struct interop_semaphore_handle { - using raw_handle_type = pi_uint64; + using raw_handle_type = ur_exp_interop_semaphore_handle_t; raw_handle_type raw_handle; external_semaphore_handle_type handle_type; }; diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_mem_handle.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_mem_handle.hpp index c243fa423a727..7c899b184fae9 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_mem_handle.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_mem_handle.hpp @@ -8,12 +8,14 @@ #pragma once +#include "ur_api.h" + namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { /// Opaque image memory handle type struct image_mem_handle { - using raw_handle_type = void *; + using raw_handle_type = ur_exp_image_mem_native_handle_t; raw_handle_type raw_handle; }; } // namespace ext::oneapi::experimental diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_memory.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_memory.hpp index cb8c44020ab21..fa5cf7d396871 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_memory.hpp @@ -45,7 +45,7 @@ class image_mem_impl { sycl::context get_context() const { return syclContext; } private: - raw_handle_type handle{nullptr}; + raw_handle_type handle{0}; image_descriptor descriptor; sycl::device syclDevice; sycl::context syclContext; diff --git a/sycl/include/sycl/ext/oneapi/bindless_images_sampler.hpp b/sycl/include/sycl/ext/oneapi/bindless_images_sampler.hpp index f29d387ed575c..e3221ce62db1e 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images_sampler.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images_sampler.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include namespace sycl { inline namespace _V1 { @@ -16,8 +17,8 @@ namespace ext::oneapi::experimental { /// cubemap filtering mode enum enum class cubemap_filtering_mode : unsigned int { - disjointed = PI_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED, - seamless = PI_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS, + disjointed = UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED, + seamless = UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS, }; struct bindless_image_sampler { diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp index 9d01c37691f33..4412c5c1a0c23 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp @@ -21,7 +21,7 @@ inline namespace _V1 { namespace ext::oneapi::cuda { // Implementation of ext_oneapi_cuda::make -inline __SYCL_EXPORT device make_device(pi_native_handle NativeHandle) { +inline __SYCL_EXPORT device make_device(ur_native_handle_t NativeHandle) { return sycl::detail::make_device(NativeHandle, backend::ext_oneapi_cuda); } @@ -74,7 +74,8 @@ inline device make_device( return dev; } } - pi_native_handle NativeHandle = static_cast(BackendObject); + ur_native_handle_t NativeHandle = + static_cast(BackendObject); return ext::oneapi::cuda::make_device(NativeHandle); } @@ -83,7 +84,7 @@ template <> inline event make_event( const backend_input_t &BackendObject, const context &TargetContext) { - return detail::make_event(detail::pi::cast(BackendObject), + return detail::make_event(reinterpret_cast(BackendObject), TargetContext, true, /*Backend*/ backend::ext_oneapi_cuda); } @@ -95,7 +96,7 @@ inline queue make_queue( const context &TargetContext, const async_handler Handler) { int32_t nativeHandleDesc = 0; const property_list &PropList{}; - return detail::make_queue(detail::pi::cast(BackendObject), + return detail::make_queue(reinterpret_cast(BackendObject), nativeHandleDesc, TargetContext, nullptr, true, PropList, Handler, /*Backend*/ backend::ext_oneapi_cuda); diff --git a/sycl/include/sycl/ext/oneapi/experimental/opportunistic_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/opportunistic_group.hpp index 54b08a4652d09..cbe8de03a9963 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/opportunistic_group.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/opportunistic_group.hpp @@ -14,10 +14,10 @@ #include #include #include // for this_sub_group -#include // for sub_group_mask -#include // for id -#include // for memory_scope -#include // for range +#include // for sub_group_mask +#include // for id +#include // for memory_scope +#include // for range #include #ifdef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 5408431a4c9db..4f6d6d51af16c 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -17,11 +17,10 @@ #include #include #include -#include -#include #include #include #include +#include #include #include #include @@ -1138,7 +1137,7 @@ class __SYCL_EXPORT handler { AccessMode == access::mode::discard_read_write; } - // PI APIs only support select fill sizes: 1, 2, 4, 8, 16, 32, 64, 128 + // UR APIs only support select fill sizes: 1, 2, 4, 8, 16, 32, 64, 128 constexpr static bool isBackendSupportedFillSize(size_t Size) { return Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; @@ -2865,7 +2864,7 @@ class __SYCL_EXPORT handler { // TODO add check:T must be an integral scalar value or a SYCL vector type static_assert(isValidTargetForExplicitOp(AccessTarget), "Invalid accessor target for the fill method."); - // CG::Fill will result in piEnqueuFillBuffer/Image which requires that mem + // CG::Fill will result in urEnqueueMemBufferFill which requires that mem // data is contiguous. Thus we check range and offset when dim > 1 // Images don't allow ranged accessors and are fine. if constexpr (isBackendSupportedFillSize(sizeof(T)) && diff --git a/sycl/include/sycl/image.hpp b/sycl/include/sycl/image.hpp index 3103231404f46..386fe93d3e41d 100644 --- a/sycl/include/sycl/image.hpp +++ b/sycl/include/sycl/image.hpp @@ -20,7 +20,6 @@ #include // for __SYCL_EXPORT #include // for getSyclObjImpl #include // for OwnerLessBase -#include // for pi_native_handle #include // for iterator_value... #include // for SYCLMemObjAllo... #include // for is_contained @@ -31,6 +30,7 @@ #include // for range, rangeTo... #include // for image_sampler #include // for vec +#include // for ur_native_hand... #include // for size_t, nullptr_t #include // for function @@ -247,7 +247,7 @@ class __SYCL_EXPORT image_plain { uint8_t Dimensions); #endif - image_plain(pi_native_handle MemObject, const context &SyclContext, + image_plain(ur_native_handle_t MemObject, const context &SyclContext, event AvailableEvent, std::unique_ptr Allocator, uint8_t Dimensions, image_channel_order Order, @@ -672,7 +672,7 @@ class image : public detail::unsampled_image_common { } private: - image(pi_native_handle MemObject, const context &SyclContext, + image(ur_native_handle_t MemObject, const context &SyclContext, event AvailableEvent, image_channel_order Order, image_channel_type Type, bool OwnNativeHandle, range Range) : common_base(MemObject, SyclContext, AvailableEvent, diff --git a/sycl/include/sycl/info/context_traits.def b/sycl/include/sycl/info/context_traits.def index 37594a1a6a985..5eb20f4b44a94 100644 --- a/sycl/include/sycl/info/context_traits.def +++ b/sycl/include/sycl/info/context_traits.def @@ -1,7 +1,7 @@ -__SYCL_PARAM_TRAITS_SPEC(context, reference_count, uint32_t, PI_CONTEXT_INFO_REFERENCE_COUNT) -__SYCL_PARAM_TRAITS_SPEC(context, platform, sycl::platform, PI_CONTEXT_INFO_PLATFORM) -__SYCL_PARAM_TRAITS_SPEC(context, devices, std::vector, PI_CONTEXT_INFO_DEVICES) -__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector, PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) -__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_scope_capabilities, std::vector, PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) -__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_order_capabilities, std::vector, PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) -__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_scope_capabilities, std::vector, PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(context, reference_count, uint32_t, UR_CONTEXT_INFO_REFERENCE_COUNT) +__SYCL_PARAM_TRAITS_SPEC(context, platform, sycl::platform, __SYCL_TRAIT_HANDLED_IN_RT) +__SYCL_PARAM_TRAITS_SPEC(context, devices, std::vector, UR_CONTEXT_INFO_DEVICES) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector, UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_scope_capabilities, std::vector, UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_order_capabilities, std::vector, UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_scope_capabilities, std::vector, UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) diff --git a/sycl/include/sycl/info/device_traits.def b/sycl/include/sycl/info/device_traits.def index 14126c3ee5e2c..54afe94f2256b 100644 --- a/sycl/include/sycl/info/device_traits.def +++ b/sycl/include/sycl/info/device_traits.def @@ -6,233 +6,236 @@ // SYCL 2020 spec __SYCL_PARAM_TRAITS_SPEC(device, device_type, info::device_type, - PI_DEVICE_INFO_TYPE) -__SYCL_PARAM_TRAITS_SPEC(device, vendor_id, pi_uint32, PI_DEVICE_INFO_VENDOR_ID) -__SYCL_PARAM_TRAITS_SPEC(device, max_compute_units, pi_uint32, - PI_DEVICE_INFO_MAX_COMPUTE_UNITS) -__SYCL_PARAM_TRAITS_SPEC(device, max_work_item_dimensions, pi_uint32, - PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS) + UR_DEVICE_INFO_TYPE) +__SYCL_PARAM_TRAITS_SPEC(device, vendor_id, uint32_t,UR_DEVICE_INFO_VENDOR_ID) +__SYCL_PARAM_TRAITS_SPEC(device, max_compute_units, uint32_t, + UR_DEVICE_INFO_MAX_COMPUTE_UNITS) +__SYCL_PARAM_TRAITS_SPEC(device, max_work_item_dimensions, uint32_t, + UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS) __SYCL_PARAM_TRAITS_TEMPLATE_SPEC(device, max_work_item_sizes<1>, range<1>, - PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES) + UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES) __SYCL_PARAM_TRAITS_TEMPLATE_SPEC(device, max_work_item_sizes<2>, range<2>, - PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES) + UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES) __SYCL_PARAM_TRAITS_TEMPLATE_SPEC(device, max_work_item_sizes<3>, range<3>, - PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES) + UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES) __SYCL_PARAM_TRAITS_SPEC(device, max_work_group_size, size_t, - PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE) -__SYCL_PARAM_TRAITS_SPEC(device, max_num_sub_groups, pi_uint32, - PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS) + UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE) +__SYCL_PARAM_TRAITS_SPEC(device, max_num_sub_groups, uint32_t, + UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS) __SYCL_PARAM_TRAITS_SPEC(device, sub_group_sizes, std::vector, - PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL) -__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_char, pi_uint32, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR) -__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_short, pi_uint32, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT) -__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_int, pi_uint32, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT) -__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_long, pi_uint32, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG) -__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_float, pi_uint32, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT) -__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_double, pi_uint32, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE) -__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_half, pi_uint32, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF) -__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_char, pi_uint32, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR) -__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_short, pi_uint32, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT) -__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_int, pi_uint32, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT) -__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_long, pi_uint32, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG) -__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_float, pi_uint32, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT) -__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_double, pi_uint32, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE) -__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_half, pi_uint32, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF) -__SYCL_PARAM_TRAITS_SPEC(device, max_clock_frequency, pi_uint32, - PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY) -__SYCL_PARAM_TRAITS_SPEC(device, address_bits, pi_uint32, - PI_DEVICE_INFO_ADDRESS_BITS) -__SYCL_PARAM_TRAITS_SPEC(device, max_mem_alloc_size, pi_uint64, - PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE) -__SYCL_PARAM_TRAITS_SPEC(device, max_read_image_args, pi_uint32, - PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS) -__SYCL_PARAM_TRAITS_SPEC(device, max_write_image_args, pi_uint32, - PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS) + UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL) +__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_char, uint32_t, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR) +__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_short, uint32_t, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT) +__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_int, uint32_t, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT) +__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_long, uint32_t, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG) +__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_float, uint32_t, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT) +__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_double, uint32_t, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE) +__SYCL_PARAM_TRAITS_SPEC(device, preferred_vector_width_half, uint32_t, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF) +__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_char, uint32_t, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR) +__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_short, uint32_t, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT) +__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_int, uint32_t, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT) +__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_long, uint32_t, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG) +__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_float, uint32_t, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT) +__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_double, uint32_t, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE) +__SYCL_PARAM_TRAITS_SPEC(device, native_vector_width_half, uint32_t, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF) +__SYCL_PARAM_TRAITS_SPEC(device, max_clock_frequency, uint32_t, + UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY) +__SYCL_PARAM_TRAITS_SPEC(device, address_bits, uint32_t, + UR_DEVICE_INFO_ADDRESS_BITS) +__SYCL_PARAM_TRAITS_SPEC(device, max_mem_alloc_size, uint64_t, + UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE) +__SYCL_PARAM_TRAITS_SPEC(device, max_read_image_args, uint32_t, + UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS) +__SYCL_PARAM_TRAITS_SPEC(device, max_write_image_args, uint32_t, + UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS) __SYCL_PARAM_TRAITS_SPEC(device, image2d_max_width, size_t, - PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH) + UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH) __SYCL_PARAM_TRAITS_SPEC(device, image2d_max_height, size_t, - PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT) + UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT) __SYCL_PARAM_TRAITS_SPEC(device, image3d_max_width, size_t, - PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH) + UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH) __SYCL_PARAM_TRAITS_SPEC(device, image3d_max_height, size_t, - PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT) + UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT) __SYCL_PARAM_TRAITS_SPEC(device, image3d_max_depth, size_t, - PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH) + UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH) __SYCL_PARAM_TRAITS_SPEC(device, image_max_buffer_size, size_t, - PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE) -__SYCL_PARAM_TRAITS_SPEC(device, max_samplers, pi_uint32, - PI_DEVICE_INFO_MAX_SAMPLERS) + UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE) +__SYCL_PARAM_TRAITS_SPEC(device, max_samplers, uint32_t, + UR_DEVICE_INFO_MAX_SAMPLERS) __SYCL_PARAM_TRAITS_SPEC(device, max_parameter_size, size_t, - PI_DEVICE_INFO_MAX_PARAMETER_SIZE) -__SYCL_PARAM_TRAITS_SPEC(device, mem_base_addr_align, pi_uint32, - PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN) + UR_DEVICE_INFO_MAX_PARAMETER_SIZE) +__SYCL_PARAM_TRAITS_SPEC(device, mem_base_addr_align, uint32_t, + UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN) __SYCL_PARAM_TRAITS_SPEC(device, half_fp_config, std::vector, - PI_DEVICE_INFO_HALF_FP_CONFIG) -__SYCL_PARAM_TRAITS_SPEC(device, single_fp_config, std::vector, - PI_DEVICE_INFO_SINGLE_FP_CONFIG) -__SYCL_PARAM_TRAITS_SPEC(device, double_fp_config, std::vector, - PI_DEVICE_INFO_DOUBLE_FP_CONFIG) + UR_DEVICE_INFO_HALF_FP_CONFIG) +__SYCL_PARAM_TRAITS_SPEC(device, single_fp_config, + std::vector, + UR_DEVICE_INFO_SINGLE_FP_CONFIG) +__SYCL_PARAM_TRAITS_SPEC(device, double_fp_config, + std::vector, + UR_DEVICE_INFO_DOUBLE_FP_CONFIG) __SYCL_PARAM_TRAITS_SPEC(device, global_mem_cache_type, info::global_mem_cache_type, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE) -__SYCL_PARAM_TRAITS_SPEC(device, global_mem_cache_line_size, pi_uint32, - PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE) -__SYCL_PARAM_TRAITS_SPEC(device, global_mem_cache_size, pi_uint64, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE) -__SYCL_PARAM_TRAITS_SPEC(device, global_mem_size, pi_uint64, - PI_DEVICE_INFO_GLOBAL_MEM_SIZE) -__SYCL_PARAM_TRAITS_SPEC(device, max_constant_buffer_size, pi_uint64, - PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE) -__SYCL_PARAM_TRAITS_SPEC(device, max_constant_args, pi_uint32, - PI_DEVICE_INFO_MAX_CONSTANT_ARGS) + UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE) +__SYCL_PARAM_TRAITS_SPEC(device, global_mem_cache_line_size, uint32_t, + UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE) +__SYCL_PARAM_TRAITS_SPEC(device, global_mem_cache_size, uint64_t, + UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE) +__SYCL_PARAM_TRAITS_SPEC(device, global_mem_size, uint64_t, + UR_DEVICE_INFO_GLOBAL_MEM_SIZE) +__SYCL_PARAM_TRAITS_SPEC(device, max_constant_buffer_size, uint64_t, + UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE) +__SYCL_PARAM_TRAITS_SPEC(device, max_constant_args, uint32_t, + UR_DEVICE_INFO_MAX_CONSTANT_ARGS) __SYCL_PARAM_TRAITS_SPEC(device, local_mem_type, info::local_mem_type, - PI_DEVICE_INFO_LOCAL_MEM_TYPE) -__SYCL_PARAM_TRAITS_SPEC(device, local_mem_size, pi_uint64, - PI_DEVICE_INFO_LOCAL_MEM_SIZE) + UR_DEVICE_INFO_LOCAL_MEM_TYPE) +__SYCL_PARAM_TRAITS_SPEC(device, local_mem_size, uint64_t, + UR_DEVICE_INFO_LOCAL_MEM_SIZE) __SYCL_PARAM_TRAITS_SPEC(device, error_correction_support, bool, - PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT) + UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT) __SYCL_PARAM_TRAITS_SPEC(device, host_unified_memory, bool, - PI_DEVICE_INFO_HOST_UNIFIED_MEMORY) + UR_DEVICE_INFO_HOST_UNIFIED_MEMORY) __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities, std::vector, - PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) + UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_order_capabilities, std::vector, - PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) + UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities, std::vector, - PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) + UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_scope_capabilities, std::vector, - PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) + UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, profiling_timer_resolution, size_t, - PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION) + UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION) __SYCL_PARAM_TRAITS_SPEC(device, is_endian_little, bool, - PI_DEVICE_INFO_ENDIAN_LITTLE) -__SYCL_PARAM_TRAITS_SPEC(device, is_available, bool, PI_DEVICE_INFO_AVAILABLE) + UR_DEVICE_INFO_ENDIAN_LITTLE) +__SYCL_PARAM_TRAITS_SPEC(device, is_available, bool,UR_DEVICE_INFO_AVAILABLE) __SYCL_PARAM_TRAITS_SPEC(device, is_compiler_available, bool, - PI_DEVICE_INFO_COMPILER_AVAILABLE) + UR_DEVICE_INFO_COMPILER_AVAILABLE) __SYCL_PARAM_TRAITS_SPEC(device, is_linker_available, bool, - PI_DEVICE_INFO_LINKER_AVAILABLE) + UR_DEVICE_INFO_LINKER_AVAILABLE) __SYCL_PARAM_TRAITS_SPEC(device, execution_capabilities, std::vector, - PI_DEVICE_INFO_EXECUTION_CAPABILITIES) + UR_DEVICE_INFO_EXECUTION_CAPABILITIES) __SYCL_PARAM_TRAITS_SPEC(device, queue_profiling, bool, - PI_DEVICE_INFO_QUEUE_PROPERTIES) + UR_DEVICE_INFO_QUEUE_PROPERTIES) __SYCL_PARAM_TRAITS_SPEC(device, built_in_kernel_ids, - std::vector, 0) + std::vector, UR_DEVICE_INFO_FORCE_UINT32) __SYCL_PARAM_TRAITS_SPEC(device, built_in_kernels, std::vector, - PI_DEVICE_INFO_BUILT_IN_KERNELS) + UR_DEVICE_INFO_BUILT_IN_KERNELS) __SYCL_PARAM_TRAITS_SPEC(device, platform, sycl::platform, - PI_DEVICE_INFO_PLATFORM) -__SYCL_PARAM_TRAITS_SPEC(device, name, std::string, PI_DEVICE_INFO_NAME) -__SYCL_PARAM_TRAITS_SPEC(device, vendor, std::string, PI_DEVICE_INFO_VENDOR) + UR_DEVICE_INFO_PLATFORM) +__SYCL_PARAM_TRAITS_SPEC(device, name, std::string,UR_DEVICE_INFO_NAME) +__SYCL_PARAM_TRAITS_SPEC(device, vendor, std::string,UR_DEVICE_INFO_VENDOR) __SYCL_PARAM_TRAITS_SPEC(device, driver_version, std::string, - PI_DEVICE_INFO_DRIVER_VERSION) -__SYCL_PARAM_TRAITS_SPEC(device, profile, std::string, PI_DEVICE_INFO_PROFILE) -__SYCL_PARAM_TRAITS_SPEC(device, version, std::string, PI_DEVICE_INFO_VERSION) + UR_DEVICE_INFO_DRIVER_VERSION) +__SYCL_PARAM_TRAITS_SPEC(device, profile, std::string,UR_DEVICE_INFO_PROFILE) +__SYCL_PARAM_TRAITS_SPEC(device, version, std::string,UR_DEVICE_INFO_VERSION) __SYCL_PARAM_TRAITS_SPEC(device, backend_version, std::string, - PI_DEVICE_INFO_BACKEND_VERSION) + UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION) __SYCL_PARAM_TRAITS_SPEC(device, extensions, std::vector, - PI_DEVICE_INFO_EXTENSIONS) + UR_DEVICE_INFO_EXTENSIONS) __SYCL_PARAM_TRAITS_SPEC(device, printf_buffer_size, size_t, - PI_DEVICE_INFO_PRINTF_BUFFER_SIZE) + UR_DEVICE_INFO_PRINTF_BUFFER_SIZE) __SYCL_PARAM_TRAITS_SPEC(device, preferred_interop_user_sync, bool, - PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC) -__SYCL_PARAM_TRAITS_SPEC(device, partition_max_sub_devices, pi_uint32, - PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES) + UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC) +__SYCL_PARAM_TRAITS_SPEC(device, partition_max_sub_devices, uint32_t, + UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES) __SYCL_PARAM_TRAITS_SPEC(device, partition_properties, std::vector, - PI_DEVICE_INFO_PARTITION_PROPERTIES) + UR_DEVICE_INFO_SUPPORTED_PARTITIONS) __SYCL_PARAM_TRAITS_SPEC(device, partition_affinity_domains, std::vector, - PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) + UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) __SYCL_PARAM_TRAITS_SPEC(device, partition_type_property, info::partition_property, - PI_DEVICE_INFO_PARTITION_TYPE) + UR_DEVICE_INFO_PARTITION_TYPE) __SYCL_PARAM_TRAITS_SPEC(device, partition_type_affinity_domain, info::partition_affinity_domain, - PI_DEVICE_INFO_PARTITION_TYPE) + UR_DEVICE_INFO_PARTITION_TYPE) // Has custom specialization. __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(device, parent_device, sycl::device, - PI_DEVICE_INFO_PARENT_DEVICE) -__SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(device, aspects, std::vector, 0) -__SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(device, image_support, bool, 0) + UR_DEVICE_INFO_PARENT_DEVICE) +__SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(device, aspects, + std::vector, + UR_DEVICE_INFO_FORCE_UINT32) +__SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(device, image_support, bool, + UR_DEVICE_INFO_FORCE_UINT32) // Extensions/deprecated -__SYCL_PARAM_TRAITS_SPEC(device, atomic64, bool, PI_DEVICE_INFO_ATOMIC_64) -__SYCL_PARAM_TRAITS_SPEC(device, kernel_kernel_pipe_support, bool, 0) -__SYCL_PARAM_TRAITS_SPEC(device, reference_count, pi_uint32, - PI_DEVICE_INFO_REFERENCE_COUNT) +__SYCL_PARAM_TRAITS_SPEC(device, atomic64, bool,UR_DEVICE_INFO_ATOMIC_64) +__SYCL_PARAM_TRAITS_SPEC(device, kernel_kernel_pipe_support, bool, + UR_DEVICE_INFO_FORCE_UINT32) +__SYCL_PARAM_TRAITS_SPEC(device, reference_count, uint32_t, + UR_DEVICE_INFO_REFERENCE_COUNT) // To be dropped (has alternatives/not needed) __SYCL_PARAM_TRAITS_SPEC(device, usm_device_allocations, bool, - PI_USM_DEVICE_SUPPORT) + UR_DEVICE_INFO_USM_DEVICE_SUPPORT) __SYCL_PARAM_TRAITS_SPEC(device, usm_host_allocations, bool, - PI_USM_HOST_SUPPORT) + UR_DEVICE_INFO_USM_HOST_SUPPORT) __SYCL_PARAM_TRAITS_SPEC(device, usm_shared_allocations, bool, - PI_USM_SINGLE_SHARED_SUPPORT) + UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT) __SYCL_PARAM_TRAITS_SPEC(device, usm_restricted_shared_allocations, bool, - PI_USM_CROSS_SHARED_SUPPORT) + UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT) __SYCL_PARAM_TRAITS_SPEC(device, usm_system_allocations, bool, - PI_USM_SYSTEM_SHARED_SUPPORT) -__SYCL_PARAM_TRAITS_SPEC(device, opencl_c_version, std::string, - PI_DEVICE_INFO_OPENCL_C_VERSION) -// To be dropped (no alternatives) + UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT) __SYCL_PARAM_TRAITS_SPEC(device, image_max_array_size, size_t, - PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE) + UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE) +// To be dropped (no alternatives) +__SYCL_PARAM_TRAITS_SPEC(device, opencl_c_version, std::string, __SYCL_TRAIT_HANDLED_IN_RT) // Extensions __SYCL_PARAM_TRAITS_SPEC(device, sub_group_independent_forward_progress, bool, - PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS) + UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS) __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_srgb, bool, - PI_DEVICE_INFO_IMAGE_SRGB) + UR_DEVICE_INFO_IMAGE_SRGB) __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_mem_channel, bool, - PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT) + UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT) //Deprecated oneapi/intel extension //TODO:Remove when possible __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_pci_address, std::string, - PI_DEVICE_INFO_PCI_ADDRESS) -__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_count, pi_uint32, - PI_DEVICE_INFO_GPU_EU_COUNT) -__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_simd_width, pi_uint32, - PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH) -__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_slices, pi_uint32, - PI_DEVICE_INFO_GPU_SLICES) -__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_subslices_per_slice, pi_uint32, - PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE) -__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_count_per_subslice, pi_uint32, - PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE) -__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_hw_threads_per_eu, pi_uint32, - PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU) + UR_DEVICE_INFO_PCI_ADDRESS) +__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_count, uint32_t, + UR_DEVICE_INFO_GPU_EU_COUNT) +__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_simd_width, uint32_t, + UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH) +__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_slices, uint32_t, + UR_DEVICE_INFO_GPU_EU_SLICES) +__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_subslices_per_slice, uint32_t, + UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE) +__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_count_per_subslice, uint32_t, + UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE) +__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_hw_threads_per_eu, uint32_t, + UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU) __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_device_info_uuid, detail::uuid_type, - PI_DEVICE_INFO_UUID) -__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_max_mem_bandwidth, pi_uint64, - PI_DEVICE_INFO_MAX_MEM_BANDWIDTH) -__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_work_groups_1d, id<1>, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D) -__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_work_groups_2d, id<2>, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D) + UR_DEVICE_INFO_UUID) +__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_max_mem_bandwidth, uint64_t, + UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH) + +__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_work_groups_1d, id<1>, __SYCL_TRAIT_HANDLED_IN_RT) +__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_work_groups_2d, id<2>, __SYCL_TRAIT_HANDLED_IN_RT) __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_work_groups_3d, id<3>, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D) -__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_global_work_groups, size_t, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS) + UR_DEVICE_INFO_MAX_WORK_GROUPS_3D) +__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_global_work_groups, size_t, __SYCL_TRAIT_HANDLED_IN_RT) __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_cuda_cluster_group, bool, - PI_EXT_ONEAPI_DEVICE_INFO_CLUSTER_LAUNCH) + UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP) #ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC diff --git a/sycl/include/sycl/info/event_profiling_traits.def b/sycl/include/sycl/info/event_profiling_traits.def index f58354809125e..ace27dfa6a2f6 100644 --- a/sycl/include/sycl/info/event_profiling_traits.def +++ b/sycl/include/sycl/info/event_profiling_traits.def @@ -1,4 +1,4 @@ -__SYCL_PARAM_TRAITS_SPEC(event_profiling, command_submit, uint64_t, PI_PROFILING_INFO_COMMAND_SUBMIT) -__SYCL_PARAM_TRAITS_SPEC(event_profiling, command_start, uint64_t, PI_PROFILING_INFO_COMMAND_START) -__SYCL_PARAM_TRAITS_SPEC(event_profiling, command_end, uint64_t, PI_PROFILING_INFO_COMMAND_END) +__SYCL_PARAM_TRAITS_SPEC(event_profiling, command_submit, uint64_t, UR_PROFILING_INFO_COMMAND_SUBMIT) +__SYCL_PARAM_TRAITS_SPEC(event_profiling, command_start, uint64_t, UR_PROFILING_INFO_COMMAND_START) +__SYCL_PARAM_TRAITS_SPEC(event_profiling, command_end, uint64_t, UR_PROFILING_INFO_COMMAND_END) diff --git a/sycl/include/sycl/info/event_traits.def b/sycl/include/sycl/info/event_traits.def index 1d45633e8ad3b..4598cceca0619 100644 --- a/sycl/include/sycl/info/event_traits.def +++ b/sycl/include/sycl/info/event_traits.def @@ -1,2 +1,2 @@ -__SYCL_PARAM_TRAITS_SPEC(event, command_execution_status, info::event_command_status, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) -__SYCL_PARAM_TRAITS_SPEC(event, reference_count, uint32_t, PI_EVENT_INFO_REFERENCE_COUNT) +__SYCL_PARAM_TRAITS_SPEC(event, command_execution_status, info::event_command_status, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) +__SYCL_PARAM_TRAITS_SPEC(event, reference_count, uint32_t, UR_EVENT_INFO_REFERENCE_COUNT) diff --git a/sycl/include/sycl/info/ext_codeplay_device_traits.def b/sycl/include/sycl/info/ext_codeplay_device_traits.def index dda904cfbc6f0..264929a70ff44 100644 --- a/sycl/include/sycl/info/ext_codeplay_device_traits.def +++ b/sycl/include/sycl/info/ext_codeplay_device_traits.def @@ -2,10 +2,10 @@ #define __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #define __SYCL_PARAM_TRAITS_TEMPLATE_SPEC __SYCL_PARAM_TRAITS_SPEC #endif -__SYCL_PARAM_TRAITS_SPEC(ext::codeplay::experimental,device, supports_fusion, bool, PI_EXT_CODEPLAY_DEVICE_INFO_SUPPORTS_FUSION) +__SYCL_PARAM_TRAITS_SPEC(ext::codeplay::experimental,device, supports_fusion, bool, __SYCL_TRAIT_HANDLED_IN_RT) __SYCL_PARAM_TRAITS_SPEC( ext::codeplay::experimental, device, max_registers_per_work_group, uint32_t, - PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP) + UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP) #ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF diff --git a/sycl/include/sycl/info/ext_intel_device_traits.def b/sycl/include/sycl/info/ext_intel_device_traits.def index e58b27af69756..f828b43e2a1d7 100644 --- a/sycl/include/sycl/info/ext_intel_device_traits.def +++ b/sycl/include/sycl/info/ext_intel_device_traits.def @@ -2,20 +2,20 @@ #define __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #define __SYCL_PARAM_TRAITS_TEMPLATE_SPEC __SYCL_PARAM_TRAITS_SPEC #endif -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, device_id, pi_uint32, PI_DEVICE_INFO_DEVICE_ID) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, pci_address, std::string, PI_DEVICE_INFO_PCI_ADDRESS) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_eu_count, pi_uint32, PI_DEVICE_INFO_GPU_EU_COUNT) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_eu_simd_width, pi_uint32, PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_slices, pi_uint32, PI_DEVICE_INFO_GPU_SLICES) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_subslices_per_slice, pi_uint32, PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_eu_count_per_subslice, pi_uint32, PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_hw_threads_per_eu, pi_uint32, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_mem_bandwidth, pi_uint64, PI_DEVICE_INFO_MAX_MEM_BANDWIDTH) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, uuid, detail::uuid_type, PI_DEVICE_INFO_UUID) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, free_memory, pi_uint64, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_clock_rate, pi_uint32, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_bus_width, pi_uint32, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH) -__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_compute_queue_indices, pi_int32, PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, device_id, uint32_t, UR_DEVICE_INFO_DEVICE_ID) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, pci_address, std::string, UR_DEVICE_INFO_PCI_ADDRESS) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_eu_count, uint32_t, UR_DEVICE_INFO_GPU_EU_COUNT) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_eu_simd_width, uint32_t, UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_slices, uint32_t, UR_DEVICE_INFO_GPU_EU_SLICES) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_subslices_per_slice, uint32_t, UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_eu_count_per_subslice, uint32_t, UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_hw_threads_per_eu, uint32_t, UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_mem_bandwidth, uint64_t, UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, uuid, detail::uuid_type, UR_DEVICE_INFO_UUID) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, free_memory, uint64_t, UR_DEVICE_INFO_GLOBAL_MEM_FREE) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_clock_rate, uint32_t, UR_DEVICE_INFO_MEMORY_CLOCK_RATE) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_bus_width, uint32_t, UR_DEVICE_INFO_MEMORY_BUS_WIDTH) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_compute_queue_indices, int32_t, UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES) #ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF diff --git a/sycl/include/sycl/info/ext_oneapi_device_traits.def b/sycl/include/sycl/info/ext_oneapi_device_traits.def index b544ca6b49441..813ec952b20d1 100644 --- a/sycl/include/sycl/info/ext_oneapi_device_traits.def +++ b/sycl/include/sycl/info/ext_oneapi_device_traits.def @@ -2,10 +2,11 @@ #define __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #define __SYCL_PARAM_TRAITS_TEMPLATE_SPEC __SYCL_PARAM_TRAITS_SPEC #endif -__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental,device, max_global_work_groups, size_t, PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS) -__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<1>, id<1>, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D) -__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<2>, id<2>, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D) -__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<3>, id<3>, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D) + +__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental,device, max_global_work_groups, size_t, __SYCL_TRAIT_HANDLED_IN_RT) +__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<1>, id<1>, __SYCL_TRAIT_HANDLED_IN_RT) +__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<2>, id<2>, __SYCL_TRAIT_HANDLED_IN_RT) +__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<3>, id<3>, UR_DEVICE_INFO_MAX_WORK_GROUPS_3D) // Forward progress guarantees __SYCL_PARAM_TRAITS_TEMPLATE_SPEC( @@ -13,71 +14,72 @@ __SYCL_PARAM_TRAITS_TEMPLATE_SPEC( work_group_progress_capabilities< ext::oneapi::experimental::execution_scope::root_group>, std::vector, - PI_EXT_ONEAPI_DEVICE_INFO_WORK_GROUP_PROGRESS_AT_ROOT_GROUP_LEVEL) + __SYCL_TRAIT_HANDLED_IN_RT) __SYCL_PARAM_TRAITS_TEMPLATE_SPEC( ext::oneapi::experimental, device, sub_group_progress_capabilities< ext::oneapi::experimental::execution_scope::root_group>, std::vector, - PI_EXT_ONEAPI_DEVICE_INFO_SUB_GROUP_PROGRESS_AT_ROOT_GROUP_LEVEL) + __SYCL_TRAIT_HANDLED_IN_RT) __SYCL_PARAM_TRAITS_TEMPLATE_SPEC( ext::oneapi::experimental, device, sub_group_progress_capabilities< ext::oneapi::experimental::execution_scope::work_group>, std::vector, - PI_EXT_ONEAPI_DEVICE_INFO_SUB_GROUP_PROGRESS_AT_WORK_GROUP_LEVEL) + __SYCL_TRAIT_HANDLED_IN_RT) __SYCL_PARAM_TRAITS_TEMPLATE_SPEC( ext::oneapi::experimental, device, work_item_progress_capabilities< ext::oneapi::experimental::execution_scope::root_group>, std::vector, - PI_EXT_ONEAPI_DEVICE_INFO_WORK_ITEM_PROGRESS_AT_ROOT_GROUP_LEVEL) + __SYCL_TRAIT_HANDLED_IN_RT) __SYCL_PARAM_TRAITS_TEMPLATE_SPEC( ext::oneapi::experimental, device, work_item_progress_capabilities< ext::oneapi::experimental::execution_scope::work_group>, std::vector, - PI_EXT_ONEAPI_DEVICE_INFO_WORK_ITEM_PROGRESS_AT_WORK_GROUP_LEVEL) + __SYCL_TRAIT_HANDLED_IN_RT) __SYCL_PARAM_TRAITS_TEMPLATE_SPEC( ext::oneapi::experimental, device, work_item_progress_capabilities< ext::oneapi::experimental::execution_scope::sub_group>, std::vector, - PI_EXT_ONEAPI_DEVICE_INFO_WORK_ITEM_PROGRESS_AT_SUB_GROUP_LEVEL) + __SYCL_TRAIT_HANDLED_IN_RT) __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, architecture, ext::oneapi::experimental::architecture, - PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION) + UR_DEVICE_INFO_IP_VERSION) + __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, matrix_combinations, std::vector, - PI_EXT_ONEAPI_DEVICE_INFO_MATRIX_COMBINATIONS) + __SYCL_TRAIT_HANDLED_IN_RT) // Bindless images pitched allocation __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, image_row_pitch_align, uint32_t, - PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_PITCH_ALIGN) + UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP) __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, max_image_linear_row_pitch, uint32_t, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH) + UR_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH_EXP) __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, max_image_linear_width, uint32_t, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH) + UR_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH_EXP) __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, max_image_linear_height, uint32_t, - PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT) + UR_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT_EXP) // Bindless images mipmaps __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, mipmap_max_anisotropy, float, - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY) + UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP) // Composite devices __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, component_devices, std::vector, - PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES) + UR_DEVICE_INFO_COMPONENT_DEVICES) __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, composite_device, sycl::device, - PI_EXT_ONEAPI_DEVICE_INFO_COMPOSITE_DEVICE) + UR_DEVICE_INFO_COMPOSITE_DEVICE) #ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC diff --git a/sycl/include/sycl/info/info_desc.hpp b/sycl/include/sycl/info/info_desc.hpp index 0dd80fc23f7be..8f6a86e5f60bb 100644 --- a/sycl/include/sycl/info/info_desc.hpp +++ b/sycl/include/sycl/info/info_desc.hpp @@ -9,7 +9,7 @@ #pragma once #include // for __SYCL2020_DEPRECATED -#include // for PI_DEVICE_AFFINITY_DOMAIN_L... +#include // FIXME: .def files included to this file use all sorts of SYCL objects like // id, range, traits, etc. We have to include some headers before including .def @@ -22,6 +22,10 @@ #include +// This is used in trait .def files when there isn't a corresponding backend +// query but we still need a value to instantiate the template. +#define __SYCL_TRAIT_HANDLED_IN_RT 0 + namespace sycl { inline namespace _V1 { @@ -31,9 +35,9 @@ class kernel_id; enum class memory_scope; enum class memory_order; -// TODO: stop using OpenCL directly, use PI. +// TODO: stop using OpenCL directly, use UR. namespace info { -#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ +#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, UrCode) \ struct Desc { \ using return_type = ReturnT; \ }; @@ -51,44 +55,80 @@ namespace context { } // namespace context // A.3 Device information descriptors -enum class device_type : pi_uint64 { - cpu = PI_DEVICE_TYPE_CPU, - gpu = PI_DEVICE_TYPE_GPU, - accelerator = PI_DEVICE_TYPE_ACC, - // TODO: figure out if we need all the below in PI - custom = PI_DEVICE_TYPE_CUSTOM, +enum class device_type : uint32_t { + cpu = UR_DEVICE_TYPE_CPU, + gpu = UR_DEVICE_TYPE_GPU, + accelerator = UR_DEVICE_TYPE_FPGA, + // TODO: evaluate the need for equivalent UR enums for these types + custom, automatic, host, - all = PI_DEVICE_TYPE_ALL + all = UR_DEVICE_TYPE_ALL }; -enum class partition_property : pi_device_partition_property { +enum class partition_property : intptr_t { no_partition = 0, - partition_equally = PI_DEVICE_PARTITION_EQUALLY, - partition_by_counts = PI_DEVICE_PARTITION_BY_COUNTS, - partition_by_affinity_domain = PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, - ext_intel_partition_by_cslice = PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE + partition_equally = UR_DEVICE_PARTITION_EQUALLY, + partition_by_counts = UR_DEVICE_PARTITION_BY_COUNTS, + partition_by_affinity_domain = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, + ext_intel_partition_by_cslice = UR_DEVICE_PARTITION_BY_CSLICE }; -enum class partition_affinity_domain : pi_device_affinity_domain { +// FIXME: maybe this should live elsewhere, maybe it should be implemented +// differently +inline partition_property +ConvertPartitionProperty(const ur_device_partition_t &Partition) { + switch (Partition) { + case UR_DEVICE_PARTITION_EQUALLY: + return partition_property::partition_equally; + case UR_DEVICE_PARTITION_BY_COUNTS: + return partition_property::partition_by_counts; + case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: + return partition_property::partition_by_affinity_domain; + case UR_DEVICE_PARTITION_BY_CSLICE: + return partition_property::ext_intel_partition_by_cslice; + default: + return partition_property::no_partition; + } +} + +enum class partition_affinity_domain : intptr_t { not_applicable = 0, - numa = PI_DEVICE_AFFINITY_DOMAIN_NUMA, - L4_cache = PI_DEVICE_AFFINITY_DOMAIN_L4_CACHE, - L3_cache = PI_DEVICE_AFFINITY_DOMAIN_L3_CACHE, - L2_cache = PI_DEVICE_AFFINITY_DOMAIN_L2_CACHE, - L1_cache = PI_DEVICE_AFFINITY_DOMAIN_L1_CACHE, - next_partitionable = PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE + numa = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, + L4_cache = UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE, + L3_cache = UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE, + L2_cache = UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE, + L1_cache = UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE, + next_partitionable = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE }; +inline partition_affinity_domain +ConvertAffinityDomain(const ur_device_affinity_domain_flags_t Domain) { + switch (Domain) { + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA: + return partition_affinity_domain::numa; + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE: + return partition_affinity_domain::L1_cache; + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE: + return partition_affinity_domain::L2_cache; + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE: + return partition_affinity_domain::L3_cache; + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE: + return partition_affinity_domain::L4_cache; + default: + return info::partition_affinity_domain::not_applicable; + } +} + enum class local_mem_type : int { none, local, global }; -enum class fp_config : pi_device_fp_config { - denorm = PI_FP_DENORM, - inf_nan = PI_FP_INF_NAN, - round_to_nearest = PI_FP_ROUND_TO_NEAREST, - round_to_zero = PI_FP_ROUND_TO_ZERO, - round_to_inf = PI_FP_ROUND_TO_INF, - fma = PI_FP_FMA, +enum class fp_config : uint32_t { + denorm = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM, + inf_nan = UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN, + round_to_nearest = UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST, + round_to_zero = UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO, + round_to_inf = UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF, + fma = UR_DEVICE_FP_CAPABILITY_FLAG_FMA, correctly_rounded_divide_sqrt, soft_float }; @@ -114,12 +154,12 @@ struct atomic_fence_scope_capabilities; #undef __SYCL_PARAM_TRAITS_DEPRECATED template struct max_work_item_sizes; -#define __SYCL_PARAM_TRAITS_TEMPLATE_SPEC(DescType, Desc, ReturnT, PiCode) \ +#define __SYCL_PARAM_TRAITS_TEMPLATE_SPEC(DescType, Desc, ReturnT, UrCode) \ template <> struct Desc { \ using return_type = ReturnT; \ }; -#define __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(DescType, Desc, ReturnT, PiCode) \ - __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) +#define __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(DescType, Desc, ReturnT, UrCode) \ + __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, UrCode) #include } // namespace device @@ -141,10 +181,10 @@ namespace kernel_device_specific { } // namespace kernel_device_specific // A.6 Event information desctiptors -enum class event_command_status : pi_int32 { - submitted = PI_EVENT_SUBMITTED, - running = PI_EVENT_RUNNING, - complete = PI_EVENT_COMPLETE, +enum class event_command_status : int32_t { + submitted = UR_EVENT_STATUS_SUBMITTED, + running = UR_EVENT_STATUS_RUNNING, + complete = UR_EVENT_STATUS_COMPLETE, // Since all BE values are positive, it is safe to use a negative value If you // add other ext_oneapi values ext_oneapi_unknown = -1 @@ -171,7 +211,7 @@ template struct compatibility_param_traits {}; #undef __SYCL_PARAM_TRAITS_SPEC } // namespace info -#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, PiCode) \ +#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, UrCode) \ namespace Namespace { \ namespace info { \ namespace DescType { \ @@ -183,7 +223,7 @@ template struct compatibility_param_traits {}; } /*Namespace*/ #define __SYCL_PARAM_TRAITS_TEMPLATE_SPEC(Namespace, DescType, Desc, ReturnT, \ - PiCode) \ + UrCode) \ namespace Namespace { \ namespace info { \ namespace DescType { \ diff --git a/sycl/include/sycl/info/kernel_device_specific_traits.def b/sycl/include/sycl/info/kernel_device_specific_traits.def index b76908173b91a..46c02c2f638ab 100644 --- a/sycl/include/sycl/info/kernel_device_specific_traits.def +++ b/sycl/include/sycl/info/kernel_device_specific_traits.def @@ -1,12 +1,12 @@ -__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, global_work_size, sycl::range<3>, PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE) -__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, work_group_size, size_t, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE) +__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, global_work_size, sycl::range<3>, UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE) +__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, work_group_size, size_t, UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE) __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, compile_work_group_size, - sycl::range<3>, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE) + sycl::range<3>, UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE) __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, - preferred_work_group_size_multiple, size_t, PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE) -__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, private_mem_size, size_t, PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE) -__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, max_num_sub_groups, uint32_t, PI_KERNEL_MAX_NUM_SUB_GROUPS) -__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, compile_num_sub_groups, uint32_t, PI_KERNEL_COMPILE_NUM_SUB_GROUPS) -__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, max_sub_group_size, uint32_t, PI_KERNEL_MAX_SUB_GROUP_SIZE) -__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, compile_sub_group_size, uint32_t, PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL) -__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, ext_codeplay_num_regs, uint32_t, PI_KERNEL_GROUP_INFO_NUM_REGS) + preferred_work_group_size_multiple, size_t, UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE) +__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, private_mem_size, size_t, UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE) +__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, max_num_sub_groups, uint32_t, UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) +__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, compile_num_sub_groups, uint32_t, UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS) +__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, max_sub_group_size, uint32_t, UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) +__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, compile_sub_group_size, uint32_t,UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL) +__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, ext_codeplay_num_regs, uint32_t, UR_KERNEL_INFO_NUM_REGS) diff --git a/sycl/include/sycl/info/kernel_traits.def b/sycl/include/sycl/info/kernel_traits.def index 73ea6d334be06..5c39401fab423 100644 --- a/sycl/include/sycl/info/kernel_traits.def +++ b/sycl/include/sycl/info/kernel_traits.def @@ -1,5 +1,5 @@ -__SYCL_PARAM_TRAITS_SPEC(kernel, num_args, uint32_t, PI_KERNEL_INFO_NUM_ARGS) -__SYCL_PARAM_TRAITS_SPEC(kernel, attributes, std::string, PI_KERNEL_INFO_ATTRIBUTES) -__SYCL_PARAM_TRAITS_SPEC(kernel, function_name, std::string, PI_KERNEL_INFO_FUNCTION_NAME) -__SYCL_PARAM_TRAITS_SPEC(kernel, reference_count, uint32_t, PI_KERNEL_INFO_REFERENCE_COUNT) -__SYCL_PARAM_TRAITS_SPEC(kernel, context, sycl::context, PI_KERNEL_INFO_CONTEXT) +__SYCL_PARAM_TRAITS_SPEC(kernel, num_args, uint32_t, UR_KERNEL_INFO_NUM_ARGS) +__SYCL_PARAM_TRAITS_SPEC(kernel, attributes, std::string, UR_KERNEL_INFO_ATTRIBUTES) +__SYCL_PARAM_TRAITS_SPEC(kernel, function_name, std::string, UR_KERNEL_INFO_FUNCTION_NAME) +__SYCL_PARAM_TRAITS_SPEC(kernel, reference_count, uint32_t, UR_KERNEL_INFO_REFERENCE_COUNT) +__SYCL_PARAM_TRAITS_SPEC(kernel, context, sycl::context, UR_KERNEL_INFO_CONTEXT) diff --git a/sycl/include/sycl/info/platform_traits.def b/sycl/include/sycl/info/platform_traits.def index bd5ef1acd7c2a..aa82eadf06ef0 100644 --- a/sycl/include/sycl/info/platform_traits.def +++ b/sycl/include/sycl/info/platform_traits.def @@ -1,5 +1,5 @@ -__SYCL_PARAM_TRAITS_SPEC(platform, profile, std::string, PI_PLATFORM_INFO_PROFILE) -__SYCL_PARAM_TRAITS_SPEC(platform, version, std::string, PI_PLATFORM_INFO_VERSION) -__SYCL_PARAM_TRAITS_SPEC(platform, name, std::string, PI_PLATFORM_INFO_NAME) -__SYCL_PARAM_TRAITS_SPEC(platform, vendor, std::string, PI_PLATFORM_INFO_VENDOR) -__SYCL_PARAM_TRAITS_SPEC(platform, extensions, std::vector, PI_PLATFORM_INFO_EXTENSIONS) +__SYCL_PARAM_TRAITS_SPEC(platform, profile, std::string, UR_PLATFORM_INFO_PROFILE) +__SYCL_PARAM_TRAITS_SPEC(platform, version, std::string, UR_PLATFORM_INFO_VERSION) +__SYCL_PARAM_TRAITS_SPEC(platform, name, std::string, UR_PLATFORM_INFO_NAME) +__SYCL_PARAM_TRAITS_SPEC(platform, vendor, std::string, UR_PLATFORM_INFO_VENDOR_NAME) +__SYCL_PARAM_TRAITS_SPEC(platform, extensions, std::vector, UR_PLATFORM_INFO_EXTENSIONS) diff --git a/sycl/include/sycl/info/queue_traits.def b/sycl/include/sycl/info/queue_traits.def index 3385be7e35908..7e70c7bd8101c 100644 --- a/sycl/include/sycl/info/queue_traits.def +++ b/sycl/include/sycl/info/queue_traits.def @@ -1,3 +1,3 @@ -__SYCL_PARAM_TRAITS_SPEC(queue, context, sycl::context, PI_QUEUE_INFO_CONTEXT) -__SYCL_PARAM_TRAITS_SPEC(queue, device, sycl::device, PI_QUEUE_INFO_DEVICE) -__SYCL_PARAM_TRAITS_SPEC(queue, reference_count, uint32_t, PI_QUEUE_INFO_REFERENCE_COUNT) +__SYCL_PARAM_TRAITS_SPEC(queue, context, sycl::context, UR_QUEUE_INFO_CONTEXT) +__SYCL_PARAM_TRAITS_SPEC(queue, device, sycl::device, UR_QUEUE_INFO_DEVICE) +__SYCL_PARAM_TRAITS_SPEC(queue, reference_count, uint32_t, UR_QUEUE_INFO_REFERENCE_COUNT) diff --git a/sycl/include/sycl/interop_handle.hpp b/sycl/include/sycl/interop_handle.hpp index b76e27980aa24..81e4a9d559caa 100644 --- a/sycl/include/sycl/interop_handle.hpp +++ b/sycl/include/sycl/interop_handle.hpp @@ -15,13 +15,13 @@ #include // for __SYCL_EXPORT #include // for context_impl #include // for getSyclObjImpl -#include // for _pi_mem, pi_native_... #include // for device, device_impl #include -#include // for queue_impl +#include // for queue_impl #include // for accessor_property_list #include // for image #include // for buffer +#include // for ur_mem_handle_t, ur... #include // for shared_ptr #include // for int32_t @@ -185,7 +185,7 @@ class interop_handle { private: friend class detail::ExecCGCommand; friend class detail::DispatchHostTask; - using ReqToMem = std::pair; + using ReqToMem = std::pair; interop_handle(std::vector MemObjs, const std::shared_ptr &Queue, @@ -197,7 +197,7 @@ class interop_handle { template backend_return_t> getMemImpl(detail::AccessorImplHost *Req) const { - std::vector NativeHandles{getNativeMem(Req)}; + std::vector NativeHandles{getNativeMem(Req)}; return detail::BufferInterop::GetNativeObjs( NativeHandles); } @@ -209,12 +209,12 @@ class interop_handle { return reinterpret_cast(getNativeMem(Req)); } - __SYCL_EXPORT pi_native_handle + __SYCL_EXPORT ur_native_handle_t getNativeMem(detail::AccessorImplHost *Req) const; - __SYCL_EXPORT pi_native_handle + __SYCL_EXPORT ur_native_handle_t getNativeQueue(int32_t &NativeHandleDesc) const; - __SYCL_EXPORT pi_native_handle getNativeDevice() const; - __SYCL_EXPORT pi_native_handle getNativeContext() const; + __SYCL_EXPORT ur_native_handle_t getNativeDevice() const; + __SYCL_EXPORT ur_native_handle_t getNativeContext() const; std::shared_ptr MQueue; std::shared_ptr MDevice; diff --git a/sycl/include/sycl/kernel.hpp b/sycl/include/sycl/kernel.hpp index 85c3bcda1f706..40db1d8461dde 100644 --- a/sycl/include/sycl/kernel.hpp +++ b/sycl/include/sycl/kernel.hpp @@ -16,13 +16,13 @@ #include // for __SYCL_EXPORT #include // for is_kernel_device_specif... #include // for OwnerLessBase -#include // for pi_native_handle #include #include #include #include // for device #include // for bundle_state #include // for range +#include // for ur_native_handle_t #include // for hash namespace sycl { @@ -167,10 +167,10 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase { /// Constructs a SYCL kernel object from a valid kernel_impl instance. kernel(std::shared_ptr Impl); - pi_native_handle getNative() const; + ur_native_handle_t getNative() const; __SYCL_DEPRECATED("Use getNative() member function") - pi_native_handle getNativeImpl() const; + ur_native_handle_t getNativeImpl() const; std::shared_ptr impl; diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index edce0b66f160b..036bb6a3afe6a 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -13,13 +13,13 @@ #include // for __SYCL_EXPORT #include // for get_spec_constant_symboli... #include // for OwnerLessBase -#include // for pi_native_handle -#include // for cast #include +#include // for cast #include // for device #include // for kernel, kernel_bundle #include // for bundle_state #include // for property_list +#include // for ur_native_handle_t #include #include // PropertyT @@ -111,7 +111,7 @@ class __SYCL_EXPORT device_image_plain { bool has_kernel(const kernel_id &KernelID, const device &Dev) const noexcept; - pi_native_handle getNative() const; + ur_native_handle_t getNative() const; protected: detail::DeviceImageImplPtr impl; @@ -472,7 +472,7 @@ class kernel_bundle : public detail::kernel_bundle_plain, for (const device_image &DevImg : *this) { ReturnValue.push_back( - detail::pi::cast( + detail::ur::cast( DevImg.getNative())); } diff --git a/sycl/include/sycl/kernel_handler.hpp b/sycl/include/sycl/kernel_handler.hpp index 20e89abdf14dd..1c59400cdc273 100644 --- a/sycl/include/sycl/kernel_handler.hpp +++ b/sycl/include/sycl/kernel_handler.hpp @@ -11,6 +11,7 @@ #include // for __SYCL_TYPE #include // for __SYCL_ALWAYS_INLINE #include // for feature_not_supported +#include // for UR_RESULT_ERROR_INVALID_OPERATION #ifdef __SYCL_DEVICE_ONLY__ #include diff --git a/sycl/include/sycl/memory_enums.hpp b/sycl/include/sycl/memory_enums.hpp index 1f3bd18780858..9ae997896f5e8 100644 --- a/sycl/include/sycl/memory_enums.hpp +++ b/sycl/include/sycl/memory_enums.hpp @@ -8,7 +8,7 @@ #pragma once -#include // for PI_MEMORY_ORDER_ACQUIRE, PI_MEMORY_ORDER_ACQ_REL +#include // for ur_memory_order_capability_flags_t #include // for memory_order #include // for vector @@ -49,33 +49,33 @@ inline constexpr auto memory_order_seq_cst = memory_order::seq_cst; namespace detail { inline std::vector -readMemoryOrderBitfield(pi_memory_order_capabilities bits) { +readMemoryOrderBitfield(ur_memory_order_capability_flags_t bits) { std::vector result; - if (bits & PI_MEMORY_ORDER_RELAXED) + if (bits & UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED) result.push_back(memory_order::relaxed); - if (bits & PI_MEMORY_ORDER_ACQUIRE) + if (bits & UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE) result.push_back(memory_order::acquire); - if (bits & PI_MEMORY_ORDER_RELEASE) + if (bits & UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE) result.push_back(memory_order::release); - if (bits & PI_MEMORY_ORDER_ACQ_REL) + if (bits & UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL) result.push_back(memory_order::acq_rel); - if (bits & PI_MEMORY_ORDER_SEQ_CST) + if (bits & UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST) result.push_back(memory_order::seq_cst); return result; } inline std::vector -readMemoryScopeBitfield(pi_memory_scope_capabilities bits) { +readMemoryScopeBitfield(ur_memory_scope_capability_flags_t bits) { std::vector result; - if (bits & PI_MEMORY_SCOPE_WORK_ITEM) + if (bits & UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM) result.push_back(memory_scope::work_item); - if (bits & PI_MEMORY_SCOPE_SUB_GROUP) + if (bits & UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP) result.push_back(memory_scope::sub_group); - if (bits & PI_MEMORY_SCOPE_WORK_GROUP) + if (bits & UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP) result.push_back(memory_scope::work_group); - if (bits & PI_MEMORY_SCOPE_DEVICE) + if (bits & UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE) result.push_back(memory_scope::device); - if (bits & PI_MEMORY_SCOPE_SYSTEM) + if (bits & UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM) result.push_back(memory_scope::system); return result; } diff --git a/sycl/include/sycl/platform.hpp b/sycl/include/sycl/platform.hpp index f431afbbcd673..0d10080e00142 100644 --- a/sycl/include/sycl/platform.hpp +++ b/sycl/include/sycl/platform.hpp @@ -15,12 +15,12 @@ #include #include #include -#include #include #include #include #include #include +#include #ifdef __SYCL_INTERNAL_API #include @@ -231,7 +231,7 @@ class __SYCL_EXPORT platform : public detail::OwnerLessBase { std::vector ext_oneapi_get_composite_devices() const; private: - pi_native_handle getNative() const; + ur_native_handle_t getNative() const; std::shared_ptr impl; platform(std::shared_ptr impl) : impl(impl) {} diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index f3f8c53738d43..706ca59ea854b 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -2654,7 +2654,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// completed, otherwise returns false. bool ext_oneapi_empty() const; - pi_native_handle getNative(int32_t &NativeHandleDesc) const; + ur_native_handle_t getNative(int32_t &NativeHandleDesc) const; event ext_oneapi_get_last_event() const; diff --git a/sycl/include/sycl/sampler.hpp b/sycl/include/sycl/sampler.hpp index c4d02ae5b8c39..547fc6a4cfeb7 100644 --- a/sycl/include/sycl/sampler.hpp +++ b/sycl/include/sycl/sampler.hpp @@ -12,7 +12,6 @@ #include // for __SYCL_SPECIAL_CLASS, __SYCL_TYPE #include // for __SYCL_EXPORT #include // for getSyclObjImpl -#include // for PI_SAMPLER_ADDRESSING_MODE_CLAMP #include // for property_list #include // for size_t @@ -22,16 +21,16 @@ namespace sycl { inline namespace _V1 { enum class addressing_mode : unsigned int { - mirrored_repeat = PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT, - repeat = PI_SAMPLER_ADDRESSING_MODE_REPEAT, - clamp_to_edge = PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE, - clamp = PI_SAMPLER_ADDRESSING_MODE_CLAMP, - none = PI_SAMPLER_ADDRESSING_MODE_NONE + mirrored_repeat = 0x1134, // Value of CL_ADDRESS_MIRRORED_REPEAT + repeat = 0x1133, // Value of CL_ADDRESS_REPEAT + clamp_to_edge = 0x1131, // Value of CL_ADDRESS_CLAMP_TO_EDGE + clamp = 0x1132, // Value of CL_ADDRESS_CLAMP + none = 0x1130 // Value of CL_ADDRESS_NONE }; enum class filtering_mode : unsigned int { - nearest = PI_SAMPLER_FILTER_MODE_NEAREST, - linear = PI_SAMPLER_FILTER_MODE_LINEAR + nearest = 0x1140, // Value of CL_FILTER_NEAREST + linear = 0x1141 // Value of CL_FILTER_LINEAR }; enum class coordinate_normalization_mode : unsigned int { diff --git a/sycl/plugins/CMakeLists.txt b/sycl/plugins/CMakeLists.txt deleted file mode 100644 index dbf0d23787857..0000000000000 --- a/sycl/plugins/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang|IntelLLVM" ) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-covered-switch-default") -endif() - -# all plugins on unified_runtime plugin and unified_runtime plugin is not an -# independent plugin, adding it explicitly -add_subdirectory(unified_runtime) - -foreach(plugin ${SYCL_ENABLE_PLUGINS}) - add_subdirectory(${plugin}) -endforeach() diff --git a/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp b/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp deleted file mode 100644 index 3a2ca6185f775..0000000000000 --- a/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp +++ /dev/null @@ -1,44 +0,0 @@ -//==------------ common_win_pi_trace.hpp - SYCL standard header file -------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// this .hpp is injected. Be sure to define __SYCL_PLUGIN_DLL_NAME before -// including -#ifdef _WIN32 -#include -BOOL WINAPI DllMain(HINSTANCE hinstDLL, // handle to DLL module - DWORD fdwReason, // reason for calling function - LPVOID lpReserved) { // reserved - - bool PrintPiTrace = false; - static const char *PiTrace = std::getenv("SYCL_PI_TRACE"); - static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0; - if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces - PrintPiTrace = true; - } - - // Perform actions based on the reason for calling. - switch (fdwReason) { - case DLL_PROCESS_DETACH: - if (PrintPiTrace) - std::cout << "---> DLL_PROCESS_DETACH " << __SYCL_PLUGIN_DLL_NAME << "\n" - << std::endl; - - break; - case DLL_PROCESS_ATTACH: - if (PrintPiTrace) - std::cout << "---> DLL_PROCESS_ATTACH " << __SYCL_PLUGIN_DLL_NAME << "\n" - << std::endl; - break; - case DLL_THREAD_ATTACH: - break; - case DLL_THREAD_DETACH: - break; - } - return TRUE; -} -#endif // WIN32 diff --git a/sycl/plugins/cuda/CMakeLists.txt b/sycl/plugins/cuda/CMakeLists.txt deleted file mode 100644 index a9b37896ea71d..0000000000000 --- a/sycl/plugins/cuda/CMakeLists.txt +++ /dev/null @@ -1,72 +0,0 @@ -message(STATUS "Including the PI API CUDA backend.") - -if (SYCL_ENABLE_XPTI_TRACING) - # cannot rely on cmake support for CUDA; it assumes runtime API is being used. - # we only require the CUDA driver API to be used - # CUDA_CUDA_LIBRARY variable defines the path to libcuda.so, the CUDA Driver API library. - find_package(CUDA 10.1 REQUIRED) - - # The following two if's can be removed when FindCUDA -> FindCUDAToolkit. - # CUDA_CUPTI_INCLUDE_DIR -> CUDAToolkit_CUPTI_INCLUDE_DIR - include(FindCUDACupti) - if(NOT CUDA_CUPTI_INCLUDE_DIR) - find_cuda_cupti_include_dir() - endif() - # CUDA_cupti_LIBRARY -> CUDAToolkit_cupti_LIBRARY - if(NOT CUDA_cupti_LIBRARY) - find_cuda_cupti_library() - endif() - - set(XPTI_PROXY_SRC "${CMAKE_SOURCE_DIR}/../xpti/src/xpti_proxy.cpp") - set(XPTI_INCLUDE - "${CMAKE_SOURCE_DIR}/../xpti/include" - "${CUDA_CUPTI_INCLUDE_DIR}" - ) - set(XPTI_LIBS - "${CMAKE_DL_LIBS}" - "${CUDA_cupti_LIBRARY}" - ) -endif() - -# Get the CUDA adapter sources so they can be shared with the CUDA PI plugin -get_target_property(UR_CUDA_ADAPTER_SOURCES ur_adapter_cuda SOURCES) - -add_sycl_plugin(cuda - SOURCES - ${UR_CUDA_ADAPTER_SOURCES} - # Some code is shared with the UR adapter - "../unified_runtime/pi2ur.hpp" - "${sycl_inc_dir}/sycl/detail/pi.h" - "${sycl_inc_dir}/sycl/detail/pi.hpp" - "pi_cuda.hpp" - "pi_cuda.cpp" - ${XPTI_PROXY_SRC} - INCLUDE_DIRS - ${sycl_inc_dir} - ${XPTI_INCLUDE} - ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime # for Unified Runtime - ${UNIFIED_RUNTIME_SOURCE_DIR}/source/ # for adapters/cuda - "${SYCL_SOURCE_DIR}/source" # for compiler.hpp - LIBRARIES - cudadrv - ${XPTI_LIBS} - UnifiedRuntime-Headers - UnifiedRuntimeCommon - UnifiedMemoryFramework - HEADER "${CMAKE_CURRENT_SOURCE_DIR}/include/features.hpp" -) - -if (SYCL_ENABLE_XPTI_TRACING) - target_compile_definitions(pi_cuda PRIVATE - XPTI_ENABLE_INSTRUMENTATION - XPTI_STATIC_LIBRARY - ) -endif() - -if(CUDA_cupti_LIBRARY) - target_compile_definitions(pi_cuda PRIVATE - "-DCUPTI_LIB_PATH=\"${CUDA_cupti_LIBRARY}\"") -endif() - -set_target_properties(pi_cuda PROPERTIES LINKER_LANGUAGE CXX) - diff --git a/sycl/plugins/cuda/include/features.hpp b/sycl/plugins/cuda/include/features.hpp deleted file mode 100644 index 63cfb0751290d..0000000000000 --- a/sycl/plugins/cuda/include/features.hpp +++ /dev/null @@ -1,11 +0,0 @@ -//===-- features.hpp - CUDA Plugin feature macros -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#define SYCL_EXT_ONEAPI_BACKEND_CUDA 1 diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp deleted file mode 100644 index 9a20ddf984df0..0000000000000 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ /dev/null @@ -1,1405 +0,0 @@ -//==---------- pi_cuda.cpp - CUDA Plugin -----------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// \file pi_cuda.cpp -/// Implementation of CUDA Plugin. -/// -/// \ingroup sycl_pi_cuda - -#include -#include -#include -#include - -// Forward declarations -struct cuda_tracing_context_t_; - -void enableCUDATracing(cuda_tracing_context_t_ *ctx); -void disableCUDATracing(cuda_tracing_context_t_ *ctx); -cuda_tracing_context_t_ *createCUDATracingContext(); -void freeCUDATracingContext(cuda_tracing_context_t_ *Ctx); - -//-- PI API implementation -extern "C" { - -pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, - pi_uint32 *NumPlatforms) { - return pi2ur::piPlatformsGet(NumEntries, Platforms, NumPlatforms); -} - -pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piPlatformGetInfo(Platform, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piextPlatformGetNativeHandle(pi_platform Platform, - pi_native_handle *NativeHandle) { - return pi2ur::piextPlatformGetNativeHandle(Platform, NativeHandle); -} - -pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform *Platform) { - return pi2ur::piextPlatformCreateWithNativeHandle(NativeHandle, Platform); -} - -pi_result piPluginGetLastError(char **message) { - return pi2ur::piPluginGetLastError(message); -} - -pi_result piPluginGetBackendOption(pi_platform platform, - const char *frontend_option, - const char **backend_option) { - return pi2ur::piPluginGetBackendOption(platform, frontend_option, - backend_option); -} - -pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, - pi_uint32 NumEntries, pi_device *Devices, - pi_uint32 *NumDevices) { - return pi2ur::piDevicesGet(Platform, DeviceType, NumEntries, Devices, - NumDevices); -} - -pi_result piDeviceRetain(pi_device Device) { - return pi2ur::piDeviceRetain(Device); -} - -pi_result piDeviceRelease(pi_device Device) { - return pi2ur::piDeviceRelease(Device); -} - -pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piDeviceGetInfo(Device, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piDevicePartition(pi_device Device, - const pi_device_partition_property *Properties, - pi_uint32 NumDevices, pi_device *OutDevices, - pi_uint32 *OutNumDevices) { - return pi2ur::piDevicePartition(Device, Properties, NumDevices, OutDevices, - OutNumDevices); -} - -pi_result piextDeviceSelectBinary(pi_device Device, pi_device_binary *Binaries, - pi_uint32 NumBinaries, - pi_uint32 *SelectedBinaryInd) { - return pi2ur::piextDeviceSelectBinary(Device, Binaries, NumBinaries, - SelectedBinaryInd); -} - -pi_result piextDeviceGetNativeHandle(pi_device Device, - pi_native_handle *NativeHandle) { - - return pi2ur::piextDeviceGetNativeHandle(Device, NativeHandle); -} - -pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform Platform, - pi_device *Device) { - - return pi2ur::piextDeviceCreateWithNativeHandle(NativeHandle, Platform, - Device); -} - -pi_result piContextCreate(const pi_context_properties *Properties, - pi_uint32 NumDevices, const pi_device *Devices, - void (*PFnNotify)(const char *ErrInfo, - const void *PrivateInfo, size_t CB, - void *UserData), - void *UserData, pi_context *RetContext) { - return pi2ur::piContextCreate(Properties, NumDevices, Devices, PFnNotify, - UserData, RetContext); -} - -pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piContextGetInfo(Context, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piextContextSetExtendedDeleter(pi_context Context, - pi_context_extended_deleter Function, - void *UserData) { - return pi2ur::piextContextSetExtendedDeleter(Context, Function, UserData); -} - -pi_result piextContextGetNativeHandle(pi_context Context, - pi_native_handle *NativeHandle) { - return pi2ur::piextContextGetNativeHandle(Context, NativeHandle); -} - -pi_result piextContextCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_uint32 NumDevices, - const pi_device *Devices, - bool OwnNativeHandle, - pi_context *RetContext) { - return pi2ur::piextContextCreateWithNativeHandle( - NativeHandle, NumDevices, Devices, OwnNativeHandle, RetContext); -} - -pi_result piContextRetain(pi_context Context) { - - return pi2ur::piContextRetain(Context); -} - -pi_result piContextRelease(pi_context Context) { - return pi2ur::piContextRelease(Context); -} - -pi_result piQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties Flags, pi_queue *Queue) { - pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; - return piextQueueCreate(Context, Device, Properties, Queue); -} - -pi_result piextQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties *Properties, pi_queue *Queue) { - return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); -} - -pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piQueueGetInfo(Queue, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piQueueRetain(pi_queue Queue) { return pi2ur::piQueueRetain(Queue); } - -pi_result piQueueRelease(pi_queue Queue) { - return pi2ur::piQueueRelease(Queue); -} - -pi_result piQueueFinish(pi_queue Queue) { return pi2ur::piQueueFinish(Queue); } - -pi_result piQueueFlush(pi_queue Queue) { return pi2ur::piQueueFlush(Queue); } - -pi_result piextQueueGetNativeHandle(pi_queue Queue, - pi_native_handle *NativeHandle, - int32_t *NativeHandleDesc) { - - return pi2ur::piextQueueGetNativeHandle(Queue, NativeHandle, - NativeHandleDesc); -} - -pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle, - int32_t NativeHandleDesc, - pi_context Context, pi_device Device, - bool OwnNativeHandle, - pi_queue_properties *Properties, - pi_queue *Queue) { - - return pi2ur::piextQueueCreateWithNativeHandle( - NativeHandle, NativeHandleDesc, Context, Device, OwnNativeHandle, - Properties, Queue); -} - -pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, - void *HostPtr, pi_mem *RetMem, - const pi_mem_properties *properties) { - return pi2ur::piMemBufferCreate(Context, Flags, Size, HostPtr, RetMem, - properties); -} - -pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet) { - return pi2ur::piMemGetInfo(Mem, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piMemRetain(pi_mem Mem) { return pi2ur::piMemRetain(Mem); } - -pi_result piMemRelease(pi_mem Mem) { return pi2ur::piMemRelease(Mem); } - -pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, - const pi_image_format *ImageFormat, - const pi_image_desc *ImageDesc, void *HostPtr, - pi_mem *RetImage) { - - return pi2ur::piMemImageCreate(Context, Flags, ImageFormat, ImageDesc, - HostPtr, RetImage); -} - -pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, - pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); -} - -pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool ownNativeHandle, pi_mem *Mem) { - return pi2ur::piextMemCreateWithNativeHandle(NativeHandle, Context, - ownNativeHandle, Mem); -} - -pi_result piProgramCreate(pi_context Context, const void *ILBytes, - size_t Length, pi_program *Program) { - return pi2ur::piProgramCreate(Context, ILBytes, Length, Program); -} - -pi_result piProgramCreateWithBinary( - pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, - const size_t *Lengths, const unsigned char **Binaries, - size_t NumMetadataEntries, const pi_device_binary_property *Metadata, - pi_int32 *BinaryStatus, pi_program *Program) { - - return pi2ur::piProgramCreateWithBinary(Context, NumDevices, DeviceList, - Lengths, Binaries, NumMetadataEntries, - Metadata, BinaryStatus, Program); -} - -pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *Img) { - return pi2ur::piextMemImageCreateWithNativeHandle( - NativeHandle, Context, OwnNativeHandle, ImageFormat, ImageDesc, Img); -} - -pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piProgramGetInfo(Program, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - pi_uint32 NumInputPrograms, - const pi_program *InputPrograms, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData, pi_program *RetProgram) { - return pi2ur::piProgramLink(Context, NumDevices, DeviceList, Options, - NumInputPrograms, InputPrograms, PFnNotify, - UserData, RetProgram); -} - -pi_result piProgramCompile( - pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, - const char *Options, pi_uint32 NumInputHeaders, - const pi_program *InputHeaders, const char **HeaderIncludeNames, - void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { - - return pi2ur::piProgramCompile(Program, NumDevices, DeviceList, Options, - NumInputHeaders, InputHeaders, - HeaderIncludeNames, PFnNotify, UserData); -} - -pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData) { - return pi2ur::piProgramBuild(Program, NumDevices, DeviceList, Options, - PFnNotify, UserData); -} - -pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, - pi_program_build_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piProgramGetBuildInfo(Program, Device, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piProgramRetain(pi_program Program) { - return pi2ur::piProgramRetain(Program); -} - -pi_result piProgramRelease(pi_program Program) { - return pi2ur::piProgramRelease(Program); -} - -pi_result piextProgramGetNativeHandle(pi_program Program, - pi_native_handle *NativeHandle) { - return pi2ur::piextProgramGetNativeHandle(Program, NativeHandle); -} - -pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_program *Program) { - return pi2ur::piextProgramCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Program); -} - -pi_result piKernelCreate(pi_program Program, const char *KernelName, - pi_kernel *RetKernel) { - - return pi2ur::piKernelCreate(Program, KernelName, RetKernel); -} - -pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, - const void *ArgValue) { - - return pi2ur::piKernelSetArg(Kernel, ArgIndex, ArgSize, ArgValue); -} - -pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_mem_obj_property *ArgProperties, - const pi_mem *ArgValue) { - return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties, - ArgValue); -} - -pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_sampler *ArgValue) { - - return pi2ur::piextKernelSetArgSampler(Kernel, ArgIndex, ArgValue); -} - -pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piKernelGetInfo(Kernel, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piextMemImageAllocate(pi_context Context, - pi_device Device, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemImageAllocate(Context, Device, ImageFormat, ImageDesc, - RetMem); -} - -__SYCL_EXPORT pi_result piextMemUnsampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, - pi_image_handle *RetHandle) { - return pi2ur::piextMemUnsampledImageCreate(Context, Device, ImgMem, - ImageFormat, ImageDesc, RetHandle); -} - -__SYCL_EXPORT pi_result piextMemSampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_sampler Sampler, - pi_image_handle *RetHandle) { - return pi2ur::piextMemSampledImageCreate(Context, Device, ImgMem, ImageFormat, - ImageDesc, Sampler, RetHandle); -} - -__SYCL_EXPORT pi_result piextBindlessImageSamplerCreate( - pi_context Context, const pi_sampler_properties *SamplerProperties, - float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, - pi_sampler *RetSampler) { - return pi2ur::piextBindlessImageSamplerCreate( - Context, SamplerProperties, MinMipmapLevelClamp, MaxMipmapLevelClamp, - MaxAnisotropy, RetSampler); -} - -__SYCL_EXPORT pi_result piextMemMipmapGetLevel(pi_context Context, - pi_device Device, - pi_image_mem_handle MipMem, - unsigned int Level, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMipmapGetLevel(Context, Device, MipMem, Level, RetMem); -} - -__SYCL_EXPORT pi_result piextMemImageFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemImageFree(Context, Device, MemoryHandle); -} - -__SYCL_EXPORT pi_result piextMemMipmapFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemMipmapFree(Context, Device, MemoryHandle); -} - -__SYCL_EXPORT pi_result piextMemImageCopy( - pi_queue Queue, void *DstPtr, const void *SrcPtr, - const pi_image_desc *SrcImageDesc, const pi_image_desc *DestImageDesc, - const pi_image_format *SrcImageFormat, - const pi_image_format *DestImageFormat, const pi_image_copy_flags Flags, - pi_image_offset SrcOffset, pi_image_offset DstOffset, - pi_image_region CopyExtent, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextMemImageCopy( - Queue, DstPtr, SrcPtr, SrcImageDesc, DestImageDesc, SrcImageFormat, - DestImageFormat, Flags, SrcOffset, DstOffset, CopyExtent, - NumEventsInWaitList, EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextMemUnsampledImageHandleDestroy( - pi_context Context, pi_device Device, pi_image_handle Handle) { - return pi2ur::piextMemUnsampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemSampledImageHandleDestroy( - pi_context Context, pi_device Device, pi_image_handle Handle) { - return pi2ur::piextMemSampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemImageGetInfo(pi_context Context, - pi_image_mem_handle MemHandle, - pi_image_info ParamName, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextMemImageGetInfo(Context, MemHandle, ParamName, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piextImportExternalMemory( - pi_context Context, pi_device Device, pi_external_mem_descriptor *MemDesc, - pi_interop_mem_handle *RetHandle) { - return pi2ur::piextImportExternalMemory(Context, Device, MemDesc, RetHandle); -} - -__SYCL_EXPORT pi_result piextMemMapExternalArray( - pi_context Context, pi_device Device, pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, pi_interop_mem_handle MemHandle, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMapExternalArray(Context, Device, ImageFormat, - ImageDesc, MemHandle, RetMem); -} - -__SYCL_EXPORT pi_result piextMemReleaseInterop(pi_context Context, - pi_device Device, - pi_interop_mem_handle ExtMem) { - return pi2ur::piextMemReleaseInterop(Context, Device, ExtMem); -} - -__SYCL_EXPORT pi_result -piextImportExternalSemaphore(pi_context Context, pi_device Device, - pi_external_semaphore_descriptor *SemDesc, - pi_interop_semaphore_handle *RetHandle) { - return pi2ur::piextImportExternalSemaphore(Context, Device, SemDesc, - RetHandle); -} - -__SYCL_EXPORT pi_result -piextReleaseExternalSemaphore(pi_context Context, pi_device Device, - pi_interop_semaphore_handle SemHandle) { - return pi2ur::piextReleaseExternalSemaphore(Context, Device, SemHandle); -} - -__SYCL_EXPORT pi_result piextWaitExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasWaitValue, - pi_uint64 WaitValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextWaitExternalSemaphore(Queue, SemHandle, HasWaitValue, - WaitValue, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextSignalExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasSignalValue, - pi_uint64 SignalValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextSignalExternalSemaphore(Queue, SemHandle, HasSignalValue, - SignalValue, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_group_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piKernelGetGroupInfo(Kernel, Device, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_sub_group_info ParamName, - size_t InputValueSize, const void *InputValue, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piKernelGetSubGroupInfo( - Kernel, Device, ParamName, InputValueSize, InputValue, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piKernelRetain(pi_kernel Kernel) { - - return pi2ur::piKernelRetain(Kernel); -} - -pi_result piKernelRelease(pi_kernel Kernel) { - - return pi2ur::piKernelRelease(Kernel); -} - -pi_result -piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - return pi2ur::piEnqueueKernelLaunch( - Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextEnqueueKernelLaunchCustom( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumPropsInLaunchPropList, - const pi_launch_property *LaunchPropList, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - return pi2ur::piextEnqueueKernelLaunchCustom( - Queue, Kernel, WorkDim, GlobalWorkSize, LocalWorkSize, - NumPropsInLaunchPropList, LaunchPropList, NumEventsInWaitList, - EventsWaitList, OutEvent); -} - -pi_result piextEnqueueCooperativeKernelLaunch( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - return pi2ur::piextEnqueueCooperativeKernelLaunch( - Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - pi_program Program, - bool OwnNativeHandle, - pi_kernel *Kernel) { - - return pi2ur::piextKernelCreateWithNativeHandle( - NativeHandle, Context, Program, OwnNativeHandle, Kernel); -} - -pi_result piextKernelGetNativeHandle(pi_kernel Kernel, - pi_native_handle *NativeHandle) { - return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle); -} - -pi_result piextKernelSuggestMaxCooperativeGroupCount( - pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize, - pi_uint32 *GroupCountRet) { - return pi2ur::piextKernelSuggestMaxCooperativeGroupCount( - Kernel, LocalWorkSize, DynamicSharedMemorySize, GroupCountRet); -} - -pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { - return pi2ur::piEventCreate(Context, RetEvent); -} - -pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piEventGetInfo(Event, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piEventGetProfilingInfo(Event, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { - return pi2ur::piEventsWait(NumEvents, EventList); -} - -pi_result piEventSetCallback(pi_event Event, pi_int32 CommandExecCallbackType, - void (*PFnNotify)(pi_event Event, - pi_int32 EventCommandStatus, - void *UserData), - void *UserData) { - return pi2ur::piEventSetCallback(Event, CommandExecCallbackType, PFnNotify, - UserData); -} - -pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { - return pi2ur::piEventSetStatus(Event, ExecutionStatus); -} - -pi_result piEventRetain(pi_event Event) { return pi2ur::piEventRetain(Event); } - -pi_result piEventRelease(pi_event Event) { - return pi2ur::piEventRelease(Event); -} - -pi_result piextEventGetNativeHandle(pi_event Event, - pi_native_handle *NativeHandle) { - - return pi2ur::piextEventGetNativeHandle(Event, NativeHandle); -} - -pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_event *Event) { - return pi2ur::piextEventCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Event); -} - -pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueTimestampRecordingExp( - Queue, Blocking, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piSamplerCreate(pi_context Context, - const pi_sampler_properties *SamplerProperties, - pi_sampler *RetSampler) { - return pi2ur::piSamplerCreate(Context, SamplerProperties, RetSampler); -} - -pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piSamplerGetInfo(Sampler, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piSamplerRetain(pi_sampler Sampler) { - return pi2ur::piSamplerRetain(Sampler); -} - -pi_result piSamplerRelease(pi_sampler Sampler) { - return pi2ur::piSamplerRelease(Sampler); -} - -pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piEnqueueEventsWait(Queue, NumEventsInWaitList, EventWaitList, - OutEvent); -} - -pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piEnqueueEventsWaitWithBarrier(Queue, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, - pi_bool BlockingRead, size_t Offset, - size_t Size, void *Dst, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferRead(Queue, Src, BlockingRead, Offset, Size, - Dst, NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piEnqueueMemBufferReadRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferReadRect( - Queue, Buffer, BlockingRead, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, - pi_bool BlockingWrite, size_t Offset, - size_t Size, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferWrite(Queue, Buffer, BlockingWrite, Offset, - Size, Ptr, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferWriteRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferWriteRect( - Queue, Buffer, BlockingWrite, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, - size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferCopy(Queue, SrcMem, DstMem, SrcOffset, - DstOffset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferCopyRect( - pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, - pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, - size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, - size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - - return pi2ur::piEnqueueMemBufferCopyRect( - Queue, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferFill(Queue, Buffer, Pattern, PatternSize, - Offset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, - pi_map_flags MapFlags, size_t Offset, - size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent, void **RetMap) { - - return pi2ur::piEnqueueMemBufferMap(Queue, Mem, BlockingMap, MapFlags, Offset, - Size, NumEventsInWaitList, EventWaitList, - OutEvent, RetMap); -} - -pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, void *MappedPtr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - - return pi2ur::piEnqueueMemUnmap(Queue, Mem, MappedPtr, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piMemImageGetInfo(Image, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, - pi_bool BlockingRead, pi_image_offset Origin, - pi_image_region Region, size_t RowPitch, - size_t SlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemImageRead( - Queue, Image, BlockingRead, Origin, Region, RowPitch, SlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, - pi_bool BlockingWrite, pi_image_offset Origin, - pi_image_region Region, size_t InputRowPitch, - size_t InputSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemImageWrite( - Queue, Image, BlockingWrite, Origin, Region, InputRowPitch, - InputSlicePitch, Ptr, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result -piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, - pi_image_offset SrcOrigin, pi_image_offset DstOrigin, - pi_image_region Region, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piEnqueueMemImageCopy(Queue, SrcImage, DstImage, SrcOrigin, - DstOrigin, Region, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, - const void *FillColor, const size_t *Origin, - const size_t *Region, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemImageFill(Queue, Image, FillColor, Origin, Region, - NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, - pi_buffer_create_type BufferCreateType, - void *BufferCreateInfo, pi_mem *RetMem) { - - return pi2ur::piMemBufferPartition(Buffer, Flags, BufferCreateType, - BufferCreateInfo, RetMem); -} - -pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program, - const char *FunctionName, - pi_uint64 *FunctionPointerRet) { - return pi2ur::piextGetDeviceFunctionPointer(Device, Program, FunctionName, - FunctionPointerRet); -} - -pi_result piextGetGlobalVariablePointer(pi_device Device, pi_program Program, - const char *GlobalVariableName, - size_t *GlobalVariableSize, - void **GlobalVariablePointerRet) { - return pi2ur::piextGetGlobalVariablePointer( - Device, Program, GlobalVariableName, GlobalVariableSize, - GlobalVariablePointerRet); -} - -pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - - return pi2ur::piextUSMDeviceAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - - return pi2ur::piextUSMSharedAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -__SYCL_EXPORT pi_result piextUSMPitchedAlloc( - void **ResultPtr, size_t *ResultPitch, pi_context Context, pi_device Device, - pi_usm_mem_properties *Properties, size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - - return pi2ur::piextUSMPitchedAlloc(ResultPtr, ResultPitch, Context, Device, - Properties, WidthInBytes, Height, - ElementSizeBytes); -} - -pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - return pi2ur::piextUSMHostAlloc(ResultPtr, Context, Properties, Size, - Alignment); -} - -pi_result piextUSMFree(pi_context Context, void *Ptr) { - - return pi2ur::piextUSMFree(Context, Ptr); -} - -pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, const void *ArgValue) { - return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); -} - -pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); -} - -pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, - const void *SrcPtr, size_t Size, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemcpy(Queue, Blocking, DstPtr, SrcPtr, Size, - NumEventsInWaitlist, EventsWaitlist, - Event); -} - -pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueuePrefetch( - Queue, Ptr, Size, Flags, NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, - size_t Length, pi_mem_advice Advice, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueueMemAdvise(Queue, Ptr, Length, Advice, OutEvent); -} - -__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, - size_t Pitch, size_t PatternSize, - const void *Pattern, size_t Width, - size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueFill2D(Queue, Ptr, Pitch, PatternSize, Pattern, - Width, Height, NumEventsWaitList, - EventsWaitList, Event); -} - -__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, - size_t Pitch, int Value, - size_t Width, size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemset2D(Queue, Ptr, Pitch, Value, Width, Height, - NumEventsWaitList, EventsWaitlist, - Event); -} - -__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D( - pi_queue Queue, pi_bool Blocking, void *DstPtr, size_t DstPitch, - const void *SrcPtr, size_t SrcPitch, size_t Width, size_t Height, - pi_uint32 NumEventsInWaitlist, const pi_event *EventWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemcpy2D( - Queue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, - NumEventsInWaitlist, EventWaitlist, Event); -} - -pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, - pi_mem_alloc_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextUSMGetMemAllocInfo(Context, Ptr, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piextUSMImport(const void *HostPtr, size_t Size, pi_context Context) { - return pi2ur::piextUSMImport(HostPtr, Size, Context); -} - -pi_result piextUSMRelease(const void *HostPtr, pi_context Context) { - return pi2ur::piextUSMRelease(HostPtr, Context); -} - -pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, - size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - return pi2ur::piextEnqueueDeviceGlobalVariableWrite( - Queue, Program, Name, BlockingWrite, Count, Offset, Src, - NumEventsInWaitList, EventsWaitList, Event); -} - -pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, - size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - return pi2ur::piextEnqueueDeviceGlobalVariableRead( - Queue, Program, Name, BlockingRead, Count, Offset, Dst, - NumEventsInWaitList, EventsWaitList, Event); - - return PI_SUCCESS; -} - -pi_result piextEnqueueReadHostPipe(pi_queue Queue, pi_program Program, - const char *PipeSymbol, pi_bool Blocking, - void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - (void)Queue; - (void)Program; - (void)PipeSymbol; - (void)Blocking; - (void)Ptr; - (void)Size; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)Event; - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - die("piextEnqueueReadHostPipe: not implemented"); - return {}; -} - -pi_result piextEnqueueWriteHostPipe(pi_queue Queue, pi_program Program, - const char *PipeSymbol, pi_bool Blocking, - void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - (void)Queue; - (void)Program; - (void)PipeSymbol; - (void)Blocking; - (void)Ptr; - (void)Size; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)Event; - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - die("piextEnqueueWriteHostPipe: not implemented"); - return {}; -} - -pi_result piKernelSetExecInfo(pi_kernel Kernel, pi_kernel_exec_info ParamName, - size_t ParamValueSize, const void *ParamValue) { - - return pi2ur::piKernelSetExecInfo(Kernel, ParamName, ParamValueSize, - ParamValue); -} - -pi_result piextProgramSetSpecializationConstant(pi_program Prog, - pi_uint32 SpecID, size_t Size, - const void *SpecValue) { - return pi2ur::piextProgramSetSpecializationConstant(Prog, SpecID, Size, - SpecValue); -} - -// Command buffer extension -pi_result piextCommandBufferCreate(pi_context Context, pi_device Device, - const pi_ext_command_buffer_desc *Desc, - pi_ext_command_buffer *RetCommandBuffer) { - return pi2ur::piextCommandBufferCreate(Context, Device, Desc, - RetCommandBuffer); -} - -pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRetain(CommandBuffer); -} - -pi_result piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRelease(CommandBuffer); -} - -pi_result piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferFinalize(CommandBuffer); -} - -pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint, - pi_ext_command_buffer_command *Command) { - return pi2ur::piextCommandBufferNDRangeKernel( - CommandBuffer, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint, - Command); -} - -pi_result piextCommandBufferMemcpyUSM( - pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, - size_t Size, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemcpyUSM(CommandBuffer, DstPtr, SrcPtr, Size, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopy( - CommandBuffer, SrcMem, DstMem, SrcOffset, DstOffset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, - pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, - size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopyRect( - CommandBuffer, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferRead( - CommandBuffer, Buffer, Offset, Size, Dst, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferReadRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWrite( - CommandBuffer, Buffer, Offset, Size, Ptr, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWriteRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferFill( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, - size_t PatternSize, size_t Offset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferFill( - CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, - void *Ptr, const void *Pattern, - size_t PatternSize, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, - pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferFillUSM( - CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferPrefetchUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferPrefetchUSM(CommandBuffer, Ptr, Size, Flags, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferAdviseUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Length, - pi_mem_advice Advice, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferAdviseUSM(CommandBuffer, Ptr, Length, Advice, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, - pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piextEnqueueCommandBuffer( - CommandBuffer, Queue, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piextCommandBufferUpdateKernelLaunch( - pi_ext_command_buffer_command Command, - pi_ext_command_buffer_update_kernel_launch_desc *Desc) { - return pi2ur::piextCommandBufferUpdateKernelLaunch(Command, Desc); -} - -pi_result -piextCommandBufferRetainCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferRetainCommand(Command); -} - -pi_result -piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferReleaseCommand(Command); -} - -pi_result piextPluginGetOpaqueData(void *opaque_data_param, - void **opaque_data_return) { - return pi2ur::piextPluginGetOpaqueData(opaque_data_param, opaque_data_return); -} - -pi_result piTearDown(void *PluginParameter) { - return pi2ur::piTearDown(PluginParameter); -} - -pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, - uint64_t *HostTime) { - return pi2ur::piGetDeviceAndHostTimer(Device, DeviceTime, HostTime); -} - -pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { - - return pi2ur::piextEnablePeerAccess(command_device, peer_device); -} - -pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { - - return pi2ur::piextDisablePeerAccess(command_device, peer_device); -} - -pi_result piextPeerAccessGetInfo(pi_device command_device, - pi_device peer_device, pi_peer_attr attr, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result -piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, - pi_virtual_mem_granularity_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, - param_value_size, param_value, - param_value_size_ret); -} - -pi_result piextPhysicalMemCreate(pi_context context, pi_device device, - size_t mem_size, - pi_physical_mem *ret_physical_mem) { - return pi2ur::piextPhysicalMemCreate(context, device, mem_size, - ret_physical_mem); -} - -pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { - return pi2ur::piextPhysicalMemRetain(physical_mem); -} - -pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { - return pi2ur::piextPhysicalMemRelease(physical_mem); -} - -pi_result piextVirtualMemReserve(pi_context context, const void *start, - size_t range_size, void **ret_ptr) { - return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); -} - -pi_result piextVirtualMemFree(pi_context context, const void *ptr, - size_t range_size) { - return pi2ur::piextVirtualMemFree(context, ptr, range_size); -} - -pi_result piextVirtualMemMap(pi_context context, const void *ptr, - size_t range_size, pi_physical_mem physical_mem, - size_t offset, pi_virtual_access_flags flags) { - return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, - offset, flags); -} - -pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, - size_t range_size) { - return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); -} - -pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, - size_t range_size, - pi_virtual_access_flags flags) { - return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); -} - -pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, - size_t range_size, - pi_virtual_mem_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, - param_value_size, param_value, - param_value_size_ret); -} - -pi_result -piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, - void *Data, pi_uint32 NumMems, const pi_mem *Mems, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, - NumEventsInWaitList, EventWaitList, - Event); -} - -const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; - -pi_result piPluginInit(pi_plugin *PluginInit) { - // Check that the major version matches in PiVersion and SupportedVersion - _PI_PLUGIN_VERSION_CHECK(PluginInit->PiVersion, SupportedVersion); - - // PI interface supports higher version or the same version. - size_t PluginVersionSize = sizeof(PluginInit->PluginVersion); - if (strlen(SupportedVersion) >= PluginVersionSize) - return PI_ERROR_INVALID_VALUE; - strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize); - - // Set whole function table to zero to make it easier to detect if - // functions are not set up below. - std::memset(&(PluginInit->PiFunctionTable), 0, - sizeof(PluginInit->PiFunctionTable)); - - cuda_tracing_context_t_ *Ctx = createCUDATracingContext(); - enableCUDATracing(Ctx); - -// Forward calls to CUDA RT. -#define _PI_API(api) \ - (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); -#include -#undef _PI_API - - return PI_SUCCESS; -} - -#ifdef _WIN32 -#define __SYCL_PLUGIN_DLL_NAME "pi_cuda.dll" -#include "../common_win_pi_trace/common_win_pi_trace.hpp" -#undef __SYCL_PLUGIN_DLL_NAME -#endif - -} // extern "C" diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp deleted file mode 100644 index 8c5112f4cc9d1..0000000000000 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ /dev/null @@ -1,89 +0,0 @@ -//===-- pi_cuda.hpp - CUDA Plugin -----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// \defgroup sycl_pi_cuda CUDA Plugin -/// \ingroup sycl_pi - -/// \file pi_cuda.hpp -/// Declarations for CUDA Plugin. It is the interface between the -/// device-agnostic SYCL runtime layer and underlying CUDA runtime. -/// -/// \ingroup sycl_pi_cuda - -#ifndef PI_CUDA_HPP -#define PI_CUDA_HPP - -// This version should be incremented for any change made to this file or its -// corresponding .cpp file. -#define _PI_CUDA_PLUGIN_VERSION 1 - -#define _PI_CUDA_PLUGIN_VERSION_STRING \ - _PI_PLUGIN_VERSION_STRING(_PI_CUDA_PLUGIN_VERSION) - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// Share code between the PI Plugin and UR Adapter -#include - -using _pi_stream_guard = std::unique_lock; - -struct _pi_platform : ur_platform_handle_t_ { - using ur_platform_handle_t_::ur_platform_handle_t_; -}; - -struct _pi_device : ur_device_handle_t_ { - using ur_device_handle_t_::ur_device_handle_t_; -}; - -struct _pi_context : ur_context_handle_t_ { - using ur_context_handle_t_::ur_context_handle_t_; -}; - -struct _pi_mem : ur_mem_handle_t_ { - using ur_mem_handle_t_::ur_mem_handle_t_; -}; - -struct _pi_queue : ur_queue_handle_t_ { - using ur_queue_handle_t_::ur_queue_handle_t_; -}; - -struct _pi_event : ur_event_handle_t_ { - using ur_event_handle_t_::ur_event_handle_t_; -}; - -struct _pi_program : ur_program_handle_t_ { - using ur_program_handle_t_::ur_program_handle_t_; -}; - -struct _pi_kernel : ur_kernel_handle_t_ { - using ur_kernel_handle_t_::ur_kernel_handle_t_; -}; - -struct _pi_sampler : ur_sampler_handle_t_ { - using ur_sampler_handle_t_::ur_sampler_handle_t_; -}; - -struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ { - using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_; -}; - -struct _pi_physical_mem : ur_physical_mem_handle_t_ { - using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; -}; - -#endif // PI_CUDA_HPP diff --git a/sycl/plugins/hip/CMakeLists.txt b/sycl/plugins/hip/CMakeLists.txt deleted file mode 100644 index 62fe6b721fea5..0000000000000 --- a/sycl/plugins/hip/CMakeLists.txt +++ /dev/null @@ -1,204 +0,0 @@ - -# Set default PI HIP platform to AMD -set(SYCL_BUILD_PI_HIP_PLATFORM "AMD" CACHE STRING "PI HIP platform, AMD or NVIDIA") - -message(STATUS "Including the PI API HIP backend for ${SYCL_BUILD_PI_HIP_PLATFORM}.") - -# Set default ROCm installation directory -set(SYCL_BUILD_PI_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir") - -# Set HIP include and lib dirs -set(SYCL_BUILD_PI_HIP_INCLUDE_DIR "" CACHE STRING "Override HIP include dir path (set to \"\" for default behavior)") -set(SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR "" CACHE STRING "Override HSA include dir path (set to \"\" for default behavior)") - -if(SYCL_ENABLE_KERNEL_FUSION) - set(SYCL_ENABLE_COMGR ON) -else(SYCL_ENABLE_KERNEL_FUSION) - set(SYCL_ENABLE_COMGR OFF) -endif(SYCL_ENABLE_KERNEL_FUSION) - -if("${SYCL_BUILD_PI_HIP_INCLUDE_DIR}" STREQUAL "") - set(PI_HIP_INCLUDE_DIR "${SYCL_BUILD_PI_HIP_ROCM_DIR}/include") -else() - set(PI_HIP_INCLUDE_DIR "${SYCL_BUILD_PI_HIP_INCLUDE_DIR}") -endif() - -if("${SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR}" STREQUAL "") - # pre v6 versions of ROCM prefix their include directory with /hsa but this - # was fixed in v6 to act like a well-behaved package - foreach (SUF hsa/include include) - if (EXISTS "${SYCL_BUILD_PI_HIP_ROCM_DIR}/${SUF}") - set(PI_HIP_HSA_INCLUDE_DIR "${SYCL_BUILD_PI_HIP_ROCM_DIR}/${SUF}") - break() - endif() - endforeach() -else() - set(PI_HIP_HSA_INCLUDE_DIR "${SYCL_BUILD_PI_HIP_INCLUDE_DIR}") -endif() - -# Set HIP lib dir -set(SYCL_BUILD_PI_HIP_LIB_DIR "" CACHE STRING "Override HIP lib dir path (set to \"\" for default behavior)") -if("${SYCL_BUILD_PI_HIP_LIB_DIR}" STREQUAL "") - set(PI_HIP_LIB_DIR "${SYCL_BUILD_PI_HIP_ROCM_DIR}/lib") -else() - set(PI_HIP_LIB_DIR "${SYCL_BUILD_PI_HIP_LIB_DIR}") -endif() - -# Set up defaults for UR -set(UR_HIP_INCLUDE_DIR "${PI_HIP_INCLUDE_DIR}" CACHE PATH - "Custom ROCm HIP include dir") -set(UR_HIP_HSA_INCLUDE_DIRS "${PI_HIP_HSA_INCLUDE_DIR}" CACHE PATH - "Custom ROCm HSA include dir") -set(UR_HIP_LIB_DIR "${PI_HIP_LIB_DIR}" CACHE PATH - "Custom ROCm HIP library dir") - -# Mark override options for advanced usage -mark_as_advanced(SYCL_BUILD_PI_HIP_INCLUDE_DIR SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR SYCL_BUILD_PI_HIP_LIB_DIR) - -# Check if ROCm install paths exists -# N.B. Doesn't check if all override options are set: HSA and HIP include (and HIP lib for AMD platform) -if(("${SYCL_BUILD_PI_HIP_INCLUDE_DIR}" STREQUAL "") OR - ("${SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR}" STREQUAL "") OR - (("${SYCL_BUILD_PI_HIP_LIB_DIR}" STREQUAL "") AND ("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD"))) - - if(NOT EXISTS "${SYCL_BUILD_PI_HIP_ROCM_DIR}") - message(FATAL_ERROR "Couldn't find ROCm installation in '${SYCL_BUILD_PI_HIP_ROCM_DIR}'," - " please set SYCL_BUILD_PI_HIP_ROCM_DIR to the path of the ROCm installation.") - endif() -endif() - -# Check if HIP include path exists -if(NOT EXISTS "${PI_HIP_INCLUDE_DIR}") - if("${SYCL_BUILD_PI_HIP_INCLUDE_DIR}" STREQUAL "") - message(FATAL_ERROR "Couldn't find the HIP include directory at '${PI_HIP_INCLUDE_DIR}'," - " please check ROCm installation and possibly set SYCL_BUILD_PI_HIP_INCLUDE_DIR to the path of the HIP include directory for non-standard install paths.") - else() - message(FATAL_ERROR "Couldn't find the HIP include directory at '${PI_HIP_INCLUDE_DIR}'," - " please set SYCL_BUILD_PI_HIP_INCLUDE_DIR to the path of the HIP include directory from the ROCm installation.") - endif() -endif() - -# Check if HSA include path exists -if(NOT EXISTS "${PI_HIP_HSA_INCLUDE_DIR}") - if("${SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR}" STREQUAL "") - message(FATAL_ERROR "Couldn't find the HSA include directory at '${PI_HIP_HSA_INCLUDE_DIR}'," - " please check ROCm installation and possibly set SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR to the path of the HSA include directory for non-standard install paths.") - else() - message(FATAL_ERROR "Couldn't find the HSA include directory at '${PI_HIP_HSA_INCLUDE_DIR}'," - " please set SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR to the path of the HSA include directory from the ROCm installation.") - endif() -endif() - -# Check if HIP library path exists (AMD platform only) -if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD") - if(NOT EXISTS "${PI_HIP_LIB_DIR}") - if("${SYCL_BUILD_PI_HIP_LIB_DIR}" STREQUAL "") - message(FATAL_ERROR "Couldn't find the HIP library directory at '${PI_HIP_LIB_DIR}'," - " please check ROCm installation and possibly set SYCL_BUILD_PI_HIP_LIB_DIR to the path of the HIP library directory for non-standard install paths.") - else() - message(FATAL_ERROR "Couldn't find the HIP library directory at '${PI_HIP_LIB_DIR}'," - " please set SYCL_BUILD_PI_HIP_LIB_DIR to the path of the HIP library directory from the ROCm installation.") - endif() - endif() -endif() - -# Set includes used in added library (rocmdrv) -set(HIP_HEADERS "${PI_HIP_INCLUDE_DIR};${PI_HIP_HSA_INCLUDE_DIR}") - -# Get the HIP sources so they can be shared with HIP PI plugin -get_target_property(UR_HIP_ADAPTER_SOURCES ur_adapter_hip SOURCES) - -# Create pi_hip library -add_sycl_plugin(hip - SOURCES - # Some code is shared with the UR adapter - "../unified_runtime/pi2ur.hpp" - ${UR_HIP_ADAPTER_SOURCES} - "${sycl_inc_dir}/sycl/detail/pi.h" - "${sycl_inc_dir}/sycl/detail/pi.hpp" - "pi_hip.hpp" - "pi_hip.cpp" - INCLUDE_DIRS - ${sycl_plugin_dir} - ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime - ${UNIFIED_RUNTIME_SOURCE_DIR}/source/ # for adapters/hip - "${SYCL_SOURCE_DIR}/source" # for compiler.hpp - LIBRARIES - UnifiedRuntime-Headers - UnifiedRuntimeCommon - UnifiedMemoryFramework - HEADER - ${CMAKE_CURRENT_SOURCE_DIR}/include/features.hpp -) -set_target_properties(pi_hip PROPERTIES LINKER_LANGUAGE CXX) - -if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD") - - set_target_properties( - rocmdrv PROPERTIES - IMPORTED_LOCATION "${PI_HIP_LIB_DIR}/libamdhip64.so" - INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - ) - target_link_libraries(pi_hip PUBLIC rocmdrv) - - if(SYCL_ENABLE_COMGR) - set(PI_COMGR_VERSION5_HEADER "${PI_HIP_INCLUDE_DIR}/amd_comgr/amd_comgr.h") - set(PI_COMGR_VERSION4_HEADER "${PI_HIP_INCLUDE_DIR}/amd_comgr.h") - # The COMGR header changed location between ROCm version 4 and 5. - # Check for the existence in the version 5 location or fallback to version 4 - if(NOT EXISTS "${PI_COMGR_VERSION5_HEADER}") - if(NOT EXISTS "${PI_COMGR_VERSION4_HEADER}") - message(FATAL_ERROR "Could not find AMD COMGR header at " - "${PI_COMGR_VERSION5_HEADER} or " - "${PI_COMGR_VERSION4_HEADER}, " - "check ROCm installation") - else() - target_compile_definitions(pi_hip PRIVATE UR_COMGR_VERSION4_INCLUDE) - endif() - endif() - - add_library(amd_comgr SHARED IMPORTED GLOBAL) - set_target_properties( - amd_comgr PROPERTIES - IMPORTED_LOCATION "${PI_HIP_LIB_DIR}/libamd_comgr.so" - INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - ) - target_link_libraries(pi_hip PUBLIC amd_comgr) - target_compile_definitions(pi_hip PRIVATE SYCL_ENABLE_KERNEL_FUSION) - endif(SYCL_ENABLE_COMGR) - - # Set HIP define to select AMD platform - target_compile_definitions(pi_hip PRIVATE __HIP_PLATFORM_AMD__) -elseif("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "NVIDIA") - # Import CUDA libraries - find_package(CUDA REQUIRED) - list(APPEND HIP_HEADERS ${CUDA_INCLUDE_DIRS}) - - # cudadrv may be defined by the CUDA plugin - if(NOT TARGET cudadrv) - add_library(cudadrv SHARED IMPORTED GLOBAL) - set_target_properties( - cudadrv PROPERTIES - IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - ) - endif() - - add_library(cudart SHARED IMPORTED GLOBAL) - set_target_properties( - cudart PROPERTIES - IMPORTED_LOCATION ${CUDA_CUDART_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - ) - target_link_libraries(pi_hip PUBLIC cudadrv cudart) - - # Set HIP define to select NVIDIA platform - target_compile_definitions(pi_hip PRIVATE __HIP_PLATFORM_NVIDIA__) -else() - message(FATAL_ERROR "Unspecified PI HIP platform please set SYCL_BUILD_PI_HIP_PLATFORM to 'AMD' or 'NVIDIA'") -endif() - diff --git a/sycl/plugins/hip/include/features.hpp b/sycl/plugins/hip/include/features.hpp deleted file mode 100644 index 5cb65cfb918d8..0000000000000 --- a/sycl/plugins/hip/include/features.hpp +++ /dev/null @@ -1,11 +0,0 @@ -//===-- features.hpp - HIP Plugin feature macros --------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#define SYCL_EXT_ONEAPI_BACKEND_HIP 1 diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp deleted file mode 100644 index 83688c2e19cf4..0000000000000 --- a/sycl/plugins/hip/pi_hip.cpp +++ /dev/null @@ -1,1412 +0,0 @@ -//==---------- pi_hip.cpp - HIP Plugin ------------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// \file pi_hip.cpp -/// Implementation of HIP Plugin. -/// -/// \ingroup sycl_pi_hip - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -//-- PI API implementation -extern "C" { - -pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, - pi_uint32 *NumPlatforms) { - return pi2ur::piPlatformsGet(NumEntries, Platforms, NumPlatforms); -} - -pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piPlatformGetInfo(Platform, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piextPlatformGetNativeHandle(pi_platform Platform, - pi_native_handle *NativeHandle) { - return pi2ur::piextPlatformGetNativeHandle(Platform, NativeHandle); -} - -pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform *Platform) { - return pi2ur::piextPlatformCreateWithNativeHandle(NativeHandle, Platform); -} - -pi_result piPluginGetLastError(char **message) { - return pi2ur::piPluginGetLastError(message); -} - -pi_result piPluginGetBackendOption(pi_platform platform, - const char *frontend_option, - const char **backend_option) { - return pi2ur::piPluginGetBackendOption(platform, frontend_option, - backend_option); -} - -pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, - pi_uint32 NumEntries, pi_device *Devices, - pi_uint32 *NumDevices) { - return pi2ur::piDevicesGet(Platform, DeviceType, NumEntries, Devices, - NumDevices); -} - -pi_result piDeviceRetain(pi_device Device) { - return pi2ur::piDeviceRetain(Device); -} - -pi_result piDeviceRelease(pi_device Device) { - return pi2ur::piDeviceRelease(Device); -} - -pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piDeviceGetInfo(Device, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piDevicePartition(pi_device Device, - const pi_device_partition_property *Properties, - pi_uint32 NumDevices, pi_device *OutDevices, - pi_uint32 *OutNumDevices) { - return pi2ur::piDevicePartition(Device, Properties, NumDevices, OutDevices, - OutNumDevices); -} - -pi_result piextDeviceSelectBinary(pi_device Device, pi_device_binary *Binaries, - pi_uint32 NumBinaries, - pi_uint32 *SelectedBinaryInd) { - return pi2ur::piextDeviceSelectBinary(Device, Binaries, NumBinaries, - SelectedBinaryInd); -} - -pi_result piextDeviceGetNativeHandle(pi_device Device, - pi_native_handle *NativeHandle) { - - return pi2ur::piextDeviceGetNativeHandle(Device, NativeHandle); -} - -pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform Platform, - pi_device *Device) { - - return pi2ur::piextDeviceCreateWithNativeHandle(NativeHandle, Platform, - Device); -} - -pi_result piContextCreate(const pi_context_properties *Properties, - pi_uint32 NumDevices, const pi_device *Devices, - void (*PFnNotify)(const char *ErrInfo, - const void *PrivateInfo, size_t CB, - void *UserData), - void *UserData, pi_context *RetContext) { - return pi2ur::piContextCreate(Properties, NumDevices, Devices, PFnNotify, - UserData, RetContext); -} - -pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piContextGetInfo(Context, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piextContextSetExtendedDeleter(pi_context Context, - pi_context_extended_deleter Function, - void *UserData) { - return pi2ur::piextContextSetExtendedDeleter(Context, Function, UserData); -} - -pi_result piextContextGetNativeHandle(pi_context Context, - pi_native_handle *NativeHandle) { - return pi2ur::piextContextGetNativeHandle(Context, NativeHandle); -} - -pi_result piextContextCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_uint32 NumDevices, - const pi_device *Devices, - bool OwnNativeHandle, - pi_context *RetContext) { - return pi2ur::piextContextCreateWithNativeHandle( - NativeHandle, NumDevices, Devices, OwnNativeHandle, RetContext); -} - -pi_result piContextRetain(pi_context Context) { - - return pi2ur::piContextRetain(Context); -} - -pi_result piContextRelease(pi_context Context) { - return pi2ur::piContextRelease(Context); -} - -pi_result piQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties Flags, pi_queue *Queue) { - pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; - return piextQueueCreate(Context, Device, Properties, Queue); -} - -pi_result piextQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties *Properties, pi_queue *Queue) { - return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); -} - -pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piQueueGetInfo(Queue, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piQueueRetain(pi_queue Queue) { return pi2ur::piQueueRetain(Queue); } - -pi_result piQueueRelease(pi_queue Queue) { - return pi2ur::piQueueRelease(Queue); -} - -pi_result piQueueFinish(pi_queue Queue) { return pi2ur::piQueueFinish(Queue); } - -pi_result piQueueFlush(pi_queue Queue) { return pi2ur::piQueueFlush(Queue); } - -pi_result piextQueueGetNativeHandle(pi_queue Queue, - pi_native_handle *NativeHandle, - int32_t *NativeHandleDesc) { - - return pi2ur::piextQueueGetNativeHandle(Queue, NativeHandle, - NativeHandleDesc); -} - -pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle, - int32_t NativeHandleDesc, - pi_context Context, pi_device Device, - bool OwnNativeHandle, - pi_queue_properties *Properties, - pi_queue *Queue) { - - return pi2ur::piextQueueCreateWithNativeHandle( - NativeHandle, NativeHandleDesc, Context, Device, OwnNativeHandle, - Properties, Queue); -} - -pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, - void *HostPtr, pi_mem *RetMem, - const pi_mem_properties *properties) { - return pi2ur::piMemBufferCreate(Context, Flags, Size, HostPtr, RetMem, - properties); -} - -pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet) { - return pi2ur::piMemGetInfo(Mem, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piMemRetain(pi_mem Mem) { return pi2ur::piMemRetain(Mem); } - -pi_result piMemRelease(pi_mem Mem) { return pi2ur::piMemRelease(Mem); } - -pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, - const pi_image_format *ImageFormat, - const pi_image_desc *ImageDesc, void *HostPtr, - pi_mem *RetImage) { - - return pi2ur::piMemImageCreate(Context, Flags, ImageFormat, ImageDesc, - HostPtr, RetImage); -} - -pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, - pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); -} - -pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool ownNativeHandle, pi_mem *Mem) { - return pi2ur::piextMemCreateWithNativeHandle(NativeHandle, Context, - ownNativeHandle, Mem); -} - -pi_result piProgramCreate(pi_context Context, const void *ILBytes, - size_t Length, pi_program *Program) { - return pi2ur::piProgramCreate(Context, ILBytes, Length, Program); -} - -pi_result piProgramCreateWithBinary( - pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, - const size_t *Lengths, const unsigned char **Binaries, - size_t NumMetadataEntries, const pi_device_binary_property *Metadata, - pi_int32 *BinaryStatus, pi_program *Program) { - - return pi2ur::piProgramCreateWithBinary(Context, NumDevices, DeviceList, - Lengths, Binaries, NumMetadataEntries, - Metadata, BinaryStatus, Program); -} - -pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *Img) { - return pi2ur::piextMemImageCreateWithNativeHandle( - NativeHandle, Context, OwnNativeHandle, ImageFormat, ImageDesc, Img); -} - -pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piProgramGetInfo(Program, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - pi_uint32 NumInputPrograms, - const pi_program *InputPrograms, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData, pi_program *RetProgram) { - return pi2ur::piProgramLink(Context, NumDevices, DeviceList, Options, - NumInputPrograms, InputPrograms, PFnNotify, - UserData, RetProgram); -} - -pi_result piProgramCompile( - pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, - const char *Options, pi_uint32 NumInputHeaders, - const pi_program *InputHeaders, const char **HeaderIncludeNames, - void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { - - return pi2ur::piProgramCompile(Program, NumDevices, DeviceList, Options, - NumInputHeaders, InputHeaders, - HeaderIncludeNames, PFnNotify, UserData); -} - -pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData) { - return pi2ur::piProgramBuild(Program, NumDevices, DeviceList, Options, - PFnNotify, UserData); -} - -pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, - pi_program_build_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piProgramGetBuildInfo(Program, Device, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piProgramRetain(pi_program Program) { - return pi2ur::piProgramRetain(Program); -} - -pi_result piProgramRelease(pi_program Program) { - return pi2ur::piProgramRelease(Program); -} - -pi_result piextProgramGetNativeHandle(pi_program Program, - pi_native_handle *NativeHandle) { - return pi2ur::piextProgramGetNativeHandle(Program, NativeHandle); -} - -pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_program *Program) { - return pi2ur::piextProgramCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Program); -} - -pi_result piKernelCreate(pi_program Program, const char *KernelName, - pi_kernel *RetKernel) { - - return pi2ur::piKernelCreate(Program, KernelName, RetKernel); -} - -pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, - const void *ArgValue) { - - return pi2ur::piKernelSetArg(Kernel, ArgIndex, ArgSize, ArgValue); -} - -pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_mem_obj_property *ArgProperties, - const pi_mem *ArgValue) { - return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties, - ArgValue); -} - -pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_sampler *ArgValue) { - - return pi2ur::piextKernelSetArgSampler(Kernel, ArgIndex, ArgValue); -} - -pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piKernelGetInfo(Kernel, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piextMemImageAllocate(pi_context Context, - pi_device Device, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemImageAllocate(Context, Device, ImageFormat, ImageDesc, - RetMem); -} - -__SYCL_EXPORT pi_result piextMemUnsampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, - pi_image_handle *RetHandle) { - return pi2ur::piextMemUnsampledImageCreate(Context, Device, ImgMem, - ImageFormat, ImageDesc, RetHandle); -} - -__SYCL_EXPORT pi_result piextMemSampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_sampler Sampler, - pi_image_handle *RetHandle) { - return pi2ur::piextMemSampledImageCreate(Context, Device, ImgMem, ImageFormat, - ImageDesc, Sampler, RetHandle); -} - -__SYCL_EXPORT pi_result piextBindlessImageSamplerCreate( - pi_context Context, const pi_sampler_properties *SamplerProperties, - float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, - pi_sampler *RetSampler) { - return pi2ur::piextBindlessImageSamplerCreate( - Context, SamplerProperties, MinMipmapLevelClamp, MaxMipmapLevelClamp, - MaxAnisotropy, RetSampler); -} - -__SYCL_EXPORT pi_result piextMemMipmapGetLevel(pi_context Context, - pi_device Device, - pi_image_mem_handle MipMem, - unsigned int Level, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMipmapGetLevel(Context, Device, MipMem, Level, RetMem); -} - -__SYCL_EXPORT pi_result piextMemImageFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemImageFree(Context, Device, MemoryHandle); -} - -__SYCL_EXPORT pi_result piextMemMipmapFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemMipmapFree(Context, Device, MemoryHandle); -} - -__SYCL_EXPORT pi_result piextMemImageCopy( - pi_queue Queue, void *DstPtr, const void *SrcPtr, - const pi_image_desc *SrcImageDesc, const pi_image_desc *DestImageDesc, - const pi_image_format *SrcImageFormat, - const pi_image_format *DestImageFormat, const pi_image_copy_flags Flags, - pi_image_offset SrcOffset, pi_image_offset DstOffset, - pi_image_region CopyExtent, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextMemImageCopy( - Queue, DstPtr, SrcPtr, SrcImageDesc, DestImageDesc, SrcImageFormat, - DestImageFormat, Flags, SrcOffset, DstOffset, CopyExtent, - NumEventsInWaitList, EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextMemUnsampledImageHandleDestroy( - pi_context Context, pi_device Device, pi_image_handle Handle) { - return pi2ur::piextMemUnsampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemSampledImageHandleDestroy( - pi_context Context, pi_device Device, pi_image_handle Handle) { - return pi2ur::piextMemSampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemImageGetInfo(pi_context Context, - pi_image_mem_handle MemHandle, - pi_image_info ParamName, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextMemImageGetInfo(Context, MemHandle, ParamName, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piextImportExternalMemory( - pi_context Context, pi_device Device, pi_external_mem_descriptor *MemDesc, - pi_interop_mem_handle *RetHandle) { - return pi2ur::piextImportExternalMemory(Context, Device, MemDesc, RetHandle); -} - -__SYCL_EXPORT pi_result piextMemMapExternalArray( - pi_context Context, pi_device Device, pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, pi_interop_mem_handle MemHandle, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMapExternalArray(Context, Device, ImageFormat, - ImageDesc, MemHandle, RetMem); -} - -__SYCL_EXPORT pi_result piextMemReleaseInterop(pi_context Context, - pi_device Device, - pi_interop_mem_handle ExtMem) { - return pi2ur::piextMemReleaseInterop(Context, Device, ExtMem); -} - -__SYCL_EXPORT pi_result -piextImportExternalSemaphore(pi_context Context, pi_device Device, - pi_external_semaphore_descriptor *SemDesc, - pi_interop_semaphore_handle *RetHandle) { - return pi2ur::piextImportExternalSemaphore(Context, Device, SemDesc, - RetHandle); -} - -__SYCL_EXPORT pi_result -piextReleaseExternalSemaphore(pi_context Context, pi_device Device, - pi_interop_semaphore_handle SemHandle) { - return pi2ur::piextReleaseExternalSemaphore(Context, Device, SemHandle); -} - -__SYCL_EXPORT pi_result piextWaitExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasWaitValue, - pi_uint64 WaitValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextWaitExternalSemaphore(Queue, SemHandle, HasWaitValue, - WaitValue, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextSignalExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasSignalValue, - pi_uint64 SignalValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextSignalExternalSemaphore(Queue, SemHandle, HasSignalValue, - SignalValue, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_group_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piKernelGetGroupInfo(Kernel, Device, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_sub_group_info ParamName, - size_t InputValueSize, const void *InputValue, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piKernelGetSubGroupInfo( - Kernel, Device, ParamName, InputValueSize, InputValue, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piKernelRetain(pi_kernel Kernel) { - - return pi2ur::piKernelRetain(Kernel); -} - -pi_result piKernelRelease(pi_kernel Kernel) { - - return pi2ur::piKernelRelease(Kernel); -} - -pi_result -piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - return pi2ur::piEnqueueKernelLaunch( - Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextEnqueueCooperativeKernelLaunch( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - return pi2ur::piextEnqueueCooperativeKernelLaunch( - Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextEnqueueKernelLaunchCustom( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumPropsInLaunchPropList, - const pi_launch_property *LaunchPropList, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - (void)Queue; - (void)Kernel; - (void)WorkDim; - (void)GlobalWorkSize; - (void)LocalWorkSize; - (void)NumPropsInLaunchPropList; - (void)LaunchPropList; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)OutEvent; - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - return PI_ERROR_UNSUPPORTED_FEATURE; -} - -pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - pi_program Program, - bool OwnNativeHandle, - pi_kernel *Kernel) { - - return pi2ur::piextKernelCreateWithNativeHandle( - NativeHandle, Context, Program, OwnNativeHandle, Kernel); -} - -pi_result piextKernelGetNativeHandle(pi_kernel Kernel, - pi_native_handle *NativeHandle) { - return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle); -} - -pi_result piextKernelSuggestMaxCooperativeGroupCount( - pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize, - pi_uint32 *GroupCountRet) { - return pi2ur::piextKernelSuggestMaxCooperativeGroupCount( - Kernel, LocalWorkSize, DynamicSharedMemorySize, GroupCountRet); -} - -pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { - return pi2ur::piEventCreate(Context, RetEvent); -} - -pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piEventGetInfo(Event, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piEventGetProfilingInfo(Event, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { - return pi2ur::piEventsWait(NumEvents, EventList); -} - -pi_result piEventSetCallback(pi_event Event, pi_int32 CommandExecCallbackType, - void (*PFnNotify)(pi_event Event, - pi_int32 EventCommandStatus, - void *UserData), - void *UserData) { - return pi2ur::piEventSetCallback(Event, CommandExecCallbackType, PFnNotify, - UserData); -} - -pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { - return pi2ur::piEventSetStatus(Event, ExecutionStatus); -} - -pi_result piEventRetain(pi_event Event) { return pi2ur::piEventRetain(Event); } - -pi_result piEventRelease(pi_event Event) { - return pi2ur::piEventRelease(Event); -} - -pi_result piextEventGetNativeHandle(pi_event Event, - pi_native_handle *NativeHandle) { - - return pi2ur::piextEventGetNativeHandle(Event, NativeHandle); -} - -pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_event *Event) { - return pi2ur::piextEventCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Event); -} - -pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueTimestampRecordingExp( - Queue, Blocking, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piSamplerCreate(pi_context Context, - const pi_sampler_properties *SamplerProperties, - pi_sampler *RetSampler) { - return pi2ur::piSamplerCreate(Context, SamplerProperties, RetSampler); -} - -pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piSamplerGetInfo(Sampler, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piSamplerRetain(pi_sampler Sampler) { - return pi2ur::piSamplerRetain(Sampler); -} - -pi_result piSamplerRelease(pi_sampler Sampler) { - return pi2ur::piSamplerRelease(Sampler); -} - -pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piEnqueueEventsWait(Queue, NumEventsInWaitList, EventWaitList, - OutEvent); -} - -pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piEnqueueEventsWaitWithBarrier(Queue, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, - pi_bool BlockingRead, size_t Offset, - size_t Size, void *Dst, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferRead(Queue, Src, BlockingRead, Offset, Size, - Dst, NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piEnqueueMemBufferReadRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferReadRect( - Queue, Buffer, BlockingRead, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, - pi_bool BlockingWrite, size_t Offset, - size_t Size, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferWrite(Queue, Buffer, BlockingWrite, Offset, - Size, Ptr, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferWriteRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferWriteRect( - Queue, Buffer, BlockingWrite, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, - size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferCopy(Queue, SrcMem, DstMem, SrcOffset, - DstOffset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferCopyRect( - pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, - pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, - size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, - size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - - return pi2ur::piEnqueueMemBufferCopyRect( - Queue, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferFill(Queue, Buffer, Pattern, PatternSize, - Offset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, - pi_map_flags MapFlags, size_t Offset, - size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent, void **RetMap) { - - return pi2ur::piEnqueueMemBufferMap(Queue, Mem, BlockingMap, MapFlags, Offset, - Size, NumEventsInWaitList, EventWaitList, - OutEvent, RetMap); -} - -pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, void *MappedPtr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - - return pi2ur::piEnqueueMemUnmap(Queue, Mem, MappedPtr, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piMemImageGetInfo(Image, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, - pi_bool BlockingRead, pi_image_offset Origin, - pi_image_region Region, size_t RowPitch, - size_t SlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemImageRead( - Queue, Image, BlockingRead, Origin, Region, RowPitch, SlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, - pi_bool BlockingWrite, pi_image_offset Origin, - pi_image_region Region, size_t InputRowPitch, - size_t InputSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemImageWrite( - Queue, Image, BlockingWrite, Origin, Region, InputRowPitch, - InputSlicePitch, Ptr, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result -piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, - pi_image_offset SrcOrigin, pi_image_offset DstOrigin, - pi_image_region Region, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piEnqueueMemImageCopy(Queue, SrcImage, DstImage, SrcOrigin, - DstOrigin, Region, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, - const void *FillColor, const size_t *Origin, - const size_t *Region, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemImageFill(Queue, Image, FillColor, Origin, Region, - NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, - pi_buffer_create_type BufferCreateType, - void *BufferCreateInfo, pi_mem *RetMem) { - - return pi2ur::piMemBufferPartition(Buffer, Flags, BufferCreateType, - BufferCreateInfo, RetMem); -} - -pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program, - const char *FunctionName, - pi_uint64 *FunctionPointerRet) { - return pi2ur::piextGetDeviceFunctionPointer(Device, Program, FunctionName, - FunctionPointerRet); -} - -pi_result piextGetGlobalVariablePointer(pi_device Device, pi_program Program, - const char *GlobalVariableName, - size_t *GlobalVariableSize, - void **GlobalVariablePointerRet) { - return pi2ur::piextGetGlobalVariablePointer( - Device, Program, GlobalVariableName, GlobalVariableSize, - GlobalVariablePointerRet); -} - -pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - - return pi2ur::piextUSMDeviceAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - - return pi2ur::piextUSMSharedAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -__SYCL_EXPORT pi_result piextUSMPitchedAlloc( - void **ResultPtr, size_t *ResultPitch, pi_context Context, pi_device Device, - pi_usm_mem_properties *Properties, size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - - return pi2ur::piextUSMPitchedAlloc(ResultPtr, ResultPitch, Context, Device, - Properties, WidthInBytes, Height, - ElementSizeBytes); -} - -pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - return pi2ur::piextUSMHostAlloc(ResultPtr, Context, Properties, Size, - Alignment); -} - -pi_result piextUSMFree(pi_context Context, void *Ptr) { - - return pi2ur::piextUSMFree(Context, Ptr); -} - -pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, const void *ArgValue) { - return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); -} - -pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); -} - -pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, - const void *SrcPtr, size_t Size, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemcpy(Queue, Blocking, DstPtr, SrcPtr, Size, - NumEventsInWaitlist, EventsWaitlist, - Event); -} - -pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueuePrefetch( - Queue, Ptr, Size, Flags, NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, - size_t Length, pi_mem_advice Advice, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueueMemAdvise(Queue, Ptr, Length, Advice, OutEvent); -} - -__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, - size_t Pitch, size_t PatternSize, - const void *Pattern, size_t Width, - size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueFill2D(Queue, Ptr, Pitch, PatternSize, Pattern, - Width, Height, NumEventsWaitList, - EventsWaitList, Event); -} - -__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, - size_t Pitch, int Value, - size_t Width, size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemset2D(Queue, Ptr, Pitch, Value, Width, Height, - NumEventsWaitList, EventsWaitlist, - Event); -} - -__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D( - pi_queue Queue, pi_bool Blocking, void *DstPtr, size_t DstPitch, - const void *SrcPtr, size_t SrcPitch, size_t Width, size_t Height, - pi_uint32 NumEventsInWaitlist, const pi_event *EventWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemcpy2D( - Queue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, - NumEventsInWaitlist, EventWaitlist, Event); -} - -pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, - pi_mem_alloc_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextUSMGetMemAllocInfo(Context, Ptr, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piextUSMImport(const void *HostPtr, size_t Size, pi_context Context) { - return pi2ur::piextUSMImport(HostPtr, Size, Context); -} - -pi_result piextUSMRelease(const void *HostPtr, pi_context Context) { - return pi2ur::piextUSMRelease(HostPtr, Context); -} - -pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, - size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - return pi2ur::piextEnqueueDeviceGlobalVariableWrite( - Queue, Program, Name, BlockingWrite, Count, Offset, Src, - NumEventsInWaitList, EventsWaitList, Event); -} - -pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, - size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - return pi2ur::piextEnqueueDeviceGlobalVariableRead( - Queue, Program, Name, BlockingRead, Count, Offset, Dst, - NumEventsInWaitList, EventsWaitList, Event); - - return PI_SUCCESS; -} - -pi_result piextEnqueueReadHostPipe(pi_queue Queue, pi_program Program, - const char *PipeSymbol, pi_bool Blocking, - void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - (void)Queue; - (void)Program; - (void)PipeSymbol; - (void)Blocking; - (void)Ptr; - (void)Size; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)Event; - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - die("piextEnqueueReadHostPipe: not implemented"); - return {}; -} - -pi_result piextEnqueueWriteHostPipe(pi_queue Queue, pi_program Program, - const char *PipeSymbol, pi_bool Blocking, - void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - (void)Queue; - (void)Program; - (void)PipeSymbol; - (void)Blocking; - (void)Ptr; - (void)Size; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)Event; - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - die("piextEnqueueWriteHostPipe: not implemented"); - return {}; -} - -pi_result piKernelSetExecInfo(pi_kernel Kernel, pi_kernel_exec_info ParamName, - size_t ParamValueSize, const void *ParamValue) { - - return pi2ur::piKernelSetExecInfo(Kernel, ParamName, ParamValueSize, - ParamValue); -} - -pi_result piextProgramSetSpecializationConstant(pi_program Prog, - pi_uint32 SpecID, size_t Size, - const void *SpecValue) { - return pi2ur::piextProgramSetSpecializationConstant(Prog, SpecID, Size, - SpecValue); -} - -// Command buffer extension -pi_result piextCommandBufferCreate(pi_context Context, pi_device Device, - const pi_ext_command_buffer_desc *Desc, - pi_ext_command_buffer *RetCommandBuffer) { - return pi2ur::piextCommandBufferCreate(Context, Device, Desc, - RetCommandBuffer); -} - -pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRetain(CommandBuffer); -} - -pi_result piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRelease(CommandBuffer); -} - -pi_result piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferFinalize(CommandBuffer); -} - -pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint, - pi_ext_command_buffer_command *Command) { - return pi2ur::piextCommandBufferNDRangeKernel( - CommandBuffer, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint, - Command); -} - -pi_result piextCommandBufferMemcpyUSM( - pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, - size_t Size, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemcpyUSM(CommandBuffer, DstPtr, SrcPtr, Size, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopy( - CommandBuffer, SrcMem, DstMem, SrcOffset, DstOffset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, - pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, - size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopyRect( - CommandBuffer, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferRead( - CommandBuffer, Buffer, Offset, Size, Dst, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferReadRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWrite( - CommandBuffer, Buffer, Offset, Size, Ptr, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWriteRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferFill( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, - size_t PatternSize, size_t Offset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferFill( - CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, - void *Ptr, const void *Pattern, - size_t PatternSize, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, - pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferFillUSM( - CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferPrefetchUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferPrefetchUSM(CommandBuffer, Ptr, Size, Flags, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferAdviseUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Length, - pi_mem_advice Advice, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferAdviseUSM(CommandBuffer, Ptr, Length, Advice, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, - pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piextEnqueueCommandBuffer( - CommandBuffer, Queue, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piextCommandBufferUpdateKernelLaunch( - pi_ext_command_buffer_command Command, - pi_ext_command_buffer_update_kernel_launch_desc *Desc) { - return pi2ur::piextCommandBufferUpdateKernelLaunch(Command, Desc); -} - -pi_result -piextCommandBufferRetainCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferRetainCommand(Command); -} - -pi_result -piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferReleaseCommand(Command); -} - -pi_result piextPluginGetOpaqueData(void *opaque_data_param, - void **opaque_data_return) { - return pi2ur::piextPluginGetOpaqueData(opaque_data_param, opaque_data_return); -} - -pi_result piTearDown(void *PluginParameter) { - return pi2ur::piTearDown(PluginParameter); -} - -pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, - uint64_t *HostTime) { - return pi2ur::piGetDeviceAndHostTimer(Device, DeviceTime, HostTime); -} - -pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { - - return pi2ur::piextEnablePeerAccess(command_device, peer_device); -} - -pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { - - return pi2ur::piextDisablePeerAccess(command_device, peer_device); -} - -pi_result piextPeerAccessGetInfo(pi_device command_device, - pi_device peer_device, pi_peer_attr attr, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result -piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, - pi_virtual_mem_granularity_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, - param_value_size, param_value, - param_value_size_ret); -} - -pi_result piextPhysicalMemCreate(pi_context context, pi_device device, - size_t mem_size, - pi_physical_mem *ret_physical_mem) { - return pi2ur::piextPhysicalMemCreate(context, device, mem_size, - ret_physical_mem); -} - -pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { - return pi2ur::piextPhysicalMemRetain(physical_mem); -} - -pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { - return pi2ur::piextPhysicalMemRelease(physical_mem); -} - -pi_result piextVirtualMemReserve(pi_context context, const void *start, - size_t range_size, void **ret_ptr) { - return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); -} - -pi_result piextVirtualMemFree(pi_context context, const void *ptr, - size_t range_size) { - return pi2ur::piextVirtualMemFree(context, ptr, range_size); -} - -pi_result piextVirtualMemMap(pi_context context, const void *ptr, - size_t range_size, pi_physical_mem physical_mem, - size_t offset, pi_virtual_access_flags flags) { - return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, - offset, flags); -} - -pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, - size_t range_size) { - return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); -} - -pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, - size_t range_size, - pi_virtual_access_flags flags) { - return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); -} - -pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, - size_t range_size, - pi_virtual_mem_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, - param_value_size, param_value, - param_value_size_ret); -} - -pi_result -piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, - void *Data, pi_uint32 NumMems, const pi_mem *Mems, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, - NumEventsInWaitList, EventWaitList, - Event); -} - -const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; - -pi_result piPluginInit(pi_plugin *PluginInit) { - // Check that the major version matches in PiVersion and SupportedVersion - _PI_PLUGIN_VERSION_CHECK(PluginInit->PiVersion, SupportedVersion); - - // PI interface supports higher version or the same version. - size_t PluginVersionSize = sizeof(PluginInit->PluginVersion); - if (strlen(SupportedVersion) >= PluginVersionSize) - return PI_ERROR_INVALID_VALUE; - strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize); - - // Set whole function table to zero to make it easier to detect if - // functions are not set up below. - std::memset(&(PluginInit->PiFunctionTable), 0, - sizeof(PluginInit->PiFunctionTable)); - -// Forward calls to HIP RT. -#define _PI_API(api) \ - (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); -#include -#undef _PI_API - - return PI_SUCCESS; -} - -#ifdef _WIN32 -#define __SYCL_PLUGIN_DLL_NAME "pi_hip.dll" -#include "../common_win_pi_trace/common_win_pi_trace.hpp" -#undef __SYCL_PLUGIN_DLL_NAME -#endif - -} // extern "C" diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp deleted file mode 100644 index bec26c9866fdb..0000000000000 --- a/sycl/plugins/hip/pi_hip.hpp +++ /dev/null @@ -1,102 +0,0 @@ -//===-- pi_hip.hpp - HIP Plugin -------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// \defgroup sycl_pi_hip HIP Plugin -/// \ingroup sycl_pi - -/// \file pi_hip.hpp -/// Declarations for HIP Plugin. It is the interface between the -/// device-agnostic SYCL runtime layer and underlying HIP runtime. -/// -/// \ingroup sycl_pi_hip - -#ifndef PI_HIP_HPP -#define PI_HIP_HPP - -// This version should be incremented for any change made to this file or its -// corresponding .cpp file. -#define _PI_HIP_PLUGIN_VERSION 1 - -#define _PI_HIP_PLUGIN_VERSION_STRING \ - _PI_PLUGIN_VERSION_STRING(_PI_HIP_PLUGIN_VERSION) - -#include "sycl/detail/pi.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "pi2ur.hpp" - -using _pi_stream_guard = std::unique_lock; - -struct _pi_platform : ur_platform_handle_t_ { - using ur_platform_handle_t_::ur_platform_handle_t_; -}; - -struct _pi_device : ur_device_handle_t_ { - using ur_device_handle_t_::ur_device_handle_t_; -}; - -struct _pi_context : ur_context_handle_t_ { - using ur_context_handle_t_::ur_context_handle_t_; -}; - -struct _pi_mem : ur_mem_handle_t_ { - using ur_mem_handle_t_::ur_mem_handle_t_; -}; - -struct _pi_queue : ur_queue_handle_t_ { - using ur_queue_handle_t_::ur_queue_handle_t_; -}; - -struct _pi_event : ur_event_handle_t_ { - using ur_event_handle_t_::ur_event_handle_t_; -}; - -struct _pi_program : ur_program_handle_t_ { - using ur_program_handle_t_::ur_program_handle_t_; -}; - -struct _pi_kernel : ur_kernel_handle_t_ { - using ur_kernel_handle_t_::ur_kernel_handle_t_; -}; - -struct _pi_sampler : ur_sampler_handle_t_ { - using ur_sampler_handle_t_::ur_sampler_handle_t_; -}; - -struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ { - using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_; -}; - -struct _pi_physical_mem : ur_physical_mem_handle_t_ { - using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; -}; - -#endif // PI_HIP_HPP diff --git a/sycl/plugins/ld-version-script.txt b/sycl/plugins/ld-version-script.txt deleted file mode 100644 index 1ad2c6d5f8390..0000000000000 --- a/sycl/plugins/ld-version-script.txt +++ /dev/null @@ -1,10 +0,0 @@ -{ - /* in CMakelists.txt, we pass -fvisibility=hidden compiler flag */ - /* This file is used to give exception of the hidden visibility */ - /* Export only pi* function symbols which are individually marked 'default' visibility */ - - global: pi*; - - /* all other symbols are local scope, meaning not exported */ - local: *; -}; diff --git a/sycl/plugins/level_zero/CMakeLists.txt b/sycl/plugins/level_zero/CMakeLists.txt deleted file mode 100644 index d4dc4c2eafca4..0000000000000 --- a/sycl/plugins/level_zero/CMakeLists.txt +++ /dev/null @@ -1,73 +0,0 @@ -# PI Level Zero plugin library - -if (SYCL_ENABLE_XPTI_TRACING) - set(XPTI_PROXY_SRC "${CMAKE_SOURCE_DIR}/../xpti/src/xpti_proxy.cpp") - set(XPTI_INCLUDE "${CMAKE_SOURCE_DIR}/../xpti/include") - set(XPTI_LIBS "${CMAKE_DL_LIBS}") -endif() - -find_package(Python3 REQUIRED) - -get_target_property(LEVEL_ZERO_INCLUDE_DIR LevelZeroLoader-Headers INTERFACE_INCLUDE_DIRECTORIES) - -add_custom_target(ze-api DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ze_api.def) -add_custom_command( - OUTPUT - ${CMAKE_CURRENT_BINARY_DIR}/ze_api.def - COMMAND ${Python3_EXECUTABLE} - ${CMAKE_CURRENT_SOURCE_DIR}/ze_api_generator.py - ${LEVEL_ZERO_INCLUDE_DIR}/ze_api.h - DEPENDS - ${LEVEL_ZERO_INCLUDE_DIR}/ze_api.h - ) - -find_package(Threads REQUIRED) - -# Get the L0 adapter sources so they can be shared with the L0 PI plugin -get_target_property(UR_L0_ADAPTER_SOURCES ur_adapter_level_zero SOURCES) - -add_sycl_plugin(level_zero - SOURCES - # These are short-term shared with Unified Runtime - # The two plugins define a few things differrently so must - # be built separately. This difference is spelled in - # their "ur_bindings.hpp" files. - # - "ur_bindings.hpp" - "../unified_runtime/pi2ur.hpp" - ${UR_L0_ADAPTER_SOURCES} - # Following are the PI Level-Zero Plugin only codes. - "pi_level_zero.cpp" - "pi_level_zero.hpp" - "tracing.cpp" - ${XPTI_PROXY_SRC} - INCLUDE_DIRS - ${CMAKE_CURRENT_BINARY_DIR} # for ze_api.def - ${CMAKE_CURRENT_SOURCE_DIR} # for Level-Zero Plugin "ur_bindings.hpp" - ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime # for Unified Runtime - ${UNIFIED_RUNTIME_SOURCE_DIR}/source/ # for adapters/level_zero - ${XPTI_INCLUDE} - "${SYCL_SOURCE_DIR}/source" # for compiler.hpp - LIBRARIES - LevelZeroLoader-Headers - UnifiedRuntime-Headers - UnifiedRuntimeCommon - UnifiedMemoryFramework - LevelZeroLoader - Threads::Threads - ${XPTI_LIBS} -) - -if (WIN32) - # 0x800: Search for the DLL only in the System32 folder - target_link_options(pi_level_zero PUBLIC /DEPENDENTLOADFLAG:0x800) -endif() - -add_dependencies(pi_level_zero ze-api) - -if (SYCL_ENABLE_XPTI_TRACING) - target_compile_definitions(pi_level_zero PRIVATE - XPTI_ENABLE_INSTRUMENTATION - XPTI_STATIC_LIBRARY - ) -endif() diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp deleted file mode 100644 index 64f23f7483276..0000000000000 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ /dev/null @@ -1,1649 +0,0 @@ -//===-------- pi_level_zero.cpp - Level Zero Plugin --------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===------------------------------------------------------------------===// - -/// \file pi_level_zero.cpp -/// Implementation of Level Zero Plugin. -/// -/// \ingroup sycl_pi_level_zero - -#include "pi_level_zero.hpp" -#include "ur_bindings.hpp" - -// Defined in tracing.cpp -void enableZeTracing(); -void disableZeTracing(); - -extern "C" { - -// Forward declarations -decltype(piEventCreate) piEventCreate; - -pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, - pi_uint32 *NumPlatforms) { - return pi2ur::piPlatformsGet(NumEntries, Platforms, NumPlatforms); -} - -pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - // To distinguish this L0 platform from Unified Runtime one. - if (ParamName == PI_PLATFORM_INFO_NAME) { - ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); - return ReturnValue("Intel(R) Level-Zero"); - } - return pi2ur::piPlatformGetInfo(Platform, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piextPlatformGetNativeHandle(pi_platform Platform, - pi_native_handle *NativeHandle) { - - return pi2ur::piextPlatformGetNativeHandle(Platform, NativeHandle); -} - -pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform *Platform) { - - return pi2ur::piextPlatformCreateWithNativeHandle(NativeHandle, Platform); -} - -pi_result piPluginGetLastError(char **message) { - return pi2ur::piPluginGetLastError(message); -} - -// Returns plugin specific backend option. -// Return '-ze-opt-disable' for frontend_option = -O0. -// Return '-ze-opt-level=2' for frontend_option = -O1, O2 or -O3. -// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for -// frontend_option = -ftarget-compile-fast. -pi_result piPluginGetBackendOption(pi_platform platform, - const char *frontend_option, - const char **backend_option) { - return pi2ur::piPluginGetBackendOption(platform, frontend_option, - backend_option); -} - -pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, - pi_uint32 NumEntries, pi_device *Devices, - pi_uint32 *NumDevices) { - return pi2ur::piDevicesGet(Platform, DeviceType, NumEntries, Devices, - NumDevices); -} - -pi_result piDeviceRetain(pi_device Device) { - return pi2ur::piDeviceRetain(Device); -} - -pi_result piDeviceRelease(pi_device Device) { - return pi2ur::piDeviceRelease(Device); -} - -pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piDeviceGetInfo(Device, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piDevicePartition(pi_device Device, - const pi_device_partition_property *Properties, - pi_uint32 NumDevices, pi_device *OutDevices, - pi_uint32 *OutNumDevices) { - return pi2ur::piDevicePartition(Device, Properties, NumDevices, OutDevices, - OutNumDevices); -} - -pi_result -piextDeviceSelectBinary(pi_device Device, // TODO: does this need to be context? - pi_device_binary *Binaries, pi_uint32 NumBinaries, - pi_uint32 *SelectedBinaryInd) { - return pi2ur::piextDeviceSelectBinary(Device, Binaries, NumBinaries, - SelectedBinaryInd); -} - -pi_result piextDeviceGetNativeHandle(pi_device Device, - pi_native_handle *NativeHandle) { - - return pi2ur::piextDeviceGetNativeHandle(Device, NativeHandle); -} - -pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform Platform, - pi_device *Device) { - - return pi2ur::piextDeviceCreateWithNativeHandle(NativeHandle, Platform, - Device); -} - -pi_result piContextCreate(const pi_context_properties *Properties, - pi_uint32 NumDevices, const pi_device *Devices, - void (*PFnNotify)(const char *ErrInfo, - const void *PrivateInfo, size_t CB, - void *UserData), - void *UserData, pi_context *RetContext) { - return pi2ur::piContextCreate(Properties, NumDevices, Devices, PFnNotify, - UserData, RetContext); -} - -pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piContextGetInfo(Context, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -// FIXME: Dummy implementation to prevent link fail -pi_result piextContextSetExtendedDeleter(pi_context Context, - pi_context_extended_deleter Function, - void *UserData) { - return pi2ur::piextContextSetExtendedDeleter(Context, Function, UserData); -} - -pi_result piextContextGetNativeHandle(pi_context Context, - pi_native_handle *NativeHandle) { - return pi2ur::piextContextGetNativeHandle(Context, NativeHandle); -} - -pi_result piextContextCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_uint32 NumDevices, - const pi_device *Devices, - bool OwnNativeHandle, - pi_context *RetContext) { - return pi2ur::piextContextCreateWithNativeHandle( - NativeHandle, NumDevices, Devices, OwnNativeHandle, RetContext); -} - -pi_result piContextRetain(pi_context Context) { - - return pi2ur::piContextRetain(Context); -} - -pi_result piContextRelease(pi_context Context) { - return pi2ur::piContextRelease(Context); -} - -pi_result piQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties Flags, pi_queue *Queue) { - pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; - return piextQueueCreate(Context, Device, Properties, Queue); -} - -pi_result piextQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties *Properties, pi_queue *Queue) { - return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); -} - -pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piQueueGetInfo(Queue, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piQueueRetain(pi_queue Queue) { return pi2ur::piQueueRetain(Queue); } - -pi_result piQueueRelease(pi_queue Queue) { - return pi2ur::piQueueRelease(Queue); -} - -pi_result piQueueFinish(pi_queue Queue) { return pi2ur::piQueueFinish(Queue); } - -pi_result piQueueFlush(pi_queue Queue) { return pi2ur::piQueueFlush(Queue); } - -pi_result piextQueueGetNativeHandle(pi_queue Queue, - pi_native_handle *NativeHandle, - int32_t *NativeHandleDesc) { - - return pi2ur::piextQueueGetNativeHandle(Queue, NativeHandle, - NativeHandleDesc); -} - -pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle, - int32_t NativeHandleDesc, - pi_context Context, pi_device Device, - bool OwnNativeHandle, - pi_queue_properties *Properties, - pi_queue *Queue) { - - return pi2ur::piextQueueCreateWithNativeHandle( - NativeHandle, NativeHandleDesc, Context, Device, OwnNativeHandle, - Properties, Queue); -} - -pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, - void *HostPtr, pi_mem *RetMem, - const pi_mem_properties *properties) { - return pi2ur::piMemBufferCreate(Context, Flags, Size, HostPtr, RetMem, - properties); -} - -pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet) { - return pi2ur::piMemGetInfo(Mem, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piMemRetain(pi_mem Mem) { return pi2ur::piMemRetain(Mem); } - -pi_result piMemRelease(pi_mem Mem) { return pi2ur::piMemRelease(Mem); } - -pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, - const pi_image_format *ImageFormat, - const pi_image_desc *ImageDesc, void *HostPtr, - pi_mem *RetImage) { - - return pi2ur::piMemImageCreate(Context, Flags, ImageFormat, ImageDesc, - HostPtr, RetImage); -} - -pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, - pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); -} - -pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool ownNativeHandle, pi_mem *Mem) { - return pi2ur::piextMemCreateWithNativeHandle(NativeHandle, Context, - ownNativeHandle, Mem); -} - -pi_result piProgramCreate(pi_context Context, const void *ILBytes, - size_t Length, pi_program *Program) { - return pi2ur::piProgramCreate(Context, ILBytes, Length, Program); -} - -pi_result piProgramCreateWithBinary( - pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, - const size_t *Lengths, const unsigned char **Binaries, - size_t NumMetadataEntries, const pi_device_binary_property *Metadata, - pi_int32 *BinaryStatus, pi_program *Program) { - - return pi2ur::piProgramCreateWithBinary(Context, NumDevices, DeviceList, - Lengths, Binaries, NumMetadataEntries, - Metadata, BinaryStatus, Program); -} - -pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *Img) { - return pi2ur::piextMemImageCreateWithNativeHandle( - NativeHandle, Context, OwnNativeHandle, ImageFormat, ImageDesc, Img); -} - -pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piProgramGetInfo(Program, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - pi_uint32 NumInputPrograms, - const pi_program *InputPrograms, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData, pi_program *RetProgram) { - return pi2ur::piProgramLink(Context, NumDevices, DeviceList, Options, - NumInputPrograms, InputPrograms, PFnNotify, - UserData, RetProgram); -} - -pi_result piProgramCompile( - pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, - const char *Options, pi_uint32 NumInputHeaders, - const pi_program *InputHeaders, const char **HeaderIncludeNames, - void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { - - return pi2ur::piProgramCompile(Program, NumDevices, DeviceList, Options, - NumInputHeaders, InputHeaders, - HeaderIncludeNames, PFnNotify, UserData); -} - -pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData) { - return pi2ur::piProgramBuild(Program, NumDevices, DeviceList, Options, - PFnNotify, UserData); -} - -pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, - pi_program_build_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piProgramGetBuildInfo(Program, Device, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piProgramRetain(pi_program Program) { - return pi2ur::piProgramRetain(Program); -} - -pi_result piProgramRelease(pi_program Program) { - return pi2ur::piProgramRelease(Program); -} - -pi_result piextProgramGetNativeHandle(pi_program Program, - pi_native_handle *NativeHandle) { - return pi2ur::piextProgramGetNativeHandle(Program, NativeHandle); -} - -pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_program *Program) { - return pi2ur::piextProgramCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Program); -} - -pi_result piKernelCreate(pi_program Program, const char *KernelName, - pi_kernel *RetKernel) { - - return pi2ur::piKernelCreate(Program, KernelName, RetKernel); -} - -pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, - const void *ArgValue) { - - return pi2ur::piKernelSetArg(Kernel, ArgIndex, ArgSize, ArgValue); -} - -// Special version of piKernelSetArg to accept pi_mem. -pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_mem_obj_property *ArgProperties, - const pi_mem *ArgValue) { - return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties, - ArgValue); -} - -// Special version of piKernelSetArg to accept pi_sampler. -pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_sampler *ArgValue) { - - return pi2ur::piextKernelSetArgSampler(Kernel, ArgIndex, ArgValue); -} - -pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piKernelGetInfo(Kernel, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piextMemImageAllocate(pi_context Context, - pi_device Device, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemImageAllocate(Context, Device, ImageFormat, ImageDesc, - RetMem); -} - -__SYCL_EXPORT pi_result piextMemUnsampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, - pi_image_handle *RetHandle) { - return pi2ur::piextMemUnsampledImageCreate(Context, Device, ImgMem, - ImageFormat, ImageDesc, RetHandle); -} - -__SYCL_EXPORT pi_result piextMemSampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_sampler Sampler, - pi_image_handle *RetHandle) { - return pi2ur::piextMemSampledImageCreate(Context, Device, ImgMem, ImageFormat, - ImageDesc, Sampler, RetHandle); -} - -__SYCL_EXPORT pi_result piextBindlessImageSamplerCreate( - pi_context Context, const pi_sampler_properties *SamplerProperties, - float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, - pi_sampler *RetSampler) { - return pi2ur::piextBindlessImageSamplerCreate( - Context, SamplerProperties, MinMipmapLevelClamp, MaxMipmapLevelClamp, - MaxAnisotropy, RetSampler); -} - -__SYCL_EXPORT pi_result piextMemMipmapGetLevel(pi_context Context, - pi_device Device, - pi_image_mem_handle MipMem, - unsigned int Level, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMipmapGetLevel(Context, Device, MipMem, Level, RetMem); -} - -__SYCL_EXPORT pi_result piextMemImageFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemImageFree(Context, Device, MemoryHandle); -} - -__SYCL_EXPORT pi_result piextMemMipmapFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemMipmapFree(Context, Device, MemoryHandle); -} - -__SYCL_EXPORT pi_result piextMemImageCopy( - pi_queue Queue, void *DstPtr, const void *SrcPtr, - const pi_image_desc *SrcImageDesc, const pi_image_desc *DestImageDesc, - const pi_image_format *SrcImageFormat, - const pi_image_format *DestImageFormat, const pi_image_copy_flags Flags, - pi_image_offset SrcOffset, pi_image_offset DstOffset, - pi_image_region CopyExtent, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextMemImageCopy( - Queue, DstPtr, SrcPtr, SrcImageDesc, DestImageDesc, SrcImageFormat, - DestImageFormat, Flags, SrcOffset, DstOffset, CopyExtent, - NumEventsInWaitList, EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextMemUnsampledImageHandleDestroy( - pi_context Context, pi_device Device, pi_image_handle Handle) { - return pi2ur::piextMemUnsampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemSampledImageHandleDestroy( - pi_context Context, pi_device Device, pi_image_handle Handle) { - return pi2ur::piextMemSampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemImageGetInfo(pi_context Context, - pi_image_mem_handle MemHandle, - pi_image_info ParamName, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextMemImageGetInfo(Context, MemHandle, ParamName, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piextImportExternalMemory( - pi_context Context, pi_device Device, pi_external_mem_descriptor *MemDesc, - pi_interop_mem_handle *RetHandle) { - return pi2ur::piextImportExternalMemory(Context, Device, MemDesc, RetHandle); -} - -__SYCL_EXPORT pi_result piextMemMapExternalArray( - pi_context Context, pi_device Device, pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, pi_interop_mem_handle MemHandle, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMapExternalArray(Context, Device, ImageFormat, - ImageDesc, MemHandle, RetMem); -} - -__SYCL_EXPORT pi_result piextMemReleaseInterop(pi_context Context, - pi_device Device, - pi_interop_mem_handle ExtMem) { - return pi2ur::piextMemReleaseInterop(Context, Device, ExtMem); -} - -__SYCL_EXPORT pi_result -piextImportExternalSemaphore(pi_context Context, pi_device Device, - pi_external_semaphore_descriptor *SemDesc, - pi_interop_semaphore_handle *RetHandle) { - return pi2ur::piextImportExternalSemaphore(Context, Device, SemDesc, - RetHandle); -} - -__SYCL_EXPORT pi_result -piextReleaseExternalSemaphore(pi_context Context, pi_device Device, - pi_interop_semaphore_handle SemHandle) { - return pi2ur::piextReleaseExternalSemaphore(Context, Device, SemHandle); -} - -__SYCL_EXPORT pi_result piextWaitExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasWaitValue, - pi_uint64 WaitValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextWaitExternalSemaphore(Queue, SemHandle, HasWaitValue, - WaitValue, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextSignalExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasSignalValue, - pi_uint64 SignalValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextSignalExternalSemaphore(Queue, SemHandle, HasSignalValue, - SignalValue, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_group_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piKernelGetGroupInfo(Kernel, Device, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_sub_group_info ParamName, - size_t InputValueSize, const void *InputValue, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piKernelGetSubGroupInfo( - Kernel, Device, ParamName, InputValueSize, InputValue, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piKernelRetain(pi_kernel Kernel) { - - return pi2ur::piKernelRetain(Kernel); -} - -pi_result piKernelRelease(pi_kernel Kernel) { - - return pi2ur::piKernelRelease(Kernel); -} - -pi_result -piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - return pi2ur::piEnqueueKernelLaunch( - Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextEnqueueKernelLaunchCustom( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumPropsInLaunchPropList, - const pi_launch_property *LaunchPropList, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - (void)Queue; - (void)Kernel; - (void)WorkDim; - (void)GlobalWorkSize; - (void)LocalWorkSize; - (void)NumPropsInLaunchPropList; - (void)LaunchPropList; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)OutEvent; - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - return PI_ERROR_UNSUPPORTED_FEATURE; -} - -pi_result piextEnqueueCooperativeKernelLaunch( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - return pi2ur::piextEnqueueCooperativeKernelLaunch( - Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - pi_program Program, - bool OwnNativeHandle, - pi_kernel *Kernel) { - - return pi2ur::piextKernelCreateWithNativeHandle( - NativeHandle, Context, Program, OwnNativeHandle, Kernel); -} - -pi_result piextKernelGetNativeHandle(pi_kernel Kernel, - pi_native_handle *NativeHandle) { - return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle); -} - -pi_result piextKernelSuggestMaxCooperativeGroupCount( - pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize, - pi_uint32 *GroupCountRet) { - return pi2ur::piextKernelSuggestMaxCooperativeGroupCount( - Kernel, LocalWorkSize, DynamicSharedMemorySize, GroupCountRet); -} - -// -// Events -// - -// External PI API entry -pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { - return pi2ur::piEventCreate(Context, RetEvent); -} - -pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piEventGetInfo(Event, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piEventGetProfilingInfo(Event, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { - return pi2ur::piEventsWait(NumEvents, EventList); -} - -pi_result piEventSetCallback(pi_event Event, pi_int32 CommandExecCallbackType, - void (*PFnNotify)(pi_event Event, - pi_int32 EventCommandStatus, - void *UserData), - void *UserData) { - return pi2ur::piEventSetCallback(Event, CommandExecCallbackType, PFnNotify, - UserData); -} - -pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { - return pi2ur::piEventSetStatus(Event, ExecutionStatus); -} - -pi_result piEventRetain(pi_event Event) { return pi2ur::piEventRetain(Event); } - -pi_result piEventRelease(pi_event Event) { - return pi2ur::piEventRelease(Event); -} - -pi_result piextEventGetNativeHandle(pi_event Event, - pi_native_handle *NativeHandle) { - - return pi2ur::piextEventGetNativeHandle(Event, NativeHandle); -} - -pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_event *Event) { - return pi2ur::piextEventCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Event); -} - -pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueTimestampRecordingExp( - Queue, Blocking, NumEventsInWaitList, EventWaitList, Event); -} - -// -// Sampler -// -pi_result piSamplerCreate(pi_context Context, - const pi_sampler_properties *SamplerProperties, - pi_sampler *RetSampler) { - return pi2ur::piSamplerCreate(Context, SamplerProperties, RetSampler); -} - -pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piSamplerGetInfo(Sampler, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piSamplerRetain(pi_sampler Sampler) { - return pi2ur::piSamplerRetain(Sampler); -} - -pi_result piSamplerRelease(pi_sampler Sampler) { - return pi2ur::piSamplerRelease(Sampler); -} - -// -// Queue Commands -// -pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piEnqueueEventsWait(Queue, NumEventsInWaitList, EventWaitList, - OutEvent); -} - -pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piEnqueueEventsWaitWithBarrier(Queue, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, - pi_bool BlockingRead, size_t Offset, - size_t Size, void *Dst, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferRead(Queue, Src, BlockingRead, Offset, Size, - Dst, NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piEnqueueMemBufferReadRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferReadRect( - Queue, Buffer, BlockingRead, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, - pi_bool BlockingWrite, size_t Offset, - size_t Size, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferWrite(Queue, Buffer, BlockingWrite, Offset, - Size, Ptr, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferWriteRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferWriteRect( - Queue, Buffer, BlockingWrite, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, - size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferCopy(Queue, SrcMem, DstMem, SrcOffset, - DstOffset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferCopyRect( - pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, - pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, - size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, - size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - - return pi2ur::piEnqueueMemBufferCopyRect( - Queue, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferFill(Queue, Buffer, Pattern, PatternSize, - Offset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, - pi_map_flags MapFlags, size_t Offset, - size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent, void **RetMap) { - - return pi2ur::piEnqueueMemBufferMap(Queue, Mem, BlockingMap, MapFlags, Offset, - Size, NumEventsInWaitList, EventWaitList, - OutEvent, RetMap); -} - -pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, void *MappedPtr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - - return pi2ur::piEnqueueMemUnmap(Queue, Mem, MappedPtr, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piMemImageGetInfo(Image, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, - pi_bool BlockingRead, pi_image_offset Origin, - pi_image_region Region, size_t RowPitch, - size_t SlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemImageRead( - Queue, Image, BlockingRead, Origin, Region, RowPitch, SlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, - pi_bool BlockingWrite, pi_image_offset Origin, - pi_image_region Region, size_t InputRowPitch, - size_t InputSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemImageWrite( - Queue, Image, BlockingWrite, Origin, Region, InputRowPitch, - InputSlicePitch, Ptr, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result -piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, - pi_image_offset SrcOrigin, pi_image_offset DstOrigin, - pi_image_region Region, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piEnqueueMemImageCopy(Queue, SrcImage, DstImage, SrcOrigin, - DstOrigin, Region, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, - const void *FillColor, const size_t *Origin, - const size_t *Region, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemImageFill(Queue, Image, FillColor, Origin, Region, - NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, - pi_buffer_create_type BufferCreateType, - void *BufferCreateInfo, pi_mem *RetMem) { - - return pi2ur::piMemBufferPartition(Buffer, Flags, BufferCreateType, - BufferCreateInfo, RetMem); -} - -// TODO: Check if the function_pointer_ret type can be converted to void**. -pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program, - const char *FunctionName, - pi_uint64 *FunctionPointerRet) { - - return pi2ur::piextGetDeviceFunctionPointer(Device, Program, FunctionName, - FunctionPointerRet); -} - -pi_result piextGetGlobalVariablePointer(pi_device Device, pi_program Program, - const char *GlobalVariableName, - size_t *GlobalVariableSize, - void **GlobalVariablePointerRet) { - return pi2ur::piextGetGlobalVariablePointer( - Device, Program, GlobalVariableName, GlobalVariableSize, - GlobalVariablePointerRet); -} - -pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - - return pi2ur::piextUSMDeviceAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - - return pi2ur::piextUSMSharedAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -__SYCL_EXPORT pi_result piextUSMPitchedAlloc( - void **ResultPtr, size_t *ResultPitch, pi_context Context, pi_device Device, - pi_usm_mem_properties *Properties, size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - - return pi2ur::piextUSMPitchedAlloc(ResultPtr, ResultPitch, Context, Device, - Properties, WidthInBytes, Height, - ElementSizeBytes); -} - -pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - return pi2ur::piextUSMHostAlloc(ResultPtr, Context, Properties, Size, - Alignment); -} - -pi_result piextUSMFree(pi_context Context, void *Ptr) { - - return pi2ur::piextUSMFree(Context, Ptr); -} - -pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, const void *ArgValue) { - return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); -} - -/// USM Fill API -/// -/// @param Queue is the queue to submit to -/// @param Ptr is the ptr to fill -/// \param Pattern is the ptr with the bytes of the pattern to set -/// \param PatternSize is the size in bytes of the pattern to set -/// @param Count is the size in bytes to fill -/// @param NumEventsInWaitlist is the number of events to wait on -/// @param EventsWaitlist is an array of events to wait on -/// @param Event is the event that represents this operation -pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); -} - -pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, - const void *SrcPtr, size_t Size, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemcpy(Queue, Blocking, DstPtr, SrcPtr, Size, - NumEventsInWaitlist, EventsWaitlist, - Event); -} - -/// Hint to migrate memory to the device -/// -/// @param Queue is the queue to submit to -/// @param Ptr points to the memory to migrate -/// @param Size is the number of bytes to migrate -/// @param Flags is a bitfield used to specify memory migration options -/// @param NumEventsInWaitlist is the number of events to wait on -/// @param EventsWaitlist is an array of events to wait on -/// @param Event is the event that represents this operation -pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueuePrefetch( - Queue, Ptr, Size, Flags, NumEventsInWaitList, EventWaitList, OutEvent); -} - -/// USM memadvise API to govern behavior of automatic migration mechanisms -/// -/// @param Queue is the queue to submit to -/// @param Ptr is the data to be advised -/// @param Length is the size in bytes of the meory to advise -/// @param Advice is device specific advice -/// @param Event is the event that represents this operation -/// -pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, - size_t Length, pi_mem_advice Advice, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueueMemAdvise(Queue, Ptr, Length, Advice, OutEvent); -} - -/// USM 2D Fill API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pitch is the total width of the destination memory including padding -/// \param pattern is a pointer with the bytes of the pattern to set -/// \param pattern_size is the size in bytes of the pattern -/// \param width is width in bytes of each row to fill -/// \param height is height the columns to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, - size_t Pitch, size_t PatternSize, - const void *Pattern, size_t Width, - size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueFill2D(Queue, Ptr, Pitch, PatternSize, Pattern, - Width, Height, NumEventsWaitList, - EventsWaitList, Event); -} - -/// USM 2D Memset API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pitch is the total width of the destination memory including padding -/// \param pattern is a pointer with the bytes of the pattern to set -/// \param pattern_size is the size in bytes of the pattern -/// \param width is width in bytes of each row to fill -/// \param height is height the columns to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, - size_t Pitch, int Value, - size_t Width, size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemset2D(Queue, Ptr, Pitch, Value, Width, Height, - NumEventsWaitList, EventsWaitlist, - Event); -} - -/// USM 2D Memcpy API -/// -/// \param queue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param dst_ptr is the location the data will be copied -/// \param dst_pitch is the total width of the destination memory including -/// padding -/// \param src_ptr is the data to be copied -/// \param dst_pitch is the total width of the source memory including padding -/// \param width is width in bytes of each row to be copied -/// \param height is height the columns to be copied -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D( - pi_queue Queue, pi_bool Blocking, void *DstPtr, size_t DstPitch, - const void *SrcPtr, size_t SrcPitch, size_t Width, size_t Height, - pi_uint32 NumEventsInWaitlist, const pi_event *EventWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemcpy2D( - Queue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, - NumEventsInWaitlist, EventWaitlist, Event); -} - -/// API to query information about USM allocated pointers. -/// Valid Queries: -/// PI_MEM_ALLOC_TYPE returns host/device/shared pi_usm_type value -/// PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if -/// the queried pointer fell inside an allocation. -/// Result must fit in void * -/// PI_MEM_ALLOC_SIZE returns how big the queried pointer's -/// allocation is in bytes. Result is a size_t. -/// PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against -/// -/// @param Context is the pi_context -/// @param Ptr is the pointer to query -/// @param ParamName is the type of query to perform -/// @param ParamValueSize is the size of the result in bytes -/// @param ParamValue is the result -/// @param ParamValueRet is how many bytes were written -pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, - pi_mem_alloc_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextUSMGetMemAllocInfo(Context, Ptr, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piextUSMImport(const void *HostPtr, size_t Size, pi_context Context) { - return pi2ur::piextUSMImport(HostPtr, Size, Context); -} - -pi_result piextUSMRelease(const void *HostPtr, pi_context Context) { - return pi2ur::piextUSMRelease(HostPtr, Context); -} - -/// API for writing data from host to a device global variable. -/// -/// \param Queue is the queue -/// \param Program is the program containing the device global variable -/// \param Name is the unique identifier for the device global variable -/// \param BlockingWrite is true if the write should block -/// \param Count is the number of bytes to copy -/// \param Offset is the byte offset into the device global variable to start -/// copying -/// \param Src is a pointer to where the data must be copied from -/// \param NumEventsInWaitList is a number of events in the wait list -/// \param EventWaitList is the wait list -/// \param Event is the resulting event -pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, - size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - return pi2ur::piextEnqueueDeviceGlobalVariableWrite( - Queue, Program, Name, BlockingWrite, Count, Offset, Src, - NumEventsInWaitList, EventsWaitList, Event); -} - -/// API reading data from a device global variable to host. -/// -/// \param Queue is the queue -/// \param Program is the program containing the device global variable -/// \param Name is the unique identifier for the device global variable -/// \param BlockingRead is true if the read should block -/// \param Count is the number of bytes to copy -/// \param Offset is the byte offset into the device global variable to start -/// copying -/// \param Dst is a pointer to where the data must be copied to -/// \param NumEventsInWaitList is a number of events in the wait list -/// \param EventWaitList is the wait list -/// \param Event is the resulting event -pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, - size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - - return pi2ur::piextEnqueueDeviceGlobalVariableRead( - Queue, Program, Name, BlockingRead, Count, Offset, Dst, - NumEventsInWaitList, EventsWaitList, Event); - - return PI_SUCCESS; -} -/// API for Read from host pipe. -/// -/// \param Queue is the queue -/// \param Program is the program containing the device variable -/// \param PipeSymbol is the unique identifier for the device variable -/// \param Blocking is true if the write should block -/// \param Ptr is a pointer to where the data will be copied to -/// \param Size is size of the data that is read/written from/to pipe -/// \param NumEventsInWaitList is a number of events in the wait list -/// \param EventWaitList is the wait list -/// \param Event is the resulting event -pi_result piextEnqueueReadHostPipe(pi_queue Queue, pi_program Program, - const char *PipeSymbol, pi_bool Blocking, - void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - (void)Queue; - (void)Program; - (void)PipeSymbol; - (void)Blocking; - (void)Ptr; - (void)Size; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)Event; - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - die("piextEnqueueReadHostPipe: not implemented"); - return {}; -} - -/// API for write to pipe of a given name. -/// -/// \param Queue is the queue -/// \param Program is the program containing the device variable -/// \param PipeSymbol is the unique identifier for the device variable -/// \param Blocking is true if the write should block -/// \param Ptr is a pointer to where the data must be copied from -/// \param Size is size of the data that is read/written from/to pipe -/// \param NumEventsInWaitList is a number of events in the wait list -/// \param EventWaitList is the wait list -/// \param Event is the resulting event -pi_result piextEnqueueWriteHostPipe(pi_queue Queue, pi_program Program, - const char *PipeSymbol, pi_bool Blocking, - void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - (void)Queue; - (void)Program; - (void)PipeSymbol; - (void)Blocking; - (void)Ptr; - (void)Size; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)Event; - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - die("piextEnqueueWriteHostPipe: not implemented"); - return {}; -} - -pi_result piKernelSetExecInfo(pi_kernel Kernel, pi_kernel_exec_info ParamName, - size_t ParamValueSize, const void *ParamValue) { - - return pi2ur::piKernelSetExecInfo(Kernel, ParamName, ParamValueSize, - ParamValue); -} - -pi_result piextProgramSetSpecializationConstant(pi_program Prog, - pi_uint32 SpecID, size_t Size, - const void *SpecValue) { - return pi2ur::piextProgramSetSpecializationConstant(Prog, SpecID, Size, - SpecValue); -} - -// Command buffer extension -pi_result piextCommandBufferCreate(pi_context Context, pi_device Device, - const pi_ext_command_buffer_desc *Desc, - pi_ext_command_buffer *RetCommandBuffer) { - return pi2ur::piextCommandBufferCreate(Context, Device, Desc, - RetCommandBuffer); -} - -pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRetain(CommandBuffer); -} - -pi_result piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRelease(CommandBuffer); -} - -pi_result piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferFinalize(CommandBuffer); -} - -pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint, - pi_ext_command_buffer_command *Command) { - return pi2ur::piextCommandBufferNDRangeKernel( - CommandBuffer, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint, - Command); -} - -pi_result piextCommandBufferMemcpyUSM( - pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, - size_t Size, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemcpyUSM(CommandBuffer, DstPtr, SrcPtr, Size, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopy( - CommandBuffer, SrcMem, DstMem, SrcOffset, DstOffset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, - pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, - size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopyRect( - CommandBuffer, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferRead( - CommandBuffer, Buffer, Offset, Size, Dst, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferReadRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWrite( - CommandBuffer, Buffer, Offset, Size, Ptr, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWriteRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferFill( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, - size_t PatternSize, size_t Offset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferFill( - CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, - void *Ptr, const void *Pattern, - size_t PatternSize, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, - pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferFillUSM( - CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferPrefetchUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferPrefetchUSM(CommandBuffer, Ptr, Size, Flags, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferAdviseUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Length, - pi_mem_advice Advice, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferAdviseUSM(CommandBuffer, Ptr, Length, Advice, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, - pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piextEnqueueCommandBuffer( - CommandBuffer, Queue, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piextCommandBufferUpdateKernelLaunch( - pi_ext_command_buffer_command Command, - pi_ext_command_buffer_update_kernel_launch_desc *Desc) { - return pi2ur::piextCommandBufferUpdateKernelLaunch(Command, Desc); -} - -pi_result -piextCommandBufferRetainCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferRetainCommand(Command); -} - -pi_result -piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferReleaseCommand(Command); -} - -/// API for getting information about the minimum and recommended granularity -/// of physical and virtual memory. -/// -/// \param Context is the context to get the granularity from. -/// \param Device is the device to get the granularity from. -/// \param MemSize is the potentially unadjusted size to get granularity for. -/// \param ParamName is the type of query to perform. -/// \param ParamValueSize is the size of the result in bytes. -/// \param ParamValue is the result. -/// \param ParamValueSizeRet is how many bytes were written. -pi_result -piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, - pi_virtual_mem_granularity_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -/// API for creating a physical memory handle that virtual memory can be mapped -/// to. -/// -/// \param Context is the context within which the physical memory is allocated. -/// \param Device is the device the physical memory is on. -/// \param MemSize is the size of physical memory to allocate. This must be a -/// multiple of the minimum virtual memory granularity. -/// \param RetPhysicalMem is the handle for the resulting physical memory. -pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, - size_t MemSize, - pi_physical_mem *RetPhysicalMem) { - return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, - RetPhysicalMem); -} - -/// API for retaining a physical memory handle. -/// -/// \param PhysicalMem is the handle for the physical memory to retain. -pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { - return pi2ur::piextPhysicalMemRetain(PhysicalMem); -} - -/// API for releasing a physical memory handle. -/// -/// \param PhysicalMem is the handle for the physical memory to free. -pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { - return pi2ur::piextPhysicalMemRelease(PhysicalMem); -} - -/// API for reserving a virtual memory range. -/// -/// \param Context is the context within which the virtual memory range is -/// reserved. -/// \param Start is a pointer to the start of the region to reserve. If nullptr -/// the implementation selects a start address. -/// \param RangeSize is the size of the virtual address range to reserve in -/// bytes. -/// \param RetPtr is the pointer to the start of the resulting virtual memory -/// range. -pi_result piextVirtualMemReserve(pi_context Context, const void *Start, - size_t RangeSize, void **RetPtr) { - return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); -} - -/// API for freeing a virtual memory range. -/// -/// \param Context is the context within which the virtual memory range is -/// reserved. -/// \param Ptr is the pointer to the start of the virtual memory range. -/// \param RangeSize is the size of the virtual address range. -pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, - size_t RangeSize) { - return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); -} - -/// API for mapping a virtual memory range to a a physical memory allocation at -/// a given offset. -/// -/// \param Context is the context within which both the virtual memory range is -/// reserved and the physical memory is allocated. -/// \param Ptr is the pointer to the start of the virtual memory range. -/// \param RangeSize is the size of the virtual address range. -/// \param PhysicalMem is the handle for the physical memory to map Ptr to. -/// \param Offset is the offset into PhysicalMem in bytes to map Ptr to. -/// \param Flags is the access flags to set for the mapping. -pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, - size_t RangeSize, pi_physical_mem PhysicalMem, - size_t Offset, pi_virtual_access_flags Flags) { - return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, - Flags); -} - -/// API for unmapping a virtual memory range previously mapped in a context. -/// After a call to this function, the virtual memory range is left in a state -/// ready to be remapped. -/// -/// \param Context is the context within which the virtual memory range is -/// currently mapped. -/// \param Ptr is the pointer to the start of the virtual memory range. -/// \param RangeSize is the size of the virtual address range in bytes. -pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, - size_t RangeSize) { - return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); -} - -/// API for setting the access mode of a mapped virtual memory range. -/// -/// \param Context is the context within which the virtual memory range is -/// currently mapped. -/// \param Ptr is the pointer to the start of the virtual memory range. -/// \param RangeSize is the size of the virtual address range in bytes. -/// \param Flags is the access flags to set for the mapped virtual access range. -pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, - size_t RangeSize, - pi_virtual_access_flags Flags) { - return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); -} - -/// API for getting info about a mapped virtual memory range. -/// -/// \param Context is the context within which the virtual memory range is -/// currently mapped. -/// \param Ptr is the pointer to the start of the virtual memory range. -/// \param RangeSize is the size of the virtual address range in bytes. -/// \param ParamName is the type of query to perform. -/// \param ParamValueSize is the size of the result in bytes. -/// \param ParamValue is the result. -/// \param ParamValueSizeRet is how many bytes were written. -pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, - size_t RangeSize, - pi_virtual_mem_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result -piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, - void *Data, pi_uint32 NumMems, const pi_mem *Mems, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, - NumEventsInWaitList, EventWaitList, - Event); -} - -const char SupportedVersion[] = _PI_LEVEL_ZERO_PLUGIN_VERSION_STRING; - -pi_result piPluginInit(pi_plugin *PluginInit) { // missing - PI_ASSERT(PluginInit, PI_ERROR_INVALID_VALUE); - - // Check that the major version matches in PiVersion and SupportedVersion - _PI_PLUGIN_VERSION_CHECK(PluginInit->PiVersion, SupportedVersion); - - // TODO: handle versioning/targets properly. - size_t PluginVersionSize = sizeof(PluginInit->PluginVersion); - - PI_ASSERT(strlen(_PI_LEVEL_ZERO_PLUGIN_VERSION_STRING) < PluginVersionSize, - PI_ERROR_INVALID_VALUE); - - strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize); - -#define _PI_API(api) \ - (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); -#include - - enableZeTracing(); - return PI_SUCCESS; -} - -pi_result piextPluginGetOpaqueData(void *opaque_data_param, - void **opaque_data_return) { - return pi2ur::piextPluginGetOpaqueData(opaque_data_param, opaque_data_return); -} - -// SYCL RT calls this api to notify the end of plugin lifetime. -// Windows: dynamically loaded plugins might have been unloaded already -// when this is called. Sycl RT holds onto the PI plugin so it can be -// called safely. But this is not transitive. If the PI plugin in turn -// dynamically loaded a different DLL, that may have been unloaded. -// It can include all the jobs to tear down resources before -// the plugin is unloaded from memory. -pi_result piTearDown(void *PluginParameter) { - return pi2ur::piTearDown(PluginParameter); -} - -pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, - uint64_t *HostTime) { - return pi2ur::piGetDeviceAndHostTimer(Device, DeviceTime, HostTime); -} - -pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { - - return pi2ur::piextEnablePeerAccess(command_device, peer_device); -} - -pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { - - return pi2ur::piextDisablePeerAccess(command_device, peer_device); -} - -pi_result piextPeerAccessGetInfo(pi_device command_device, - pi_device peer_device, pi_peer_attr attr, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -#ifdef _WIN32 -#define __SYCL_PLUGIN_DLL_NAME "pi_level_zero.dll" -#include "../common_win_pi_trace/common_win_pi_trace.hpp" -#undef __SYCL_PLUGIN_DLL_NAME -#endif -} // extern "C" diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp deleted file mode 100644 index 83163ecd85168..0000000000000 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ /dev/null @@ -1,32 +0,0 @@ -//===--------- pi_level_zero.hpp - Level Zero Plugin ----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===-----------------------------------------------------------------===// - -/// \defgroup sycl_pi_level_zero Level Zero Plugin -/// \ingroup sycl_pi - -/// \file pi_level_zero.hpp -/// Declarations for Level Zero Plugin. It is the interface between the -/// device-agnostic SYCL runtime layer and underlying Level Zero runtime. -/// -/// \ingroup sycl_pi_level_zero - -#ifndef PI_LEVEL_ZERO_HPP -#define PI_LEVEL_ZERO_HPP - -// This version should be incremented for any change made to this file or its -// corresponding .cpp file. -#define _PI_LEVEL_ZERO_PLUGIN_VERSION 1 - -#define _PI_LEVEL_ZERO_PLUGIN_VERSION_STRING \ - _PI_PLUGIN_VERSION_STRING(_PI_LEVEL_ZERO_PLUGIN_VERSION) - -// Share code between this PI L0 Plugin and UR L0 Adapter -#include -#include - -#endif // PI_LEVEL_ZERO_HPP diff --git a/sycl/plugins/level_zero/tracing.cpp b/sycl/plugins/level_zero/tracing.cpp deleted file mode 100644 index 87331f100f3e2..0000000000000 --- a/sycl/plugins/level_zero/tracing.cpp +++ /dev/null @@ -1,169 +0,0 @@ -//===-------------- tracing.cpp - Level-Zero Host API Tracing --------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifdef XPTI_ENABLE_INSTRUMENTATION -#include -#include -#endif - -#include -#include -#include - -#include - -constexpr auto ZE_CALL_STREAM_NAME = "sycl.experimental.level_zero.call"; -constexpr auto ZE_DEBUG_STREAM_NAME = "sycl.experimental.level_zero.debug"; - -thread_local uint64_t CallCorrelationID = 0; -thread_local uint64_t DebugCorrelationID = 0; - -constexpr auto GVerStr = "0.1"; -constexpr int GMajVer = 0; -constexpr int GMinVer = 1; - -#ifdef XPTI_ENABLE_INSTRUMENTATION -static xpti_td *GCallEvent = nullptr; -static xpti_td *GDebugEvent = nullptr; -static uint8_t GCallStreamID = 0; -static uint8_t GDebugStreamID = 0; -#endif // XPTI_ENABLE_INSTRUMENTATION - -enum class ZEApiKind { -#define _ZE_API(call, domain, cb, params_type) call, -#include "ze_api.def" -#undef _ZE_API -}; - -void enableZeTracing() { -#ifdef XPTI_ENABLE_INSTRUMENTATION - if (!xptiTraceEnabled()) - return; - - // Initialize the required streams and stream ID for use - GCallStreamID = xptiRegisterStream(ZE_CALL_STREAM_NAME); - xptiInitialize(ZE_CALL_STREAM_NAME, GMajVer, GMinVer, GVerStr); - GDebugStreamID = xptiRegisterStream(ZE_DEBUG_STREAM_NAME); - xptiInitialize(ZE_DEBUG_STREAM_NAME, GMajVer, GMinVer, GVerStr); - - uint64_t Dummy; - xpti::payload_t ZePayload("Level Zero Plugin Layer"); - GCallEvent = - xptiMakeEvent("Level Zero Plugin Layer", &ZePayload, - xpti::trace_algorithm_event, xpti_at::active, &Dummy); - - xpti::payload_t ZeDebugPayload("Level Zero Plugin Debug Layer"); - GDebugEvent = - xptiMakeEvent("Level Zero Plugin Debug Layer", &ZeDebugPayload, - xpti::trace_algorithm_event, xpti_at::active, &Dummy); - - ze_result_t Status = zeInit(0); - if (Status != ZE_RESULT_SUCCESS) { - // Most likey there are no Level Zero devices. - return; - } - - int Foo = 0; - zel_tracer_desc_t TracerDesc = {ZEL_STRUCTURE_TYPE_TRACER_EXP_DESC, nullptr, - &Foo}; - zel_tracer_handle_t Tracer = nullptr; - - Status = zelTracerCreate(&TracerDesc, &Tracer); - - if (Status != ZE_RESULT_SUCCESS || Tracer == nullptr) { - std::cerr << "[WARNING] Failed to create Level Zero tracer: " << Status - << "\n"; - return; - } - - zel_core_callbacks_t Prologue = {}; - zel_core_callbacks_t Epilogue = {}; - -#define _ZE_API(call, domain, cb, params_type) \ - Prologue.domain.cb = [](params_type *Params, ze_result_t, void *, void **) { \ - if (xptiTraceEnabled()) { \ - const char *FuncName = #call; \ - if (xptiCheckTraceEnabled( \ - GCallStreamID, \ - (uint16_t)xpti::trace_point_type_t::function_begin)) { \ - CallCorrelationID = xptiGetUniqueId(); \ - xptiNotifySubscribers( \ - GCallStreamID, (uint16_t)xpti::trace_point_type_t::function_begin, \ - GCallEvent, nullptr, CallCorrelationID, FuncName); \ - } \ - if (xptiCheckTraceEnabled( \ - GDebugStreamID, \ - (uint16_t)xpti::trace_point_type_t::function_with_args_begin)) { \ - DebugCorrelationID = xptiGetUniqueId(); \ - uint32_t FuncID = static_cast(ZEApiKind::call); \ - xpti::function_with_args_t Payload{FuncID, FuncName, Params, nullptr, \ - nullptr}; \ - xptiNotifySubscribers( \ - GDebugStreamID, \ - (uint16_t)xpti::trace_point_type_t::function_with_args_begin, \ - GDebugEvent, nullptr, DebugCorrelationID, &Payload); \ - } \ - } \ - }; \ - Epilogue.domain.cb = [](params_type *Params, ze_result_t Result, void *, \ - void **) { \ - if (xptiTraceEnabled()) { \ - const char *FuncName = #call; \ - if (xptiCheckTraceEnabled( \ - GCallStreamID, \ - (uint16_t)xpti::trace_point_type_t::function_end)) { \ - xptiNotifySubscribers( \ - GCallStreamID, (uint16_t)xpti::trace_point_type_t::function_end, \ - GCallEvent, nullptr, CallCorrelationID, FuncName); \ - } \ - if (xptiCheckTraceEnabled( \ - GDebugStreamID, \ - (uint16_t)xpti::trace_point_type_t::function_with_args_end)) { \ - uint32_t FuncID = static_cast(ZEApiKind::call); \ - xpti::function_with_args_t Payload{FuncID, FuncName, Params, &Result, \ - nullptr}; \ - xptiNotifySubscribers( \ - GDebugStreamID, \ - (uint16_t)xpti::trace_point_type_t::function_with_args_end, \ - GDebugEvent, nullptr, DebugCorrelationID, &Payload); \ - } \ - } \ - }; - -#include "ze_api.def" - -#undef _ZE_API - - Status = zelTracerSetPrologues(Tracer, &Prologue); - if (Status != ZE_RESULT_SUCCESS) { - std::cerr << "Failed to enable Level Zero tracing\n"; - std::terminate(); - } - Status = zelTracerSetEpilogues(Tracer, &Epilogue); - if (Status != ZE_RESULT_SUCCESS) { - std::cerr << "Failed to enable Level Zero tracing\n"; - std::terminate(); - } - - Status = zelTracerSetEnabled(Tracer, true); - if (Status != ZE_RESULT_SUCCESS) { - std::cerr << "Failed to enable Level Zero tracing\n"; - std::terminate(); - } -#endif // XPTI_ENABLE_INSTRUMENTATION -} - -void disableZeTracing() { -#ifdef XPTI_ENABLE_INSTRUMENTATION - if (!xptiTraceEnabled()) - return; - - xptiFinalize(ZE_CALL_STREAM_NAME); - xptiFinalize(ZE_DEBUG_STREAM_NAME); -#endif // XPTI_ENABLE_INSTRUMENTATION -} diff --git a/sycl/plugins/level_zero/ur_bindings.hpp b/sycl/plugins/level_zero/ur_bindings.hpp deleted file mode 100644 index faaab6d5e925b..0000000000000 --- a/sycl/plugins/level_zero/ur_bindings.hpp +++ /dev/null @@ -1,11 +0,0 @@ -//===------ ur_bindings.hpp - Complete definitions of UR handles -------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===------------------------------------------------------------------===// -#pragma once - -#include "pi_level_zero.hpp" -#include diff --git a/sycl/plugins/native_cpu/CMakeLists.txt b/sycl/plugins/native_cpu/CMakeLists.txt deleted file mode 100644 index c995744646bd2..0000000000000 --- a/sycl/plugins/native_cpu/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -# Plugin for SYCL Native CPU -# Create shared library for libpi_nativecpu.so - -# Get the Native CPU adapter sources so they can be shared with the Native CPU PI plugin -get_target_property(UR_NATIVE_CPU_ADAPTER_SOURCES ur_adapter_native_cpu SOURCES) - -add_sycl_plugin(native_cpu - SOURCES - ${UR_NATIVE_CPU_ADAPTER_SOURCES} - # Some code is shared with the UR adapter - "../unified_runtime/pi2ur.hpp" - "${sycl_inc_dir}/sycl/detail/pi.h" - "${sycl_inc_dir}/sycl/detail/pi.hpp" - "pi_native_cpu.cpp" - "pi_native_cpu.hpp" - INCLUDE_DIRS - ${sycl_inc_dir} - ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime # for Unified Runtime - ${UNIFIED_RUNTIME_SOURCE_DIR}/source/ # for adapters/native_cpu - "${SYCL_SOURCE_DIR}/source" # for compiler.hpp - LIBRARIES - sycl - UnifiedRuntime-Headers - UnifiedRuntimeCommon -) - - -if(NATIVECPU_USE_OCK) - target_compile_definitions(pi_native_cpu PRIVATE NATIVECPU_USE_OCK) -endif() - -set_target_properties(pi_native_cpu PROPERTIES LINKER_LANGUAGE CXX) diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp deleted file mode 100644 index d98803ef2b8b0..0000000000000 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ /dev/null @@ -1,1414 +0,0 @@ -//==---------- pi_native_cpu.cpp - Native CPU Plugin -----------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "pi_native_cpu.hpp" - -extern "C" { - -#define EXPORT_PI_API_SYMBOLS -// TODO: consider undefining EXPORT_PI_API_SYMBOLS because exporting these -// symbols does not appear to be needed at the moment. Undefining -// EXPORT_PI_API_SYMBOLS will initialize the function table directly with pi2ur -// functions instead of stubs that call the corresponding pi2ur functions. While -// this no longer exports the PI API names (and therefore prevents symbol -// checking using the abi_check.py scripts), it's less code/symbols, likely more -// efficient and since the table is initialized using all -// of the PI API should already be covered assuming pi.def is kept up to date. - -#ifdef EXPORT_PI_API_SYMBOLS -// define stubs and export them with the PI API names to enable symbol checking. - -// First, forward-declare stubs to enable checking that they match the pi2ur -// decls. -#define _PI_API(api) decltype(pi2ur::api) api; -#include -#undef _PI_API - -// stubs taken from LevelZero -pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, - pi_uint32 *NumPlatforms) { - return pi2ur::piPlatformsGet(NumEntries, Platforms, NumPlatforms); -} - -pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piPlatformGetInfo(Platform, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piextPlatformGetNativeHandle(pi_platform Platform, - pi_native_handle *NativeHandle) { - - return pi2ur::piextPlatformGetNativeHandle(Platform, NativeHandle); -} - -pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform *Platform) { - - return pi2ur::piextPlatformCreateWithNativeHandle(NativeHandle, Platform); -} - -pi_result piPluginGetLastError(char **message) { - return pi2ur::piPluginGetLastError(message); -} - -pi_result piPluginGetBackendOption(pi_platform platform, - const char *frontend_option, - const char **backend_option) { - return pi2ur::piPluginGetBackendOption(platform, frontend_option, - backend_option); -} - -pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, - pi_uint32 NumEntries, pi_device *Devices, - pi_uint32 *NumDevices) { - return pi2ur::piDevicesGet(Platform, DeviceType, NumEntries, Devices, - NumDevices); -} - -pi_result piDeviceRetain(pi_device Device) { - return pi2ur::piDeviceRetain(Device); -} - -pi_result piDeviceRelease(pi_device Device) { - return pi2ur::piDeviceRelease(Device); -} - -pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piDeviceGetInfo(Device, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piDevicePartition(pi_device Device, - const pi_device_partition_property *Properties, - pi_uint32 NumDevices, pi_device *OutDevices, - pi_uint32 *OutNumDevices) { - return pi2ur::piDevicePartition(Device, Properties, NumDevices, OutDevices, - OutNumDevices); -} - -pi_result -piextDeviceSelectBinary(pi_device Device, // TODO: does this need to be context? - pi_device_binary *Binaries, pi_uint32 NumBinaries, - pi_uint32 *SelectedBinaryInd) { - return pi2ur::piextDeviceSelectBinary(Device, Binaries, NumBinaries, - SelectedBinaryInd); -} - -pi_result piextDeviceGetNativeHandle(pi_device Device, - pi_native_handle *NativeHandle) { - - return pi2ur::piextDeviceGetNativeHandle(Device, NativeHandle); -} - -pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform Platform, - pi_device *Device) { - - return pi2ur::piextDeviceCreateWithNativeHandle(NativeHandle, Platform, - Device); -} - -pi_result piContextCreate(const pi_context_properties *Properties, - pi_uint32 NumDevices, const pi_device *Devices, - void (*PFnNotify)(const char *ErrInfo, - const void *PrivateInfo, size_t CB, - void *UserData), - void *UserData, pi_context *RetContext) { - return pi2ur::piContextCreate(Properties, NumDevices, Devices, PFnNotify, - UserData, RetContext); -} - -pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piContextGetInfo(Context, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -// FIXME: Dummy implementation to prevent link fail -pi_result piextContextSetExtendedDeleter(pi_context Context, - pi_context_extended_deleter Function, - void *UserData) { - return pi2ur::piextContextSetExtendedDeleter(Context, Function, UserData); -} - -pi_result piextContextGetNativeHandle(pi_context Context, - pi_native_handle *NativeHandle) { - return pi2ur::piextContextGetNativeHandle(Context, NativeHandle); -} - -pi_result piextContextCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_uint32 NumDevices, - const pi_device *Devices, - bool OwnNativeHandle, - pi_context *RetContext) { - return pi2ur::piextContextCreateWithNativeHandle( - NativeHandle, NumDevices, Devices, OwnNativeHandle, RetContext); -} - -pi_result piContextRetain(pi_context Context) { - - return pi2ur::piContextRetain(Context); -} - -pi_result piContextRelease(pi_context Context) { - return pi2ur::piContextRelease(Context); -} - -pi_result piQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties Flags, pi_queue *Queue) { - pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; - return piextQueueCreate(Context, Device, Properties, Queue); -} - -pi_result piextQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties *Properties, pi_queue *Queue) { - return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); -} - -pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piQueueGetInfo(Queue, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piQueueRetain(pi_queue Queue) { return pi2ur::piQueueRetain(Queue); } - -pi_result piQueueRelease(pi_queue Queue) { - return pi2ur::piQueueRelease(Queue); -} - -pi_result piQueueFinish(pi_queue Queue) { return pi2ur::piQueueFinish(Queue); } - -pi_result piQueueFlush(pi_queue Queue) { return pi2ur::piQueueFlush(Queue); } - -pi_result piextQueueGetNativeHandle(pi_queue Queue, - pi_native_handle *NativeHandle, - int32_t *NativeHandleDesc) { - - return pi2ur::piextQueueGetNativeHandle(Queue, NativeHandle, - NativeHandleDesc); -} - -pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle, - int32_t NativeHandleDesc, - pi_context Context, pi_device Device, - bool OwnNativeHandle, - pi_queue_properties *Properties, - pi_queue *Queue) { - - return pi2ur::piextQueueCreateWithNativeHandle( - NativeHandle, NativeHandleDesc, Context, Device, OwnNativeHandle, - Properties, Queue); -} - -pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, - void *HostPtr, pi_mem *RetMem, - const pi_mem_properties *properties) { - return pi2ur::piMemBufferCreate(Context, Flags, Size, HostPtr, RetMem, - properties); -} - -pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet) { - return pi2ur::piMemGetInfo(Mem, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piMemRetain(pi_mem Mem) { return pi2ur::piMemRetain(Mem); } - -pi_result piMemRelease(pi_mem Mem) { return pi2ur::piMemRelease(Mem); } - -pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, - const pi_image_format *ImageFormat, - const pi_image_desc *ImageDesc, void *HostPtr, - pi_mem *RetImage) { - - return pi2ur::piMemImageCreate(Context, Flags, ImageFormat, ImageDesc, - HostPtr, RetImage); -} - -pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, - pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); -} - -pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool ownNativeHandle, pi_mem *Mem) { - return pi2ur::piextMemCreateWithNativeHandle(NativeHandle, Context, - ownNativeHandle, Mem); -} - -pi_result piProgramCreate(pi_context Context, const void *ILBytes, - size_t Length, pi_program *Program) { - return pi2ur::piProgramCreate(Context, ILBytes, Length, Program); -} - -pi_result piProgramCreateWithBinary( - pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, - const size_t *Lengths, const unsigned char **Binaries, - size_t NumMetadataEntries, const pi_device_binary_property *Metadata, - pi_int32 *BinaryStatus, pi_program *Program) { - - return pi2ur::piProgramCreateWithBinary(Context, NumDevices, DeviceList, - Lengths, Binaries, NumMetadataEntries, - Metadata, BinaryStatus, Program); -} - -pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *Img) { - return pi2ur::piextMemImageCreateWithNativeHandle( - NativeHandle, Context, OwnNativeHandle, ImageFormat, ImageDesc, Img); -} - -pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piProgramGetInfo(Program, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - pi_uint32 NumInputPrograms, - const pi_program *InputPrograms, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData, pi_program *RetProgram) { - return pi2ur::piProgramLink(Context, NumDevices, DeviceList, Options, - NumInputPrograms, InputPrograms, PFnNotify, - UserData, RetProgram); -} - -pi_result piProgramCompile( - pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, - const char *Options, pi_uint32 NumInputHeaders, - const pi_program *InputHeaders, const char **HeaderIncludeNames, - void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { - - return pi2ur::piProgramCompile(Program, NumDevices, DeviceList, Options, - NumInputHeaders, InputHeaders, - HeaderIncludeNames, PFnNotify, UserData); -} - -pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData) { - return pi2ur::piProgramBuild(Program, NumDevices, DeviceList, Options, - PFnNotify, UserData); -} - -pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, - pi_program_build_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piProgramGetBuildInfo(Program, Device, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piProgramRetain(pi_program Program) { - return pi2ur::piProgramRetain(Program); -} - -pi_result piProgramRelease(pi_program Program) { - return pi2ur::piProgramRelease(Program); -} - -pi_result piextProgramGetNativeHandle(pi_program Program, - pi_native_handle *NativeHandle) { - return pi2ur::piextProgramGetNativeHandle(Program, NativeHandle); -} - -pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_program *Program) { - return pi2ur::piextProgramCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Program); -} - -pi_result piKernelCreate(pi_program Program, const char *KernelName, - pi_kernel *RetKernel) { - - return pi2ur::piKernelCreate(Program, KernelName, RetKernel); -} - -pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, - const void *ArgValue) { - - return pi2ur::piKernelSetArg(Kernel, ArgIndex, ArgSize, ArgValue); -} - -pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_mem_obj_property *ArgProperties, - const pi_mem *ArgValue) { - return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties, - ArgValue); -} - -pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_sampler *ArgValue) { - - return pi2ur::piextKernelSetArgSampler(Kernel, ArgIndex, ArgValue); -} - -pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piKernelGetInfo(Kernel, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piextMemImageAllocate(pi_context Context, - pi_device Device, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemImageAllocate(Context, Device, ImageFormat, ImageDesc, - RetMem); -} - -__SYCL_EXPORT pi_result piextMemUnsampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, - pi_image_handle *RetHandle) { - return pi2ur::piextMemUnsampledImageCreate(Context, Device, ImgMem, - ImageFormat, ImageDesc, RetHandle); -} - -__SYCL_EXPORT pi_result piextMemSampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_sampler Sampler, - pi_image_handle *RetHandle) { - return pi2ur::piextMemSampledImageCreate(Context, Device, ImgMem, ImageFormat, - ImageDesc, Sampler, RetHandle); -} - -__SYCL_EXPORT pi_result piextBindlessImageSamplerCreate( - pi_context Context, const pi_sampler_properties *SamplerProperties, - float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, - pi_sampler *RetSampler) { - return pi2ur::piextBindlessImageSamplerCreate( - Context, SamplerProperties, MinMipmapLevelClamp, MaxMipmapLevelClamp, - MaxAnisotropy, RetSampler); -} - -__SYCL_EXPORT pi_result piextMemMipmapGetLevel(pi_context Context, - pi_device Device, - pi_image_mem_handle MipMem, - unsigned int Level, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMipmapGetLevel(Context, Device, MipMem, Level, RetMem); -} - -__SYCL_EXPORT pi_result piextMemImageFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemImageFree(Context, Device, MemoryHandle); -} - -__SYCL_EXPORT pi_result piextMemMipmapFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemMipmapFree(Context, Device, MemoryHandle); -} - -__SYCL_EXPORT pi_result piextMemImageCopy( - pi_queue Queue, void *DstPtr, const void *SrcPtr, - const pi_image_desc *SrcImageDesc, const pi_image_desc *DestImageDesc, - const pi_image_format *SrcImageFormat, - const pi_image_format *DestImageFormat, const pi_image_copy_flags Flags, - pi_image_offset SrcOffset, pi_image_offset DstOffset, - pi_image_region CopyExtent, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextMemImageCopy( - Queue, DstPtr, SrcPtr, SrcImageDesc, DestImageDesc, SrcImageFormat, - DestImageFormat, Flags, SrcOffset, DstOffset, CopyExtent, - NumEventsInWaitList, EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextMemUnsampledImageHandleDestroy( - pi_context Context, pi_device Device, pi_image_handle Handle) { - return pi2ur::piextMemUnsampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemSampledImageHandleDestroy( - pi_context Context, pi_device Device, pi_image_handle Handle) { - return pi2ur::piextMemSampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemImageGetInfo(pi_context Context, - pi_image_mem_handle MemHandle, - pi_image_info ParamName, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextMemImageGetInfo(Context, MemHandle, ParamName, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piextImportExternalMemory( - pi_context Context, pi_device Device, pi_external_mem_descriptor *MemDesc, - pi_interop_mem_handle *RetHandle) { - return pi2ur::piextImportExternalMemory(Context, Device, MemDesc, RetHandle); -} - -__SYCL_EXPORT pi_result piextMemMapExternalArray( - pi_context Context, pi_device Device, pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, pi_interop_mem_handle MemHandle, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMapExternalArray(Context, Device, ImageFormat, - ImageDesc, MemHandle, RetMem); -} - -__SYCL_EXPORT pi_result piextMemReleaseInterop(pi_context Context, - pi_device Device, - pi_interop_mem_handle ExtMem) { - return pi2ur::piextMemReleaseInterop(Context, Device, ExtMem); -} - -__SYCL_EXPORT pi_result -piextImportExternalSemaphore(pi_context Context, pi_device Device, - pi_external_semaphore_descriptor *SemDesc, - pi_interop_semaphore_handle *RetHandle) { - return pi2ur::piextImportExternalSemaphore(Context, Device, SemDesc, - RetHandle); -} - -__SYCL_EXPORT pi_result -piextReleaseExternalSemaphore(pi_context Context, pi_device Device, - pi_interop_semaphore_handle SemHandle) { - return pi2ur::piextReleaseExternalSemaphore(Context, Device, SemHandle); -} - -__SYCL_EXPORT pi_result piextWaitExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasWaitValue, - pi_uint64 WaitValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextWaitExternalSemaphore(Queue, SemHandle, HasWaitValue, - WaitValue, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextSignalExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasSignalValue, - pi_uint64 SignalValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextSignalExternalSemaphore(Queue, SemHandle, HasSignalValue, - SignalValue, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_group_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piKernelGetGroupInfo(Kernel, Device, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_sub_group_info ParamName, - size_t InputValueSize, const void *InputValue, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piKernelGetSubGroupInfo( - Kernel, Device, ParamName, InputValueSize, InputValue, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piKernelRetain(pi_kernel Kernel) { - - return pi2ur::piKernelRetain(Kernel); -} - -pi_result piKernelRelease(pi_kernel Kernel) { - - return pi2ur::piKernelRelease(Kernel); -} - -pi_result -piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - return pi2ur::piEnqueueKernelLaunch( - Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextEnqueueKernelLaunchCustom( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumPropsInLaunchPropList, - const pi_launch_property *LaunchPropList, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - (void)Queue; - (void)Kernel; - (void)WorkDim; - (void)GlobalWorkSize; - (void)LocalWorkSize; - (void)NumPropsInLaunchPropList; - (void)LaunchPropList; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)OutEvent; - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - return PI_ERROR_UNSUPPORTED_FEATURE; -} - -pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - pi_program Program, - bool OwnNativeHandle, - pi_kernel *Kernel) { - - return pi2ur::piextKernelCreateWithNativeHandle( - NativeHandle, Context, Program, OwnNativeHandle, Kernel); -} - -pi_result piextKernelGetNativeHandle(pi_kernel Kernel, - pi_native_handle *NativeHandle) { - return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle); -} - -// -// Events -// - -// External PI API entry -pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { - return pi2ur::piEventCreate(Context, RetEvent); -} - -pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piEventGetInfo(Event, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piEventGetProfilingInfo(Event, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { - return pi2ur::piEventsWait(NumEvents, EventList); -} - -pi_result piEventSetCallback(pi_event Event, pi_int32 CommandExecCallbackType, - void (*PFnNotify)(pi_event Event, - pi_int32 EventCommandStatus, - void *UserData), - void *UserData) { - return pi2ur::piEventSetCallback(Event, CommandExecCallbackType, PFnNotify, - UserData); -} - -pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { - return pi2ur::piEventSetStatus(Event, ExecutionStatus); -} - -pi_result piEventRetain(pi_event Event) { return pi2ur::piEventRetain(Event); } - -pi_result piEventRelease(pi_event Event) { - return pi2ur::piEventRelease(Event); -} - -pi_result piextEventGetNativeHandle(pi_event Event, - pi_native_handle *NativeHandle) { - - return pi2ur::piextEventGetNativeHandle(Event, NativeHandle); -} - -pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_event *Event) { - return pi2ur::piextEventCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Event); -} - -pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueTimestampRecordingExp( - Queue, Blocking, NumEventsInWaitList, EventWaitList, Event); -} - -// -// Sampler -// -pi_result piSamplerCreate(pi_context Context, - const pi_sampler_properties *SamplerProperties, - pi_sampler *RetSampler) { - return pi2ur::piSamplerCreate(Context, SamplerProperties, RetSampler); -} - -pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piSamplerGetInfo(Sampler, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piSamplerRetain(pi_sampler Sampler) { - return pi2ur::piSamplerRetain(Sampler); -} - -pi_result piSamplerRelease(pi_sampler Sampler) { - return pi2ur::piSamplerRelease(Sampler); -} - -// -// Queue Commands -// -pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piEnqueueEventsWait(Queue, NumEventsInWaitList, EventWaitList, - OutEvent); -} - -pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piEnqueueEventsWaitWithBarrier(Queue, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, - pi_bool BlockingRead, size_t Offset, - size_t Size, void *Dst, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferRead(Queue, Src, BlockingRead, Offset, Size, - Dst, NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piEnqueueMemBufferReadRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferReadRect( - Queue, Buffer, BlockingRead, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, - pi_bool BlockingWrite, size_t Offset, - size_t Size, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferWrite(Queue, Buffer, BlockingWrite, Offset, - Size, Ptr, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferWriteRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferWriteRect( - Queue, Buffer, BlockingWrite, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, - size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferCopy(Queue, SrcMem, DstMem, SrcOffset, - DstOffset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferCopyRect( - pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, - pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, - size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, - size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - - return pi2ur::piEnqueueMemBufferCopyRect( - Queue, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferFill(Queue, Buffer, Pattern, PatternSize, - Offset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, - pi_map_flags MapFlags, size_t Offset, - size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent, void **RetMap) { - - return pi2ur::piEnqueueMemBufferMap(Queue, Mem, BlockingMap, MapFlags, Offset, - Size, NumEventsInWaitList, EventWaitList, - OutEvent, RetMap); -} - -pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, void *MappedPtr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - - return pi2ur::piEnqueueMemUnmap(Queue, Mem, MappedPtr, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piMemImageGetInfo(Image, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, - pi_bool BlockingRead, pi_image_offset Origin, - pi_image_region Region, size_t RowPitch, - size_t SlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemImageRead( - Queue, Image, BlockingRead, Origin, Region, RowPitch, SlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, - pi_bool BlockingWrite, pi_image_offset Origin, - pi_image_region Region, size_t InputRowPitch, - size_t InputSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemImageWrite( - Queue, Image, BlockingWrite, Origin, Region, InputRowPitch, - InputSlicePitch, Ptr, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result -piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, - pi_image_offset SrcOrigin, pi_image_offset DstOrigin, - pi_image_region Region, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piEnqueueMemImageCopy(Queue, SrcImage, DstImage, SrcOrigin, - DstOrigin, Region, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, - const void *FillColor, const size_t *Origin, - const size_t *Region, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemImageFill(Queue, Image, FillColor, Origin, Region, - NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, - pi_buffer_create_type BufferCreateType, - void *BufferCreateInfo, pi_mem *RetMem) { - - return pi2ur::piMemBufferPartition(Buffer, Flags, BufferCreateType, - BufferCreateInfo, RetMem); -} - -pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program, - const char *FunctionName, - pi_uint64 *FunctionPointerRet) { - - return pi2ur::piextGetDeviceFunctionPointer(Device, Program, FunctionName, - FunctionPointerRet); -} - -pi_result piextGetGlobalVariablePointer(pi_device Device, pi_program Program, - const char *GlobalVariableName, - size_t *GlobalVariableSize, - void **GlobalVariablePointerRet) { - return pi2ur::piextGetGlobalVariablePointer( - Device, Program, GlobalVariableName, GlobalVariableSize, - GlobalVariablePointerRet); -} - -pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - - return pi2ur::piextUSMDeviceAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - - return pi2ur::piextUSMSharedAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -__SYCL_EXPORT pi_result piextUSMPitchedAlloc( - void **ResultPtr, size_t *ResultPitch, pi_context Context, pi_device Device, - pi_usm_mem_properties *Properties, size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - return pi2ur::piextUSMPitchedAlloc(ResultPtr, ResultPitch, Context, Device, - Properties, WidthInBytes, Height, - ElementSizeBytes); -} - -pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - return pi2ur::piextUSMHostAlloc(ResultPtr, Context, Properties, Size, - Alignment); -} - -pi_result piextUSMFree(pi_context Context, void *Ptr) { - - return pi2ur::piextUSMFree(Context, Ptr); -} - -pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, const void *ArgValue) { - return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); -} - -pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); -} - -pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, - const void *SrcPtr, size_t Size, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemcpy(Queue, Blocking, DstPtr, SrcPtr, Size, - NumEventsInWaitlist, EventsWaitlist, - Event); -} - -pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueuePrefetch( - Queue, Ptr, Size, Flags, NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, - size_t Length, pi_mem_advice Advice, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueueMemAdvise(Queue, Ptr, Length, Advice, OutEvent); -} - -__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, - size_t Pitch, size_t PatternSize, - const void *Pattern, size_t Width, - size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueFill2D(Queue, Ptr, Pitch, PatternSize, Pattern, - Width, Height, NumEventsWaitList, - EventsWaitList, Event); -} - -__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, - size_t Pitch, int Value, - size_t Width, size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemset2D(Queue, Ptr, Pitch, Value, Width, Height, - NumEventsWaitList, EventsWaitlist, - Event); -} - -__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D( - pi_queue Queue, pi_bool Blocking, void *DstPtr, size_t DstPitch, - const void *SrcPtr, size_t SrcPitch, size_t Width, size_t Height, - pi_uint32 NumEventsInWaitlist, const pi_event *EventWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemcpy2D( - Queue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, - NumEventsInWaitlist, EventWaitlist, Event); -} - -pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, - pi_mem_alloc_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextUSMGetMemAllocInfo(Context, Ptr, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piextUSMImport(const void *HostPtr, size_t Size, pi_context Context) { - return pi2ur::piextUSMImport(HostPtr, Size, Context); -} - -pi_result piextUSMRelease(const void *HostPtr, pi_context Context) { - return pi2ur::piextUSMRelease(HostPtr, Context); -} - -pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, - size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - return pi2ur::piextEnqueueDeviceGlobalVariableWrite( - Queue, Program, Name, BlockingWrite, Count, Offset, Src, - NumEventsInWaitList, EventsWaitList, Event); -} - -pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, - size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - - return pi2ur::piextEnqueueDeviceGlobalVariableRead( - Queue, Program, Name, BlockingRead, Count, Offset, Dst, - NumEventsInWaitList, EventsWaitList, Event); - - return PI_SUCCESS; -} - -pi_result piextEnqueueReadHostPipe(pi_queue Queue, pi_program Program, - const char *PipeSymbol, pi_bool Blocking, - void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - (void)Queue; - (void)Program; - (void)PipeSymbol; - (void)Blocking; - (void)Ptr; - (void)Size; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)Event; - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - die("piextEnqueueReadHostPipe: not implemented"); - return {}; -} - -pi_result piextEnqueueWriteHostPipe(pi_queue Queue, pi_program Program, - const char *PipeSymbol, pi_bool Blocking, - void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - (void)Queue; - (void)Program; - (void)PipeSymbol; - (void)Blocking; - (void)Ptr; - (void)Size; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)Event; - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - die("piextEnqueueWriteHostPipe: not implemented"); - return {}; -} - -pi_result piKernelSetExecInfo(pi_kernel Kernel, pi_kernel_exec_info ParamName, - size_t ParamValueSize, const void *ParamValue) { - - return pi2ur::piKernelSetExecInfo(Kernel, ParamName, ParamValueSize, - ParamValue); -} - -pi_result piextProgramSetSpecializationConstant(pi_program Prog, - pi_uint32 SpecID, size_t Size, - const void *SpecValue) { - return pi2ur::piextProgramSetSpecializationConstant(Prog, SpecID, Size, - SpecValue); -} - -// Command buffer extension -pi_result piextCommandBufferCreate(pi_context Context, pi_device Device, - const pi_ext_command_buffer_desc *Desc, - pi_ext_command_buffer *RetCommandBuffer) { - return pi2ur::piextCommandBufferCreate(Context, Device, Desc, - RetCommandBuffer); -} - -pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRetain(CommandBuffer); -} - -pi_result piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRelease(CommandBuffer); -} - -pi_result piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferFinalize(CommandBuffer); -} - -pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint, - pi_ext_command_buffer_command *Command) { - return pi2ur::piextCommandBufferNDRangeKernel( - CommandBuffer, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint, - Command); -} - -pi_result piextCommandBufferMemcpyUSM( - pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, - size_t Size, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemcpyUSM(CommandBuffer, DstPtr, SrcPtr, Size, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopy( - CommandBuffer, SrcMem, DstMem, SrcOffset, DstOffset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, - pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, - size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopyRect( - CommandBuffer, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferRead( - CommandBuffer, Buffer, Offset, Size, Dst, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferReadRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWrite( - CommandBuffer, Buffer, Offset, Size, Ptr, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWriteRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, - pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piextEnqueueCommandBuffer( - CommandBuffer, Queue, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piextCommandBufferUpdateKernelLaunch( - pi_ext_command_buffer_command Command, - pi_ext_command_buffer_update_kernel_launch_desc *Desc) { - return pi2ur::piextCommandBufferUpdateKernelLaunch(Command, Desc); -} - -pi_result -piextCommandBufferRetainCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferRetainCommand(Command); -} - -pi_result -piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferReleaseCommand(Command); -} - -pi_result piextPluginGetOpaqueData(void *opaque_data_param, - void **opaque_data_return) { - return pi2ur::piextPluginGetOpaqueData(opaque_data_param, opaque_data_return); -} - -pi_result piTearDown(void *PluginParameter) { - return pi2ur::piTearDown(PluginParameter); -} - -pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, - uint64_t *HostTime) { - return pi2ur::piGetDeviceAndHostTimer(Device, DeviceTime, HostTime); -} - -pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { - - return pi2ur::piextEnablePeerAccess(command_device, peer_device); -} - -pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { - - return pi2ur::piextDisablePeerAccess(command_device, peer_device); -} - -pi_result piextPeerAccessGetInfo(pi_device command_device, - pi_device peer_device, pi_peer_attr attr, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piextCommandBufferMemBufferFill( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, - size_t PatternSize, size_t Offset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferFill( - CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, - void *Ptr, const void *Pattern, - size_t PatternSize, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, - pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferFillUSM( - CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferPrefetchUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferPrefetchUSM(CommandBuffer, Ptr, Size, Flags, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferAdviseUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Length, - pi_mem_advice Advice, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferAdviseUSM(CommandBuffer, Ptr, Length, Advice, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextEnqueueCooperativeKernelLaunch(pi_queue, pi_kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *, pi_uint32, - const pi_event *, pi_event *) { - return PI_ERROR_UNSUPPORTED_FEATURE; -} - -pi_result piextKernelSuggestMaxCooperativeGroupCount(pi_kernel, size_t, size_t, - pi_uint32 *) { - return PI_ERROR_UNSUPPORTED_FEATURE; -} - -pi_result -piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, - pi_virtual_mem_granularity_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, - param_value_size, param_value, - param_value_size_ret); -} - -pi_result piextPhysicalMemCreate(pi_context context, pi_device device, - size_t mem_size, - pi_physical_mem *ret_physical_mem) { - return pi2ur::piextPhysicalMemCreate(context, device, mem_size, - ret_physical_mem); -} - -pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { - return pi2ur::piextPhysicalMemRetain(physical_mem); -} - -pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { - return pi2ur::piextPhysicalMemRelease(physical_mem); -} - -pi_result piextVirtualMemReserve(pi_context context, const void *start, - size_t range_size, void **ret_ptr) { - return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); -} - -pi_result piextVirtualMemFree(pi_context context, const void *ptr, - size_t range_size) { - return pi2ur::piextVirtualMemFree(context, ptr, range_size); -} - -pi_result piextVirtualMemMap(pi_context context, const void *ptr, - size_t range_size, pi_physical_mem physical_mem, - size_t offset, pi_virtual_access_flags flags) { - return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, - offset, flags); -} - -pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, - size_t range_size) { - return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); -} - -pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, - size_t range_size, - pi_virtual_access_flags flags) { - return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); -} - -pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, - size_t range_size, - pi_virtual_mem_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, - param_value_size, param_value, - param_value_size_ret); -} - -pi_result -piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, - void *Data, pi_uint32 NumMems, const pi_mem *Mems, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, - NumEventsInWaitList, EventWaitList, - Event); -} - -// Initialize function table with stubs. -#define _PI_API(api) \ - (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); -#else -// Initialize function table directly with pi2ur functions. -#define _PI_API(api) \ - (PluginInit->PiFunctionTable).api = (decltype(&::api))(&pi2ur::api); -#endif - -pi_result piPluginInit(pi_plugin *PluginInit) { - -#include -#undef _PI_API - - return PI_SUCCESS; -} -} diff --git a/sycl/plugins/native_cpu/pi_native_cpu.hpp b/sycl/plugins/native_cpu/pi_native_cpu.hpp deleted file mode 100644 index 287b3c03115b6..0000000000000 --- a/sycl/plugins/native_cpu/pi_native_cpu.hpp +++ /dev/null @@ -1,50 +0,0 @@ -//===------ pi_native_cpu.hpp - Native CPU Plugin -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -struct _pi_context : ur_context_handle_t_ { - using ur_context_handle_t_::ur_context_handle_t_; -}; - -struct _pi_device : ur_device_handle_t_ { - using ur_device_handle_t_::ur_device_handle_t_; -}; - -struct _pi_kernel : ur_kernel_handle_t_ { - using ur_kernel_handle_t_::ur_kernel_handle_t_; -}; - -struct _pi_mem : ur_mem_handle_t_ { - using ur_mem_handle_t_::ur_mem_handle_t_; -}; - -struct _pi_platform : ur_platform_handle_t_ { - using ur_platform_handle_t_::ur_platform_handle_t_; -}; - -struct _pi_program : ur_program_handle_t_ { - using ur_program_handle_t_::ur_program_handle_t_; -}; - -struct _pi_queue : ur_queue_handle_t_ { - using ur_queue_handle_t_::ur_queue_handle_t_; -}; - -struct _pi_physical_mem : ur_physical_mem_handle_t_ { - using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; -}; diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt deleted file mode 100644 index 582adba435a49..0000000000000 --- a/sycl/plugins/opencl/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -# Plugin for OpenCL -# Create Shared library for libpi_opencl.so. - -# Get the OpenCL adapter sources so they can be shared with the OpenCL PI plugin -get_target_property(UR_OPENCL_ADAPTER_SOURCES ur_adapter_opencl SOURCES) - -add_sycl_plugin(opencl - SOURCES - ${UR_OPENCL_ADAPTER_SOURCES} - # Some code is shared with the UR adapter - "../unified_runtime/pi2ur.hpp" - "${sycl_inc_dir}/sycl/detail/pi.h" - "${sycl_inc_dir}/sycl/detail/pi.hpp" - "pi_opencl.cpp" - "pi_opencl.hpp" - INCLUDE_DIRS - ${sycl_inc_dir} - ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime # for Unified Runtime - ${UNIFIED_RUNTIME_SOURCE_DIR}/source/ # for adapters/opencl - "${SYCL_SOURCE_DIR}/source" # for compiler.hpp - LIBRARIES - UnifiedRuntime-Headers - UnifiedRuntimeCommon - OpenCL-ICD -) - -set_target_properties(pi_opencl PROPERTIES LINKER_LANGUAGE CXX) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp deleted file mode 100644 index 4726454084ce3..0000000000000 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ /dev/null @@ -1,1335 +0,0 @@ -//==---------- pi_opencl.cpp - OpenCL Plugin -------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \defgroup sycl_pi_ocl OpenCL Plugin -/// \ingroup sycl_pi - -/// \file pi_opencl.cpp -/// Implementation of OpenCL Plugin. It is the interface between device-agnostic -/// SYCL runtime layer and underlying OpenCL runtime. -/// -/// \ingroup sycl_pi_ocl - -#define CL_USE_DEPRECATED_OPENCL_1_2_APIS - -#include -#include -#include - -extern "C" { - -const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; - -pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, - pi_uint32 *NumPlatforms) { - return pi2ur::piPlatformsGet(NumEntries, Platforms, NumPlatforms); -} - -pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piPlatformGetInfo(Platform, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piextPlatformGetNativeHandle(pi_platform Platform, - pi_native_handle *NativeHandle) { - return pi2ur::piextPlatformGetNativeHandle(Platform, NativeHandle); -} - -pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform *Platform) { - return pi2ur::piextPlatformCreateWithNativeHandle(NativeHandle, Platform); -} - -pi_result piPluginGetLastError(char **message) { - return pi2ur::piPluginGetLastError(message); -} - -pi_result piPluginGetBackendOption(pi_platform platform, - const char *frontend_option, - const char **backend_option) { - return pi2ur::piPluginGetBackendOption(platform, frontend_option, - backend_option); -} - -pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, - pi_uint32 NumEntries, pi_device *Devices, - pi_uint32 *NumDevices) { - return pi2ur::piDevicesGet(Platform, DeviceType, NumEntries, Devices, - NumDevices); -} - -pi_result piDeviceRetain(pi_device Device) { - return pi2ur::piDeviceRetain(Device); -} - -pi_result piDeviceRelease(pi_device Device) { - return pi2ur::piDeviceRelease(Device); -} - -pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piDeviceGetInfo(Device, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piDevicePartition(pi_device Device, - const pi_device_partition_property *Properties, - pi_uint32 NumDevices, pi_device *OutDevices, - pi_uint32 *OutNumDevices) { - return pi2ur::piDevicePartition(Device, Properties, NumDevices, OutDevices, - OutNumDevices); -} - -pi_result piextDeviceSelectBinary(pi_device Device, pi_device_binary *Binaries, - pi_uint32 NumBinaries, - pi_uint32 *SelectedBinaryInd) { - return pi2ur::piextDeviceSelectBinary(Device, Binaries, NumBinaries, - SelectedBinaryInd); -} - -pi_result piextDeviceGetNativeHandle(pi_device Device, - pi_native_handle *NativeHandle) { - return pi2ur::piextDeviceGetNativeHandle(Device, NativeHandle); -} - -pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform Platform, - pi_device *Device) { - return pi2ur::piextDeviceCreateWithNativeHandle(NativeHandle, Platform, - Device); -} - -pi_result piContextCreate(const pi_context_properties *Properties, - pi_uint32 NumDevices, const pi_device *Devices, - void (*PFnNotify)(const char *ErrInfo, - const void *PrivateInfo, size_t CB, - void *UserData), - void *UserData, pi_context *RetContext) { - return pi2ur::piContextCreate(Properties, NumDevices, Devices, PFnNotify, - UserData, RetContext); -} - -pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piContextGetInfo(Context, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piextContextSetExtendedDeleter(pi_context Context, - pi_context_extended_deleter Function, - void *UserData) { - return pi2ur::piextContextSetExtendedDeleter(Context, Function, UserData); -} - -pi_result piextContextGetNativeHandle(pi_context Context, - pi_native_handle *NativeHandle) { - return pi2ur::piextContextGetNativeHandle(Context, NativeHandle); -} - -pi_result piextContextCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_uint32 NumDevices, - const pi_device *Devices, - bool OwnNativeHandle, - pi_context *RetContext) { - return pi2ur::piextContextCreateWithNativeHandle( - NativeHandle, NumDevices, Devices, OwnNativeHandle, RetContext); -} - -pi_result piContextRetain(pi_context Context) { - return pi2ur::piContextRetain(Context); -} - -pi_result piContextRelease(pi_context Context) { - return pi2ur::piContextRelease(Context); -} - -pi_result piQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties Flags, pi_queue *Queue) { - pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; - return piextQueueCreate(Context, Device, Properties, Queue); -} - -pi_result piextQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties *Properties, pi_queue *Queue) { - return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); -} - -pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piQueueGetInfo(Queue, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piQueueRetain(pi_queue Queue) { return pi2ur::piQueueRetain(Queue); } - -pi_result piQueueRelease(pi_queue Queue) { - return pi2ur::piQueueRelease(Queue); -} - -pi_result piQueueFinish(pi_queue Queue) { return pi2ur::piQueueFinish(Queue); } - -pi_result piQueueFlush(pi_queue Queue) { return pi2ur::piQueueFlush(Queue); } - -pi_result piextQueueGetNativeHandle(pi_queue Queue, - pi_native_handle *NativeHandle, - int32_t *NativeHandleDesc) { - return pi2ur::piextQueueGetNativeHandle(Queue, NativeHandle, - NativeHandleDesc); -} - -pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle, - int32_t NativeHandleDesc, - pi_context Context, pi_device Device, - bool OwnNativeHandle, - pi_queue_properties *Properties, - pi_queue *Queue) { - return pi2ur::piextQueueCreateWithNativeHandle( - NativeHandle, NativeHandleDesc, Context, Device, OwnNativeHandle, - Properties, Queue); -} - -pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, - void *HostPtr, pi_mem *RetMem, - const pi_mem_properties *properties) { - return pi2ur::piMemBufferCreate(Context, Flags, Size, HostPtr, RetMem, - properties); -} - -pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet) { - return pi2ur::piMemGetInfo(Mem, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piMemRetain(pi_mem Mem) { return pi2ur::piMemRetain(Mem); } - -pi_result piMemRelease(pi_mem Mem) { return pi2ur::piMemRelease(Mem); } - -pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, - const pi_image_format *ImageFormat, - const pi_image_desc *ImageDesc, void *HostPtr, - pi_mem *RetImage) { - return pi2ur::piMemImageCreate(Context, Flags, ImageFormat, ImageDesc, - HostPtr, RetImage); -} - -pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, - pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); -} - -pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool ownNativeHandle, pi_mem *Mem) { - return pi2ur::piextMemCreateWithNativeHandle(NativeHandle, Context, - ownNativeHandle, Mem); -} - -pi_result piProgramCreate(pi_context Context, const void *ILBytes, - size_t Length, pi_program *Program) { - return pi2ur::piProgramCreate(Context, ILBytes, Length, Program); -} - -pi_result piProgramCreateWithBinary( - pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, - const size_t *Lengths, const unsigned char **Binaries, - size_t NumMetadataEntries, const pi_device_binary_property *Metadata, - pi_int32 *BinaryStatus, pi_program *Program) { - return pi2ur::piProgramCreateWithBinary(Context, NumDevices, DeviceList, - Lengths, Binaries, NumMetadataEntries, - Metadata, BinaryStatus, Program); -} - -pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *Img) { - return pi2ur::piextMemImageCreateWithNativeHandle( - NativeHandle, Context, OwnNativeHandle, ImageFormat, ImageDesc, Img); -} - -pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piProgramGetInfo(Program, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - pi_uint32 NumInputPrograms, - const pi_program *InputPrograms, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData, pi_program *RetProgram) { - return pi2ur::piProgramLink(Context, NumDevices, DeviceList, Options, - NumInputPrograms, InputPrograms, PFnNotify, - UserData, RetProgram); -} - -pi_result piProgramCompile( - pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, - const char *Options, pi_uint32 NumInputHeaders, - const pi_program *InputHeaders, const char **HeaderIncludeNames, - void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { - return pi2ur::piProgramCompile(Program, NumDevices, DeviceList, Options, - NumInputHeaders, InputHeaders, - HeaderIncludeNames, PFnNotify, UserData); -} - -pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData) { - return pi2ur::piProgramBuild(Program, NumDevices, DeviceList, Options, - PFnNotify, UserData); -} - -pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, - pi_program_build_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piProgramGetBuildInfo(Program, Device, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piProgramRetain(pi_program Program) { - return pi2ur::piProgramRetain(Program); -} - -pi_result piProgramRelease(pi_program Program) { - return pi2ur::piProgramRelease(Program); -} - -pi_result piextProgramGetNativeHandle(pi_program Program, - pi_native_handle *NativeHandle) { - return pi2ur::piextProgramGetNativeHandle(Program, NativeHandle); -} - -pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_program *Program) { - return pi2ur::piextProgramCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Program); -} - -pi_result piKernelCreate(pi_program Program, const char *KernelName, - pi_kernel *RetKernel) { - return pi2ur::piKernelCreate(Program, KernelName, RetKernel); -} - -pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, - const void *ArgValue) { - return pi2ur::piKernelSetArg(Kernel, ArgIndex, ArgSize, ArgValue); -} - -pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_mem_obj_property *ArgProperties, - const pi_mem *ArgValue) { - return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties, - ArgValue); -} - -pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_sampler *ArgValue) { - return pi2ur::piextKernelSetArgSampler(Kernel, ArgIndex, ArgValue); -} - -pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piKernelGetInfo(Kernel, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piextMemImageAllocate(pi_context Context, pi_device Device, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemImageAllocate(Context, Device, ImageFormat, ImageDesc, - RetMem); -} - -pi_result piextMemUnsampledImageCreate(pi_context Context, pi_device Device, - pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_image_handle *RetHandle) { - return pi2ur::piextMemUnsampledImageCreate(Context, Device, ImgMem, - ImageFormat, ImageDesc, RetHandle); -} - -pi_result piextMemSampledImageCreate(pi_context Context, pi_device Device, - pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_sampler Sampler, - pi_image_handle *RetHandle) { - return pi2ur::piextMemSampledImageCreate(Context, Device, ImgMem, ImageFormat, - ImageDesc, Sampler, RetHandle); -} - -pi_result piextBindlessImageSamplerCreate( - pi_context Context, const pi_sampler_properties *SamplerProperties, - float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, - pi_sampler *RetSampler) { - return pi2ur::piextBindlessImageSamplerCreate( - Context, SamplerProperties, MinMipmapLevelClamp, MaxMipmapLevelClamp, - MaxAnisotropy, RetSampler); -} - -pi_result piextMemMipmapGetLevel(pi_context Context, pi_device Device, - pi_image_mem_handle MipMem, unsigned int Level, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMipmapGetLevel(Context, Device, MipMem, Level, RetMem); -} - -pi_result piextMemImageFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemImageFree(Context, Device, MemoryHandle); -} - -pi_result piextMemMipmapFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemMipmapFree(Context, Device, MemoryHandle); -} - -pi_result piextMemImageCopy( - pi_queue Queue, void *DstPtr, const void *SrcPtr, - const pi_image_desc *SrcImageDesc, const pi_image_desc *DestImageDesc, - const pi_image_format *SrcImageFormat, - const pi_image_format *DestImageFormat, const pi_image_copy_flags Flags, - pi_image_offset SrcOffset, pi_image_offset DstOffset, - pi_image_region CopyExtent, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextMemImageCopy( - Queue, DstPtr, SrcPtr, SrcImageDesc, DestImageDesc, SrcImageFormat, - DestImageFormat, Flags, SrcOffset, DstOffset, CopyExtent, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piextMemUnsampledImageHandleDestroy(pi_context Context, - pi_device Device, - pi_image_handle Handle) { - return pi2ur::piextMemUnsampledImageHandleDestroy(Context, Device, Handle); -} - -pi_result piextMemSampledImageHandleDestroy(pi_context Context, - pi_device Device, - pi_image_handle Handle) { - return pi2ur::piextMemSampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemImageGetInfo(pi_context Context, - pi_image_mem_handle MemHandle, - pi_image_info ParamName, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextMemImageGetInfo(Context, MemHandle, ParamName, ParamValue, - ParamValueSizeRet); -} - -pi_result piextImportExternalMemory(pi_context Context, pi_device Device, - pi_external_mem_descriptor *MemDesc, - pi_interop_mem_handle *RetHandle) { - return pi2ur::piextImportExternalMemory(Context, Device, MemDesc, RetHandle); -} - -pi_result piextMemMapExternalArray(pi_context Context, pi_device Device, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_interop_mem_handle MemHandle, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMapExternalArray(Context, Device, ImageFormat, - ImageDesc, MemHandle, RetMem); -} - -pi_result piextMemReleaseInterop(pi_context Context, pi_device Device, - pi_interop_mem_handle ExtMem) { - return pi2ur::piextMemReleaseInterop(Context, Device, ExtMem); -} - -pi_result -piextImportExternalSemaphore(pi_context Context, pi_device Device, - pi_external_semaphore_descriptor *SemDesc, - pi_interop_semaphore_handle *RetHandle) { - return pi2ur::piextImportExternalSemaphore(Context, Device, SemDesc, - RetHandle); -} - -pi_result piextReleaseExternalSemaphore(pi_context Context, pi_device Device, - pi_interop_semaphore_handle SemHandle) { - return pi2ur::piextReleaseExternalSemaphore(Context, Device, SemHandle); -} - -__SYCL_EXPORT pi_result piextWaitExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasWaitValue, - pi_uint64 WaitValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextWaitExternalSemaphore(Queue, SemHandle, HasWaitValue, - WaitValue, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextSignalExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasSignalValue, - pi_uint64 SignalValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextSignalExternalSemaphore(Queue, SemHandle, HasSignalValue, - SignalValue, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_group_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piKernelGetGroupInfo(Kernel, Device, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_sub_group_info ParamName, - size_t InputValueSize, const void *InputValue, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piKernelGetSubGroupInfo( - Kernel, Device, ParamName, InputValueSize, InputValue, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piKernelRetain(pi_kernel Kernel) { - return pi2ur::piKernelRetain(Kernel); -} - -pi_result piKernelRelease(pi_kernel Kernel) { - return pi2ur::piKernelRelease(Kernel); -} - -pi_result -piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - return pi2ur::piEnqueueKernelLaunch( - Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextEnqueueCooperativeKernelLaunch( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - return pi2ur::piextEnqueueCooperativeKernelLaunch( - Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextEnqueueKernelLaunchCustom( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumPropsInLaunchPropList, - const pi_launch_property *LaunchPropList, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - (void)Queue; - (void)Kernel; - (void)WorkDim; - (void)GlobalWorkSize; - (void)LocalWorkSize; - (void)NumPropsInLaunchPropList; - (void)LaunchPropList; - (void)NumEventsInWaitList; - (void)EventsWaitList; - (void)OutEvent; - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - return PI_ERROR_UNSUPPORTED_FEATURE; -} - -pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - pi_program Program, - bool OwnNativeHandle, - pi_kernel *Kernel) { - return pi2ur::piextKernelCreateWithNativeHandle( - NativeHandle, Context, Program, OwnNativeHandle, Kernel); -} - -pi_result piextKernelGetNativeHandle(pi_kernel Kernel, - pi_native_handle *NativeHandle) { - return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle); -} - -pi_result piextKernelSuggestMaxCooperativeGroupCount( - pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize, - pi_uint32 *GroupCountRet) { - return pi2ur::piextKernelSuggestMaxCooperativeGroupCount( - Kernel, LocalWorkSize, DynamicSharedMemorySize, GroupCountRet); -} - -pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { - return pi2ur::piEventCreate(Context, RetEvent); -} - -pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piEventGetInfo(Event, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piEventGetProfilingInfo(Event, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { - return pi2ur::piEventsWait(NumEvents, EventList); -} - -pi_result piEventSetCallback(pi_event Event, pi_int32 CommandExecCallbackType, - void (*PFnNotify)(pi_event Event, - pi_int32 EventCommandStatus, - void *UserData), - void *UserData) { - return pi2ur::piEventSetCallback(Event, CommandExecCallbackType, PFnNotify, - UserData); -} - -pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { - return pi2ur::piEventSetStatus(Event, ExecutionStatus); -} - -pi_result piEventRetain(pi_event Event) { return pi2ur::piEventRetain(Event); } - -pi_result piEventRelease(pi_event Event) { - return pi2ur::piEventRelease(Event); -} - -pi_result piextEventGetNativeHandle(pi_event Event, - pi_native_handle *NativeHandle) { - return pi2ur::piextEventGetNativeHandle(Event, NativeHandle); -} - -pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_event *Event) { - return pi2ur::piextEventCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Event); -} - -pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueTimestampRecordingExp( - Queue, Blocking, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piSamplerCreate(pi_context Context, - const pi_sampler_properties *SamplerProperties, - pi_sampler *RetSampler) { - return pi2ur::piSamplerCreate(Context, SamplerProperties, RetSampler); -} - -pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piSamplerGetInfo(Sampler, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piSamplerRetain(pi_sampler Sampler) { - return pi2ur::piSamplerRetain(Sampler); -} - -pi_result piSamplerRelease(pi_sampler Sampler) { - return pi2ur::piSamplerRelease(Sampler); -} - -pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - return pi2ur::piEnqueueEventsWait(Queue, NumEventsInWaitList, EventWaitList, - OutEvent); -} - -pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - return pi2ur::piEnqueueEventsWaitWithBarrier(Queue, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, - pi_bool BlockingRead, size_t Offset, - size_t Size, void *Dst, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemBufferRead(Queue, Src, BlockingRead, Offset, Size, - Dst, NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piEnqueueMemBufferReadRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemBufferReadRect( - Queue, Buffer, BlockingRead, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, - pi_bool BlockingWrite, size_t Offset, - size_t Size, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemBufferWrite(Queue, Buffer, BlockingWrite, Offset, - Size, Ptr, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferWriteRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemBufferWriteRect( - Queue, Buffer, BlockingWrite, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, - size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemBufferCopy(Queue, SrcMem, DstMem, SrcOffset, - DstOffset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferCopyRect( - pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, - pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, - size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, - size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - - return pi2ur::piEnqueueMemBufferCopyRect( - Queue, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemBufferFill(Queue, Buffer, Pattern, PatternSize, - Offset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, - pi_map_flags MapFlags, size_t Offset, - size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent, void **RetMap) { - return pi2ur::piEnqueueMemBufferMap(Queue, Mem, BlockingMap, MapFlags, Offset, - Size, NumEventsInWaitList, EventWaitList, - OutEvent, RetMap); -} - -pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, void *MappedPtr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - return pi2ur::piEnqueueMemUnmap(Queue, Mem, MappedPtr, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piMemImageGetInfo(Image, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, - pi_bool BlockingRead, pi_image_offset Origin, - pi_image_region Region, size_t RowPitch, - size_t SlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemImageRead( - Queue, Image, BlockingRead, Origin, Region, RowPitch, SlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, - pi_bool BlockingWrite, pi_image_offset Origin, - pi_image_region Region, size_t InputRowPitch, - size_t InputSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemImageWrite( - Queue, Image, BlockingWrite, Origin, Region, InputRowPitch, - InputSlicePitch, Ptr, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result -piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, - pi_image_offset SrcOrigin, pi_image_offset DstOrigin, - pi_image_region Region, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piEnqueueMemImageCopy(Queue, SrcImage, DstImage, SrcOrigin, - DstOrigin, Region, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, - const void *FillColor, const size_t *Origin, - const size_t *Region, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemImageFill(Queue, Image, FillColor, Origin, Region, - NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, - pi_buffer_create_type BufferCreateType, - void *BufferCreateInfo, pi_mem *RetMem) { - return pi2ur::piMemBufferPartition(Buffer, Flags, BufferCreateType, - BufferCreateInfo, RetMem); -} - -pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program, - const char *FunctionName, - pi_uint64 *FunctionPointerRet) { - return pi2ur::piextGetDeviceFunctionPointer(Device, Program, FunctionName, - FunctionPointerRet); -} - -pi_result piextGetGlobalVariablePointer(pi_device Device, pi_program Program, - const char *GlobalVariableName, - size_t *GlobalVariableSize, - void **GlobalVariablePointerRet) { - return pi2ur::piextGetGlobalVariablePointer( - Device, Program, GlobalVariableName, GlobalVariableSize, - GlobalVariablePointerRet); -} - -pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - return pi2ur::piextUSMDeviceAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - return pi2ur::piextUSMSharedAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -pi_result piextUSMPitchedAlloc(void **ResultPtr, size_t *ResultPitch, - pi_context Context, pi_device Device, - pi_usm_mem_properties *Properties, - size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - return pi2ur::piextUSMPitchedAlloc(ResultPtr, ResultPitch, Context, Device, - Properties, WidthInBytes, Height, - ElementSizeBytes); -} - -pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, - pi_usm_mem_properties *Properties, size_t Size, - pi_uint32 Alignment) { - return pi2ur::piextUSMHostAlloc(ResultPtr, Context, Properties, Size, - Alignment); -} - -pi_result piextUSMFree(pi_context Context, void *Ptr) { - return pi2ur::piextUSMFree(Context, Ptr); -} - -pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, const void *ArgValue) { - return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); -} - -pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); -} - -pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, - const void *SrcPtr, size_t Size, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemcpy(Queue, Blocking, DstPtr, SrcPtr, Size, - NumEventsInWaitlist, EventsWaitlist, - Event); -} - -pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - return pi2ur::piextUSMEnqueuePrefetch( - Queue, Ptr, Size, Flags, NumEventsInWaitList, EventWaitList, OutEvent); -} - -pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, - size_t Length, pi_mem_advice Advice, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueueMemAdvise(Queue, Ptr, Length, Advice, OutEvent); -} - -pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, size_t Pitch, - size_t PatternSize, const void *Pattern, - size_t Width, size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - return pi2ur::piextUSMEnqueueFill2D(Queue, Ptr, Pitch, PatternSize, Pattern, - Width, Height, NumEventsWaitList, - EventsWaitList, Event); -} - -pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, size_t Pitch, - int Value, size_t Width, size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset2D(Queue, Ptr, Pitch, Value, Width, Height, - NumEventsWaitList, EventsWaitlist, - Event); -} - -pi_result piextUSMEnqueueMemcpy2D(pi_queue Queue, pi_bool Blocking, - void *DstPtr, size_t DstPitch, - const void *SrcPtr, size_t SrcPitch, - size_t Width, size_t Height, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemcpy2D( - Queue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, - NumEventsInWaitlist, EventWaitlist, Event); -} - -pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, - pi_mem_alloc_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextUSMGetMemAllocInfo(Context, Ptr, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -pi_result piextUSMImport(const void *HostPtr, size_t Size, pi_context Context) { - return pi2ur::piextUSMImport(HostPtr, Size, Context); -} - -pi_result piextUSMRelease(const void *HostPtr, pi_context Context) { - return pi2ur::piextUSMRelease(HostPtr, Context); -} - -pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, - size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - return pi2ur::piextEnqueueDeviceGlobalVariableWrite( - Queue, Program, Name, BlockingWrite, Count, Offset, Src, - NumEventsInWaitList, EventsWaitList, Event); -} - -pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, - size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - - return pi2ur::piextEnqueueDeviceGlobalVariableRead( - Queue, Program, Name, BlockingRead, Count, Offset, Dst, - NumEventsInWaitList, EventsWaitList, Event); - - return PI_SUCCESS; -} - -pi_result piextEnqueueReadHostPipe(pi_queue Queue, pi_program Program, - const char *PipeSymbol, pi_bool Blocking, - void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - return pi2ur::piextEnqueueReadHostPipe(Queue, Program, PipeSymbol, Blocking, - Ptr, Size, NumEventsInWaitList, - EventsWaitList, Event); -} - -pi_result piextEnqueueWriteHostPipe(pi_queue Queue, pi_program Program, - const char *PipeSymbol, pi_bool Blocking, - void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - return pi2ur::piextEnqueueWriteHostPipe(Queue, Program, PipeSymbol, Blocking, - Ptr, Size, NumEventsInWaitList, - EventsWaitList, Event); -} - -pi_result piKernelSetExecInfo(pi_kernel Kernel, pi_kernel_exec_info ParamName, - size_t ParamValueSize, const void *ParamValue) { - - return pi2ur::piKernelSetExecInfo(Kernel, ParamName, ParamValueSize, - ParamValue); -} - -pi_result piextProgramSetSpecializationConstant(pi_program Prog, - pi_uint32 SpecID, size_t Size, - const void *SpecValue) { - return pi2ur::piextProgramSetSpecializationConstant(Prog, SpecID, Size, - SpecValue); -} - -pi_result piextCommandBufferCreate(pi_context Context, pi_device Device, - const pi_ext_command_buffer_desc *Desc, - pi_ext_command_buffer *RetCommandBuffer) { - return pi2ur::piextCommandBufferCreate(Context, Device, Desc, - RetCommandBuffer); -} - -pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRetain(CommandBuffer); -} - -pi_result piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRelease(CommandBuffer); -} - -pi_result piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferFinalize(CommandBuffer); -} - -pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint, - pi_ext_command_buffer_command *Command) { - return pi2ur::piextCommandBufferNDRangeKernel( - CommandBuffer, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint, - Command); -} - -pi_result piextCommandBufferMemcpyUSM( - pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, - size_t Size, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemcpyUSM(CommandBuffer, DstPtr, SrcPtr, Size, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopy( - CommandBuffer, SrcMem, DstMem, SrcOffset, DstOffset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, - pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, - size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopyRect( - CommandBuffer, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferRead( - CommandBuffer, Buffer, Offset, Size, Dst, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferReadRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWrite( - CommandBuffer, Buffer, Offset, Size, Ptr, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWriteRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferFill( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, - size_t PatternSize, size_t Offset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferFill( - CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, - void *Ptr, const void *Pattern, - size_t PatternSize, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, - pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferFillUSM( - CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferPrefetchUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferPrefetchUSM(CommandBuffer, Ptr, Size, Flags, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferAdviseUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Length, - pi_mem_advice Advice, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferAdviseUSM(CommandBuffer, Ptr, Length, Advice, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, - pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piextEnqueueCommandBuffer( - CommandBuffer, Queue, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piextCommandBufferUpdateKernelLaunch( - pi_ext_command_buffer_command Command, - pi_ext_command_buffer_update_kernel_launch_desc *Desc) { - return pi2ur::piextCommandBufferUpdateKernelLaunch(Command, Desc); -} - -pi_result -piextCommandBufferRetainCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferRetainCommand(Command); -} - -pi_result -piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferReleaseCommand(Command); -} - -pi_result piextPluginGetOpaqueData(void *opaque_data_param, - void **opaque_data_return) { - return pi2ur::piextPluginGetOpaqueData(opaque_data_param, opaque_data_return); -} - -pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, - uint64_t *HostTime) { - return pi2ur::piGetDeviceAndHostTimer(Device, DeviceTime, HostTime); -} - -pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { - return pi2ur::piextEnablePeerAccess(command_device, peer_device); -} - -pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { - return pi2ur::piextDisablePeerAccess(command_device, peer_device); -} - -pi_result piextPeerAccessGetInfo(pi_device command_device, - pi_device peer_device, pi_peer_attr attr, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result -piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, - pi_virtual_mem_granularity_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, - size_t MemSize, - pi_physical_mem *RetPhysicalMem) { - return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, - RetPhysicalMem); -} - -pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { - return pi2ur::piextPhysicalMemRetain(PhysicalMem); -} - -pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { - return pi2ur::piextPhysicalMemRelease(PhysicalMem); -} - -pi_result piextVirtualMemReserve(pi_context Context, const void *Start, - size_t RangeSize, void **RetPtr) { - return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); -} - -pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, - size_t RangeSize) { - return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); -} - -pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, - size_t RangeSize, pi_physical_mem PhysicalMem, - size_t Offset, pi_virtual_access_flags Flags) { - return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, - Flags); -} - -pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, - size_t RangeSize) { - return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); -} - -pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, - size_t RangeSize, - pi_virtual_access_flags Flags) { - return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); -} - -pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, - size_t RangeSize, - pi_virtual_mem_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -pi_result -piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, - void *Data, pi_uint32 NumMems, const pi_mem *Mems, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, - NumEventsInWaitList, EventWaitList, - Event); -} - -pi_result piTearDown(void *PluginParameter) { - return pi2ur::piTearDown(PluginParameter); -} - -pi_result piPluginInit(pi_plugin *PluginInit) { - // Check that the major version matches in PiVersion and SupportedVersion - _PI_PLUGIN_VERSION_CHECK(PluginInit->PiVersion, SupportedVersion); - - // PI interface supports higher version or the same version. - size_t PluginVersionSize = sizeof(PluginInit->PluginVersion); - if (strlen(SupportedVersion) >= PluginVersionSize) - return PI_ERROR_INVALID_VALUE; - strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize); - -#define _PI_API(api) \ - (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); -#include -#undef _PI_API - - return PI_SUCCESS; -} - -#ifdef _WIN32 -#define __SYCL_PLUGIN_DLL_NAME "pi_opencl.dll" -#include "../common_win_pi_trace/common_win_pi_trace.hpp" -#undef __SYCL_PLUGIN_DLL_NAME -#endif - -} // end extern 'C' diff --git a/sycl/plugins/opencl/pi_opencl.hpp b/sycl/plugins/opencl/pi_opencl.hpp deleted file mode 100644 index 6894d5cb65d30..0000000000000 --- a/sycl/plugins/opencl/pi_opencl.hpp +++ /dev/null @@ -1,34 +0,0 @@ -//==---------- pi_opencl.hpp - OpenCL Plugin -------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \defgroup sycl_pi_ocl OpenCL Plugin -/// \ingroup sycl_pi - -/// \file pi_opencl.hpp -/// Declarations for vOpenCL Plugin. It is the interface between device-agnostic -/// SYCL runtime layer and underlying OpenCL runtime. -/// -/// \ingroup sycl_pi_ocl - -#ifndef PI_OPENCL_HPP -#define PI_OPENCL_HPP - -#include -#include -#include - -// Share code between the PI Plugin and UR Adapter -#include - -// This version should be incremented for any change made to this file or its -// corresponding .cpp file. -#define _PI_OPENCL_PLUGIN_VERSION 1 - -#define _PI_OPENCL_PLUGIN_VERSION_STRING \ - _PI_PLUGIN_VERSION_STRING(_PI_OPENCL_PLUGIN_VERSION) - -#endif // PI_OPENCL_HPP diff --git a/sycl/plugins/unified_runtime/README.md b/sycl/plugins/unified_runtime/README.md deleted file mode 100644 index ecb68af22a934..0000000000000 --- a/sycl/plugins/unified_runtime/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Unified Runtime - -This directory contains the implementation of the PI plugin for Unified Runtime, -including the pi2ur translation layer, as well as the sources for the individual -Unified Runtime adapters. - -## Making changes to Unified Runtime -If you introduce changes to PI (e.g. new entry points, new enum values) you -should introduce matching changes to the Unified Runtime spec. - -To do this, open a Pull Request adding the changes to the -[Unified Runtime](https://github.com/oneapi-src/unified-runtime) -repository, making sure to follow the -[Contribution Guide](https://oneapi-src.github.io/unified-runtime/core/CONTRIB.html). - -When your changes to Unified Runtime are merged, you should: -* Update the UR commit used by changing the `UNIFIED_RUNTIME_TAG` value in - [`CMakeLists.txt`](CMakeLists.txt) -* Make changes to [`pi2ur.hpp`](pi2ur.hpp) to ensure correct mapping from PI to - UR -* Make changes to the affected adapter implementations in the -[`ur/adapters`](ur/adapters) folder diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp deleted file mode 100644 index f4798431c2581..0000000000000 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ /dev/null @@ -1,5901 +0,0 @@ -//===---------------- pi2ur.hpp - PI API to UR API ------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include "ur_api.h" -#include -#include -#include -#include -#include - -// Map of UR error codes to PI error codes -static pi_result ur2piResult(ur_result_t urResult) { - if (urResult == UR_RESULT_SUCCESS) - return PI_SUCCESS; - - switch (urResult) { - case UR_RESULT_ERROR_INVALID_OPERATION: - return PI_ERROR_INVALID_OPERATION; - case UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES: - return PI_ERROR_INVALID_QUEUE_PROPERTIES; - case UR_RESULT_ERROR_INVALID_QUEUE: - return PI_ERROR_INVALID_QUEUE; - case UR_RESULT_ERROR_INVALID_VALUE: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_INVALID_CONTEXT: - return PI_ERROR_INVALID_CONTEXT; - case UR_RESULT_ERROR_INVALID_PLATFORM: - return PI_ERROR_INVALID_PLATFORM; - case UR_RESULT_ERROR_INVALID_BINARY: - return PI_ERROR_INVALID_BINARY; - case UR_RESULT_ERROR_INVALID_PROGRAM: - return PI_ERROR_INVALID_PROGRAM; - case UR_RESULT_ERROR_INVALID_SAMPLER: - return PI_ERROR_INVALID_SAMPLER; - case UR_RESULT_ERROR_INVALID_MEM_OBJECT: - return PI_ERROR_INVALID_MEM_OBJECT; - case UR_RESULT_ERROR_INVALID_EVENT: - return PI_ERROR_INVALID_EVENT; - case UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: - return PI_ERROR_INVALID_EVENT_WAIT_LIST; - case UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET: - return PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET; - case UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE: - return PI_ERROR_INVALID_WORK_GROUP_SIZE; - case UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE: - return PI_ERROR_COMPILER_NOT_AVAILABLE; - case UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE: - return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE; - case UR_RESULT_ERROR_DEVICE_NOT_FOUND: - return PI_ERROR_DEVICE_NOT_FOUND; - case UR_RESULT_ERROR_INVALID_DEVICE: - return PI_ERROR_INVALID_DEVICE; - case UR_RESULT_ERROR_DEVICE_REQUIRES_RESET: - case UR_RESULT_ERROR_DEVICE_LOST: - case UR_RESULT_ERROR_DEVICE_NOT_AVAILABLE: - return PI_ERROR_DEVICE_NOT_AVAILABLE; - case UR_RESULT_ERROR_DEVICE_PARTITION_FAILED: - return PI_ERROR_DEVICE_PARTITION_FAILED; - case UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT: - return PI_ERROR_INVALID_DEVICE_PARTITION_COUNT; - case UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE: - return PI_ERROR_INVALID_WORK_ITEM_SIZE; - case UR_RESULT_ERROR_INVALID_WORK_DIMENSION: - return PI_ERROR_INVALID_WORK_DIMENSION; - case UR_RESULT_ERROR_INVALID_KERNEL_ARGS: - return PI_ERROR_INVALID_KERNEL_ARGS; - case UR_RESULT_ERROR_INVALID_KERNEL: - return PI_ERROR_INVALID_KERNEL; - case UR_RESULT_ERROR_INVALID_KERNEL_NAME: - return PI_ERROR_INVALID_KERNEL_NAME; - case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX: - return PI_ERROR_INVALID_ARG_INDEX; - case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE: - return PI_ERROR_INVALID_ARG_SIZE; - case UR_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_INVALID_IMAGE_SIZE: - return PI_ERROR_INVALID_IMAGE_SIZE; - case UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR: - return PI_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; - case UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE: - return PI_ERROR_MEM_OBJECT_ALLOCATION_FAILURE; - case UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE: - return PI_ERROR_INVALID_PROGRAM_EXECUTABLE; - case UR_RESULT_ERROR_UNINITIALIZED: - return PI_ERROR_UNINITIALIZED; - case UR_RESULT_ERROR_OUT_OF_HOST_MEMORY: - return PI_ERROR_OUT_OF_HOST_MEMORY; - case UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY: - case UR_RESULT_ERROR_OUT_OF_RESOURCES: - return PI_ERROR_OUT_OF_RESOURCES; - case UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE: - return PI_ERROR_BUILD_PROGRAM_FAILURE; - case UR_RESULT_ERROR_PROGRAM_LINK_FAILURE: - return PI_ERROR_LINK_PROGRAM_FAILURE; - case UR_RESULT_ERROR_UNSUPPORTED_VERSION: - return PI_ERROR_INVALID_OPERATION; - case UR_RESULT_ERROR_UNSUPPORTED_FEATURE: - return PI_ERROR_UNSUPPORTED_FEATURE; - case UR_RESULT_ERROR_INVALID_ARGUMENT: - case UR_RESULT_ERROR_INVALID_NULL_HANDLE: - case UR_RESULT_ERROR_HANDLE_OBJECT_IN_USE: - case UR_RESULT_ERROR_INVALID_NULL_POINTER: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_INVALID_SIZE: - case UR_RESULT_ERROR_UNSUPPORTED_SIZE: - return PI_ERROR_INVALID_BUFFER_SIZE; - case UR_RESULT_ERROR_UNSUPPORTED_ALIGNMENT: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT: - case UR_RESULT_ERROR_INVALID_ENUMERATION: - case UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT: - return PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED; - case UR_RESULT_ERROR_INVALID_NATIVE_BINARY: - return PI_ERROR_INVALID_BINARY; - case UR_RESULT_ERROR_INVALID_GLOBAL_NAME: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_FUNCTION_ADDRESS_NOT_AVAILABLE: - return PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE; - case UR_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION: - return PI_ERROR_INVALID_WORK_DIMENSION; - case UR_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_PROGRAM_UNLINKED: - return PI_ERROR_INVALID_PROGRAM_EXECUTABLE; - case UR_RESULT_ERROR_OVERLAPPING_REGIONS: - return PI_ERROR_MEM_COPY_OVERLAP; - case UR_RESULT_ERROR_INVALID_HOST_PTR: - return PI_ERROR_INVALID_HOST_PTR; - case UR_RESULT_ERROR_INVALID_USM_SIZE: - return PI_ERROR_INVALID_BUFFER_SIZE; - case UR_RESULT_ERROR_OBJECT_ALLOCATION_FAILURE: - return PI_ERROR_OUT_OF_RESOURCES; - case UR_RESULT_ERROR_ADAPTER_SPECIFIC: - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; - case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP: - return PI_ERROR_INVALID_COMMAND_BUFFER_KHR; - case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: - return PI_ERROR_INVALID_SYNC_POINT_WAIT_LIST_KHR; - case UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: - return PI_ERROR_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; - case UR_RESULT_ERROR_UNKNOWN: - default: - return PI_ERROR_UNKNOWN; - }; -} - -// Helper for one-liner validation -#define PI_ASSERT(condition, error) \ - if (!(condition)) \ - return error; - -// Early exits on any error -#define HANDLE_ERRORS(urCall) \ - if (auto Result = urCall) \ - return ur2piResult(Result); - -// A version of return helper that returns pi_result and not ur_result_t -class ReturnHelper : public UrReturnHelper { -public: - using UrReturnHelper::UrReturnHelper; - - template pi_result operator()(const T &t) { - return ur2piResult(UrReturnHelper::operator()(t)); - } - // Array return value - template pi_result operator()(const T *t, size_t s) { - return ur2piResult(UrReturnHelper::operator()(t, s)); - } - // Array return value where element type is different from T - template pi_result operator()(const T *t, size_t s) { - return ur2piResult(UrReturnHelper::operator()(t, s)); - } -}; - -// A version of return helper that supports conversion through a map -class ConvertHelper : public ReturnHelper { - using ReturnHelper::ReturnHelper; - -public: - // Convert the value using a conversion map - template - pi_result convert(std::function Func) { - *param_value_size_ret = sizeof(TypePI); - - // There is no value to convert. - if (!param_value) - return PI_SUCCESS; - - auto pValueUR = static_cast(param_value); - auto pValuePI = static_cast(param_value); - - // Cannot convert to a smaller storage type - PI_ASSERT(sizeof(TypePI) >= sizeof(TypeUR), PI_ERROR_UNKNOWN); - - *pValuePI = Func(*pValueUR); - return PI_SUCCESS; - } - - // Convert the array using a conversion map - template - pi_result convertArray(std::function Func) { - // Cannot convert to a smaller element storage type - PI_ASSERT(sizeof(TypePI) >= sizeof(TypeUR), PI_ERROR_UNKNOWN); - - const uint32_t NumberElements = - *param_value_size_ret / sizeof(ur_device_partition_t); - - *param_value_size_ret *= sizeof(TypePI) / sizeof(TypeUR); - - // There is no value to convert. Adjust to a possibly bigger PI storage. - if (!param_value) - return PI_SUCCESS; - - PI_ASSERT(*param_value_size_ret % sizeof(TypePI) == 0, PI_ERROR_UNKNOWN); - - // Make a copy of the input UR array as we may possibly overwrite - // following elements while converting previous ones (if extending). - auto ValueUR = new char[*param_value_size_ret]; - auto pValueUR = reinterpret_cast(ValueUR); - auto pValuePI = static_cast(param_value); - memcpy(pValueUR, param_value, *param_value_size_ret); - - for (uint32_t I = 0; I < NumberElements; ++I) { - *pValuePI = Func(*pValueUR); - ++pValuePI; - ++pValueUR; - } - - delete[] ValueUR; - return PI_SUCCESS; - } - - // Convert the bitset using a conversion map - template - pi_result convertBitSet(std::function Func) { - // There is no value to convert. - if (!param_value) - return PI_SUCCESS; - - auto pValuePI = static_cast(param_value); - auto pValueUR = static_cast(param_value); - - // Cannot handle biteset large than size_t - PI_ASSERT(sizeof(TypeUR) <= sizeof(size_t), PI_ERROR_UNKNOWN); - size_t In = *pValueUR; - TypePI Out = 0; - - size_t Val; - while ((Val = In & -In)) { // Val is the rightmost set bit in In - In &= In - 1; // Reset the rightmost set bit - - // Convert the Val alone and merge it into Out - *pValueUR = TypeUR(Val); - if (auto Res = convert(Func)) - return Res; - Out |= *pValuePI; - } - *pValuePI = TypePI(Out); - return PI_SUCCESS; - } -}; - -// Handle mismatched PI and UR type return sizes for info queries -inline void fixupInfoValueTypes(size_t ParamValueSizeRetUR, - size_t *ParamValueSizeRetPI, - size_t ParamValueSize, void *ParamValue) { - if (ParamValueSizeRetUR == 1 && ParamValueSize == 4) { - // extend bool to pi_bool (uint32_t) - if (ParamValue) { - auto *ValIn = static_cast(ParamValue); - auto *ValOut = static_cast(ParamValue); - *ValOut = static_cast(*ValIn); - } - if (ParamValueSizeRetPI) { - *ParamValueSizeRetPI = sizeof(pi_bool); - } - } -} - -// Translate UR platform info values to PI info values -inline pi_result ur2piPlatformInfoValue(ur_platform_info_t ParamName, - size_t ParamValueSizePI, - size_t *ParamValueSizeUR, - void *ParamValue) { - - ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); - - switch (ParamName) { - case UR_PLATFORM_INFO_EXTENSIONS: - case UR_PLATFORM_INFO_NAME: - case UR_PLATFORM_INFO_PROFILE: - case UR_PLATFORM_INFO_VENDOR_NAME: - case UR_PLATFORM_INFO_VERSION: - // These ones do not need ur2pi translations - break; - case UR_PLATFORM_INFO_BACKEND: { - auto ConvertFunc = [](ur_platform_backend_t UrValue) { - switch (UrValue) { - case UR_PLATFORM_BACKEND_UNKNOWN: - return PI_EXT_PLATFORM_BACKEND_UNKNOWN; - case UR_PLATFORM_BACKEND_LEVEL_ZERO: - return PI_EXT_PLATFORM_BACKEND_LEVEL_ZERO; - case UR_PLATFORM_BACKEND_OPENCL: - return PI_EXT_PLATFORM_BACKEND_OPENCL; - case UR_PLATFORM_BACKEND_CUDA: - return PI_EXT_PLATFORM_BACKEND_CUDA; - case UR_PLATFORM_BACKEND_HIP: - return PI_EXT_PLATFORM_BACKEND_HIP; - case UR_PLATFORM_BACKEND_NATIVE_CPU: - return PI_EXT_PLATFORM_BACKEND_NATIVE_CPU; - default: - die("UR_PLATFORM_INFO_BACKEND: unhandled value"); - } - }; - return Value.convert( - ConvertFunc); - } - default: - return PI_ERROR_UNKNOWN; - } - - if (ParamValueSizePI && ParamValueSizePI != *ParamValueSizeUR) { - fprintf(stderr, "UR PlatformInfoType=%d PI=%d but UR=%d\n", ParamName, - (int)ParamValueSizePI, (int)*ParamValueSizeUR); - die("ur2piPlatformInfoValue: size mismatch"); - } - return PI_SUCCESS; -} - -/** - * Translate UR device info values to PI info values - * @param ParamName The name of the parameter - * @param ParamValueSize[in] The size of ParamValue passed to the PI plugin. - * @param ParamValue[in, out] Input: The ParamValue returned by the UR adapter. - * Output: The UR output converted to PI. - * @param ParamValueSizeRet[in, out] Input: The value of ParamValueSizeRet that - * UR returned. Output: The value of ParamValueSizeRet after conversion. - */ -inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - /* Helper function to perform conversions in-place */ - ConvertHelper Value(ParamValueSize, ParamValue, ParamValueSizeRet); - - pi_result Error = PI_SUCCESS; - if (ParamName == UR_DEVICE_INFO_TYPE) { - auto ConvertFunc = [](ur_device_type_t UrValue) { - switch (UrValue) { - case UR_DEVICE_TYPE_CPU: - return PI_DEVICE_TYPE_CPU; - case UR_DEVICE_TYPE_GPU: - return PI_DEVICE_TYPE_GPU; - case UR_DEVICE_TYPE_FPGA: - return PI_DEVICE_TYPE_ACC; - default: - die("UR_DEVICE_INFO_TYPE: unhandled value"); - } - }; - return Value.convert(ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_QUEUE_PROPERTIES) { - auto ConvertFunc = [](ur_queue_flag_t UrValue) { - switch (UrValue) { - case UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE: - return PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; - case UR_QUEUE_FLAG_PROFILING_ENABLE: - return PI_QUEUE_FLAG_PROFILING_ENABLE; - case UR_QUEUE_FLAG_ON_DEVICE: - return PI_QUEUE_FLAG_ON_DEVICE; - case UR_QUEUE_FLAG_ON_DEVICE_DEFAULT: - return PI_QUEUE_FLAG_ON_DEVICE_DEFAULT; - case UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM: - return static_cast(__SYCL_PI_CUDA_SYNC_WITH_DEFAULT); - case UR_QUEUE_FLAG_USE_DEFAULT_STREAM: - return static_cast(__SYCL_PI_CUDA_USE_DEFAULT_STREAM); - default: - die("UR_DEVICE_INFO_QUEUE_PROPERTIES: unhandled value"); - } - }; - return Value.convertBitSet( - ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_EXECUTION_CAPABILITIES) { - auto ConvertFunc = [](ur_device_exec_capability_flag_t UrValue) { - switch (UrValue) { - case UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL: - return PI_DEVICE_EXEC_CAPABILITIES_KERNEL; - case UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL: - return PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL; - default: - die("UR_DEVICE_INFO_EXECUTION_CAPABILITIES: unhandled value"); - } - }; - return Value - .convertBitSet( - ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { - auto ConvertFunc = [](ur_device_affinity_domain_flag_t UrValue) { - switch (UrValue) { - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA: - return PI_DEVICE_AFFINITY_DOMAIN_NUMA; - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE: - return PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; - default: - die("UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: unhandled value"); - } - }; - return Value.convertBitSet(ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_PARTITION_TYPE) { - - auto ConvertFunc = [](ur_device_partition_t UrValue) { - switch (static_cast(UrValue)) { - case UR_DEVICE_PARTITION_EQUALLY: - return PI_DEVICE_PARTITION_EQUALLY; - case UR_DEVICE_PARTITION_BY_COUNTS: - return PI_DEVICE_PARTITION_BY_COUNTS; - case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: - return PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; - case UR_DEVICE_PARTITION_BY_CSLICE: - return PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE; - default: - die("UR_DEVICE_INFO_PARTITION_TYPE: unhandled value"); - } - }; - - /* - * This property returns the argument specified in piCreateSubDevices. - * Each partition name is immediately followed by a value. The list is - * terminated with 0. In the case where the properties argument to - * piCreateSubDevices is [PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, - * PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE], the affinity domain used - * to perform the partition will be returned. */ - - PI_ASSERT(sizeof(pi_device_partition_property) == - sizeof(ur_device_partition_property_t), - PI_ERROR_UNKNOWN); - - const uint32_t UrNumberElements = - *ParamValueSizeRet / sizeof(ur_device_partition_property_t); - - if (ParamValue) { - auto ParamValueCopy = - std::make_unique(UrNumberElements); - std::memcpy(ParamValueCopy.get(), ParamValue, - UrNumberElements * sizeof(ur_device_partition_property_t)); - pi_device_partition_property *pValuePI = - reinterpret_cast(ParamValue); - ur_device_partition_property_t *pValueUR = - reinterpret_cast( - ParamValueCopy.get()); - const ur_device_partition_t Type = pValueUR->type; - *pValuePI = ConvertFunc(Type); - ++pValuePI; - - for (uint32_t i = 0; i < UrNumberElements; ++i) { - switch (pValueUR->type) { - case UR_DEVICE_PARTITION_EQUALLY: { - *pValuePI = pValueUR->value.equally; - break; - } - case UR_DEVICE_PARTITION_BY_COUNTS: { - *pValuePI = pValueUR->value.count; - break; - } - case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { - *pValuePI = pValueUR->value.affinity_domain; - break; - } - case UR_DEVICE_PARTITION_BY_CSLICE: { - *pValuePI = 0; - break; - } - default: - die("UR_DEVICE_INFO_PARTITION_TYPE query returned unsupported type"); - } - ++pValuePI; - ++pValueUR; - } - *pValuePI = 0; - } - - if (ParamValueSizeRet && *ParamValueSizeRet != 0) { - /* Add 2 extra elements to the return value (one for the type at the - * beginning and another to terminate the array with a 0 */ - *ParamValueSizeRet = - (UrNumberElements + 2) * sizeof(pi_device_partition_property); - } - } - - else if (ParamName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { - auto ConvertFunc = [](ur_device_partition_t UrValue) { - switch (static_cast(UrValue)) { - case UR_DEVICE_PARTITION_EQUALLY: - return PI_DEVICE_PARTITION_EQUALLY; - case UR_DEVICE_PARTITION_BY_COUNTS: - return PI_DEVICE_PARTITION_BY_COUNTS; - case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: - return PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; - case UR_DEVICE_PARTITION_BY_CSLICE: - return PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE; - default: - die("UR_DEVICE_INFO_SUPPORTED_PARTITIONS: unhandled value"); - } - }; - - Value.convertArray( - ConvertFunc); - - if (ParamValue) { - const uint32_t NumberElements = - *ParamValueSizeRet / sizeof(pi_device_partition_property); - reinterpret_cast( - ParamValue)[NumberElements] = 0; - } - - if (ParamValueSizeRet && *ParamValueSizeRet != 0) { - *ParamValueSizeRet += sizeof(pi_device_partition_property); - } - - } else if (ParamName == UR_DEVICE_INFO_LOCAL_MEM_TYPE) { - auto ConvertFunc = [](ur_device_local_mem_type_t UrValue) { - switch (UrValue) { - case UR_DEVICE_LOCAL_MEM_TYPE_LOCAL: - return PI_DEVICE_LOCAL_MEM_TYPE_LOCAL; - case UR_DEVICE_LOCAL_MEM_TYPE_GLOBAL: - return PI_DEVICE_LOCAL_MEM_TYPE_GLOBAL; - default: - die("UR_DEVICE_INFO_LOCAL_MEM_TYPE: unhandled value"); - } - }; - return Value.convert( - ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES || - ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { - auto ConvertFunc = [](ur_memory_order_capability_flag_t UrValue) { - switch (UrValue) { - case UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED: - return PI_MEMORY_ORDER_RELAXED; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE: - return PI_MEMORY_ORDER_ACQUIRE; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE: - return PI_MEMORY_ORDER_RELEASE; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL: - return PI_MEMORY_ORDER_ACQ_REL; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST: - return PI_MEMORY_ORDER_SEQ_CST; - default: - die("UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: unhandled " - "value"); - } - }; - return Value.convertBitSet(ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES || - ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { - auto ConvertFunc = [](ur_memory_scope_capability_flag_t UrValue) { - switch (UrValue) { - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM: - return PI_MEMORY_SCOPE_WORK_ITEM; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP: - return PI_MEMORY_SCOPE_SUB_GROUP; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP: - return PI_MEMORY_SCOPE_WORK_GROUP; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE: - return PI_MEMORY_SCOPE_DEVICE; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM: - return PI_MEMORY_SCOPE_SYSTEM; - default: - die("UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: unhandled " - "value"); - } - }; - return Value.convertBitSet(ConvertFunc); - } else if (*ParamValueSizeRet == 1 && ParamValueSize == 4) { - /* PI type: pi_bool - * UR type: ur_bool_t - * Need to convert from pi_bool (4 bytes) to ur_bool_t (1 byte) - */ - fixupInfoValueTypes(*ParamValueSizeRet, ParamValueSizeRet, ParamValueSize, - ParamValue); - } else if (ParamName == UR_DEVICE_INFO_QUEUE_PROPERTIES || - ParamName == UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES || - ParamName == UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES || - ParamName == UR_DEVICE_INFO_EXECUTION_CAPABILITIES || - ParamName == UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN || - ParamName == UR_DEVICE_INFO_USM_HOST_SUPPORT || - ParamName == UR_DEVICE_INFO_USM_DEVICE_SUPPORT || - ParamName == UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT || - ParamName == UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT || - ParamName == UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT) { - /* PI type: pi_bitfield - * UR type: ur_flags_t (uint32_t) - * No need to convert since types are compatible - */ - *ParamValueSizeRet = sizeof(pi_bitfield); - } else if (ParamName == UR_DEVICE_INFO_SINGLE_FP_CONFIG || - ParamName == UR_DEVICE_INFO_HALF_FP_CONFIG || - ParamName == UR_DEVICE_INFO_DOUBLE_FP_CONFIG) { - /* CL type: pi_device_fp_config - * UR type: ur_device_fp_capability_flags_t - * No need to convert since types are compatible - */ - *ParamValueSizeRet = sizeof(pi_device_fp_config); - } else if (ParamName == UR_DEVICE_INFO_COMPONENT_DEVICES) { - if (ParamValueSizeRet && *ParamValueSizeRet != 0) { - const uint32_t UrNumberElements = - *ParamValueSizeRet / sizeof(ur_device_handle_t); - *ParamValueSizeRet = UrNumberElements * sizeof(pi_device); - } - } else { - - // TODO: what else needs a UR-PI translation? - } - - if (ParamValueSize && ParamValueSizeRet && - ParamValueSize != *ParamValueSizeRet) { - fprintf(stderr, "UR DeviceInfoType=%d PI=%d but UR=%d\n", ParamName, - (int)ParamValueSize, (int)*ParamValueSizeRet); - die("ur2piDeviceInfoValue: size mismatch"); - } - return Error; -} - -inline pi_result ur2piSamplerInfoValue(ur_sampler_info_t ParamName, - size_t ParamValueSizePI, - size_t *ParamValueSizeUR, - void *ParamValue) { - - ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); - switch (ParamName) { - case UR_SAMPLER_INFO_ADDRESSING_MODE: { - auto ConvertFunc = [](ur_sampler_addressing_mode_t UrValue) { - switch (UrValue) { - case UR_SAMPLER_ADDRESSING_MODE_CLAMP: - return PI_SAMPLER_ADDRESSING_MODE_CLAMP; - case UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: - return PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; - case UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT: - return PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; - case UR_SAMPLER_ADDRESSING_MODE_NONE: - return PI_SAMPLER_ADDRESSING_MODE_NONE; - case UR_SAMPLER_ADDRESSING_MODE_REPEAT: - return PI_SAMPLER_ADDRESSING_MODE_REPEAT; - - default: - die("UR_SAMPLER_ADDRESSING_MODE_TYPE: unhandled value"); - } - }; - return Value - .convert( - ConvertFunc); - } - case UR_SAMPLER_INFO_FILTER_MODE: { - auto ConvertFunc = [](ur_sampler_filter_mode_t UrValue) { - switch (UrValue) { - case UR_SAMPLER_FILTER_MODE_LINEAR: - return PI_SAMPLER_FILTER_MODE_LINEAR; - case UR_SAMPLER_FILTER_MODE_NEAREST: - return PI_SAMPLER_FILTER_MODE_NEAREST; - default: - die("UR_SAMPLER_FILTER_MODE: unhandled value"); - } - }; - return Value.convert( - ConvertFunc); - } - default: - return PI_SUCCESS; - } -} - -inline pi_result ur2piVirtualMemInfoValue(ur_virtual_mem_info_t ParamName, - size_t ParamValueSizePI, - size_t *ParamValueSizeUR, - void *ParamValue) { - - ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); - switch (ParamName) { - case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { - auto ConvertFunc = [](ur_virtual_mem_access_flags_t UrValue) { - pi_virtual_access_flags PiValue = 0; - if (UrValue & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) - PiValue |= PI_VIRTUAL_ACCESS_FLAG_RW; - if (UrValue & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) - PiValue |= PI_VIRTUAL_ACCESS_FLAG_READ_ONLY; - return PiValue; - }; - return Value - .convert( - ConvertFunc); - } - default: - return PI_SUCCESS; - } -} - -// Translate UR device info values to PI info values -inline pi_result ur2piUSMAllocInfoValue(ur_usm_alloc_info_t ParamName, - size_t ParamValueSizePI, - size_t *ParamValueSizeUR, - void *ParamValue) { - ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); - - if (ParamName == UR_USM_ALLOC_INFO_TYPE) { - auto ConvertFunc = [](ur_usm_type_t UrValue) { - switch (UrValue) { - case UR_USM_TYPE_UNKNOWN: - return PI_MEM_TYPE_UNKNOWN; - case UR_USM_TYPE_HOST: - return PI_MEM_TYPE_HOST; - case UR_USM_TYPE_DEVICE: - return PI_MEM_TYPE_DEVICE; - case UR_USM_TYPE_SHARED: - return PI_MEM_TYPE_SHARED; - default: - die("UR_USM_ALLOC_INFO_TYPE: unhandled value"); - } - }; - return Value.convert(ConvertFunc); - } - - return PI_SUCCESS; -} - -// Translate UR program build info values to PI info values -inline pi_result ur2piProgramBuildInfoValue(ur_program_build_info_t ParamName, - size_t ParamValueSizePI, - size_t *ParamValueSizeUR, - void *ParamValue) { - ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); - - if (ParamName == UR_PROGRAM_BUILD_INFO_BINARY_TYPE) { - auto ConvertFunc = [](ur_program_binary_type_t UrValue) { - switch (UrValue) { - case UR_PROGRAM_BINARY_TYPE_NONE: - return PI_PROGRAM_BINARY_TYPE_NONE; - case UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT: - return PI_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; - case UR_PROGRAM_BINARY_TYPE_LIBRARY: - return PI_PROGRAM_BINARY_TYPE_LIBRARY; - case UR_PROGRAM_BINARY_TYPE_EXECUTABLE: - return PI_PROGRAM_BINARY_TYPE_EXECUTABLE; - default: - die("ur_program_binary_type_t: unhandled value"); - } - }; - return Value.convert( - ConvertFunc); - } - - if (ParamName == UR_PROGRAM_BUILD_INFO_STATUS) { - auto ConvertFunc = [](ur_program_build_status_t UrValue) { - switch (UrValue) { - case UR_PROGRAM_BUILD_STATUS_NONE: - return PI_PROGRAM_BUILD_STATUS_NONE; - case UR_PROGRAM_BUILD_STATUS_ERROR: - return PI_PROGRAM_BUILD_STATUS_ERROR; - case UR_PROGRAM_BUILD_STATUS_SUCCESS: - return PI_PROGRAM_BUILD_STATUS_SUCCESS; - case UR_PROGRAM_BUILD_STATUS_IN_PROGRESS: - return PI_PROGRAM_BUILD_STATUS_IN_PROGRESS; - default: - die("ur_program_build_status_t: unhandled value"); - } - }; - return Value.convert( - ConvertFunc); - } - - return PI_SUCCESS; -} - -inline ur_result_t -mapPIMetadataToUR(const pi_device_binary_property *pi_metadata, - ur_program_metadata_t *ur_metadata) { - ur_metadata->pName = (*pi_metadata)->Name; - ur_metadata->size = (*pi_metadata)->ValSize; - switch ((*pi_metadata)->Type) { - case SYCL_PROPERTY_TYPE_UINT32: - ur_metadata->type = UR_PROGRAM_METADATA_TYPE_UINT32; - ur_metadata->value.data32 = (*pi_metadata)->ValSize; - return UR_RESULT_SUCCESS; - case SYCL_PROPERTY_TYPE_BYTE_ARRAY: - ur_metadata->type = UR_PROGRAM_METADATA_TYPE_BYTE_ARRAY; - ur_metadata->value.pData = (*pi_metadata)->ValAddr; - return UR_RESULT_SUCCESS; - case SYCL_PROPERTY_TYPE_STRING: - ur_metadata->type = UR_PROGRAM_METADATA_TYPE_STRING; - ur_metadata->value.pString = - reinterpret_cast((*pi_metadata)->ValAddr); - return UR_RESULT_SUCCESS; - default: - return UR_RESULT_ERROR_INVALID_VALUE; - } -} - -namespace pi2ur { - -inline pi_result piTearDown(void *PluginParameter) { - bool *pluginTeardown = static_cast(PluginParameter); - *pluginTeardown = true; - // Fetch the single known adapter (the one which is statically linked) so we - // can release it. Fetching it for a second time (after piPlatformsGet) - // increases the reference count, so we need to release it twice. - // pi_unified_runtime has its own implementation of piTearDown. - static std::once_flag AdapterReleaseFlag; - ur_adapter_handle_t Adapter; - ur_result_t Ret = UR_RESULT_SUCCESS; - std::call_once(AdapterReleaseFlag, [&]() { - Ret = urAdapterGet(1, &Adapter, nullptr); - if (Ret == UR_RESULT_SUCCESS) { - Ret = urAdapterRelease(Adapter); - Ret = urAdapterRelease(Adapter); - } - }); - HANDLE_ERRORS(Ret); - - return PI_SUCCESS; -} - -inline pi_result PiGetAdapter(ur_adapter_handle_t &adapter) { - // We're not going through the UR loader so we're guaranteed to have exactly - // one adapter (whichever is statically linked). The PI plugin for UR has its - // own implementation of piPlatformsGet. - static ur_adapter_handle_t Adapter; - static std::once_flag AdapterGetFlag; - ur_result_t Ret = UR_RESULT_SUCCESS; - std::call_once(AdapterGetFlag, - [&Ret]() { Ret = urAdapterGet(1, &Adapter, nullptr); }); - HANDLE_ERRORS(Ret); - - adapter = Adapter; - - return PI_SUCCESS; -} - -/////////////////////////////////////////////////////////////////////////////// -// Platform -inline pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, - pi_uint32 *NumPlatforms) { - ur_adapter_handle_t adapter = nullptr; - if (auto res = PiGetAdapter(adapter); res != PI_SUCCESS) { - return res; - } - - auto phPlatforms = reinterpret_cast(Platforms); - HANDLE_ERRORS( - urPlatformGet(&adapter, 1, NumEntries, phPlatforms, NumPlatforms)); - return PI_SUCCESS; -} - -inline pi_result piextPlatformGetNativeHandle(pi_platform Platform, - pi_native_handle *NativeHandle) { - - PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - auto UrPlatform = reinterpret_cast(Platform); - - ur_native_handle_t UrNativeHandle{}; - HANDLE_ERRORS(urPlatformGetNativeHandle(UrPlatform, &UrNativeHandle)); - - *NativeHandle = UrNativeHandle; - - return PI_SUCCESS; -} - -inline pi_result -piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform *Platform) { - - PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_adapter_handle_t adapter = nullptr; - if (auto res = PiGetAdapter(adapter); res != PI_SUCCESS) { - return res; - } - - ur_platform_handle_t UrPlatform{}; - ur_native_handle_t UrNativeHandle = NativeHandle; - ur_platform_native_properties_t UrProperties{}; - urPlatformCreateWithNativeHandle(UrNativeHandle, adapter, &UrProperties, - &UrPlatform); - - *Platform = reinterpret_cast(UrPlatform); - - return PI_SUCCESS; -} - -inline pi_result piPlatformGetInfo(pi_platform Platform, - pi_platform_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); - - ur_platform_info_t UrParamName = {}; - switch (ParamName) { - case PI_PLATFORM_INFO_EXTENSIONS: { - UrParamName = UR_PLATFORM_INFO_EXTENSIONS; - break; - } - case PI_PLATFORM_INFO_NAME: { - UrParamName = UR_PLATFORM_INFO_NAME; - break; - } - case PI_PLATFORM_INFO_PROFILE: { - UrParamName = UR_PLATFORM_INFO_PROFILE; - break; - } - case PI_PLATFORM_INFO_VENDOR: { - UrParamName = UR_PLATFORM_INFO_VENDOR_NAME; - break; - } - case PI_PLATFORM_INFO_VERSION: { - UrParamName = UR_PLATFORM_INFO_VERSION; - break; - } - case PI_EXT_PLATFORM_INFO_BACKEND: { - UrParamName = UR_PLATFORM_INFO_BACKEND; - break; - } - default: - die("urGetContextInfo: unsuppported ParamName."); - } - - size_t UrParamValueSizeRet; - auto UrPlatform = reinterpret_cast(Platform); - HANDLE_ERRORS(urPlatformGetInfo(UrPlatform, UrParamName, ParamValueSize, - ParamValue, &UrParamValueSizeRet)); - - if (ParamValueSizeRet) { - *ParamValueSizeRet = UrParamValueSizeRet; - } - ur2piPlatformInfoValue(UrParamName, ParamValueSize, &ParamValueSize, - ParamValue); - fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, - ParamValue); - - return PI_SUCCESS; -} - -inline pi_result piextPluginGetOpaqueData(void *opaque_data_param, - void **opaque_data_return) { - (void)opaque_data_param; - (void)opaque_data_return; - return PI_ERROR_UNKNOWN; -} - -inline pi_result piPluginGetBackendOption(pi_platform Platform, - const char *FrontendOption, - const char **PlatformOption) { - - auto UrPlatform = reinterpret_cast(Platform); - HANDLE_ERRORS( - urPlatformGetBackendOption(UrPlatform, FrontendOption, PlatformOption)); - - return PI_SUCCESS; -} - -// Platform -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Device -inline pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, - pi_uint32 NumEntries, pi_device *Devices, - pi_uint32 *NumDevices) { - ur_device_type_t Type; - switch (DeviceType) { - case PI_DEVICE_TYPE_ALL: - Type = UR_DEVICE_TYPE_ALL; - break; - case PI_DEVICE_TYPE_GPU: - Type = UR_DEVICE_TYPE_GPU; - break; - case PI_DEVICE_TYPE_CPU: - Type = UR_DEVICE_TYPE_CPU; - break; - case PI_DEVICE_TYPE_ACC: - Type = UR_DEVICE_TYPE_FPGA; - break; - default: - return PI_ERROR_UNKNOWN; - } - - PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); - - auto UrPlatform = reinterpret_cast(Platform); - auto UrDevices = reinterpret_cast(Devices); - HANDLE_ERRORS( - urDeviceGet(UrPlatform, Type, NumEntries, UrDevices, NumDevices)); - - return PI_SUCCESS; -} - -inline pi_result piDeviceRetain(pi_device Device) { - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - auto UrDevice = reinterpret_cast(Device); - HANDLE_ERRORS(urDeviceRetain(UrDevice)); - return PI_SUCCESS; -} - -inline pi_result piDeviceRelease(pi_device Device) { - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrDevice = reinterpret_cast(Device); - HANDLE_ERRORS(urDeviceRelease(UrDevice)); - return PI_SUCCESS; -} - -inline pi_result piPluginGetLastError(char **Message) { - // We're not going through the UR loader so we're guaranteed to have exactly - // one adapter (whichever is statically linked). The PI plugin for UR has its - // own implementation of piPluginGetLastError. Materialize the adapter - // reference for the urAdapterGetLastError call, then release it. - ur_adapter_handle_t Adapter; - urAdapterGet(1, &Adapter, nullptr); - // FIXME: ErrorCode should store a native error, but these are not being used - // in CUDA adapter at the moment - int32_t ErrorCode; - ur_result_t Res = urAdapterGetLastError( - Adapter, const_cast(Message), &ErrorCode); - urAdapterRelease(Adapter); - - return ur2piResult(Res); -} - -inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - ur_device_info_t InfoType; - switch (ParamName) { -#define PI_TO_UR_MAP_DEVICE_INFO(FROM, TO) \ - case FROM: { \ - InfoType = TO; \ - break; \ - } - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_TYPE, UR_DEVICE_INFO_TYPE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PARENT_DEVICE, - UR_DEVICE_INFO_PARENT_DEVICE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PLATFORM, UR_DEVICE_INFO_PLATFORM) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_VENDOR_ID, UR_DEVICE_INFO_VENDOR_ID) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_UUID, UR_DEVICE_INFO_UUID) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_ATOMIC_64, UR_DEVICE_INFO_ATOMIC_64) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_EXTENSIONS, - UR_DEVICE_INFO_EXTENSIONS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NAME, UR_DEVICE_INFO_NAME) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_COMPILER_AVAILABLE, - UR_DEVICE_INFO_COMPILER_AVAILABLE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_LINKER_AVAILABLE, - UR_DEVICE_INFO_LINKER_AVAILABLE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_COMPUTE_UNITS, - UR_DEVICE_INFO_MAX_COMPUTE_UNITS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, - UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, - UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, - UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY, - UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_ADDRESS_BITS, - UR_DEVICE_INFO_ADDRESS_BITS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, - UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GLOBAL_MEM_SIZE, - UR_DEVICE_INFO_GLOBAL_MEM_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_LOCAL_MEM_SIZE, - UR_DEVICE_INFO_LOCAL_MEM_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE_SUPPORT, - UR_DEVICE_INFO_IMAGE_SUPPORTED) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_HOST_UNIFIED_MEMORY, - UR_DEVICE_INFO_HOST_UNIFIED_MEMORY) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_AVAILABLE, UR_DEVICE_INFO_AVAILABLE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_VENDOR, UR_DEVICE_INFO_VENDOR) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_DRIVER_VERSION, - UR_DEVICE_INFO_DRIVER_VERSION) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_VERSION, UR_DEVICE_INFO_VERSION) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES, - UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_REFERENCE_COUNT, - UR_DEVICE_INFO_REFERENCE_COUNT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PARTITION_PROPERTIES, - UR_DEVICE_INFO_SUPPORTED_PARTITIONS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN, - UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PARTITION_TYPE, - UR_DEVICE_INFO_PARTITION_TYPE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_OPENCL_C_VERSION, - UR_EXT_DEVICE_INFO_OPENCL_C_VERSION) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC, - UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PRINTF_BUFFER_SIZE, - UR_DEVICE_INFO_PRINTF_BUFFER_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PROFILE, UR_DEVICE_INFO_PROFILE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_BUILT_IN_KERNELS, - UR_DEVICE_INFO_BUILT_IN_KERNELS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_QUEUE_PROPERTIES, - UR_DEVICE_INFO_QUEUE_PROPERTIES) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_EXECUTION_CAPABILITIES, - UR_DEVICE_INFO_EXECUTION_CAPABILITIES) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_ENDIAN_LITTLE, - UR_DEVICE_INFO_ENDIAN_LITTLE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT, - UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION, - UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_LOCAL_MEM_TYPE, - UR_DEVICE_INFO_LOCAL_MEM_TYPE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_CONSTANT_ARGS, - UR_DEVICE_INFO_MAX_CONSTANT_ARGS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE, - UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE, - UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE, - UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE, - UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_PARAMETER_SIZE, - UR_DEVICE_INFO_MAX_PARAMETER_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN, - UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_SAMPLERS, - UR_DEVICE_INFO_MAX_SAMPLERS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS, - UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS, - UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_SINGLE_FP_CONFIG, - UR_DEVICE_INFO_SINGLE_FP_CONFIG) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_HALF_FP_CONFIG, - UR_DEVICE_INFO_HALF_FP_CONFIG) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_DOUBLE_FP_CONFIG, - UR_DEVICE_INFO_DOUBLE_FP_CONFIG) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH, - UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT, - UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH, - UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT, - UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH, - UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE, - UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR, - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR, - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT, - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT, - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT, - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT, - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG, - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG, - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT, - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT, - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE, - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE, - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF, - UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF, - UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS, - UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS) - PI_TO_UR_MAP_DEVICE_INFO( - PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, - UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, - UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IL_VERSION, - UR_DEVICE_INFO_IL_VERSION) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_USM_HOST_SUPPORT, - UR_DEVICE_INFO_USM_HOST_SUPPORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_USM_DEVICE_SUPPORT, - UR_DEVICE_INFO_USM_DEVICE_SUPPORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT, - UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT, - UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT, - UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_PCI_ADDRESS, - UR_DEVICE_INFO_PCI_ADDRESS) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_EU_COUNT, - UR_DEVICE_INFO_GPU_EU_COUNT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH, - UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, - UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION, - UR_DEVICE_INFO_IP_VERSION) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_BUILD_ON_SUBDEVICE, - UR_DEVICE_INFO_BUILD_ON_SUBDEVICE) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D, - UR_DEVICE_INFO_MAX_WORK_GROUPS_3D) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE, - UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_DEVICE_ID, UR_DEVICE_INFO_DEVICE_ID) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY, - UR_DEVICE_INFO_GLOBAL_MEM_FREE) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE, - UR_DEVICE_INFO_MEMORY_CLOCK_RATE) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH, - UR_DEVICE_INFO_MEMORY_BUS_WIDTH) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES, - UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_SLICES, - UR_DEVICE_INFO_GPU_EU_SLICES) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, - UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, - UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_MAX_MEM_BANDWIDTH, - UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS, - UR_DEVICE_INFO_BFLOAT16) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, - UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, - UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, - UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, - UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT, - UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_IMAGE_SRGB, - UR_DEVICE_INFO_IMAGE_SRGB) - PI_TO_UR_MAP_DEVICE_INFO(PI_DEVICE_INFO_BACKEND_VERSION, - UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP, - UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT, - UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT, - UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT, - UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT, - UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_PITCH_ALIGN, - UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH, - UR_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT, - UR_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH, - UR_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_SUPPORT, - UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT, - UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY, - UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT, - UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SUPPORT, - UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT, - UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D, - UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT, - UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT, - UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT, - UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT, - UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT, - UR_DEVICE_INFO_ESIMD_SUPPORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES, - UR_DEVICE_INFO_COMPONENT_DEVICES) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_COMPOSITE_DEVICE, - UR_DEVICE_INFO_COMPOSITE_DEVICE) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_SUPPORT, - UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT, - UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM, - UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_CLUSTER_LAUNCH, - UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_ARRAY_SUPPORT, - UR_DEVICE_INFO_IMAGE_ARRAY_SUPPORT_EXP) - PI_TO_UR_MAP_DEVICE_INFO( - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_UNIQUE_ADDRESSING_PER_DIM, - UR_DEVICE_INFO_BINDLESS_UNIQUE_ADDRESSING_PER_DIM_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLE_1D_USM, - UR_DEVICE_INFO_BINDLESS_SAMPLE_1D_USM_EXP) - PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLE_2D_USM, - UR_DEVICE_INFO_BINDLESS_SAMPLE_2D_USM_EXP) -#undef PI_TO_UR_MAP_DEVICE_INFO - default: - return PI_ERROR_UNKNOWN; - }; - - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - size_t ParamValueSizeRetUR; - auto DeviceUR = reinterpret_cast(Device); - - HANDLE_ERRORS(urDeviceGetInfo(DeviceUR, InfoType, ParamValueSize, ParamValue, - &ParamValueSizeRetUR)); - - ur2piDeviceInfoValue(InfoType, ParamValueSize, ParamValue, - &ParamValueSizeRetUR); - - if (ParamValueSizeRet) { - *ParamValueSizeRet = ParamValueSizeRetUR; - } - - return PI_SUCCESS; -} - -inline pi_result piextDeviceGetNativeHandle(pi_device Device, - pi_native_handle *NativeHandle) { - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - auto UrDevice = reinterpret_cast(Device); - - ur_native_handle_t UrNativeHandle{}; - HANDLE_ERRORS(urDeviceGetNativeHandle(UrDevice, &UrNativeHandle)); - *NativeHandle = UrNativeHandle; - return PI_SUCCESS; -} - -inline pi_result -piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform Platform, pi_device *Device) { - - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_adapter_handle_t adapter = nullptr; - if (auto res = PiGetAdapter(adapter); res != PI_SUCCESS) { - return res; - } - (void)adapter; - - ur_native_handle_t UrNativeDevice = NativeHandle; - ur_platform_handle_t UrPlatform = - reinterpret_cast(Platform); - auto UrDevice = reinterpret_cast(Device); - ur_device_native_properties_t UrProperties{}; - HANDLE_ERRORS(urDeviceCreateWithNativeHandle(UrNativeDevice, UrPlatform, - &UrProperties, UrDevice)); - - return PI_SUCCESS; -} - -inline pi_result piDevicePartition( - pi_device Device, const pi_device_partition_property *Properties, - pi_uint32 NumEntries, pi_device *SubDevices, pi_uint32 *NumSubDevices) { - - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - if (!Properties || !Properties[0]) { - return PI_ERROR_INVALID_VALUE; - } - - ur_device_partition_t UrType; - switch (Properties[0]) { - case PI_DEVICE_PARTITION_EQUALLY: - UrType = UR_DEVICE_PARTITION_EQUALLY; - break; - case PI_DEVICE_PARTITION_BY_COUNTS: - UrType = UR_DEVICE_PARTITION_BY_COUNTS; - break; - case PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: - UrType = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; - break; - case PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE: - UrType = UR_DEVICE_PARTITION_BY_CSLICE; - break; - default: - return PI_ERROR_UNKNOWN; - } - - std::vector UrProperties{}; - - // UR_DEVICE_PARTITION_BY_CSLICE doesn't have a value, so - // handle it outside the while loop below. - if (UrType == UR_DEVICE_PARTITION_BY_CSLICE) { - ur_device_partition_property_t UrProperty{}; - UrProperty.type = UrType; - UrProperties.push_back(UrProperty); - } - while (*(++Properties)) { - ur_device_partition_property_t UrProperty; - UrProperty.type = UrType; - switch (UrType) { - case UR_DEVICE_PARTITION_EQUALLY: { - UrProperty.value.equally = *Properties; - break; - } - case UR_DEVICE_PARTITION_BY_COUNTS: { - UrProperty.value.count = *Properties; - break; - } - case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { - /* No need to convert affinity domain enums from pi to ur because they - * are equivalent */ - UrProperty.value.affinity_domain = *Properties; - break; - } - default: { - die("Invalid properties for call to piDevicePartition"); - } - } - UrProperties.push_back(UrProperty); - } - - const ur_device_partition_properties_t UrPropertiesStruct{ - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, - nullptr, - UrProperties.data(), - UrProperties.size(), - }; - - auto UrDevice = reinterpret_cast(Device); - auto UrSubDevices = reinterpret_cast(SubDevices); - HANDLE_ERRORS(urDevicePartition(UrDevice, &UrPropertiesStruct, NumEntries, - UrSubDevices, NumSubDevices)); - return PI_SUCCESS; -} - -inline pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, - uint64_t *HostTime) { - auto UrDevice = reinterpret_cast(Device); - HANDLE_ERRORS(urDeviceGetGlobalTimestamps(UrDevice, DeviceTime, HostTime)); - return PI_SUCCESS; -} - -inline pi_result -piextDeviceSelectBinary(pi_device Device, // TODO: does this need to be context? - pi_device_binary *PIBinaries, pi_uint32 NumBinaries, - pi_uint32 *SelectedBinaryInd) { - - auto UrDevice = reinterpret_cast(Device); - std::vector UrBinaries(NumBinaries); - auto *Binaries = reinterpret_cast(PIBinaries); - - for (uint32_t BinaryCount = 0; BinaryCount < NumBinaries; BinaryCount++) { - if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_DEVICE_BINARY_TARGET_UNKNOWN) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_UNKNOWN; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_DEVICE_BINARY_TARGET_SPIRV32) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_SPIRV32; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_SPIRV64; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_NVPTX64; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_AMDGCN; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - "native_cpu"; // todo: define UR_DEVICE_BINARY_TARGET_NATIVE_CPU; - else - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_UNKNOWN; - } - - HANDLE_ERRORS(urDeviceSelectBinary(UrDevice, UrBinaries.data(), NumBinaries, - SelectedBinaryInd)); - return PI_SUCCESS; -} - -// Device -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Context -inline pi_result piContextCreate(const pi_context_properties *Properties, - pi_uint32 NumDevices, const pi_device *Devices, - void (*PFnNotify)(const char *ErrInfo, - const void *PrivateInfo, - size_t CB, void *UserData), - void *UserData, pi_context *RetContext) { - std::ignore = Properties; - std::ignore = PFnNotify; - std::ignore = UserData; - auto UrDevices = reinterpret_cast(Devices); - - ur_context_handle_t *UrContext = - reinterpret_cast(RetContext); - // TODO: Parse PI Context Properties into UR - ur_context_properties_t UrProperties{}; - HANDLE_ERRORS( - urContextCreate(NumDevices, UrDevices, &UrProperties, UrContext)); - return PI_SUCCESS; -} - -inline pi_result piextContextSetExtendedDeleter( - pi_context Context, pi_context_extended_deleter Function, void *UserData) { - auto hContext = reinterpret_cast(Context); - - HANDLE_ERRORS(urContextSetExtendedDeleter(hContext, Function, UserData)); - - return PI_SUCCESS; -} - -inline pi_result piextContextGetNativeHandle(pi_context Context, - pi_native_handle *NativeHandle) { - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_native_handle_t UrNativeHandle{}; - HANDLE_ERRORS(urContextGetNativeHandle(UrContext, &UrNativeHandle)); - *NativeHandle = UrNativeHandle; - return PI_SUCCESS; -} - -inline pi_result piextContextCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_uint32 NumDevices, - const pi_device *Devices, bool OwnNativeHandle, pi_context *RetContext) { - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(RetContext, PI_ERROR_INVALID_VALUE); - - ur_adapter_handle_t adapter = nullptr; - if (auto res = PiGetAdapter(adapter); res != PI_SUCCESS) { - return res; - } - - ur_native_handle_t NativeContext = NativeHandle; - const ur_device_handle_t *UrDevices = - reinterpret_cast(Devices); - ur_context_handle_t *UrContext = - reinterpret_cast(RetContext); - - ur_context_native_properties_t Properties{ - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES, nullptr, OwnNativeHandle}; - - HANDLE_ERRORS(urContextCreateWithNativeHandle( - NativeContext, adapter, NumDevices, UrDevices, &Properties, UrContext)); - - return PI_SUCCESS; -} - -inline pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_context_handle_t hContext = reinterpret_cast(Context); - ur_context_info_t ContextInfoType{}; - - switch (ParamName) { - case PI_CONTEXT_INFO_DEVICES: { - ContextInfoType = UR_CONTEXT_INFO_DEVICES; - break; - } - case PI_CONTEXT_INFO_NUM_DEVICES: { - ContextInfoType = UR_CONTEXT_INFO_NUM_DEVICES; - break; - } - case PI_CONTEXT_INFO_REFERENCE_COUNT: { - ContextInfoType = UR_CONTEXT_INFO_REFERENCE_COUNT; - break; - } - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT: - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: { - ContextInfoType = UR_CONTEXT_INFO_USM_FILL2D_SUPPORT; - break; - } - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: { - ContextInfoType = UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT; - break; - } - case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - // These queries should be dealt with in context_impl.cpp by calling the - // queries of each device separately and building the intersection set. - die("These queries should have never come here"); - } - default: { - die("piContextGetInfo: unsuppported ParamName."); - } - } - - size_t UrParamValueSizeRet; - HANDLE_ERRORS(urContextGetInfo(hContext, ContextInfoType, ParamValueSize, - ParamValue, &UrParamValueSizeRet)); - if (ParamValueSizeRet) { - *ParamValueSizeRet = UrParamValueSizeRet; - } - fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, - ParamValue); - return PI_SUCCESS; -} - -inline pi_result piContextRetain(pi_context Context) { - ur_context_handle_t hContext = reinterpret_cast(Context); - - HANDLE_ERRORS(urContextRetain(hContext)); - - return PI_SUCCESS; -} - -inline pi_result piContextRelease(pi_context Context) { - ur_context_handle_t UrContext = - reinterpret_cast(Context); - HANDLE_ERRORS(urContextRelease(UrContext)); - return PI_SUCCESS; -} -// Context -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Queue -inline pi_result piextQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties *Properties, - pi_queue *Queue) { - - PI_ASSERT(Properties, PI_ERROR_INVALID_VALUE); - // Expect flags mask to be passed first. - PI_ASSERT(Properties[0] == PI_QUEUE_FLAGS, PI_ERROR_INVALID_VALUE); - - PI_ASSERT(Properties[2] == 0 || - (Properties[2] == PI_QUEUE_COMPUTE_INDEX && Properties[4] == 0), - PI_ERROR_INVALID_VALUE); - - // Check that unexpected bits are not set. - PI_ASSERT(!(Properties[1] & - ~(PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | - PI_QUEUE_FLAG_PROFILING_ENABLE | PI_QUEUE_FLAG_ON_DEVICE | - PI_QUEUE_FLAG_ON_DEVICE_DEFAULT | - PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS | - PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW | - PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH | - PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE | - PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE)), - PI_ERROR_INVALID_VALUE); - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - ur_queue_properties_t UrProperties{}; - UrProperties.stype = UR_STRUCTURE_TYPE_QUEUE_PROPERTIES; - if (Properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) - UrProperties.flags |= UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; - if (Properties[1] & PI_QUEUE_FLAG_PROFILING_ENABLE) - UrProperties.flags |= UR_QUEUE_FLAG_PROFILING_ENABLE; - if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE) - UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE; - if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE_DEFAULT) - UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE_DEFAULT; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS) - UrProperties.flags |= UR_QUEUE_FLAG_DISCARD_EVENTS; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW) - UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_LOW; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH) - UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_HIGH; - if (Properties[1] & __SYCL_PI_CUDA_SYNC_WITH_DEFAULT) - UrProperties.flags |= UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM; - if (Properties[1] & __SYCL_PI_CUDA_USE_DEFAULT_STREAM) - UrProperties.flags |= UR_QUEUE_FLAG_USE_DEFAULT_STREAM; - if (Properties[1] & PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE) - UrProperties.flags |= UR_QUEUE_FLAG_SUBMISSION_BATCHED; - if (Properties[1] & PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE) - UrProperties.flags |= UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE; - - ur_queue_index_properties_t IndexProperties{}; - IndexProperties.stype = UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES; - if (Properties[2] != 0) { - IndexProperties.computeIndex = Properties[3]; - } - - UrProperties.pNext = &IndexProperties; - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - - ur_queue_handle_t *UrQueue = reinterpret_cast(Queue); - HANDLE_ERRORS(urQueueCreate(UrContext, UrDevice, &UrProperties, UrQueue)); - - return PI_SUCCESS; -} - -inline pi_result piQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties Flags, pi_queue *Queue) { - pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; - return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); -} - -inline pi_result piextQueueCreateWithNativeHandle( - pi_native_handle NativeHandle, int32_t NativeHandleDesc, pi_context Context, - pi_device Device, bool OwnNativeHandle, pi_queue_properties *Properties, - pi_queue *Queue) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_device_handle_t UrDevice = reinterpret_cast(Device); - ur_native_handle_t UrNativeHandle = NativeHandle; - ur_queue_handle_t *UrQueue = reinterpret_cast(Queue); - ur_queue_native_properties_t UrNativeProperties{}; - UrNativeProperties.isNativeHandleOwned = OwnNativeHandle; - - ur_queue_properties_t UrProperties{}; - UrProperties.stype = UR_STRUCTURE_TYPE_QUEUE_PROPERTIES; - if (Properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) - UrProperties.flags |= UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; - if (Properties[1] & PI_QUEUE_FLAG_PROFILING_ENABLE) - UrProperties.flags |= UR_QUEUE_FLAG_PROFILING_ENABLE; - if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE) - UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE; - if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE_DEFAULT) - UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE_DEFAULT; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS) - UrProperties.flags |= UR_QUEUE_FLAG_DISCARD_EVENTS; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW) - UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_LOW; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH) - UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_HIGH; - - ur_queue_native_desc_t UrNativeDesc{}; - UrNativeDesc.stype = UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC; - UrNativeDesc.pNativeData = &NativeHandleDesc; - - UrProperties.pNext = &UrNativeDesc; - UrNativeProperties.pNext = &UrProperties; - - HANDLE_ERRORS(urQueueCreateWithNativeHandle( - UrNativeHandle, UrContext, UrDevice, &UrNativeProperties, UrQueue)); - return PI_SUCCESS; -} - -inline pi_result piextQueueGetNativeHandle(pi_queue Queue, - pi_native_handle *NativeHandle, - int32_t *NativeHandleDesc) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_queue_native_desc_t UrNativeDesc{}; - UrNativeDesc.pNativeData = NativeHandleDesc; - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - ur_native_handle_t UrNativeQueue{}; - HANDLE_ERRORS(urQueueGetNativeHandle(UrQueue, &UrNativeDesc, &UrNativeQueue)); - - *NativeHandle = UrNativeQueue; - - return PI_SUCCESS; -} - -inline pi_result piQueueRelease(pi_queue Queue) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - HANDLE_ERRORS(urQueueRelease(UrQueue)); - - return PI_SUCCESS; -} - -inline pi_result piQueueFinish(pi_queue Queue) { - // Wait until command lists attached to the command queue are executed. - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - HANDLE_ERRORS(urQueueFinish(UrQueue)); - - return PI_SUCCESS; -} - -inline pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - ur_queue_info_t UrParamName{}; - - switch (ParamName) { - case PI_QUEUE_INFO_CONTEXT: { - UrParamName = UR_QUEUE_INFO_CONTEXT; - break; - } - case PI_QUEUE_INFO_DEVICE: { - UrParamName = UR_QUEUE_INFO_DEVICE; - break; - } - case PI_QUEUE_INFO_DEVICE_DEFAULT: { - UrParamName = UR_QUEUE_INFO_DEVICE_DEFAULT; - break; - } - case PI_QUEUE_INFO_PROPERTIES: { - UrParamName = UR_QUEUE_INFO_FLAGS; - break; - } - case PI_QUEUE_INFO_REFERENCE_COUNT: { - UrParamName = UR_QUEUE_INFO_REFERENCE_COUNT; - break; - } - case PI_QUEUE_INFO_SIZE: { - UrParamName = UR_QUEUE_INFO_SIZE; - break; - } - case PI_EXT_ONEAPI_QUEUE_INFO_EMPTY: { - UrParamName = UR_QUEUE_INFO_EMPTY; - break; - } - default: { - die("Unsupported ParamName in piQueueGetInfo"); - return PI_ERROR_INVALID_VALUE; - } - } - - HANDLE_ERRORS(urQueueGetInfo(UrQueue, UrParamName, ParamValueSize, ParamValue, - ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piQueueRetain(pi_queue Queue) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - HANDLE_ERRORS(urQueueRetain(UrQueue)); - - return PI_SUCCESS; -} - -inline pi_result piQueueFlush(pi_queue Queue) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - HANDLE_ERRORS(urQueueFlush(UrQueue)); - - return PI_SUCCESS; -} - -// Queue -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Program - -inline pi_result piProgramCreate(pi_context Context, const void *ILBytes, - size_t Length, pi_program *Program) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(ILBytes && Length, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_program_properties_t UrProperties{}; - ur_program_handle_t *UrProgram = - reinterpret_cast(Program); - HANDLE_ERRORS(urProgramCreateWithIL(UrContext, ILBytes, Length, &UrProperties, - UrProgram)); - - return PI_SUCCESS; -} - -inline pi_result piProgramCreateWithBinary( - pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, - const size_t *Lengths, const unsigned char **Binaries, - size_t NumMetadataEntries, const pi_device_binary_property *Metadata, - pi_int32 *BinaryStatus, pi_program *Program) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(DeviceList && NumDevices, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Binaries && Lengths, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - if (!Binaries[0] || !Lengths[0]) { - if (BinaryStatus) - *BinaryStatus = PI_ERROR_INVALID_VALUE; - return PI_ERROR_INVALID_VALUE; - } - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(DeviceList[0]); - - ur_program_properties_t Properties = {}; - Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES; - Properties.pNext = nullptr; - Properties.count = NumMetadataEntries; - - std::unique_ptr pMetadatas; - if (NumMetadataEntries) { - pMetadatas.reset(new ur_program_metadata_t[NumMetadataEntries]); - for (unsigned i = 0; i < NumMetadataEntries; i++) { - HANDLE_ERRORS(mapPIMetadataToUR(&Metadata[i], &pMetadatas[i])); - } - - Properties.pMetadatas = pMetadatas.get(); - } - - ur_program_handle_t *UrProgram = - reinterpret_cast(Program); - HANDLE_ERRORS(urProgramCreateWithBinary(UrContext, UrDevice, Lengths[0], - Binaries[0], &Properties, UrProgram)); - - if (BinaryStatus) - *BinaryStatus = PI_SUCCESS; - - return PI_SUCCESS; -} - -inline pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - - ur_program_info_t PropName{}; - - switch (ParamName) { - case PI_PROGRAM_INFO_REFERENCE_COUNT: { - PropName = UR_PROGRAM_INFO_REFERENCE_COUNT; - break; - } - case PI_PROGRAM_INFO_CONTEXT: { - PropName = UR_PROGRAM_INFO_CONTEXT; - break; - } - case PI_PROGRAM_INFO_NUM_DEVICES: { - PropName = UR_PROGRAM_INFO_NUM_DEVICES; - break; - } - case PI_PROGRAM_INFO_DEVICES: { - PropName = UR_PROGRAM_INFO_DEVICES; - break; - } - case PI_PROGRAM_INFO_SOURCE: { - PropName = UR_PROGRAM_INFO_SOURCE; - break; - } - case PI_PROGRAM_INFO_BINARY_SIZES: { - PropName = UR_PROGRAM_INFO_BINARY_SIZES; - break; - } - case PI_PROGRAM_INFO_BINARIES: { - PropName = UR_PROGRAM_INFO_BINARIES; - break; - } - case PI_PROGRAM_INFO_NUM_KERNELS: { - PropName = UR_PROGRAM_INFO_NUM_KERNELS; - break; - } - case PI_PROGRAM_INFO_KERNEL_NAMES: { - PropName = UR_PROGRAM_INFO_KERNEL_NAMES; - break; - } - default: { - die("urProgramGetInfo: not implemented"); - } - } - - HANDLE_ERRORS(urProgramGetInfo(UrProgram, PropName, ParamValueSize, - ParamValue, ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result -piProgramLink(pi_context Context, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - pi_uint32 NumInputPrograms, const pi_program *InputPrograms, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData, pi_program *RetProgram) { - - // Validate input parameters. - PI_ASSERT(DeviceList, PI_ERROR_INVALID_DEVICE); - PI_ASSERT(!PFnNotify && !UserData, PI_ERROR_INVALID_VALUE); - if (NumInputPrograms == 0 || InputPrograms == nullptr) - return PI_ERROR_INVALID_VALUE; - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - const ur_program_handle_t *UrInputPrograms = - reinterpret_cast(InputPrograms); - ur_program_handle_t *UrProgram = - reinterpret_cast(RetProgram); - - auto UrDevices = reinterpret_cast( - const_cast(DeviceList)); - - // If it fails, urProgramLinkExp will clear the pointer - ur_program_handle_t UrProgramForExp; - auto urResult = - urProgramLinkExp(UrContext, NumDevices, UrDevices, NumInputPrograms, - UrInputPrograms, Options, &UrProgramForExp); - if (urResult == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { - urResult = urProgramLink(UrContext, NumInputPrograms, UrInputPrograms, - Options, UrProgram); - } else { - *UrProgram = UrProgramForExp; - } - return ur2piResult(urResult); -} - -inline pi_result piProgramCompile( - pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, - const char *Options, pi_uint32 NumInputHeaders, - const pi_program *InputHeaders, const char **HeaderIncludeNames, - void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { - - std::ignore = NumInputHeaders; - std::ignore = InputHeaders; - std::ignore = HeaderIncludeNames; - - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - if ((NumDevices && !DeviceList) || (!NumDevices && DeviceList)) - return PI_ERROR_INVALID_VALUE; - - // These aren't supported. - PI_ASSERT(!PFnNotify && !UserData, PI_ERROR_INVALID_VALUE); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - - ur_program_info_t PropName = UR_PROGRAM_INFO_CONTEXT; - ur_context_handle_t UrContext{}; - HANDLE_ERRORS(urProgramGetInfo(UrProgram, PropName, sizeof(&UrContext), - &UrContext, nullptr)); - - auto UrDevices = reinterpret_cast( - const_cast(DeviceList)); - - auto urResult = - urProgramCompileExp(UrProgram, NumDevices, UrDevices, Options); - if (urResult == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { - urResult = urProgramCompile(UrContext, UrProgram, Options); - } - return ur2piResult(urResult); -} - -inline pi_result -piProgramBuild(pi_program Program, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - if ((NumDevices && !DeviceList) || (!NumDevices && DeviceList)) { - return PI_ERROR_INVALID_VALUE; - } - - // These aren't supported. - PI_ASSERT(!PFnNotify && !UserData, PI_ERROR_INVALID_VALUE); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - ur_program_info_t PropName = UR_PROGRAM_INFO_CONTEXT; - ur_context_handle_t UrContext{}; - HANDLE_ERRORS(urProgramGetInfo(UrProgram, PropName, sizeof(&UrContext), - &UrContext, nullptr)); - - auto UrDevices = reinterpret_cast( - const_cast(DeviceList)); - - auto urResult = urProgramBuildExp(UrProgram, NumDevices, UrDevices, Options); - if (urResult == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { - urResult = urProgramBuild(UrContext, UrProgram, Options); - } - return ur2piResult(urResult); -} - -inline pi_result piextProgramSetSpecializationConstant(pi_program Program, - pi_uint32 SpecID, - size_t Size, - const void *SpecValue) { - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - uint32_t Count = 1; - ur_specialization_constant_info_t SpecConstant{}; - SpecConstant.id = SpecID; - SpecConstant.size = Size; - SpecConstant.pValue = SpecValue; - HANDLE_ERRORS( - urProgramSetSpecializationConstants(UrProgram, Count, &SpecConstant)); - - return PI_SUCCESS; -} - -inline pi_result piKernelCreate(pi_program Program, const char *KernelName, - pi_kernel *RetKernel) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - PI_ASSERT(RetKernel, PI_ERROR_INVALID_VALUE); - PI_ASSERT(KernelName, PI_ERROR_INVALID_VALUE); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - ur_kernel_handle_t *UrKernel = - reinterpret_cast(RetKernel); - - HANDLE_ERRORS(urKernelCreate(UrProgram, KernelName, UrKernel)); - - return PI_SUCCESS; -} - -inline pi_result -piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, const void *FillColor, - const size_t *Origin, const size_t *Region, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - - std::ignore = Image; - std::ignore = FillColor; - std::ignore = Origin; - std::ignore = Region; - std::ignore = NumEventsInWaitList; - std::ignore = EventsWaitList; - std::ignore = Event; - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - die("piEnqueueMemImageFill: not implemented"); - return PI_SUCCESS; -} - -inline pi_result piextGetDeviceFunctionPointer(pi_device Device, - pi_program Program, - const char *FunctionName, - pi_uint64 *FunctionPointerRet) { - - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - auto UrDevice = reinterpret_cast(Device); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - - void **FunctionPointer = reinterpret_cast(FunctionPointerRet); - - HANDLE_ERRORS(urProgramGetFunctionPointer(UrDevice, UrProgram, FunctionName, - FunctionPointer)); - return PI_SUCCESS; -} - -inline pi_result piextGetGlobalVariablePointer( - pi_device Device, pi_program Program, const char *GlobalVariableName, - size_t *GlobalVariableSize, void **GlobalVariablePointerRet) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - auto UrDevice = reinterpret_cast(Device); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - - HANDLE_ERRORS(urProgramGetGlobalVariablePointer( - UrDevice, UrProgram, GlobalVariableName, GlobalVariableSize, - GlobalVariablePointerRet)); - return PI_SUCCESS; -} - -// Special version of piKernelSetArg to accept pi_mem. -inline pi_result -piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_mem_obj_property *ArgProperties, - const pi_mem *ArgValue) { - - // TODO: the better way would probably be to add a new PI API for - // extracting native PI object from PI handle, and have SYCL - // RT pass that directly to the regular piKernelSetArg (and - // then remove this piextKernelSetArgMemObj). - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_mem_handle_t UrMemory{}; - if (ArgValue) - UrMemory = reinterpret_cast(*ArgValue); - - // We don't yet know the device where this kernel will next be run on. - // Thus we can't know the actual memory allocation that needs to be used. - // Remember the memory object being used as an argument for this kernel - // to process it later when the device is known (at the kernel enqueue). - // - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - // the only applicable type, just ignore anything else - if (ArgProperties && ArgProperties->type == PI_KERNEL_ARG_MEM_OBJ_ACCESS) { - // following structure layout checks to be replaced with - // std::is_layout_compatible after move to C++20 - static_assert(sizeof(pi_mem_obj_property) == - sizeof(ur_kernel_arg_mem_obj_properties_t)); - static_assert(sizeof(pi_mem_obj_property::type) == - sizeof(ur_kernel_arg_mem_obj_properties_t::stype)); - static_assert(sizeof(pi_mem_obj_property::pNext) == - sizeof(ur_kernel_arg_mem_obj_properties_t::pNext)); - static_assert(sizeof(pi_mem_obj_property::mem_access) == - sizeof(ur_kernel_arg_mem_obj_properties_t::memoryAccess)); - - static_assert(uint32_t(PI_ACCESS_READ_WRITE) == - uint32_t(UR_MEM_FLAG_READ_WRITE)); - static_assert(uint32_t(PI_ACCESS_READ_ONLY) == - uint32_t(UR_MEM_FLAG_READ_ONLY)); - static_assert(uint32_t(PI_ACCESS_WRITE_ONLY) == - uint32_t(UR_MEM_FLAG_WRITE_ONLY)); - static_assert(uint32_t(PI_KERNEL_ARG_MEM_OBJ_ACCESS) == - uint32_t(UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES)); - - const ur_kernel_arg_mem_obj_properties_t *UrMemProperties = - reinterpret_cast( - ArgProperties); - HANDLE_ERRORS( - urKernelSetArgMemObj(UrKernel, ArgIndex, UrMemProperties, UrMemory)); - } else { - HANDLE_ERRORS(urKernelSetArgMemObj(UrKernel, ArgIndex, nullptr, UrMemory)); - } - - return PI_SUCCESS; -} - -inline pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, const void *ArgValue) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - - if (ArgValue) { - HANDLE_ERRORS( - urKernelSetArgValue(UrKernel, ArgIndex, ArgSize, nullptr, ArgValue)); - } else { - HANDLE_ERRORS(urKernelSetArgLocal(UrKernel, ArgIndex, ArgSize, nullptr)); - } - return PI_SUCCESS; -} - -inline pi_result piKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, const void *ArgValue) { - std::ignore = ArgSize; - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - // The old PI interface was expecting a pointer to the pointer obtained via - // usm/virtual memory, UR now consumes that pointer directly. - const void *Arg = *static_cast(ArgValue); - HANDLE_ERRORS(urKernelSetArgPointer(UrKernel, ArgIndex, nullptr, Arg)); - - return PI_SUCCESS; -} - -inline pi_result -piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, pi_program Program, - bool OwnNativeHandle, pi_kernel *Kernel) { - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_native_handle_t UrNativeKernel = NativeHandle; - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - ur_kernel_handle_t *UrKernel = reinterpret_cast(Kernel); - ur_kernel_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; - HANDLE_ERRORS(urKernelCreateWithNativeHandle( - UrNativeKernel, UrContext, UrProgram, &Properties, UrKernel)); - - return PI_SUCCESS; -} - -inline pi_result piProgramRetain(pi_program Program) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - HANDLE_ERRORS( - urProgramRetain(reinterpret_cast(UrProgram))); - - return PI_SUCCESS; -} - -inline pi_result piKernelSetExecInfo(pi_kernel Kernel, - pi_kernel_exec_info ParamName, - size_t ParamValueSize, - const void *ParamValue) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(ParamValue, PI_ERROR_INVALID_VALUE); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - ur_kernel_exec_info_t PropName{}; - uint64_t PropValue{}; - switch (ParamName) { - case PI_USM_INDIRECT_ACCESS: { - PropName = UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS; - PropValue = *(static_cast(const_cast(ParamValue))); - break; - } - case PI_USM_PTRS: { - PropName = UR_KERNEL_EXEC_INFO_USM_PTRS; - break; - } - case PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG: { - PropName = UR_KERNEL_EXEC_INFO_CACHE_CONFIG; - auto Param = (*(static_cast(ParamValue))); - if (Param == PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_SLM) { - PropValue = static_cast(UR_KERNEL_CACHE_CONFIG_LARGE_SLM); - } else if (Param == PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_DATA) { - PropValue = static_cast(UR_KERNEL_CACHE_CONFIG_LARGE_DATA); - break; - } else if (Param == PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT) { - PropValue = static_cast(UR_KERNEL_CACHE_CONFIG_DEFAULT); - } else { - die("piKernelSetExecInfo: unsupported ParamValue\n"); - } - break; - } - default: - die("piKernelSetExecInfo: unsupported ParamName\n"); - } - HANDLE_ERRORS(urKernelSetExecInfo(UrKernel, PropName, ParamValueSize, nullptr, - &PropValue)); - - return PI_SUCCESS; -} - -inline pi_result piextProgramGetNativeHandle(pi_program Program, - pi_native_handle *NativeHandle) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - ur_native_handle_t NativeProgram{}; - HANDLE_ERRORS(urProgramGetNativeHandle(UrProgram, &NativeProgram)); - - *NativeHandle = NativeProgram; - - return PI_SUCCESS; -} - -inline pi_result -piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, bool OwnNativeHandle, - pi_program *Program) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_native_handle_t NativeProgram = NativeHandle; - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_program_handle_t *UrProgram = - reinterpret_cast(Program); - ur_program_native_properties_t UrProperties{}; - UrProperties.isNativeHandleOwned = OwnNativeHandle; - HANDLE_ERRORS(urProgramCreateWithNativeHandle(NativeProgram, UrContext, - &UrProperties, UrProgram)); - return PI_SUCCESS; -} - -inline pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - ur_kernel_info_t UrParamName{}; - switch (ParamName) { - case PI_KERNEL_INFO_FUNCTION_NAME: { - UrParamName = UR_KERNEL_INFO_FUNCTION_NAME; - break; - } - case PI_KERNEL_INFO_NUM_ARGS: { - UrParamName = UR_KERNEL_INFO_NUM_ARGS; - break; - } - case PI_KERNEL_INFO_REFERENCE_COUNT: { - UrParamName = UR_KERNEL_INFO_REFERENCE_COUNT; - break; - } - case PI_KERNEL_INFO_CONTEXT: { - UrParamName = UR_KERNEL_INFO_CONTEXT; - break; - } - case PI_KERNEL_INFO_PROGRAM: { - UrParamName = UR_KERNEL_INFO_PROGRAM; - break; - } - case PI_KERNEL_INFO_ATTRIBUTES: { - UrParamName = UR_KERNEL_INFO_ATTRIBUTES; - break; - } - default: - return PI_ERROR_INVALID_PROPERTY; - } - - HANDLE_ERRORS(urKernelGetInfo(UrKernel, UrParamName, ParamValueSize, - ParamValue, ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_group_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - auto UrDevice = reinterpret_cast(Device); - - ur_kernel_group_info_t UrParamName{}; - switch (ParamName) { - case PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { - UrParamName = UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE; - break; - } - case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { - UrParamName = UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE; - break; - } - case PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: { - UrParamName = UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE; - break; - } - case PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { - UrParamName = UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE; - break; - } - case PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { - UrParamName = UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; - break; - } - case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { - UrParamName = UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE; - break; - } - // The number of registers used by the compiled kernel (device specific) - case PI_KERNEL_GROUP_INFO_NUM_REGS: { - HANDLE_ERRORS(urKernelGetInfo(UrKernel, UR_KERNEL_INFO_NUM_REGS, - ParamValueSize, ParamValue, - ParamValueSizeRet)); - return PI_SUCCESS; - } - default: { - die("Unknown ParamName in piKernelGetGroupInfo"); - return PI_ERROR_INVALID_VALUE; - } - } - - HANDLE_ERRORS(urKernelGetGroupInfo(UrKernel, UrDevice, UrParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piKernelRetain(pi_kernel Kernel) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - - HANDLE_ERRORS(urKernelRetain(UrKernel)); - - return PI_SUCCESS; -} - -inline pi_result piKernelRelease(pi_kernel Kernel) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - - HANDLE_ERRORS(urKernelRelease(UrKernel)); - - return PI_SUCCESS; -} - -inline pi_result piProgramRelease(pi_program Program) { - - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - - HANDLE_ERRORS(urProgramRelease(UrProgram)); - - return PI_SUCCESS; -} - -inline pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t, const void *ArgValue) { - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - // The old PI interface was expecting a pointer to the pointer obtained via - // usm/virtual memory, UR now consumes that pointer directly. - const void *Arg = *static_cast(ArgValue); - HANDLE_ERRORS(urKernelSetArgPointer(UrKernel, ArgIndex, nullptr, Arg)); - - return PI_SUCCESS; -} - -inline pi_result piKernelGetSubGroupInfo( - pi_kernel Kernel, pi_device Device, pi_kernel_sub_group_info ParamName, - size_t InputValueSize, const void *InputValue, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet) { - - std::ignore = InputValueSize; - std::ignore = InputValue; - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - auto UrDevice = reinterpret_cast(Device); - - ur_kernel_sub_group_info_t PropName{}; - switch (ParamName) { - case PI_KERNEL_MAX_SUB_GROUP_SIZE: { - PropName = UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE; - break; - } - case PI_KERNEL_MAX_NUM_SUB_GROUPS: { - PropName = UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS; - break; - } - case PI_KERNEL_COMPILE_NUM_SUB_GROUPS: { - PropName = UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS; - break; - } - case PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: { - PropName = UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL; - break; - } - } - HANDLE_ERRORS(urKernelGetSubGroupInfo(UrKernel, UrDevice, PropName, - ParamValueSize, ParamValue, - ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, - pi_program_build_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - auto UrDevice = reinterpret_cast(Device); - - ur_program_build_info_t PropName{}; - switch (ParamName) { - case PI_PROGRAM_BUILD_INFO_STATUS: { - PropName = UR_PROGRAM_BUILD_INFO_STATUS; - break; - } - case PI_PROGRAM_BUILD_INFO_OPTIONS: { - PropName = UR_PROGRAM_BUILD_INFO_OPTIONS; - break; - } - case PI_PROGRAM_BUILD_INFO_LOG: { - PropName = UR_PROGRAM_BUILD_INFO_LOG; - break; - } - case PI_PROGRAM_BUILD_INFO_BINARY_TYPE: { - PropName = UR_PROGRAM_BUILD_INFO_BINARY_TYPE; - break; - } - default: { - die("piProgramGetBuildInfo: not implemented"); - } - } - - size_t SizeInOut = ParamValueSize; - HANDLE_ERRORS(urProgramGetBuildInfo(UrProgram, UrDevice, PropName, - ParamValueSize, ParamValue, - ParamValueSizeRet)); - ur2piProgramBuildInfoValue(PropName, ParamValueSize, &SizeInOut, ParamValue); - return PI_SUCCESS; -} - -inline pi_result piextKernelGetNativeHandle(pi_kernel Kernel, - pi_native_handle *NativeHandle) { - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - ur_native_handle_t NativeKernel{}; - HANDLE_ERRORS(urKernelGetNativeHandle(UrKernel, &NativeKernel)); - - *NativeHandle = NativeKernel; - - return PI_SUCCESS; -} - -inline pi_result piextKernelSuggestMaxCooperativeGroupCount( - pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize, - pi_uint32 *GroupCountRet) { - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(GroupCountRet, PI_ERROR_INVALID_VALUE); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - HANDLE_ERRORS(urKernelSuggestMaxCooperativeGroupCountExp( - UrKernel, LocalWorkSize, DynamicSharedMemorySize, GroupCountRet)); - - return PI_SUCCESS; -} - -/// API for writing data from host to a device global variable. -/// -/// \param Queue is the queue -/// \param Program is the program containing the device global variable -/// \param Name is the unique identifier for the device global variable -/// \param BlockingWrite is true if the write should block -/// \param Count is the number of bytes to copy -/// \param Offset is the byte offset into the device global variable to start -/// copying -/// \param Src is a pointer to where the data must be copied from -/// \param NumEventsInWaitList is a number of events in the wait list -/// \param EventWaitList is the wait list -/// \param Event is the resulting event -inline pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, - size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - HANDLE_ERRORS(urEnqueueDeviceGlobalVariableWrite( - UrQueue, UrProgram, Name, BlockingWrite, Count, Offset, Src, - NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -/// API reading data from a device global variable to host. -/// -/// \param Queue is the queue -/// \param Program is the program containing the device global variable -/// \param Name is the unique identifier for the device global variable -/// \param BlockingRead is true if the read should block -/// \param Count is the number of bytes to copy -/// \param Offset is the byte offset into the device global variable to start -/// copying -/// \param Dst is a pointer to where the data must be copied to -/// \param NumEventsInWaitList is a number of events in the wait list -/// \param EventWaitList is the wait list -/// \param Event is the resulting event -inline pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, - size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueDeviceGlobalVariableRead( - UrQueue, UrProgram, Name, BlockingRead, Count, Offset, Dst, - NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -// Program -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Memory -inline pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, - size_t Size, void *HostPtr, pi_mem *RetMem, - const pi_mem_properties *properties) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(RetMem, PI_ERROR_INVALID_VALUE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_mem_flags_t UrBufferFlags{}; - if (Flags & PI_MEM_FLAGS_ACCESS_RW) { - UrBufferFlags |= UR_MEM_FLAG_READ_WRITE; - } - if (Flags & PI_MEM_ACCESS_READ_ONLY) { - UrBufferFlags |= UR_MEM_FLAG_READ_ONLY; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) { - UrBufferFlags |= UR_MEM_FLAG_USE_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) { - UrBufferFlags |= UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) { - UrBufferFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER; - } - - ur_buffer_properties_t UrProps{}; - UrProps.stype = UR_STRUCTURE_TYPE_BUFFER_PROPERTIES; - UrProps.pHost = HostPtr; - - ur_buffer_channel_properties_t bufferChannelProperties{}; - bufferChannelProperties.stype = UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES; - ur_buffer_alloc_location_properties_t bufferLocationProperties{}; - bufferLocationProperties.stype = - UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES; - if (properties != nullptr) { - bool bufferLocationPropertySet = false; - bool bufferMemChannelPropertySet = false; - uint64_t allocBufferLocation = 0; - uint32_t allocBufferMemChannel = 0; - // pi mem properties must ended by 0 - size_t I = 0; - while (properties[I] != 0) { - if (properties[I] == PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION) { - allocBufferLocation = properties[I + 1]; - bufferLocationPropertySet = true; - } else if (properties[I] == PI_MEM_PROPERTIES_CHANNEL) { - allocBufferMemChannel = properties[I + 1]; - bufferMemChannelPropertySet = true; - } - I += 2; - } - void *extensionProperties = nullptr; - if (bufferLocationPropertySet) { - bufferLocationProperties.location = allocBufferLocation; - extensionProperties = &bufferLocationProperties; - } - if (bufferMemChannelPropertySet) { - bufferChannelProperties.channel = allocBufferMemChannel; - extensionProperties = &bufferChannelProperties; - } - if (bufferLocationPropertySet && bufferMemChannelPropertySet) { - bufferLocationProperties.pNext = &bufferChannelProperties; - extensionProperties = &bufferLocationProperties; - } - UrProps.pNext = extensionProperties; - } - ur_mem_handle_t *UrBuffer = reinterpret_cast(RetMem); - HANDLE_ERRORS( - urMemBufferCreate(UrContext, UrBufferFlags, Size, &UrProps, UrBuffer)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, - pi_usm_mem_properties *Properties, - size_t Size, pi_uint32 Alignment) { - ur_usm_desc_t USMDesc{}; - USMDesc.align = Alignment; - - ur_usm_alloc_location_desc_t UsmLocationDesc{}; - UsmLocationDesc.stype = UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC; - - if (Properties) { - uint32_t Next = 0; - while (Properties[Next]) { - if (Properties[Next] == PI_MEM_USM_ALLOC_BUFFER_LOCATION) { - UsmLocationDesc.location = static_cast(Properties[Next + 1]); - USMDesc.pNext = &UsmLocationDesc; - } else { - return PI_ERROR_INVALID_VALUE; - } - Next += 2; - } - } - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_usm_pool_handle_t Pool{}; - HANDLE_ERRORS(urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, ResultPtr)); - return PI_SUCCESS; -} - -inline pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - PI_ASSERT(Mem, PI_ERROR_INVALID_VALUE); - // piMemImageGetInfo must be used for images - - ur_mem_handle_t UrMemory = reinterpret_cast(Mem); - ur_mem_info_t MemInfoType{}; - switch (ParamName) { - case PI_MEM_CONTEXT: { - MemInfoType = UR_MEM_INFO_CONTEXT; - break; - } - case PI_MEM_SIZE: { - MemInfoType = UR_MEM_INFO_SIZE; - break; - } - default: { - die("piMemGetInfo: unsuppported ParamName."); - } - } - HANDLE_ERRORS(urMemGetInfo(UrMemory, MemInfoType, ParamValueSize, ParamValue, - ParamValueSizeRet)); - return PI_SUCCESS; -} - -static void pi2urImageDesc(const pi_image_format *ImageFormat, - const pi_image_desc *ImageDesc, - ur_image_format_t *UrFormat, - ur_image_desc_t *UrDesc) { - - switch (ImageFormat->image_channel_data_type) { -#define PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(FROM, TO) \ - case FROM: { \ - UrFormat->channelType = TO; \ - break; \ - } - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_SNORM_INT8, - UR_IMAGE_CHANNEL_TYPE_SNORM_INT8) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_SNORM_INT16, - UR_IMAGE_CHANNEL_TYPE_SNORM_INT16) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNORM_INT8, - UR_IMAGE_CHANNEL_TYPE_UNORM_INT8) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNORM_INT16, - UR_IMAGE_CHANNEL_TYPE_UNORM_INT16) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565, - UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555, - UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010, - UR_IMAGE_CHANNEL_TYPE_INT_101010) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8, - UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16, - UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32, - UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, - UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, - UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, - UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT, - UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT) - PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE(PI_IMAGE_CHANNEL_TYPE_FLOAT, - UR_IMAGE_CHANNEL_TYPE_FLOAT) -#undef PI_TO_UR_MAP_IMAGE_CHANNEL_TYPE - default: { - die("piMemImageCreate: unsuppported image_channel_data_type."); - } - } - switch (ImageFormat->image_channel_order) { -#define PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(FROM, TO) \ - case FROM: { \ - UrFormat->channelOrder = TO; \ - break; \ - } - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_A, - UR_IMAGE_CHANNEL_ORDER_A) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_R, - UR_IMAGE_CHANNEL_ORDER_R) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RG, - UR_IMAGE_CHANNEL_ORDER_RG) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RA, - UR_IMAGE_CHANNEL_ORDER_RA) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RGB, - UR_IMAGE_CHANNEL_ORDER_RGB) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RGBA, - UR_IMAGE_CHANNEL_ORDER_RGBA) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_BGRA, - UR_IMAGE_CHANNEL_ORDER_BGRA) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_ARGB, - UR_IMAGE_CHANNEL_ORDER_ARGB) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_ABGR, - UR_IMAGE_CHANNEL_ORDER_ABGR) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_INTENSITY, - UR_IMAGE_CHANNEL_ORDER_INTENSITY) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_LUMINANCE, - UR_IMAGE_CHANNEL_ORDER_LUMINANCE) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_Rx, - UR_IMAGE_CHANNEL_ORDER_RX) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RGx, - UR_IMAGE_CHANNEL_ORDER_RGX) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_RGBx, - UR_IMAGE_CHANNEL_ORDER_RGBX) - PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER(PI_IMAGE_CHANNEL_ORDER_sRGBA, - UR_IMAGE_CHANNEL_ORDER_SRGBA) -#undef PI_TO_UR_MAP_IMAGE_CHANNEL_ORDER - default: { - die("piMemImageCreate: unsuppported image_channel_order."); - } - } - - UrDesc->stype = UR_STRUCTURE_TYPE_IMAGE_DESC; - UrDesc->arraySize = ImageDesc->image_array_size; - UrDesc->depth = ImageDesc->image_depth; - UrDesc->height = ImageDesc->image_height; - UrDesc->numMipLevel = ImageDesc->num_mip_levels; - UrDesc->numSamples = ImageDesc->num_samples; - UrDesc->rowPitch = ImageDesc->image_row_pitch; - UrDesc->slicePitch = ImageDesc->image_slice_pitch; - switch (ImageDesc->image_type) { -#define PI_TO_UR_MAP_IMAGE_TYPE(FROM, TO) \ - case FROM: { \ - UrDesc->type = TO; \ - break; \ - } - PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE2D, UR_MEM_TYPE_IMAGE2D) - PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE3D, UR_MEM_TYPE_IMAGE3D) - PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE2D_ARRAY, - UR_MEM_TYPE_IMAGE2D_ARRAY) - PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE1D, UR_MEM_TYPE_IMAGE1D) - PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE1D_ARRAY, - UR_MEM_TYPE_IMAGE1D_ARRAY) - PI_TO_UR_MAP_IMAGE_TYPE(PI_MEM_TYPE_IMAGE_CUBEMAP, - UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) -#undef PI_TO_UR_MAP_IMAGE_TYPE - default: { - die("piMemImageCreate: unsuppported image_type."); - } - } - UrDesc->width = ImageDesc->image_width; - UrDesc->arraySize = ImageDesc->image_array_size; - UrDesc->arraySize = ImageDesc->image_array_size; -} - -static void ur2piImageFormat(const ur_image_format_t *UrFormat, - pi_image_format *PiFormat) { - switch (UrFormat->channelOrder) { -#define UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(FROM, TO) \ - case FROM: { \ - PiFormat->image_channel_order = TO; \ - break; \ - } - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_A, - PI_IMAGE_CHANNEL_ORDER_A) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_R, - PI_IMAGE_CHANNEL_ORDER_R) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RG, - PI_IMAGE_CHANNEL_ORDER_RG) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RA, - PI_IMAGE_CHANNEL_ORDER_RA) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RGB, - PI_IMAGE_CHANNEL_ORDER_RGB) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RGBA, - PI_IMAGE_CHANNEL_ORDER_RGBA) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_BGRA, - PI_IMAGE_CHANNEL_ORDER_BGRA) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_ARGB, - PI_IMAGE_CHANNEL_ORDER_ARGB) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_ABGR, - PI_IMAGE_CHANNEL_ORDER_ABGR) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_INTENSITY, - PI_IMAGE_CHANNEL_ORDER_INTENSITY) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_LUMINANCE, - PI_IMAGE_CHANNEL_ORDER_LUMINANCE) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RX, - PI_IMAGE_CHANNEL_ORDER_Rx) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RGX, - PI_IMAGE_CHANNEL_ORDER_RGx) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_RGBX, - PI_IMAGE_CHANNEL_ORDER_RGBx) - UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER(UR_IMAGE_CHANNEL_ORDER_SRGBA, - PI_IMAGE_CHANNEL_ORDER_sRGBA) -#undef UR_TO_PI_MAP_IMAGE_CHANNEL_ORDER - default: { - die("ur2piImageFormat: unsuppported channelOrder."); - } - } - - switch (UrFormat->channelType) { -#define UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(FROM, TO) \ - case FROM: { \ - PiFormat->image_channel_data_type = TO; \ - break; \ - } - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_SNORM_INT8, - PI_IMAGE_CHANNEL_TYPE_SNORM_INT8) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_SNORM_INT16, - PI_IMAGE_CHANNEL_TYPE_SNORM_INT16) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT8) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT16) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565, - PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555, - PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_INT_101010, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT, - PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT) - UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE(UR_IMAGE_CHANNEL_TYPE_FLOAT, - PI_IMAGE_CHANNEL_TYPE_FLOAT) -#undef UR_TO_PI_MAP_IMAGE_CHANNEL_TYPE - default: { - die("ur2piImageFormat: unsuppported channelType."); - } - } -} - -inline pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, - const pi_image_format *ImageFormat, - const pi_image_desc *ImageDesc, void *HostPtr, - pi_mem *RetImage) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(RetImage, PI_ERROR_INVALID_VALUE); - PI_ASSERT(ImageFormat, PI_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_mem_flags_t UrFlags{}; - if (Flags & PI_MEM_FLAGS_ACCESS_RW) { - UrFlags |= UR_MEM_FLAG_READ_WRITE; - } - if (Flags & PI_MEM_ACCESS_READ_ONLY) { - UrFlags |= UR_MEM_FLAG_READ_ONLY; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) { - UrFlags |= UR_MEM_FLAG_USE_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) { - UrFlags |= UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) { - UrFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER; - } - - ur_image_format_t UrFormat{}; - ur_image_desc_t UrDesc{}; - pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); - - // TODO: UrDesc doesn't have something for ImageDesc->buffer - - ur_mem_handle_t *UrMem = reinterpret_cast(RetImage); - HANDLE_ERRORS( - urMemImageCreate(UrContext, UrFlags, &UrFormat, &UrDesc, HostPtr, UrMem)); - - return PI_SUCCESS; -} - -inline pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *RetImage) { - - PI_ASSERT(RetImage, PI_ERROR_INVALID_VALUE); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_native_handle_t UrNativeMem = NativeHandle; - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_mem_handle_t *UrMem = reinterpret_cast(RetImage); - ur_mem_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; - - ur_image_format_t UrFormat{}; - ur_image_desc_t UrDesc{}; - pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); - - HANDLE_ERRORS(urMemImageCreateWithNativeHandle( - UrNativeMem, UrContext, &UrFormat, &UrDesc, &Properties, UrMem)); - - return PI_SUCCESS; -} - -inline pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, - pi_buffer_create_type BufferCreateType, - void *BufferCreateInfo, pi_mem *RetMem) { - - PI_ASSERT(BufferCreateType == PI_BUFFER_CREATE_TYPE_REGION && - BufferCreateInfo && RetMem, - PI_ERROR_INVALID_VALUE); - - auto Region = (pi_buffer_region)BufferCreateInfo; - PI_ASSERT(Region->size != 0u, PI_ERROR_INVALID_BUFFER_SIZE); - PI_ASSERT(Region->origin <= (Region->origin + Region->size), - PI_ERROR_INVALID_VALUE); - - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - - ur_mem_flags_t UrFlags{}; - if (Flags & PI_MEM_FLAGS_ACCESS_RW) { - UrFlags |= UR_MEM_FLAG_READ_WRITE; - } - if (Flags & PI_MEM_ACCESS_READ_ONLY) { - UrFlags |= UR_MEM_FLAG_READ_ONLY; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) { - UrFlags |= UR_MEM_FLAG_USE_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) { - UrFlags |= UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) { - UrFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER; - } - - ur_buffer_create_type_t UrBufferCreateType{}; - if (BufferCreateType == PI_BUFFER_CREATE_TYPE_REGION) { - UrBufferCreateType = UR_BUFFER_CREATE_TYPE_REGION; - } - - ur_buffer_region_t UrBufferCreateInfo{}; - UrBufferCreateInfo.origin = Region->origin; - UrBufferCreateInfo.size = Region->size; - ur_mem_handle_t *UrMem = reinterpret_cast(RetMem); - HANDLE_ERRORS(urMemBufferPartition(UrBuffer, UrFlags, UrBufferCreateType, - &UrBufferCreateInfo, UrMem)); - - return PI_SUCCESS; -} - -inline pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, - pi_native_handle *NativeHandle) { - PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); - - ur_mem_handle_t UrMem = reinterpret_cast(Mem); - ur_device_handle_t UrDev = reinterpret_cast(Dev); - ur_native_handle_t NativeMem{}; - HANDLE_ERRORS(urMemGetNativeHandle(UrMem, UrDev, &NativeMem)); - - *NativeHandle = NativeMem; - - return PI_SUCCESS; -} - -inline pi_result -piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, - pi_image_offset SrcOrigin, pi_image_offset DstOrigin, - pi_image_region Region, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - ur_mem_handle_t UrImageSrc = reinterpret_cast(SrcImage); - ur_mem_handle_t UrImageDst = reinterpret_cast(DstImage); - - ur_rect_offset_t UrSrcOrigin{SrcOrigin->x, SrcOrigin->y, SrcOrigin->z}; - ur_rect_offset_t UrDstOrigin{DstOrigin->x, DstOrigin->y, DstOrigin->z}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth; - UrRegion.height = Region->height; - UrRegion.width = Region->width; - - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemImageCopy( - UrQueue, UrImageSrc, UrImageDst, UrSrcOrigin, UrDstOrigin, UrRegion, - NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_mem *Mem) { - PI_ASSERT(Mem, PI_ERROR_INVALID_VALUE); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_native_handle_t UrNativeMem = NativeHandle; - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_mem_handle_t *UrMem = reinterpret_cast(Mem); - ur_mem_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; - HANDLE_ERRORS(urMemBufferCreateWithNativeHandle(UrNativeMem, UrContext, - &Properties, UrMem)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, - size_t Size, pi_uint32 Alignment) { - ur_context_handle_t UrContext = - reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - - ur_usm_desc_t USMDesc{}; - USMDesc.align = Alignment; - - ur_usm_alloc_location_desc_t UsmLocDesc{}; - UsmLocDesc.stype = UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC; - - if (Properties) { - uint32_t Next = 0; - while (Properties[Next]) { - if (Properties[Next] == PI_MEM_USM_ALLOC_BUFFER_LOCATION) { - UsmLocDesc.location = static_cast(Properties[Next + 1]); - USMDesc.pNext = &UsmLocDesc; - } else { - return PI_ERROR_INVALID_VALUE; - } - Next += 2; - } - } - - ur_usm_pool_handle_t Pool{}; - HANDLE_ERRORS( - urUSMDeviceAlloc(UrContext, UrDevice, &USMDesc, Pool, Size, ResultPtr)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMPitchedAlloc(void **ResultPtr, size_t *ResultPitch, - pi_context Context, pi_device Device, - pi_usm_mem_properties *Properties, - size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - std::ignore = Properties; - ur_usm_desc_t USMDesc{}; - ur_usm_pool_handle_t Pool{}; - - HANDLE_ERRORS(urUSMPitchedAllocExp(UrContext, UrDevice, &USMDesc, Pool, - WidthInBytes, Height, ElementSizeBytes, - ResultPtr, ResultPitch)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, - size_t Size, pi_uint32 Alignment) { - ur_context_handle_t UrContext = - reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - - ur_usm_desc_t USMDesc{}; - USMDesc.align = Alignment; - ur_usm_device_desc_t UsmDeviceDesc{}; - UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC; - ur_usm_host_desc_t UsmHostDesc{}; - UsmHostDesc.stype = UR_STRUCTURE_TYPE_USM_HOST_DESC; - ur_usm_alloc_location_desc_t UsmLocationDesc{}; - UsmLocationDesc.stype = UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC; - - // One properties bitfield can correspond to a host_desc and a device_desc - // struct, since having `0` values in these is harmless we can set up this - // pNext chain in advance. - USMDesc.pNext = &UsmDeviceDesc; - UsmDeviceDesc.pNext = &UsmHostDesc; - - if (Properties) { - uint32_t Next = 0; - while (Properties[Next]) { - switch (Properties[Next]) { - case PI_MEM_ALLOC_FLAGS: { - if (Properties[Next + 1] & PI_MEM_ALLOC_WRTITE_COMBINED) { - UsmDeviceDesc.flags |= UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED; - } - if (Properties[Next + 1] & PI_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE) { - UsmDeviceDesc.flags |= UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT; - } - if (Properties[Next + 1] & PI_MEM_ALLOC_INITIAL_PLACEMENT_HOST) { - UsmHostDesc.flags |= UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT; - } - if (Properties[Next + 1] & PI_MEM_ALLOC_DEVICE_READ_ONLY) { - UsmDeviceDesc.flags |= UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY; - } - break; - } - case PI_MEM_USM_ALLOC_BUFFER_LOCATION: { - UsmLocationDesc.location = static_cast(Properties[Next + 1]); - // We wait until we've seen a BUFFER_LOCATION property to tack this - // onto the end of the chain, a `0` here might be valid as far as we - // know so we must exclude it unless we've been given a value. - UsmHostDesc.pNext = &UsmLocationDesc; - break; - } - default: - return PI_ERROR_INVALID_VALUE; - } - Next += 2; - } - } - - ur_usm_pool_handle_t Pool{}; - HANDLE_ERRORS( - urUSMSharedAlloc(UrContext, UrDevice, &USMDesc, Pool, Size, ResultPtr)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMFree(pi_context Context, void *Ptr) { - ur_context_handle_t UrContext = - reinterpret_cast(Context); - HANDLE_ERRORS(urUSMFree(UrContext, Ptr)); - return PI_SUCCESS; -} - -inline pi_result piMemRetain(pi_mem Mem) { - PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); - - ur_mem_handle_t UrMem = reinterpret_cast(Mem); - - HANDLE_ERRORS(urMemRetain(UrMem)); - - return PI_SUCCESS; -} - -inline pi_result piMemRelease(pi_mem Mem) { - PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); - - ur_mem_handle_t UrMem = reinterpret_cast(Mem); - - HANDLE_ERRORS(urMemRelease(UrMem)); - - return PI_SUCCESS; -} - -/// Hint to migrate memory to the device -/// -/// @param Queue is the queue to submit to -/// @param Ptr points to the memory to migrate -/// @param Size is the number of bytes to migrate -/// @param Flags is a bitfield used to specify memory migration options -/// @param NumEventsInWaitList is the number of events to wait on -/// @param EventsWaitList is an array of events to wait on -/// @param Event is the event that represents this operation -inline pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, - size_t Size, - pi_usm_migration_flags Flags, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - // flags is currently unused so fail if set - PI_ASSERT(Flags == 0, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - // TODO: to map from pi_usm_migration_flags to - // ur_usm_migration_flags_t - // once we have those defined - ur_usm_migration_flags_t UrFlags{}; - HANDLE_ERRORS(urEnqueueUSMPrefetch(UrQueue, Ptr, Size, UrFlags, - NumEventsInWaitList, UrEventsWaitList, - UREvent)); - - return PI_SUCCESS; -} - -/// USM memadvise API to govern behavior of automatic migration mechanisms -/// -/// @param Queue is the queue to submit to -/// @param Ptr is the data to be advised -/// @param Length is the size in bytes of the meory to advise -/// @param Advice is device specific advice -/// @param Event is the event that represents this operation -/// -inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, - size_t Length, pi_mem_advice Advice, - pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - ur_usm_advice_flags_t UrAdvice{}; - if (Advice & PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_READ_MOSTLY; - } - if (Advice & PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY; - } - if (Advice & PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION; - } - if (Advice & PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION; - } - if (Advice & PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE; - } - if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE; - } - if (Advice & PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST; - } - if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST; - } - if (Advice & PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY; - } - if (Advice & PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY; - } - if (Advice & PI_MEM_ADVICE_RESET) { - UrAdvice |= UR_USM_ADVICE_FLAG_DEFAULT; - } - - HANDLE_ERRORS(urEnqueueUSMAdvise(UrQueue, Ptr, Length, UrAdvice, UREvent)); - - return PI_SUCCESS; -} - -/// USM 2D Fill API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pitch is the total width of the destination memory including -/// padding \param pattern is a pointer with the bytes of the pattern to set -/// \param pattern_size is the size in bytes of the pattern -/// \param width is width in bytes of each row to fill -/// \param height is height the columns to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -inline pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, size_t Pitch, - size_t PatternSize, const void *Pattern, - size_t Width, size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - - auto hQueue = reinterpret_cast(Queue); - auto phEventWaitList = - reinterpret_cast(EventsWaitList); - auto phEvent = reinterpret_cast(Event); - - HANDLE_ERRORS(urEnqueueUSMFill2D(hQueue, Ptr, Pitch, PatternSize, Pattern, - Width, Height, NumEventsWaitList, - phEventWaitList, phEvent)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, - size_t Pitch, int Value, size_t Width, - size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - std::ignore = Queue; - std::ignore = Ptr; - std::ignore = Pitch; - std::ignore = Value; - std::ignore = Width; - std::ignore = Height; - std::ignore = NumEventsWaitList; - std::ignore = EventsWaitList; - std::ignore = Event; - die("piextUSMEnqueueMemset2D: not implemented"); - return PI_SUCCESS; -} - -inline pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, - pi_mem_alloc_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_usm_alloc_info_t UrParamName{}; - switch (ParamName) { - case PI_MEM_ALLOC_TYPE: { - UrParamName = UR_USM_ALLOC_INFO_TYPE; - break; - } - case PI_MEM_ALLOC_BASE_PTR: { - UrParamName = UR_USM_ALLOC_INFO_BASE_PTR; - break; - } - case PI_MEM_ALLOC_SIZE: { - UrParamName = UR_USM_ALLOC_INFO_SIZE; - break; - } - case PI_MEM_ALLOC_DEVICE: { - UrParamName = UR_USM_ALLOC_INFO_DEVICE; - break; - } - default: { - die("piextUSMGetMemAllocInfo: unsuppported ParamName."); - } - } - - size_t SizeInOut = ParamValueSize; - HANDLE_ERRORS(urUSMGetMemAllocInfo(UrContext, Ptr, UrParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet)) - ur2piUSMAllocInfoValue(UrParamName, ParamValueSize, &SizeInOut, ParamValue); - return PI_SUCCESS; -} - -inline pi_result piextUSMImport(const void *HostPtr, size_t Size, - pi_context Context) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - HANDLE_ERRORS(urUSMImportExp(UrContext, const_cast(HostPtr), Size)); - return PI_SUCCESS; -} - -inline pi_result piextUSMRelease(const void *HostPtr, pi_context Context) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - HANDLE_ERRORS(urUSMReleaseExp(UrContext, const_cast(HostPtr))); - return PI_SUCCESS; -} - -inline pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - auto hMem = reinterpret_cast(Image); - - ur_image_info_t UrParamName{}; - switch (ParamName) { - case PI_IMAGE_INFO_FORMAT: { - UrParamName = UR_IMAGE_INFO_FORMAT; - break; - } - case PI_IMAGE_INFO_ELEMENT_SIZE: { - UrParamName = UR_IMAGE_INFO_ELEMENT_SIZE; - break; - } - case PI_IMAGE_INFO_ROW_PITCH: { - UrParamName = UR_IMAGE_INFO_ROW_PITCH; - break; - } - case PI_IMAGE_INFO_SLICE_PITCH: { - UrParamName = UR_IMAGE_INFO_SLICE_PITCH; - break; - } - case PI_IMAGE_INFO_WIDTH: { - UrParamName = UR_IMAGE_INFO_WIDTH; - break; - } - case PI_IMAGE_INFO_HEIGHT: { - UrParamName = UR_IMAGE_INFO_HEIGHT; - break; - } - case PI_IMAGE_INFO_DEPTH: { - UrParamName = UR_IMAGE_INFO_DEPTH; - break; - } - default: - return PI_ERROR_UNKNOWN; - } - - HANDLE_ERRORS(urMemImageGetInfo(hMem, UrParamName, ParamValueSize, ParamValue, - ParamValueSizeRet)); - return PI_SUCCESS; -} - -/// USM 2D Memcpy API -/// -/// \param queue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param dst_ptr is the location the data will be copied -/// \param dst_pitch is the total width of the destination memory including -/// padding -/// \param src_ptr is the data to be copied -/// \param dst_pitch is the total width of the source memory including padding -/// \param width is width in bytes of each row to be copied -/// \param height is height the columns to be copied -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -inline pi_result piextUSMEnqueueMemcpy2D(pi_queue Queue, pi_bool Blocking, - void *DstPtr, size_t DstPitch, - const void *SrcPtr, size_t SrcPitch, - size_t Width, size_t Height, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - if (!DstPtr || !SrcPtr) - return PI_ERROR_INVALID_VALUE; - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueUSMMemcpy2D( - UrQueue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, - NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -// Memory -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Enqueue - -inline pi_result -piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - PI_ASSERT((WorkDim > 0) && (WorkDim < 4), PI_ERROR_INVALID_WORK_DIMENSION); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueKernelLaunch( - UrQueue, UrKernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piextEnqueueCooperativeKernelLaunch( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - PI_ASSERT((WorkDim > 0) && (WorkDim < 4), PI_ERROR_INVALID_WORK_DIMENSION); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueCooperativeKernelLaunchExp( - UrQueue, UrKernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piextEnqueueKernelLaunchCustom( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumPropsInLaunchPropList, - const pi_launch_property *LaunchPropList, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - PI_ASSERT((WorkDim > 0) && (WorkDim < 4), PI_ERROR_INVALID_WORK_DIMENSION); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - std::vector props(NumPropsInLaunchPropList); - for (pi_uint32 i = 0; i < NumPropsInLaunchPropList; i++) { - switch (LaunchPropList[i].id) { - case PI_LAUNCH_PROPERTY_IGNORE: { - props[i].id = UR_EXP_LAUNCH_PROPERTY_ID_IGNORE; - break; - } - case PI_LAUNCH_PROPERTY_CLUSTER_DIMENSION: { - - props[i].id = UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION; - props[i].value.clusterDim[0] = LaunchPropList[i].value.cluster_dims[0]; - props[i].value.clusterDim[1] = LaunchPropList[i].value.cluster_dims[1]; - props[i].value.clusterDim[2] = LaunchPropList[i].value.cluster_dims[2]; - break; - } - case PI_LAUNCH_PROPERTY_COOPERATIVE: { - props[i].id = UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE; - props[i].value.cooperative = LaunchPropList[i].value.cooperative; - break; - } - default: { - return PI_ERROR_INVALID_VALUE; - } - } - } - - HANDLE_ERRORS(urEnqueueKernelLaunchCustomExp( - UrQueue, UrKernel, WorkDim, GlobalWorkSize, LocalWorkSize, - NumPropsInLaunchPropList, &props[0], NumEventsInWaitList, - UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result -piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, pi_bool BlockingWrite, - pi_image_offset Origin, pi_image_region Region, - size_t InputRowPitch, size_t InputSlicePitch, - const void *Ptr, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrImage = reinterpret_cast(Image); - ur_rect_offset_t UrOrigin{Origin->x, Origin->y, Origin->z}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth; - UrRegion.height = Region->height; - UrRegion.width = Region->width; - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemImageWrite( - UrQueue, UrImage, BlockingWrite, UrOrigin, UrRegion, InputRowPitch, - InputSlicePitch, const_cast(Ptr), NumEventsInWaitList, - UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result -piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, pi_bool BlockingRead, - pi_image_offset Origin, pi_image_region Region, - size_t RowPitch, size_t SlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrImage = reinterpret_cast(Image); - ur_rect_offset_t UrOrigin{Origin->x, Origin->y, Origin->z}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth; - UrRegion.height = Region->height; - UrRegion.width = Region->width; - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemImageRead( - UrQueue, UrImage, BlockingRead, UrOrigin, UrRegion, RowPitch, SlicePitch, - Ptr, NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferMap( - pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, pi_map_flags MapFlags, - size_t Offset, size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent, void **RetMap) { - // TODO: we don't implement read-only or write-only, always read-write. - // assert((map_flags & PI_MAP_READ) != 0); - // assert((map_flags & PI_MAP_WRITE) != 0); - PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrMem = reinterpret_cast(Mem); - - ur_map_flags_t UrMapFlags{}; - if (MapFlags & PI_MAP_READ) - UrMapFlags |= UR_MAP_FLAG_READ; - if (MapFlags & PI_MAP_WRITE) - UrMapFlags |= UR_MAP_FLAG_WRITE; - if (MapFlags & PI_MAP_WRITE_INVALIDATE_REGION) - UrMapFlags |= UR_MAP_FLAG_WRITE_INVALIDATE_REGION; - - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferMap(UrQueue, UrMem, BlockingMap, UrMapFlags, - Offset, Size, NumEventsInWaitList, - UrEventsWaitList, UREvent, RetMap)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, void *MappedPtr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrMem = reinterpret_cast(Mem); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemUnmap(UrQueue, UrMem, MappedPtr, - NumEventsInWaitList, UrEventsWaitList, - UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferFill(UrQueue, UrBuffer, Pattern, PatternSize, - Offset, Size, NumEventsInWaitList, - UrEventsWaitList, UREvent)); - return PI_SUCCESS; -} - -inline pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, - const void *Pattern, size_t PatternSize, - size_t Count, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - if (!Ptr) { - return PI_ERROR_INVALID_VALUE; - } - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, Pattern, Count, - NumEventsInWaitList, UrEventsWaitList, - UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferCopyRect( - pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, - pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, - size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, - size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(SrcMem && DstMem, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBufferSrc = reinterpret_cast(SrcMem); - ur_mem_handle_t UrBufferDst = reinterpret_cast(DstMem); - ur_rect_offset_t UrSrcOrigin{SrcOrigin->x_bytes, SrcOrigin->y_scalar, - SrcOrigin->z_scalar}; - ur_rect_offset_t UrDstOrigin{DstOrigin->x_bytes, DstOrigin->y_scalar, - DstOrigin->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferCopyRect( - UrQueue, UrBufferSrc, UrBufferDst, UrSrcOrigin, UrDstOrigin, UrRegion, - SrcRowPitch, SrcSlicePitch, DstRowPitch, DstSlicePitch, - NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, - pi_mem DstMem, size_t SrcOffset, - size_t DstOffset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(SrcMem && DstMem, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBufferSrc = reinterpret_cast(SrcMem); - ur_mem_handle_t UrBufferDst = reinterpret_cast(DstMem); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferCopy( - UrQueue, UrBufferSrc, UrBufferDst, SrcOffset, DstOffset, Size, - NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, - void *DstPtr, const void *SrcPtr, - size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueUSMMemcpy(UrQueue, Blocking, DstPtr, SrcPtr, Size, - NumEventsInWaitList, UrEventsWaitList, - UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferWriteRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, - BufferOffset->z_scalar}; - ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, - HostOffset->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferWriteRect( - UrQueue, UrBuffer, BlockingWrite, UrBufferOffset, UrHostOffset, UrRegion, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, - const_cast(Ptr), NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, - pi_bool BlockingWrite, size_t Offset, - size_t Size, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferWrite( - UrQueue, UrBuffer, BlockingWrite, Offset, Size, const_cast(Ptr), - NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferReadRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, - BufferOffset->z_scalar}; - ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, - HostOffset->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferReadRect( - UrQueue, UrBuffer, BlockingRead, UrBufferOffset, UrHostOffset, UrRegion, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, - pi_bool BlockingRead, size_t Offset, - size_t Size, void *Dst, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - PI_ASSERT(Src, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBuffer = reinterpret_cast(Src); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferRead(UrQueue, UrBuffer, BlockingRead, Offset, - Size, Dst, NumEventsInWaitList, - UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueEventsWaitWithBarrier(UrQueue, NumEventsInWaitList, - UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueEventsWait(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - if (EventsWaitList) { - PI_ASSERT(NumEventsInWaitList > 0, PI_ERROR_INVALID_VALUE); - } - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueEventsWait(UrQueue, NumEventsInWaitList, - UrEventsWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result -piextEnqueueReadHostPipe(pi_queue queue, pi_program program, - const char *pipe_symbol, pi_bool blocking, void *ptr, - size_t size, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event) { - auto hQueue = reinterpret_cast(queue); - auto hProgram = reinterpret_cast(program); - auto phEventWaitList = - reinterpret_cast(events_waitlist); - auto phEvent = reinterpret_cast(event); - - HANDLE_ERRORS(urEnqueueReadHostPipe(hQueue, hProgram, pipe_symbol, blocking, - ptr, size, num_events_in_waitlist, - phEventWaitList, phEvent)); - - return PI_SUCCESS; -} - -inline pi_result -piextEnqueueWriteHostPipe(pi_queue queue, pi_program program, - const char *pipe_symbol, pi_bool blocking, void *ptr, - size_t size, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event) { - auto hQueue = reinterpret_cast(queue); - auto hProgram = reinterpret_cast(program); - auto phEventWaitList = - reinterpret_cast(events_waitlist); - auto phEvent = reinterpret_cast(event); - - HANDLE_ERRORS(urEnqueueWriteHostPipe(hQueue, hProgram, pipe_symbol, blocking, - ptr, size, num_events_in_waitlist, - phEventWaitList, phEvent)); - - return PI_SUCCESS; -} -// Enqueue -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Events -inline pi_result piEventsWait(pi_uint32 NumEvents, - const pi_event *EventsWaitList) { - if (NumEvents && !EventsWaitList) { - return PI_ERROR_INVALID_EVENT; - } - - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - HANDLE_ERRORS(urEventWait(NumEvents, UrEventsWaitList)); - - return PI_SUCCESS; -} - -inline pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - - ur_event_handle_t UREvent = reinterpret_cast(Event); - - ur_event_info_t PropName{}; - if (ParamName == PI_EVENT_INFO_COMMAND_QUEUE) { - PropName = UR_EVENT_INFO_COMMAND_QUEUE; - } else if (ParamName == PI_EVENT_INFO_CONTEXT) { - PropName = UR_EVENT_INFO_CONTEXT; - } else if (ParamName == PI_EVENT_INFO_COMMAND_TYPE) { - PropName = UR_EVENT_INFO_COMMAND_TYPE; - } else if (ParamName == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - PropName = UR_EVENT_INFO_COMMAND_EXECUTION_STATUS; - } else if (ParamName == PI_EVENT_INFO_REFERENCE_COUNT) { - PropName = UR_EVENT_INFO_REFERENCE_COUNT; - } else { - return PI_ERROR_INVALID_VALUE; - } - - HANDLE_ERRORS(urEventGetInfo(UREvent, PropName, ParamValueSize, ParamValue, - ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piextEventGetNativeHandle(pi_event Event, - pi_native_handle *NativeHandle) { - - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_event_handle_t UREvent = reinterpret_cast(Event); - - ur_native_handle_t *UrNativeEvent = - reinterpret_cast(NativeHandle); - HANDLE_ERRORS(urEventGetNativeHandle(UREvent, UrNativeEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEventGetProfilingInfo(pi_event Event, - pi_profiling_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - - ur_event_handle_t UREvent = reinterpret_cast(Event); - - ur_profiling_info_t PropName{}; - switch (ParamName) { - case PI_PROFILING_INFO_COMMAND_QUEUED: { - PropName = UR_PROFILING_INFO_COMMAND_QUEUED; - break; - } - case PI_PROFILING_INFO_COMMAND_SUBMIT: { - PropName = UR_PROFILING_INFO_COMMAND_SUBMIT; - break; - } - case PI_PROFILING_INFO_COMMAND_START: { - PropName = UR_PROFILING_INFO_COMMAND_START; - break; - } - case PI_PROFILING_INFO_COMMAND_END: { - PropName = UR_PROFILING_INFO_COMMAND_END; - break; - } - default: - return PI_ERROR_INVALID_PROPERTY; - } - - HANDLE_ERRORS(urEventGetProfilingInfo(UREvent, PropName, ParamValueSize, - ParamValue, ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_event_handle_t *UREvent = reinterpret_cast(RetEvent); - // pass null for the hNativeHandle to use urEventCreateWithNativeHandle - // as urEventCreate - ur_event_native_properties_t Properties{}; - HANDLE_ERRORS( - urEventCreateWithNativeHandle(NULL, UrContext, &Properties, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_event *Event) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_native_handle_t UrNativeKernel = NativeHandle; - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_event_handle_t *UREvent = reinterpret_cast(Event); - ur_event_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; - HANDLE_ERRORS(urEventCreateWithNativeHandle(UrNativeKernel, UrContext, - &Properties, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEventSetCallback( - pi_event Event, pi_int32 CommandExecCallbackType, - void (*PFnNotify)(pi_event Event, pi_int32 EventCommandStatus, - void *UserData), - void *UserData) { - std::ignore = Event; - std::ignore = CommandExecCallbackType; - std::ignore = PFnNotify; - std::ignore = UserData; - die("piEventSetCallback: deprecated, to be removed"); - return PI_SUCCESS; -} - -inline pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { - std::ignore = Event; - std::ignore = ExecutionStatus; - die("piEventSetStatus: deprecated, to be removed"); - return PI_SUCCESS; -} - -inline pi_result piEventRetain(pi_event Event) { - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - - ur_event_handle_t UREvent = reinterpret_cast(Event); - HANDLE_ERRORS(urEventRetain(UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEventRelease(pi_event Event) { - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - - ur_event_handle_t UREvent = reinterpret_cast(Event); - HANDLE_ERRORS(urEventRelease(UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, - pi_bool Blocking, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventWaitList = - reinterpret_cast(EventWaitList); - ur_event_handle_t *UREvent = reinterpret_cast(Event); - - HANDLE_ERRORS(urEnqueueTimestampRecordingExp( - UrQueue, Blocking, NumEventsInWaitList, UrEventWaitList, UREvent)); - - return PI_SUCCESS; -} - -// Events -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Sampler -inline pi_result piSamplerCreate(pi_context Context, - const pi_sampler_properties *SamplerProperties, - pi_sampler *RetSampler) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(RetSampler, PI_ERROR_INVALID_VALUE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_sampler_desc_t UrProps{}; - UrProps.stype = UR_STRUCTURE_TYPE_SAMPLER_DESC; - - const pi_sampler_properties *CurProperty = SamplerProperties; - while (*CurProperty != 0) { - switch (*CurProperty) { - case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS: { - UrProps.normalizedCoords = ur_cast(*(++CurProperty)); - } break; - - case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE: { - pi_sampler_addressing_mode CurValueAddressingMode = - ur_cast( - ur_cast(*(++CurProperty))); - - if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT) - UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; - else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_REPEAT) - UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_REPEAT; - else if (CurValueAddressingMode == - PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE) - UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; - else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_CLAMP) - UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_CLAMP; - else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_NONE) - UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_NONE; - } break; - - case PI_SAMPLER_PROPERTIES_FILTER_MODE: { - pi_sampler_filter_mode CurValueFilterMode = - ur_cast(ur_cast(*(++CurProperty))); - - if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_NEAREST) - UrProps.filterMode = UR_SAMPLER_FILTER_MODE_NEAREST; - else if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_LINEAR) - UrProps.filterMode = UR_SAMPLER_FILTER_MODE_LINEAR; - } break; - - default: - break; - } - CurProperty++; - } - - ur_sampler_handle_t *UrSampler = - reinterpret_cast(RetSampler); - - HANDLE_ERRORS(urSamplerCreate(UrContext, &UrProps, UrSampler)); - - return PI_SUCCESS; -} - -inline pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - ur_sampler_info_t InfoType{}; - switch (ParamName) { - case PI_SAMPLER_INFO_REFERENCE_COUNT: - InfoType = UR_SAMPLER_INFO_REFERENCE_COUNT; - break; - case PI_SAMPLER_INFO_CONTEXT: - InfoType = UR_SAMPLER_INFO_CONTEXT; - break; - case PI_SAMPLER_INFO_NORMALIZED_COORDS: - InfoType = UR_SAMPLER_INFO_NORMALIZED_COORDS; - break; - case PI_SAMPLER_INFO_ADDRESSING_MODE: - InfoType = UR_SAMPLER_INFO_ADDRESSING_MODE; - break; - case PI_SAMPLER_INFO_FILTER_MODE: - InfoType = UR_SAMPLER_INFO_FILTER_MODE; - break; - default: - return PI_ERROR_UNKNOWN; - } - - size_t UrParamValueSizeRet; - auto hSampler = reinterpret_cast(Sampler); - HANDLE_ERRORS(urSamplerGetInfo(hSampler, InfoType, ParamValueSize, ParamValue, - &UrParamValueSizeRet)); - if (ParamValueSizeRet) { - *ParamValueSizeRet = UrParamValueSizeRet; - } - ur2piSamplerInfoValue(InfoType, ParamValueSize, &ParamValueSize, ParamValue); - fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, - ParamValue); - return PI_SUCCESS; -} - -// Special version of piKernelSetArg to accept pi_sampler. -inline pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_sampler *ArgValue) { - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - ur_sampler_handle_t UrSampler = - reinterpret_cast(*ArgValue); - - HANDLE_ERRORS(urKernelSetArgSampler(UrKernel, ArgIndex, nullptr, UrSampler)); - - return PI_SUCCESS; -} - -inline pi_result piSamplerRetain(pi_sampler Sampler) { - PI_ASSERT(Sampler, PI_ERROR_INVALID_SAMPLER); - - ur_sampler_handle_t UrSampler = - reinterpret_cast(Sampler); - - HANDLE_ERRORS(urSamplerRetain(UrSampler)); - - return PI_SUCCESS; -} - -inline pi_result piSamplerRelease(pi_sampler Sampler) { - PI_ASSERT(Sampler, PI_ERROR_INVALID_SAMPLER); - - ur_sampler_handle_t UrSampler = - reinterpret_cast(Sampler); - - HANDLE_ERRORS(urSamplerRelease(UrSampler)); - - return PI_SUCCESS; -} - -// Sampler -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Command-buffer extension - -inline pi_result -piextCommandBufferCreate(pi_context Context, pi_device Device, - const pi_ext_command_buffer_desc *Desc, - pi_ext_command_buffer *RetCommandBuffer) { - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_device_handle_t UrDevice = reinterpret_cast(Device); - ur_exp_command_buffer_desc_t UrDesc; - UrDesc.stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC; - UrDesc.isInOrder = ur_bool_t(Desc->is_in_order); - UrDesc.enableProfiling = ur_bool_t(Desc->enable_profiling); - UrDesc.isUpdatable = Desc->is_updatable; - ur_exp_command_buffer_handle_t *UrCommandBuffer = - reinterpret_cast(RetCommandBuffer); - - HANDLE_ERRORS( - urCommandBufferCreateExp(UrContext, UrDevice, &UrDesc, UrCommandBuffer)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - HANDLE_ERRORS(urCommandBufferRetainExp(UrCommandBuffer)); - - return PI_SUCCESS; -} - -inline pi_result -piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - HANDLE_ERRORS(urCommandBufferReleaseExp(UrCommandBuffer)); - - return PI_SUCCESS; -} - -inline pi_result -piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - HANDLE_ERRORS(urCommandBufferFinalizeExp(UrCommandBuffer)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint, - pi_ext_command_buffer_command *Command) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - ur_exp_command_buffer_command_handle_t *UrCommandHandle = - reinterpret_cast(Command); - HANDLE_ERRORS(urCommandBufferAppendKernelLaunchExp( - UrCommandBuffer, UrKernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint, - UrCommandHandle)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemcpyUSM( - pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, - size_t Size, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - HANDLE_ERRORS(urCommandBufferAppendUSMMemcpyExp( - UrCommandBuffer, DstPtr, SrcPtr, Size, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - ur_mem_handle_t UrSrcMem = reinterpret_cast(SrcMem); - ur_mem_handle_t UrDstMem = reinterpret_cast(DstMem); - - HANDLE_ERRORS(urCommandBufferAppendMemBufferCopyExp( - UrCommandBuffer, UrSrcMem, UrDstMem, SrcOffset, DstOffset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, - pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, - size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - ur_mem_handle_t UrSrcMem = reinterpret_cast(SrcMem); - ur_mem_handle_t UrDstMem = reinterpret_cast(DstMem); - - ur_rect_offset_t UrSrcOrigin{SrcOrigin->x_bytes, SrcOrigin->y_scalar, - SrcOrigin->z_scalar}; - ur_rect_offset_t UrDstOrigin{DstOrigin->x_bytes, DstOrigin->y_scalar, - DstOrigin->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - - HANDLE_ERRORS(urCommandBufferAppendMemBufferCopyRectExp( - UrCommandBuffer, UrSrcMem, UrDstMem, UrSrcOrigin, UrDstOrigin, UrRegion, - SrcRowPitch, SrcSlicePitch, DstRowPitch, DstSlicePitch, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, - BufferOffset->z_scalar}; - ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, - HostOffset->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - - HANDLE_ERRORS(urCommandBufferAppendMemBufferReadRectExp( - UrCommandBuffer, UrBuffer, UrBufferOffset, UrHostOffset, UrRegion, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer CommandBuffer, pi_mem Src, size_t Offset, size_t Size, - void *Dst, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - PI_ASSERT(Src, PI_ERROR_INVALID_MEM_OBJECT); - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - ur_mem_handle_t UrBuffer = reinterpret_cast(Src); - - HANDLE_ERRORS(urCommandBufferAppendMemBufferReadExp( - UrCommandBuffer, UrBuffer, Offset, Size, Dst, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, - BufferOffset->z_scalar}; - ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, - HostOffset->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - - HANDLE_ERRORS(urCommandBufferAppendMemBufferWriteRectExp( - UrCommandBuffer, UrBuffer, UrBufferOffset, UrHostOffset, UrRegion, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, - const_cast(Ptr), NumSyncPointsInWaitList, SyncPointWaitList, - SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - - HANDLE_ERRORS(urCommandBufferAppendMemBufferWriteExp( - UrCommandBuffer, UrBuffer, Offset, Size, const_cast(Ptr), - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferFill( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, - size_t PatternSize, size_t Offset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - - HANDLE_ERRORS(urCommandBufferAppendMemBufferFillExp( - UrCommandBuffer, UrBuffer, Pattern, PatternSize, Offset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferFillUSM( - pi_ext_command_buffer CommandBuffer, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Size, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - HANDLE_ERRORS(urCommandBufferAppendUSMFillExp( - UrCommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint)); - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferPrefetchUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - - // flags is currently unused so fail if set - PI_ASSERT(Flags == 0, PI_ERROR_INVALID_VALUE); - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - // TODO: to map from pi_usm_migration_flags to - // ur_usm_migration_flags_t - // once we have those defined - ur_usm_migration_flags_t UrFlags{}; - HANDLE_ERRORS(urCommandBufferAppendUSMPrefetchExp( - UrCommandBuffer, Ptr, Size, UrFlags, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint)); - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferAdviseUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Length, - pi_mem_advice Advice, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - ur_usm_advice_flags_t UrAdvice{}; - if (Advice & PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_READ_MOSTLY; - } - if (Advice & PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY; - } - if (Advice & PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION; - } - if (Advice & PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION; - } - if (Advice & PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE; - } - if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE; - } - if (Advice & PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST; - } - if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST; - } - if (Advice & PI_MEM_ADVICE_RESET) { - UrAdvice |= UR_USM_ADVICE_FLAG_DEFAULT; - } - - HANDLE_ERRORS(urCommandBufferAppendUSMAdviseExp( - UrCommandBuffer, Ptr, Length, UrAdvice, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint)); - return PI_SUCCESS; -} - -inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, - pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventWaitList = - reinterpret_cast(EventWaitList); - ur_event_handle_t *UREvent = reinterpret_cast(Event); - - HANDLE_ERRORS(urCommandBufferEnqueueExp( - UrCommandBuffer, UrQueue, NumEventsInWaitList, UrEventWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferUpdateKernelLaunch( - pi_ext_command_buffer_command command, - pi_ext_command_buffer_update_kernel_launch_desc *desc) { - ur_exp_command_buffer_command_handle_t UrCommand = - reinterpret_cast(command); - ur_exp_command_buffer_update_kernel_launch_desc_t UrDesc; - - UrDesc.stype = ur_structure_type_t:: - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC; - UrDesc.numNewMemObjArgs = desc->num_mem_obj_args; - UrDesc.numNewPointerArgs = desc->num_ptr_args; - UrDesc.numNewValueArgs = desc->num_value_args; - UrDesc.newWorkDim = desc->num_work_dim; - - // Convert arg descs - std::vector UrMemObjDescs; - std::vector UrPointerDescs; - std::vector UrValueDescs; - - for (size_t i = 0; i < UrDesc.numNewMemObjArgs; i++) { - auto &PiDesc = desc->mem_obj_arg_list[i]; - UrMemObjDescs.push_back( - {ur_structure_type_t:: - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, - nullptr, PiDesc.arg_index, nullptr, - reinterpret_cast(PiDesc.new_mem_obj)}); - } - UrDesc.pNewMemObjArgList = UrMemObjDescs.data(); - - for (size_t i = 0; i < UrDesc.numNewPointerArgs; i++) { - auto &PiDesc = desc->ptr_arg_list[i]; - UrPointerDescs.push_back( - {ur_structure_type_t:: - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, - nullptr, PiDesc.arg_index, nullptr, PiDesc.new_ptr}); - } - UrDesc.pNewPointerArgList = UrPointerDescs.data(); - - for (size_t i = 0; i < UrDesc.numNewValueArgs; i++) { - auto &PiDesc = desc->value_arg_list[i]; - UrValueDescs.push_back( - {ur_structure_type_t:: - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, - nullptr, PiDesc.arg_index, PiDesc.arg_size, nullptr, - PiDesc.new_value}); - } - UrDesc.pNewValueArgList = UrValueDescs.data(); - - UrDesc.pNewGlobalWorkSize = desc->global_work_size; - UrDesc.pNewGlobalWorkOffset = desc->global_work_offset; - UrDesc.pNewLocalWorkSize = desc->local_work_size; - - HANDLE_ERRORS(urCommandBufferUpdateKernelLaunchExp(UrCommand, &UrDesc)); - - return PI_SUCCESS; -} - -inline pi_result -piextCommandBufferRetainCommand(pi_ext_command_buffer_command command) { - ur_exp_command_buffer_command_handle_t UrCommand = - reinterpret_cast(command); - HANDLE_ERRORS(urCommandBufferRetainCommandExp(UrCommand)); - return PI_SUCCESS; -} - -inline pi_result -piextCommandBufferReleaseCommand(pi_ext_command_buffer_command command) { - ur_exp_command_buffer_command_handle_t UrCommand = - reinterpret_cast(command); - HANDLE_ERRORS(urCommandBufferReleaseCommandExp(UrCommand)); - return PI_SUCCESS; -} - -// Command-buffer extension -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// usm-p2p - -inline pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { - auto commandDevice = reinterpret_cast(command_device); - auto peerDevice = reinterpret_cast(peer_device); - - HANDLE_ERRORS(urUsmP2PEnablePeerAccessExp(commandDevice, peerDevice)); - - return PI_SUCCESS; -} - -inline pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { - auto commandDevice = reinterpret_cast(command_device); - auto peerDevice = reinterpret_cast(peer_device); - - HANDLE_ERRORS(urUsmP2PDisablePeerAccessExp(commandDevice, peerDevice)); - - return PI_SUCCESS; -} - -inline pi_result -piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, - pi_peer_attr attr, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) { - auto commandDevice = reinterpret_cast(command_device); - auto peerDevice = reinterpret_cast(peer_device); - - ur_exp_peer_info_t propName; - switch (attr) { - case PI_PEER_ACCESS_SUPPORTED: { - propName = UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED; - break; - } - case PI_PEER_ATOMICS_SUPPORTED: { - propName = UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED; - break; - } - default: { - return PI_ERROR_INVALID_VALUE; - } - } - - HANDLE_ERRORS(urUsmP2PPeerAccessGetInfoExp( - commandDevice, peerDevice, propName, param_value_size, param_value, - param_value_size_ret)); - - return PI_SUCCESS; -} - -// usm-p2p -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Bindless Images Extension - -inline pi_result piextMemImageAllocate(pi_context Context, pi_device Device, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_image_mem_handle *RetMem) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - - ur_image_format_t UrFormat{}; - ur_image_desc_t UrDesc{}; - pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); - - ur_exp_image_mem_native_handle_t *UrRetMem = - reinterpret_cast(RetMem); - - HANDLE_ERRORS(urBindlessImagesImageAllocateExp(UrContext, UrDevice, &UrFormat, - &UrDesc, UrRetMem)); - - return PI_SUCCESS; -} - -inline pi_result piextMemUnsampledImageCreate(pi_context Context, - pi_device Device, - pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_image_handle *RetHandle) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - auto UrImgMem = reinterpret_cast(ImgMem); - - ur_image_format_t UrFormat{}; - ur_image_desc_t UrDesc{}; - pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); - - ur_exp_image_native_handle_t *UrRetHandle = - reinterpret_cast(RetHandle); - - HANDLE_ERRORS(urBindlessImagesUnsampledImageCreateExp( - UrContext, UrDevice, UrImgMem, &UrFormat, &UrDesc, UrRetHandle)); - - return PI_SUCCESS; -} - -inline pi_result piextMemSampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_sampler Sampler, - pi_image_handle *RetHandle) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - PI_ASSERT(Sampler, PI_ERROR_INVALID_SAMPLER); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - auto UrImgMem = reinterpret_cast(ImgMem); - - ur_image_format_t UrFormat{}; - ur_image_desc_t UrDesc{}; - pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); - - auto UrSampler = reinterpret_cast(Sampler); - ur_exp_image_native_handle_t *UrRetHandle = - reinterpret_cast(RetHandle); - - HANDLE_ERRORS(urBindlessImagesSampledImageCreateExp( - UrContext, UrDevice, UrImgMem, &UrFormat, &UrDesc, UrSampler, - UrRetHandle)); - - return PI_SUCCESS; -} - -inline pi_result piextBindlessImageSamplerCreate( - pi_context Context, const pi_sampler_properties *SamplerProperties, - float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, - pi_sampler *RetSampler) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(RetSampler, PI_ERROR_INVALID_VALUE); - - auto UrContext = reinterpret_cast(Context); - ur_sampler_desc_t UrProps{}; - UrProps.stype = UR_STRUCTURE_TYPE_SAMPLER_DESC; - - ur_exp_sampler_mip_properties_t UrMipProps{}; - UrMipProps.stype = UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES; - UrMipProps.minMipmapLevelClamp = MinMipmapLevelClamp; - UrMipProps.maxMipmapLevelClamp = MaxMipmapLevelClamp; - UrMipProps.maxAnisotropy = MaxAnisotropy; - UrProps.pNext = &UrMipProps; - - ur_exp_sampler_addr_modes_t UrAddrModes{}; - UrAddrModes.stype = UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES; - UrMipProps.pNext = &UrAddrModes; - int addrIndex = 0; - - ur_exp_sampler_cubemap_properties_t UrCubemapProps{}; - UrCubemapProps.stype = UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES; - UrAddrModes.pNext = &UrCubemapProps; - - const pi_sampler_properties *CurProperty = SamplerProperties; - while (*CurProperty != 0) { - switch (*CurProperty) { - case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS: { - UrProps.normalizedCoords = ur_cast(*(++CurProperty)); - } break; - - case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE: { - pi_sampler_addressing_mode CurValueAddressingMode = - ur_cast( - ur_cast(*(++CurProperty))); - - if (CurValueAddressingMode == - PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT) { - UrAddrModes.addrModes[addrIndex] = - UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; - } else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_REPEAT) { - UrAddrModes.addrModes[addrIndex] = UR_SAMPLER_ADDRESSING_MODE_REPEAT; - } else if (CurValueAddressingMode == - PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE) { - UrAddrModes.addrModes[addrIndex] = - UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; - } else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_CLAMP) { - UrAddrModes.addrModes[addrIndex] = UR_SAMPLER_ADDRESSING_MODE_CLAMP; - } else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_NONE) { - UrAddrModes.addrModes[addrIndex] = UR_SAMPLER_ADDRESSING_MODE_NONE; - } - addrIndex++; - } break; - - case PI_SAMPLER_PROPERTIES_FILTER_MODE: { - pi_sampler_filter_mode CurValueFilterMode = - ur_cast(ur_cast(*(++CurProperty))); - - if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_NEAREST) - UrProps.filterMode = UR_SAMPLER_FILTER_MODE_NEAREST; - else if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_LINEAR) - UrProps.filterMode = UR_SAMPLER_FILTER_MODE_LINEAR; - } break; - - case PI_SAMPLER_PROPERTIES_MIP_FILTER_MODE: { - pi_sampler_filter_mode CurValueFilterMode = - ur_cast(ur_cast(*(++CurProperty))); - - if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_NEAREST) - UrMipProps.mipFilterMode = UR_SAMPLER_FILTER_MODE_NEAREST; - else if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_LINEAR) - UrMipProps.mipFilterMode = UR_SAMPLER_FILTER_MODE_LINEAR; - } break; - - case PI_SAMPLER_PROPERTIES_CUBEMAP_FILTER_MODE: { - pi_sampler_cubemap_filter_mode CurValueFilterMode = - ur_cast( - ur_cast(*(++CurProperty))); - - if (CurValueFilterMode == PI_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS) - UrCubemapProps.cubemapFilterMode = - UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS; - else if (CurValueFilterMode == PI_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED) - UrCubemapProps.cubemapFilterMode = - UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED; - - } break; - - default: - break; - } - CurProperty++; - } - UrProps.addressingMode = UrAddrModes.addrModes[0]; - - ur_sampler_handle_t *UrSampler = - reinterpret_cast(RetSampler); - - HANDLE_ERRORS(urSamplerCreate(UrContext, &UrProps, UrSampler)); - - return PI_SUCCESS; -} - -inline pi_result piextMemMipmapGetLevel(pi_context Context, pi_device Device, - pi_image_mem_handle MipMem, - unsigned int Level, - pi_image_mem_handle *RetMem) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - auto UrMipMem = reinterpret_cast(MipMem); - ur_exp_image_mem_native_handle_t *UrRetMem = - reinterpret_cast(RetMem); - - HANDLE_ERRORS(urBindlessImagesMipmapGetLevelExp(UrContext, UrDevice, UrMipMem, - Level, UrRetMem)); - - return PI_SUCCESS; -} - -inline pi_result piextMemImageFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - auto UrMemoryHandle = - reinterpret_cast(MemoryHandle); - - HANDLE_ERRORS( - urBindlessImagesImageFreeExp(UrContext, UrDevice, UrMemoryHandle)); - - return PI_SUCCESS; -} - -inline pi_result piextMemMipmapFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - auto UrMemoryHandle = - reinterpret_cast(MemoryHandle); - - HANDLE_ERRORS( - urBindlessImagesMipmapFreeExp(UrContext, UrDevice, UrMemoryHandle)); - - return PI_SUCCESS; -} - -static void pi2urImageCopyFlags(const pi_image_copy_flags PiFlags, - ur_exp_image_copy_flags_t *UrFlags) { - switch (PiFlags) { - case PI_IMAGE_COPY_HOST_TO_DEVICE: - *UrFlags = UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE; - break; - case PI_IMAGE_COPY_DEVICE_TO_HOST: - *UrFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST; - break; - case PI_IMAGE_COPY_DEVICE_TO_DEVICE: - *UrFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE; - break; - default: - die("pi2urImageCopyFlags: Unsupported use case"); - } -} - -inline pi_result piextMemImageCopy( - pi_queue Queue, void *DstPtr, const void *SrcPtr, - const pi_image_desc *SrcImageDesc, const pi_image_desc *DstImageDesc, - const pi_image_format *SrcImageFormat, - const pi_image_format *DstImageFormat, const pi_image_copy_flags Flags, - pi_image_offset SrcOffset, pi_image_offset DstOffset, - pi_image_region CopyExtent, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - auto UrQueue = reinterpret_cast(Queue); - - ur_image_format_t UrSrcFormat{}; - ur_image_desc_t UrSrcDesc{}; - pi2urImageDesc(SrcImageFormat, SrcImageDesc, &UrSrcFormat, &UrSrcDesc); - - ur_image_format_t UrDstFormat{}; - ur_image_desc_t UrDstDesc{}; - pi2urImageDesc(DstImageFormat, DstImageDesc, &UrDstFormat, &UrDstDesc); - - ur_exp_image_copy_flags_t UrFlags; - pi2urImageCopyFlags(Flags, &UrFlags); - - ur_rect_offset_t UrSrcOffset{SrcOffset->x, SrcOffset->y, SrcOffset->z}; - ur_rect_offset_t UrDstOffset{DstOffset->x, DstOffset->y, DstOffset->z}; - ur_rect_region_t UrCopyExtent{}; - UrCopyExtent.depth = CopyExtent->depth; - UrCopyExtent.height = CopyExtent->height; - UrCopyExtent.width = CopyExtent->width; - - ur_exp_image_copy_region_t UrCopyRegion{}; - UrCopyRegion.copyExtent = UrCopyExtent; - UrCopyRegion.srcOffset = UrSrcOffset; - UrCopyRegion.dstOffset = UrDstOffset; - - const ur_event_handle_t *UrEventWaitList = - reinterpret_cast(EventWaitList); - ur_event_handle_t *UREvent = reinterpret_cast(Event); - - HANDLE_ERRORS(urBindlessImagesImageCopyExp( - UrQueue, SrcPtr, DstPtr, &UrSrcDesc, &UrDstDesc, &UrSrcFormat, - &UrDstFormat, &UrCopyRegion, UrFlags, NumEventsInWaitList, - UrEventWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piextMemUnsampledImageHandleDestroy(pi_context Context, - pi_device Device, - pi_image_handle Handle) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - - HANDLE_ERRORS(urBindlessImagesUnsampledImageHandleDestroyExp( - UrContext, UrDevice, Handle)); - - return PI_SUCCESS; -} - -inline pi_result piextMemSampledImageHandleDestroy(pi_context Context, - pi_device Device, - pi_image_handle Handle) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - - HANDLE_ERRORS(urBindlessImagesSampledImageHandleDestroyExp(UrContext, - UrDevice, Handle)); - - return PI_SUCCESS; -} - -static void pi2urImageInfoFlags(const pi_image_info PiFlags, - ur_image_info_t *UrFlags) { - switch (PiFlags) { -#define PI_TO_UR_IMAGE_INFO(FROM, TO) \ - case FROM: { \ - *UrFlags = TO; \ - return; \ - } - PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_FORMAT, UR_IMAGE_INFO_FORMAT) - PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_ELEMENT_SIZE, UR_IMAGE_INFO_ELEMENT_SIZE) - PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_ROW_PITCH, UR_IMAGE_INFO_ROW_PITCH) - PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_SLICE_PITCH, UR_IMAGE_INFO_SLICE_PITCH) - PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_WIDTH, UR_IMAGE_INFO_WIDTH) - PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_HEIGHT, UR_IMAGE_INFO_HEIGHT) - PI_TO_UR_IMAGE_INFO(PI_IMAGE_INFO_DEPTH, UR_IMAGE_INFO_DEPTH) -#undef PI_TO_UR_IMAGE_INFO - default: - die("pi2urImageInfoFlags: Unsupported use case"); - } -} - -inline pi_result piextMemImageGetInfo(pi_context Context, - pi_image_mem_handle MemHandle, - pi_image_info ParamName, void *ParamValue, - size_t *ParamValueSizeRet) { - auto UrMemHandle = - reinterpret_cast(MemHandle); - auto UrContext = reinterpret_cast(Context); - - ur_image_info_t UrParamName{}; - pi2urImageInfoFlags(ParamName, &UrParamName); - - HANDLE_ERRORS(urBindlessImagesImageGetInfoExp( - UrContext, UrMemHandle, UrParamName, ParamValue, ParamValueSizeRet)); - - if (ParamName == pi_image_info::PI_IMAGE_INFO_FORMAT && ParamValue) { - pi_image_format PiFormat; - ur2piImageFormat(reinterpret_cast(ParamValue), - &PiFormat); - reinterpret_cast(ParamValue)->image_channel_data_type = - PiFormat.image_channel_data_type; - reinterpret_cast(ParamValue)->image_channel_order = - PiFormat.image_channel_order; - if (ParamValueSizeRet) { - *ParamValueSizeRet = sizeof(pi_image_format); - } - } - - return PI_SUCCESS; -} - -inline pi_result -piextImportExternalMemory(pi_context Context, pi_device Device, - pi_external_mem_descriptor *MemDescriptor, - pi_interop_mem_handle *RetHandle) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - auto *UrRetHandle = - reinterpret_cast(RetHandle); - - ur_exp_interop_mem_desc_t InteropMemDesc{}; - InteropMemDesc.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC; - - ur_exp_external_mem_type_t UrExternalMemHandleType; - switch (MemDescriptor->handleType) { -#ifndef _WIN32 - case pi_external_mem_handle_type::opaque_fd: - UrExternalMemHandleType = UR_EXP_EXTERNAL_MEM_TYPE_OPAQUE_FD; - break; -#else - case pi_external_mem_handle_type::win32_nt_handle: - UrExternalMemHandleType = UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT; - break; - case pi_external_mem_handle_type::win32_nt_dx12_resource: - UrExternalMemHandleType = UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX12_RESOURCE; - break; -#endif - default: - return PI_ERROR_INVALID_VALUE; - } - -#ifndef _WIN32 - ur_exp_file_descriptor_t OpaqueFD{}; -#else - ur_exp_win32_handle_t Win32Handle{}; -#endif - switch (MemDescriptor->handleType) { -#ifndef _WIN32 - case pi_external_mem_handle_type::opaque_fd: { - OpaqueFD.stype = UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR; - OpaqueFD.fd = MemDescriptor->handle.file_descriptor; - InteropMemDesc.pNext = &OpaqueFD; - break; - } -#else - case pi_external_mem_handle_type::win32_nt_handle: - case pi_external_mem_handle_type::win32_nt_dx12_resource: { - Win32Handle.stype = UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE; - Win32Handle.handle = MemDescriptor->handle.win32_handle; - InteropMemDesc.pNext = &Win32Handle; - break; - } -#endif - default: - return PI_ERROR_INVALID_VALUE; - } - - HANDLE_ERRORS(urBindlessImagesImportExternalMemoryExp( - UrContext, UrDevice, MemDescriptor->memorySizeBytes, - UrExternalMemHandleType, &InteropMemDesc, UrRetHandle)); - - return PI_SUCCESS; -} - -inline pi_result piextMemMapExternalArray(pi_context Context, pi_device Device, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_interop_mem_handle MemHandle, - pi_image_mem_handle *RetMem) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - - ur_image_format_t UrFormat{}; - ur_image_desc_t UrDesc{}; - pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); - - auto UrMemHandle = reinterpret_cast(MemHandle); - ur_exp_image_mem_native_handle_t *UrRetMem = - reinterpret_cast(RetMem); - - HANDLE_ERRORS(urBindlessImagesMapExternalArrayExp( - UrContext, UrDevice, &UrFormat, &UrDesc, UrMemHandle, UrRetMem)); - - return PI_SUCCESS; -} - -inline pi_result piextMemReleaseInterop(pi_context Context, pi_device Device, - pi_interop_mem_handle ExtMem) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - auto UrExtMem = reinterpret_cast(ExtMem); - - HANDLE_ERRORS( - urBindlessImagesReleaseInteropExp(UrContext, UrDevice, UrExtMem)); - - return PI_SUCCESS; -} - -inline pi_result -piextImportExternalSemaphore(pi_context Context, pi_device Device, - pi_external_semaphore_descriptor *SemDescriptor, - pi_interop_semaphore_handle *RetHandle) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - auto *UrRetHandle = - reinterpret_cast(RetHandle); - - ur_exp_interop_semaphore_desc_t InteropSemDesc{}; - InteropSemDesc.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC; - - ur_exp_external_semaphore_type_t UrExternalSemHandleType; - switch (SemDescriptor->handleType) { -#ifndef _WIN32 - case pi_external_semaphore_handle_type::opaque_fd: - UrExternalSemHandleType = UR_EXP_EXTERNAL_SEMAPHORE_TYPE_OPAQUE_FD; - break; -#else - case pi_external_semaphore_handle_type::win32_nt_handle: - UrExternalSemHandleType = UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT; - break; - case pi_external_semaphore_handle_type::win32_nt_dx12_fence: - UrExternalSemHandleType = - UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX12_FENCE; - break; -#endif - default: - return PI_ERROR_INVALID_VALUE; - } - -#ifndef _WIN32 - ur_exp_file_descriptor_t OpaqueFD{}; -#else - ur_exp_win32_handle_t Win32Handle{}; -#endif - switch (SemDescriptor->handleType) { -#ifndef _WIN32 - case pi_external_semaphore_handle_type::opaque_fd: { - OpaqueFD.stype = UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR; - OpaqueFD.fd = SemDescriptor->handle.file_descriptor; - InteropSemDesc.pNext = &OpaqueFD; - break; - } -#else - case pi_external_semaphore_handle_type::win32_nt_dx12_fence: - case pi_external_semaphore_handle_type::win32_nt_handle: { - Win32Handle.stype = UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE; - Win32Handle.handle = SemDescriptor->handle.win32_handle; - InteropSemDesc.pNext = &Win32Handle; - break; - } -#endif - default: - return PI_ERROR_INVALID_VALUE; - } - - HANDLE_ERRORS(urBindlessImagesImportExternalSemaphoreExp( - UrContext, UrDevice, UrExternalSemHandleType, &InteropSemDesc, - UrRetHandle)); - - return PI_SUCCESS; -} - -inline pi_result -piextReleaseExternalSemaphore(pi_context Context, pi_device Device, - pi_interop_semaphore_handle SemHandle) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrContext = reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - auto UrSemHandle = - reinterpret_cast(SemHandle); - - HANDLE_ERRORS(urBindlessImagesReleaseExternalSemaphoreExp(UrContext, UrDevice, - UrSemHandle)); - - return PI_SUCCESS; -} - -inline pi_result piextWaitExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasWaitValue, - pi_uint64 WaitValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - auto UrQueue = reinterpret_cast(Queue); - auto UrSemHandle = - reinterpret_cast(SemHandle); - const ur_event_handle_t *UrEventWaitList = - reinterpret_cast(EventWaitList); - ur_event_handle_t *UREvent = reinterpret_cast(Event); - - HANDLE_ERRORS(urBindlessImagesWaitExternalSemaphoreExp( - UrQueue, UrSemHandle, HasWaitValue, WaitValue, NumEventsInWaitList, - UrEventWaitList, UREvent)); - - return PI_SUCCESS; -} - -inline pi_result piextSignalExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasSignalValue, - pi_uint64 SignalValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - auto UrQueue = reinterpret_cast(Queue); - auto UrSemHandle = - reinterpret_cast(SemHandle); - const ur_event_handle_t *UrEventWaitList = - reinterpret_cast(EventWaitList); - ur_event_handle_t *UREvent = reinterpret_cast(Event); - - HANDLE_ERRORS(urBindlessImagesSignalExternalSemaphoreExp( - UrQueue, UrSemHandle, HasSignalValue, SignalValue, NumEventsInWaitList, - UrEventWaitList, UREvent)); - - return PI_SUCCESS; -} - -// Bindless Images Extension -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Virtual Memory - -inline pi_result -piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, - pi_virtual_mem_granularity_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_device_handle_t UrDevice = reinterpret_cast(Device); - - ur_virtual_mem_granularity_info_t InfoType{}; - switch (ParamName) { - case PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: - InfoType = UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM; - break; - case PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: - InfoType = UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED; - break; - default: - return PI_ERROR_UNKNOWN; - } - - HANDLE_ERRORS(urVirtualMemGranularityGetInfo(UrContext, UrDevice, InfoType, - ParamValueSize, ParamValue, - ParamValueSizeRet)); - return PI_SUCCESS; -} - -inline pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, - size_t MemSize, - pi_physical_mem *RetPhyscialMem) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_device_handle_t UrDevice = reinterpret_cast(Device); - - ur_physical_mem_handle_t *UrPhysicalMem = - reinterpret_cast(RetPhyscialMem); - - HANDLE_ERRORS(urPhysicalMemCreate(UrContext, UrDevice, MemSize, nullptr, - UrPhysicalMem)); - - return PI_SUCCESS; -} - -inline pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { - PI_ASSERT(PhysicalMem, PI_ERROR_INVALID_ARG_VALUE); - - ur_physical_mem_handle_t UrPhysicalMem = - reinterpret_cast(PhysicalMem); - - HANDLE_ERRORS(urPhysicalMemRetain(UrPhysicalMem)); - - return PI_SUCCESS; -} - -inline pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { - - ur_physical_mem_handle_t UrPhysicalMem = - reinterpret_cast(PhysicalMem); - - HANDLE_ERRORS(urPhysicalMemRelease(UrPhysicalMem)); - - return PI_SUCCESS; -} - -inline pi_result piextVirtualMemReserve(pi_context Context, const void *Start, - size_t RangeSize, void **RetPtr) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(RetPtr, PI_ERROR_INVALID_ARG_VALUE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - HANDLE_ERRORS(urVirtualMemReserve(UrContext, Start, RangeSize, RetPtr)); - - return PI_SUCCESS; -} - -inline pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, - size_t RangeSize) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - HANDLE_ERRORS(urVirtualMemFree(UrContext, Ptr, RangeSize)); - - return PI_SUCCESS; -} - -inline pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, - size_t RangeSize, - pi_virtual_access_flags Flags) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_virtual_mem_access_flags_t UrFlags = 0; - if (Flags & PI_VIRTUAL_ACCESS_FLAG_RW) - UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; - if (Flags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) - UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; - - HANDLE_ERRORS(urVirtualMemSetAccess(UrContext, Ptr, RangeSize, UrFlags)); - - return PI_SUCCESS; -} - -inline pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, - size_t RangeSize, - pi_physical_mem PhysicalMem, size_t Offset, - pi_virtual_access_flags Flags) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); - PI_ASSERT(PhysicalMem, PI_ERROR_INVALID_ARG_VALUE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_physical_mem_handle_t UrPhysicalMem = - reinterpret_cast(PhysicalMem); - - ur_virtual_mem_access_flags_t UrFlags = 0; - if (Flags & PI_VIRTUAL_ACCESS_FLAG_RW) - UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; - if (Flags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) - UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; - - HANDLE_ERRORS(urVirtualMemMap(UrContext, Ptr, RangeSize, UrPhysicalMem, - Offset, UrFlags)); - - return PI_SUCCESS; -} - -inline pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, - size_t RangeSize) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - HANDLE_ERRORS(urVirtualMemUnmap(UrContext, Ptr, RangeSize)); - - return PI_SUCCESS; -} - -inline pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, - size_t RangeSize, - pi_virtual_mem_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_virtual_mem_info_t InfoType{}; - switch (ParamName) { - case PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE: - InfoType = UR_VIRTUAL_MEM_INFO_ACCESS_MODE; - break; - default: - return PI_ERROR_UNKNOWN; - } - - HANDLE_ERRORS(urVirtualMemGetInfo(UrContext, Ptr, RangeSize, InfoType, - ParamValueSize, ParamValue, - ParamValueSizeRet)); - ur2piVirtualMemInfoValue(InfoType, ParamValueSize, &ParamValueSize, - ParamValue); - - return PI_SUCCESS; -} - -// Virtual Memory -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Enqueue Native Command Extension -inline pi_result -piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, - void *Data, pi_uint32 NumMems, const pi_mem *MemList, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - auto UrQueue = reinterpret_cast(Queue); - auto UrFn = reinterpret_cast(Fn); - const ur_mem_handle_t *UrMemList = - reinterpret_cast(MemList); - const ur_event_handle_t *UrEventWaitList = - reinterpret_cast(EventWaitList); - ur_event_handle_t *UREvent = reinterpret_cast(Event); - - HANDLE_ERRORS(urEnqueueNativeCommandExp( - UrQueue, UrFn, Data, NumMems, UrMemList, nullptr /*pProperties*/, - NumEventsInWaitList, UrEventWaitList, UREvent)); - - return PI_SUCCESS; -} -// Enqueue Native Command Extension -/////////////////////////////////////////////////////////////////////////////// - -} // namespace pi2ur diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp deleted file mode 100644 index 95f329811f643..0000000000000 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ /dev/null @@ -1,1640 +0,0 @@ -//===--- pi_unified_runtime.cpp - Unified Runtime PI Plugin ---------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -// #include "ur/adapters/level_zero/ur_level_zero_common.hpp" -#include -#include - -// Stub function to where all not yet supported PI API are bound -static void DieUnsupported() { - die("Unified Runtime: functionality is not supported"); -} - -// Adapters may be released by piTearDown being called, or the global dtors -// being called first. Handle releasing the adapters exactly once. -static void releaseAdapters(std::vector &Vec) noexcept { - static std::once_flag ReleaseFlag{}; - try { - std::call_once(ReleaseFlag, [&]() { - for (auto Adapter : Vec) { - urAdapterRelease(Adapter); - } - urLoaderTearDown(); - }); - } catch (...) { - // Ignore any potential exceptions on teardown. Worst case scenario - // this just leaks some memory on exit. - } -} - -struct AdapterHolder { - ~AdapterHolder() { releaseAdapters(Vec); } - std::vector Vec{}; -} Adapters; - -// All PI API interfaces are C interfaces -extern "C" { -__SYCL_EXPORT pi_result piPlatformsGet(pi_uint32 NumEntries, - pi_platform *Platforms, - pi_uint32 *NumPlatforms) { - // Get all the platforms from all available adapters - urPlatformGet(Adapters.Vec.data(), static_cast(Adapters.Vec.size()), - NumEntries, reinterpret_cast(Platforms), - NumPlatforms); - - return PI_SUCCESS; -} - -__SYCL_EXPORT pi_result piPlatformGetInfo(pi_platform Platform, - pi_platform_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piPlatformGetInfo(Platform, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piDevicesGet(pi_platform Platform, - pi_device_type DeviceType, - pi_uint32 NumEntries, pi_device *Devices, - pi_uint32 *NumDevices) { - return pi2ur::piDevicesGet(Platform, DeviceType, NumEntries, Devices, - NumDevices); -} - -__SYCL_EXPORT pi_result piDeviceRetain(pi_device Device) { - return pi2ur::piDeviceRetain(Device); -} - -__SYCL_EXPORT pi_result piDeviceRelease(pi_device Device) { - return pi2ur::piDeviceRelease(Device); -} - -__SYCL_EXPORT pi_result piDeviceGetInfo(pi_device Device, - pi_device_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piDeviceGetInfo(Device, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piDevicePartition( - pi_device Device, const pi_device_partition_property *Properties, - pi_uint32 NumDevices, pi_device *OutDevices, pi_uint32 *OutNumDevices) { - return pi2ur::piDevicePartition(Device, Properties, NumDevices, OutDevices, - OutNumDevices); -} - -// Stub for the not yet supported API -__SYCL_EXPORT pi_result piextDeviceSelectBinary(pi_device Device, - pi_device_binary *Binaries, - pi_uint32 NumBinaries, - pi_uint32 *SelectedBinaryInd) { - return pi2ur::piextDeviceSelectBinary(Device, Binaries, NumBinaries, - SelectedBinaryInd); -} - -__SYCL_EXPORT pi_result -piContextCreate(const pi_context_properties *Properties, pi_uint32 NumDevices, - const pi_device *Devices, - void (*PFnNotify)(const char *ErrInfo, const void *PrivateInfo, - size_t CB, void *UserData), - void *UserData, pi_context *RetContext) { - return pi2ur::piContextCreate(Properties, NumDevices, Devices, PFnNotify, - UserData, RetContext); -} - -__SYCL_EXPORT pi_result piContextGetInfo(pi_context Context, - pi_context_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piContextGetInfo(Context, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piContextRelease(pi_context Context) { - return pi2ur::piContextRelease(Context); -} - -__SYCL_EXPORT pi_result piQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties Flags, - pi_queue *Queue) { - return pi2ur::piQueueCreate(Context, Device, Flags, Queue); -} - -__SYCL_EXPORT pi_result piextQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties *Properties, - pi_queue *Queue) { - return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); -} - -__SYCL_EXPORT pi_result piQueueRelease(pi_queue Queue) { - return pi2ur::piQueueRelease(Queue); -} - -__SYCL_EXPORT pi_result piProgramCreate(pi_context Context, const void *ILBytes, - size_t Length, pi_program *Program) { - return pi2ur::piProgramCreate(Context, ILBytes, Length, Program); -} - -__SYCL_EXPORT pi_result piProgramBuild( - pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, - const char *Options, void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData) { - return pi2ur::piProgramBuild(Program, NumDevices, DeviceList, Options, - PFnNotify, UserData); -} - -__SYCL_EXPORT pi_result piextProgramSetSpecializationConstant( - pi_program Prog, pi_uint32 SpecID, size_t Size, const void *SpecValue) { - return pi2ur::piextProgramSetSpecializationConstant(Prog, SpecID, Size, - SpecValue); -} - -__SYCL_EXPORT pi_result -piProgramLink(pi_context Context, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - pi_uint32 NumInputPrograms, const pi_program *InputPrograms, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData, pi_program *RetProgram) { - return pi2ur::piProgramLink(Context, NumDevices, DeviceList, Options, - NumInputPrograms, InputPrograms, PFnNotify, - UserData, RetProgram); -} - -__SYCL_EXPORT pi_result piKernelCreate(pi_program Program, - const char *KernelName, - pi_kernel *RetKernel) { - return pi2ur::piKernelCreate(Program, KernelName, RetKernel); -} - -// Special version of piKernelSetArg to accept pi_mem. -__SYCL_EXPORT pi_result piextKernelSetArgMemObj( - pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_mem_obj_property *ArgProperties, const pi_mem *ArgValue) { - - return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties, - ArgValue); -} - -__SYCL_EXPORT pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, const void *ArgValue) { - - return pi2ur::piKernelSetArg(Kernel, ArgIndex, ArgSize, ArgValue); -} - -__SYCL_EXPORT pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_group_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piKernelGetGroupInfo(Kernel, Device, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piMemBufferCreate(pi_context Context, - pi_mem_flags Flags, size_t Size, - void *HostPtr, pi_mem *RetMem, - const pi_mem_properties *properties) { - - return pi2ur::piMemBufferCreate(Context, Flags, Size, HostPtr, RetMem, - properties); -} - -__SYCL_EXPORT pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, - pi_usm_mem_properties *Properties, - size_t Size, pi_uint32 Alignment) { - - return pi2ur::piextUSMHostAlloc(ResultPtr, Context, Properties, Size, - Alignment); -} - -__SYCL_EXPORT pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piMemGetInfo(Mem, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, - const pi_image_format *ImageFormat, - const pi_image_desc *ImageDesc, - void *HostPtr, pi_mem *RetImage) { - - return pi2ur::piMemImageCreate(Context, Flags, ImageFormat, ImageDesc, - HostPtr, RetImage); -} - -__SYCL_EXPORT pi_result piMemBufferPartition( - pi_mem Buffer, pi_mem_flags Flags, pi_buffer_create_type BufferCreateType, - void *BufferCreateInfo, pi_mem *RetMem) { - return pi2ur::piMemBufferPartition(Buffer, Flags, BufferCreateType, - BufferCreateInfo, RetMem); -} - -__SYCL_EXPORT pi_result piextMemGetNativeHandle( - pi_mem Mem, pi_device Dev, pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); -} - -__SYCL_EXPORT pi_result -piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, - pi_image_offset SrcOrigin, pi_image_offset DstOrigin, - pi_image_region Region, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piEnqueueMemImageCopy(Queue, SrcImage, DstImage, SrcOrigin, - DstOrigin, Region, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextMemCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool ownNativeHandle, - pi_mem *Mem) { - return pi2ur::piextMemCreateWithNativeHandle(NativeHandle, Context, - ownNativeHandle, Mem); -} - -__SYCL_EXPORT pi_result piEnqueueKernelLaunch( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - - return pi2ur::piEnqueueKernelLaunch( - Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumEventsInWaitList, EventWaitList, OutEvent); -} - -__SYCL_EXPORT pi_result piextEnqueueKernelLaunchCustom( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumPropsInLaunchPropList, - const pi_launch_property *LaunchPropList, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - return pi2ur::piextEnqueueKernelLaunchCustom( - Queue, Kernel, WorkDim, GlobalWorkSize, LocalWorkSize, - NumPropsInLaunchPropList, LaunchPropList, NumEventsInWaitList, - EventsWaitList, OutEvent); -} - -__SYCL_EXPORT pi_result piEnqueueMemImageWrite( - pi_queue Queue, pi_mem Image, pi_bool BlockingWrite, pi_image_offset Origin, - pi_image_region Region, size_t InputRowPitch, size_t InputSlicePitch, - const void *Ptr, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - - return pi2ur::piEnqueueMemImageWrite( - Queue, Image, BlockingWrite, Origin, Region, InputRowPitch, - InputSlicePitch, Ptr, NumEventsInWaitList, EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piEnqueueMemImageRead( - pi_queue Queue, pi_mem Image, pi_bool BlockingRead, pi_image_offset Origin, - pi_image_region Region, size_t RowPitch, size_t SlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemImageRead( - Queue, Image, BlockingRead, Origin, Region, RowPitch, SlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextKernelCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, pi_program Program, - bool OwnNativeHandle, pi_kernel *Kernel) { - - return pi2ur::piextKernelCreateWithNativeHandle( - NativeHandle, Context, Program, OwnNativeHandle, Kernel); -} - -__SYCL_EXPORT pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, - void *MappedPtr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piEnqueueMemUnmap(Queue, Mem, MappedPtr, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -__SYCL_EXPORT pi_result piEventsWait(pi_uint32 NumEvents, - const pi_event *EventList) { - - return pi2ur::piEventsWait(NumEvents, EventList); -} - -__SYCL_EXPORT pi_result piQueueFinish(pi_queue Queue) { - return pi2ur::piQueueFinish(Queue); -} - -__SYCL_EXPORT pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piEventGetInfo(Event, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piEnqueueMemBufferMap( - pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, pi_map_flags MapFlags, - size_t Offset, size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent, void **RetMap) { - - return pi2ur::piEnqueueMemBufferMap(Queue, Mem, BlockingMap, MapFlags, Offset, - Size, NumEventsInWaitList, EventWaitList, - OutEvent, RetMap); -} - -__SYCL_EXPORT pi_result piEnqueueMemBufferFill( - pi_queue Queue, pi_mem Buffer, const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piEnqueueMemBufferFill(Queue, Buffer, Pattern, PatternSize, - Offset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextUSMDeviceAlloc(void **ResultPtr, - pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, - size_t Size, pi_uint32 Alignment) { - - return pi2ur::piextUSMDeviceAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -__SYCL_EXPORT pi_result piKernelRetain(pi_kernel Kernel) { - return pi2ur::piKernelRetain(Kernel); -} - -__SYCL_EXPORT pi_result piKernelRelease(pi_kernel Kernel) { - - return pi2ur::piKernelRelease(Kernel); -} - -__SYCL_EXPORT pi_result piProgramRelease(pi_program Program) { - return pi2ur::piProgramRelease(Program); -} - -__SYCL_EXPORT pi_result piextUSMSharedAlloc(void **ResultPtr, - pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, - size_t Size, pi_uint32 Alignment) { - - return pi2ur::piextUSMSharedAlloc(ResultPtr, Context, Device, Properties, - Size, Alignment); -} - -__SYCL_EXPORT pi_result piextUSMPitchedAlloc( - void **ResultPtr, size_t *ResultPitch, pi_context Context, pi_device Device, - pi_usm_mem_properties *Properties, size_t WidthInBytes, size_t Height, - unsigned int ElementSizeBytes) { - - return pi2ur::piextUSMPitchedAlloc(ResultPtr, ResultPitch, Context, Device, - Properties, WidthInBytes, Height, - ElementSizeBytes); -} - -__SYCL_EXPORT pi_result piextUSMFree(pi_context Context, void *Ptr) { - return pi2ur::piextUSMFree(Context, Ptr); -} - -__SYCL_EXPORT pi_result piextUSMImport(const void *HostPtr, size_t Size, - pi_context Context) { - return pi2ur::piextUSMImport(HostPtr, Size, Context); -} - -__SYCL_EXPORT pi_result piextUSMRelease(const void *HostPtr, - pi_context Context) { - return pi2ur::piextUSMRelease(HostPtr, Context); -} - -__SYCL_EXPORT pi_result piContextRetain(pi_context Context) { - return pi2ur::piContextRetain(Context); -} - -__SYCL_EXPORT pi_result piextKernelSetArgPointer(pi_kernel Kernel, - pi_uint32 ArgIndex, - size_t ArgSize, - const void *ArgValue) { - return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); -} - -// Special version of piKernelSetArg to accept pi_sampler. -__SYCL_EXPORT pi_result piextKernelSetArgSampler(pi_kernel Kernel, - pi_uint32 ArgIndex, - const pi_sampler *ArgValue) { - - return pi2ur::piextKernelSetArgSampler(Kernel, ArgIndex, ArgValue); -} - -__SYCL_EXPORT pi_result piKernelGetSubGroupInfo( - pi_kernel Kernel, pi_device Device, pi_kernel_sub_group_info ParamName, - size_t InputValueSize, const void *InputValue, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet) { - - return pi2ur::piKernelGetSubGroupInfo( - Kernel, Device, ParamName, InputValueSize, InputValue, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piQueueGetInfo(Queue, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -/// USM Fill API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pattern is the ptr with the bytes of the pattern to set -/// \param patternSize is the size in bytes of the pattern to set -/// \param count is the size in bytes to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, - const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); -} - -__SYCL_EXPORT pi_result piEnqueueMemBufferCopyRect( - pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, - pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, - size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, - size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - - return pi2ur::piEnqueueMemBufferCopyRect( - Queue, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, - pi_mem DstMem, size_t SrcOffset, - size_t DstOffset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piEnqueueMemBufferCopy(Queue, SrcMem, DstMem, SrcOffset, - DstOffset, Size, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, - void *DstPtr, const void *SrcPtr, - size_t Size, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemcpy(Queue, Blocking, DstPtr, SrcPtr, Size, - NumEventsInWaitlist, EventsWaitlist, - Event); -} - -__SYCL_EXPORT pi_result piEnqueueMemBufferWriteRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferWriteRect( - Queue, Buffer, BlockingWrite, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piEnqueueMemBufferWrite( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, size_t Offset, - size_t Size, const void *Ptr, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - - return pi2ur::piEnqueueMemBufferWrite(Queue, Buffer, BlockingWrite, Offset, - Size, Ptr, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piEnqueueMemBufferReadRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { - - return pi2ur::piEnqueueMemBufferReadRect( - Queue, Buffer, BlockingRead, BufferOffset, HostOffset, Region, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piEnqueueMemBufferRead( - pi_queue Queue, pi_mem Src, pi_bool BlockingRead, size_t Offset, - size_t Size, void *Dst, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - - return pi2ur::piEnqueueMemBufferRead(Queue, Src, BlockingRead, Offset, Size, - Dst, NumEventsInWaitList, EventWaitList, - Event); -} - -__SYCL_EXPORT pi_result piEnqueueEventsWaitWithBarrier( - pi_queue Queue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *OutEvent) { - - return pi2ur::piEnqueueEventsWaitWithBarrier(Queue, NumEventsInWaitList, - EventWaitList, OutEvent); -} - -__SYCL_EXPORT pi_result piEnqueueEventsWait(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piEnqueueEventsWait(Queue, NumEventsInWaitList, EventWaitList, - OutEvent); -} - -__SYCL_EXPORT pi_result -piextEventGetNativeHandle(pi_event Event, pi_native_handle *NativeHandle) { - - return pi2ur::piextEventGetNativeHandle(Event, NativeHandle); -} - -__SYCL_EXPORT pi_result piEventGetProfilingInfo(pi_event Event, - pi_profiling_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piEventGetProfilingInfo(Event, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piProgramRetain(pi_program Program) { - return pi2ur::piProgramRetain(Program); -} - -__SYCL_EXPORT pi_result piKernelSetExecInfo(pi_kernel Kernel, - pi_kernel_exec_info ParamName, - size_t ParamValueSize, - const void *ParamValue) { - - return pi2ur::piKernelSetExecInfo(Kernel, ParamName, ParamValueSize, - ParamValue); -} - -__SYCL_EXPORT pi_result piKernelGetInfo(pi_kernel Kernel, - pi_kernel_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piKernelGetInfo(Kernel, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piQueueRetain(pi_queue Queue) { - return pi2ur::piQueueRetain(Queue); -} - -__SYCL_EXPORT pi_result piQueueFlush(pi_queue Queue) { - return pi2ur::piQueueFlush(Queue); -} - -__SYCL_EXPORT pi_result piMemRetain(pi_mem Mem) { - return pi2ur::piMemRetain(Mem); -} - -__SYCL_EXPORT pi_result piProgramCreateWithBinary( - pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, - const size_t *Lengths, const unsigned char **Binaries, - size_t NumMetadataEntries, const pi_device_binary_property *Metadata, - pi_int32 *BinaryStatus, pi_program *Program) { - - return pi2ur::piProgramCreateWithBinary(Context, NumDevices, DeviceList, - Lengths, Binaries, NumMetadataEntries, - Metadata, BinaryStatus, Program); -} - -__SYCL_EXPORT pi_result piProgramGetInfo(pi_program Program, - pi_program_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - - return pi2ur::piProgramGetInfo(Program, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piProgramCompile( - pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, - const char *Options, pi_uint32 NumInputHeaders, - const pi_program *InputHeaders, const char **HeaderIncludeNames, - void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { - - return pi2ur::piProgramCompile(Program, NumDevices, DeviceList, Options, - NumInputHeaders, InputHeaders, - HeaderIncludeNames, PFnNotify, UserData); -} - -__SYCL_EXPORT pi_result piProgramGetBuildInfo( - pi_program Program, pi_device Device, pi_program_build_info ParamName, - size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - - return pi2ur::piProgramGetBuildInfo(Program, Device, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { - - return pi2ur::piEventCreate(Context, RetEvent); -} - -__SYCL_EXPORT pi_result piEventSetCallback( - pi_event Event, pi_int32 CommandExecCallbackType, - void (*PFnNotify)(pi_event Event, pi_int32 EventCommandStatus, - void *UserData), - void *UserData) { - return pi2ur::piEventSetCallback(Event, CommandExecCallbackType, PFnNotify, - UserData); -} - -__SYCL_EXPORT pi_result piEventSetStatus(pi_event Event, - pi_int32 ExecutionStatus) { - return pi2ur::piEventSetStatus(Event, ExecutionStatus); -} - -__SYCL_EXPORT pi_result piEventRetain(pi_event Event) { - return pi2ur::piEventRetain(Event); -} - -__SYCL_EXPORT pi_result piEventRelease(pi_event Event) { - return pi2ur::piEventRelease(Event); -} - -__SYCL_EXPORT pi_result piextEventCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, - pi_event *Event) { - return pi2ur::piextEventCreateWithNativeHandle(NativeHandle, Context, - OwnNativeHandle, Event); -} - -__SYCL_EXPORT pi_result piEnqueueTimestampRecordingExp( - pi_queue Queue, pi_bool Blocking, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piEnqueueTimestampRecordingExp( - Queue, Blocking, NumEventsInWaitList, EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piEnqueueMemImageFill( - pi_queue Queue, pi_mem Image, const void *FillColor, const size_t *Origin, - const size_t *Region, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - - return pi2ur::piEnqueueMemImageFill(Queue, Image, FillColor, Origin, Region, - NumEventsInWaitList, EventWaitList, - Event); -} - -__SYCL_EXPORT pi_result piextPlatformGetNativeHandle( - pi_platform Platform, pi_native_handle *NativeHandle) { - - return pi2ur::piextPlatformGetNativeHandle(Platform, NativeHandle); -} - -__SYCL_EXPORT pi_result piextPlatformCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_platform *Platform) { - - return pi2ur::piextPlatformCreateWithNativeHandle(NativeHandle, Platform); -} - -__SYCL_EXPORT pi_result -piextDeviceGetNativeHandle(pi_device Device, pi_native_handle *NativeHandle) { - - return pi2ur::piextDeviceGetNativeHandle(Device, NativeHandle); -} - -__SYCL_EXPORT pi_result piextDeviceCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_platform Platform, pi_device *Device) { - - return pi2ur::piextDeviceCreateWithNativeHandle(NativeHandle, Platform, - Device); -} - -// FIXME: Dummy implementation to prevent link fail -__SYCL_EXPORT pi_result piextContextSetExtendedDeleter( - pi_context Context, pi_context_extended_deleter Function, void *UserData) { - return pi2ur::piextContextSetExtendedDeleter(Context, Function, UserData); -} - -__SYCL_EXPORT pi_result piextContextGetNativeHandle( - pi_context Context, pi_native_handle *NativeHandle) { - - return pi2ur::piextContextGetNativeHandle(Context, NativeHandle); -} - -__SYCL_EXPORT pi_result piextContextCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_uint32 NumDevices, - const pi_device *Devices, bool OwnNativeHandle, pi_context *RetContext) { - return pi2ur::piextContextCreateWithNativeHandle( - NativeHandle, NumDevices, Devices, OwnNativeHandle, RetContext); -} - -__SYCL_EXPORT pi_result piextQueueGetNativeHandle( - pi_queue Queue, pi_native_handle *NativeHandle, int32_t *NativeHandleDesc) { - return pi2ur::piextQueueGetNativeHandle(Queue, NativeHandle, - NativeHandleDesc); -} - -__SYCL_EXPORT pi_result piextQueueCreateWithNativeHandle( - pi_native_handle NativeHandle, int32_t NativeHandleDesc, pi_context Context, - pi_device Device, bool OwnNativeHandle, pi_queue_properties *Properties, - pi_queue *Queue) { - return pi2ur::piextQueueCreateWithNativeHandle( - NativeHandle, NativeHandleDesc, Context, Device, OwnNativeHandle, - Properties, Queue); -} - -__SYCL_EXPORT pi_result piMemRelease(pi_mem Mem) { - return pi2ur::piMemRelease(Mem); -} - -__SYCL_EXPORT pi_result piextGetDeviceFunctionPointer( - pi_device Device, pi_program Program, const char *FunctionName, - pi_uint64 *FunctionPointerRet) { - - return pi2ur::piextGetDeviceFunctionPointer(Device, Program, FunctionName, - FunctionPointerRet); -} - -__SYCL_EXPORT pi_result piextGetGlobalVariablePointer( - pi_device Device, pi_program Program, const char *GlobalVariableName, - size_t *GlobalVariableSize, void **GlobalVariablePointerRet) { - - return pi2ur::piextGetGlobalVariablePointer( - Device, Program, GlobalVariableName, GlobalVariableSize, - GlobalVariablePointerRet); -} - -/// Hint to migrate memory to the device -/// -/// @param Queue is the queue to submit to -/// @param Ptr points to the memory to migrate -/// @param Size is the number of bytes to migrate -/// @param Flags is a bitfield used to specify memory migration options -/// @param NumEventsInWaitlist is the number of events to wait on -/// @param EventsWaitlist is an array of events to wait on -/// @param Event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, - size_t Size, - pi_usm_migration_flags Flags, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueuePrefetch( - Queue, Ptr, Size, Flags, NumEventsInWaitList, EventWaitList, OutEvent); -} - -/// USM memadvise API to govern behavior of automatic migration mechanisms -/// -/// @param Queue is the queue to submit to -/// @param Ptr is the data to be advised -/// @param Length is the size in bytes of the meory to advise -/// @param Advice is device specific advice -/// @param Event is the event that represents this operation -/// -__SYCL_EXPORT pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, - const void *Ptr, size_t Length, - pi_mem_advice Advice, - pi_event *OutEvent) { - - return pi2ur::piextUSMEnqueueMemAdvise(Queue, Ptr, Length, Advice, OutEvent); -} - -/// USM 2D Fill API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pitch is the total width of the destination memory including padding -/// \param pattern is a pointer with the bytes of the pattern to set -/// \param pattern_size is the size in bytes of the pattern -/// \param width is width in bytes of each row to fill -/// \param height is height the columns to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, - size_t Pitch, size_t PatternSize, - const void *Pattern, size_t Width, - size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueFill2D(Queue, Ptr, Pitch, PatternSize, Pattern, - Width, Height, NumEventsWaitList, - EventsWaitList, Event); -} - -/// USM 2D Memset API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pitch is the total width of the destination memory including padding -/// \param pattern is a pointer with the bytes of the pattern to set -/// \param pattern_size is the size in bytes of the pattern -/// \param width is width in bytes of each row to fill -/// \param height is height the columns to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, - size_t Pitch, int Value, - size_t Width, size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset2D(Queue, Ptr, Pitch, Value, Width, Height, - NumEventsWaitList, EventsWaitlist, - Event); -} - -/// API to query information about USM allocated pointers. -/// Valid Queries: -/// PI_MEM_ALLOC_TYPE returns host/device/shared pi_usm_type value -/// PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if -/// the queried pointer fell inside an allocation. -/// Result must fit in void * -/// PI_MEM_ALLOC_SIZE returns how big the queried pointer's -/// allocation is in bytes. Result is a size_t. -/// PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against -/// -/// @param Context is the pi_context -/// @param Ptr is the pointer to query -/// @param ParamName is the type of query to perform -/// @param ParamValueSize is the size of the result in bytes -/// @param ParamValue is the result -/// @param ParamValueRet is how many bytes were written -__SYCL_EXPORT pi_result piextUSMGetMemAllocInfo( - pi_context Context, const void *Ptr, pi_mem_alloc_info ParamName, - size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - return pi2ur::piextUSMGetMemAllocInfo(Context, Ptr, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piextPluginGetOpaqueData(void *opaque_data_param, - void **opaque_data_return) { - return pi2ur::piextPluginGetOpaqueData(opaque_data_param, opaque_data_return); -} - -__SYCL_EXPORT pi_result piextProgramGetNativeHandle( - pi_program Program, pi_native_handle *NativeHandle) { - - return pi2ur::piextProgramGetNativeHandle(Program, NativeHandle); -} - -__SYCL_EXPORT pi_result piextProgramCreateWithNativeHandle( - pi_native_handle NativeHandle, // missing - pi_context Context, bool ownNativeHandle, pi_program *Program) { - return pi2ur::piextProgramCreateWithNativeHandle(NativeHandle, Context, - ownNativeHandle, Program); -} - -__SYCL_EXPORT pi_result piSamplerCreate( - pi_context Context, const pi_sampler_properties *SamplerProperties, - pi_sampler *RetSampler) { - return pi2ur::piSamplerCreate(Context, SamplerProperties, RetSampler); -} - -__SYCL_EXPORT pi_result piSamplerGetInfo(pi_sampler Sampler, - pi_sampler_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piSamplerGetInfo(Sampler, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piSamplerRetain(pi_sampler Sampler) { - return pi2ur::piSamplerRetain(Sampler); -} - -__SYCL_EXPORT pi_result piSamplerRelease(pi_sampler Sampler) { - return pi2ur::piSamplerRelease(Sampler); -} - -__SYCL_EXPORT pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piMemImageGetInfo(Image, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -/// USM 2D Memcpy API -/// -/// \param queue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param dst_ptr is the location the data will be copied -/// \param dst_pitch is the total width of the destination memory including -/// padding -/// \param src_ptr is the data to be copied -/// \param dst_pitch is the total width of the source memory including padding -/// \param width is width in bytes of each row to be copied -/// \param height is height the columns to be copied -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D( - pi_queue Queue, pi_bool Blocking, void *DstPtr, size_t DstPitch, - const void *SrcPtr, size_t SrcPitch, size_t Width, size_t Height, - pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, - pi_event *Event) { - - return pi2ur::piextUSMEnqueueMemcpy2D( - Queue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, - NumEventsInWaitList, EventsWaitList, Event); -} - -/// API for writing data from host to a device global variable. -/// -/// \param Queue is the queue -/// \param Program is the program containing the device global variable -/// \param Name is the unique identifier for the device global variable -/// \param BlockingWrite is true if the write should block -/// \param Count is the number of bytes to copy -/// \param Offset is the byte offset into the device global variable to start -/// copying -/// \param Src is a pointer to where the data must be copied from -/// \param NumEventsInWaitList is a number of events in the wait list -/// \param EventWaitList is the wait list -/// \param Event is the resulting event -pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, - size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - return pi2ur::piextEnqueueDeviceGlobalVariableWrite( - Queue, Program, Name, BlockingWrite, Count, Offset, Src, - NumEventsInWaitList, EventsWaitList, Event); -} - -/// API reading data from a device global variable to host. -/// -/// \param Queue is the queue -/// \param Program is the program containing the device global variable -/// \param Name is the unique identifier for the device global variable -/// \param BlockingRead is true if the read should block -/// \param Count is the number of bytes to copy -/// \param Offset is the byte offset into the device global variable to start -/// copying -/// \param Dst is a pointer to where the data must be copied to -/// \param NumEventsInWaitList is a number of events in the wait list -/// \param EventWaitList is the wait list -/// \param Event is the resulting event -pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, - size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - - return pi2ur::piextEnqueueDeviceGlobalVariableRead( - Queue, Program, Name, BlockingRead, Count, Offset, Dst, - NumEventsInWaitList, EventsWaitList, Event); -} - -pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *Img) { - return pi2ur::piextMemImageCreateWithNativeHandle( - NativeHandle, Context, OwnNativeHandle, ImageFormat, ImageDesc, Img); -} - -// Command buffer extension -pi_result piextCommandBufferCreate(pi_context Context, pi_device Device, - const pi_ext_command_buffer_desc *Desc, - pi_ext_command_buffer *RetCommandBuffer) { - return pi2ur::piextCommandBufferCreate(Context, Device, Desc, - RetCommandBuffer); -} - -pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRetain(CommandBuffer); -} - -pi_result piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferRelease(CommandBuffer); -} - -pi_result piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { - return pi2ur::piextCommandBufferFinalize(CommandBuffer); -} - -pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint, - pi_ext_command_buffer_command *Command) { - return pi2ur::piextCommandBufferNDRangeKernel( - CommandBuffer, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint, - Command); -} - -pi_result piextCommandBufferMemcpyUSM( - pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, - size_t Size, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemcpyUSM(CommandBuffer, DstPtr, SrcPtr, Size, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopy( - CommandBuffer, SrcMem, DstMem, SrcOffset, DstOffset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, - pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, - size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferCopyRect( - CommandBuffer, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferRead( - CommandBuffer, Buffer, Offset, Size, Dst, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferReadRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWrite( - CommandBuffer, Buffer, Offset, Size, Ptr, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferWriteRect( - CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferMemBufferFill( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, const void *Pattern, - size_t PatternSize, size_t Offset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferMemBufferFill( - CommandBuffer, Buffer, Pattern, PatternSize, Offset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferFillUSM(pi_ext_command_buffer CommandBuffer, - void *Ptr, const void *Pattern, - size_t PatternSize, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, - pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferFillUSM( - CommandBuffer, Ptr, Pattern, PatternSize, Size, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferPrefetchUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Size, - pi_usm_migration_flags Flags, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferPrefetchUSM(CommandBuffer, Ptr, Size, Flags, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextCommandBufferAdviseUSM( - pi_ext_command_buffer CommandBuffer, const void *Ptr, size_t Length, - pi_mem_advice Advice, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - return pi2ur::piextCommandBufferAdviseUSM(CommandBuffer, Ptr, Length, Advice, - NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint); -} - -pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, - pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return pi2ur::piextEnqueueCommandBuffer( - CommandBuffer, Queue, NumEventsInWaitList, EventWaitList, Event); -} - -pi_result piextCommandBufferUpdateKernelLaunch( - pi_ext_command_buffer_command Command, - pi_ext_command_buffer_update_kernel_launch_desc *Desc) { - return pi2ur::piextCommandBufferUpdateKernelLaunch(Command, Desc); -} - -pi_result -piextCommandBufferRetainCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferRetainCommand(Command); -} - -pi_result -piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { - return pi2ur::piextCommandBufferReleaseCommand(Command); -} - -__SYCL_EXPORT pi_result piextVirtualMemGranularityGetInfo( - pi_context Context, pi_device Device, - pi_virtual_mem_granularity_info ParamName, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet) { - return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result -piextPhysicalMemCreate(pi_context Context, pi_device Device, size_t MemSize, - pi_physical_mem *RetPhsycialMem) { - return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, - RetPhsycialMem); -} - -__SYCL_EXPORT pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { - return pi2ur::piextPhysicalMemRetain(PhysicalMem); -} - -__SYCL_EXPORT pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { - return pi2ur::piextPhysicalMemRelease(PhysicalMem); -} - -__SYCL_EXPORT pi_result piextVirtualMemReserve(pi_context Context, - const void *Start, - size_t RangeSize, - void **RetPtr) { - return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); -} - -__SYCL_EXPORT pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, - size_t RangeSize) { - return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); -} - -__SYCL_EXPORT pi_result -piextVirtualMemSetAccess(pi_context Context, const void *Ptr, size_t RangeSize, - pi_virtual_access_flags Flags) { - return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); -} - -__SYCL_EXPORT pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, - size_t RangeSize, - pi_physical_mem PhysicalMem, - size_t Offset, - pi_virtual_access_flags Flags) { - return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, - Flags); -} - -__SYCL_EXPORT pi_result piextVirtualMemUnmap(pi_context Context, - const void *Ptr, - size_t RangeSize) { - return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); -} - -__SYCL_EXPORT pi_result -piextVirtualMemGetInfo(pi_context Context, const void *Ptr, size_t RangeSize, - pi_virtual_mem_info ParamName, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet) { - return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, - uint64_t *DeviceTime, - uint64_t *HostTime) { - return pi2ur::piGetDeviceAndHostTimer(Device, DeviceTime, HostTime); -} - -__SYCL_EXPORT pi_result piPluginGetBackendOption(pi_platform platform, - const char *frontend_option, - const char **backend_option) { - return pi2ur::piPluginGetBackendOption(platform, frontend_option, - backend_option); -} - -__SYCL_EXPORT pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { - - return pi2ur::piextEnablePeerAccess(command_device, peer_device); -} - -__SYCL_EXPORT pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { - - return pi2ur::piextDisablePeerAccess(command_device, peer_device); -} - -__SYCL_EXPORT pi_result piextPeerAccessGetInfo( - pi_device command_device, pi_device peer_device, pi_peer_attr attr, - size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr, - ParamValueSize, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piTearDown(void *) { - releaseAdapters(Adapters.Vec); - return PI_SUCCESS; -} - -__SYCL_EXPORT pi_result piextMemImageAllocate(pi_context Context, - pi_device Device, - pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemImageAllocate(Context, Device, ImageFormat, ImageDesc, - RetMem); -} - -__SYCL_EXPORT pi_result piextMemUnsampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, - pi_image_handle *RetHandle) { - return pi2ur::piextMemUnsampledImageCreate(Context, Device, ImgMem, - ImageFormat, ImageDesc, RetHandle); -} - -__SYCL_EXPORT pi_result piextMemSampledImageCreate( - pi_context Context, pi_device Device, pi_image_mem_handle ImgMem, - pi_image_format *ImageFormat, pi_image_desc *ImageDesc, pi_sampler Sampler, - pi_image_handle *RetHandle) { - return pi2ur::piextMemSampledImageCreate(Context, Device, ImgMem, ImageFormat, - ImageDesc, Sampler, RetHandle); -} - -__SYCL_EXPORT pi_result piextBindlessImageSamplerCreate( - pi_context Context, const pi_sampler_properties *SamplerProperties, - float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, - pi_sampler *RetSampler) { - return pi2ur::piextBindlessImageSamplerCreate( - Context, SamplerProperties, MinMipmapLevelClamp, MaxMipmapLevelClamp, - MaxAnisotropy, RetSampler); -} - -__SYCL_EXPORT pi_result piextMemMipmapGetLevel(pi_context Context, - pi_device Device, - pi_image_mem_handle MipMem, - unsigned int Level, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMipmapGetLevel(Context, Device, MipMem, Level, RetMem); -} - -__SYCL_EXPORT pi_result piextMemImageFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemImageFree(Context, Device, MemoryHandle); -} - -__SYCL_EXPORT pi_result piextMemMipmapFree(pi_context Context, pi_device Device, - pi_image_mem_handle MemoryHandle) { - return pi2ur::piextMemMipmapFree(Context, Device, MemoryHandle); -} - -__SYCL_EXPORT pi_result piextMemImageCopy( - pi_queue Queue, void *DstPtr, const void *SrcPtr, - const pi_image_desc *SrcImageDesc, const pi_image_desc *DstImageDesc, - const pi_image_format *SrcImageFormat, - const pi_image_format *DstImageFormat, const pi_image_copy_flags Flags, - pi_image_offset SrcOffset, pi_image_offset DstOffset, - pi_image_region CopyExtent, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextMemImageCopy(Queue, DstPtr, SrcPtr, SrcImageDesc, - DstImageDesc, SrcImageFormat, DstImageFormat, - Flags, SrcOffset, DstOffset, CopyExtent, - NumEventsInWaitList, EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextMemUnsampledImageHandleDestroy( - pi_context Context, pi_device Device, pi_image_handle Handle) { - return pi2ur::piextMemUnsampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemSampledImageHandleDestroy( - pi_context Context, pi_device Device, pi_image_handle Handle) { - return pi2ur::piextMemSampledImageHandleDestroy(Context, Device, Handle); -} - -__SYCL_EXPORT pi_result piextMemImageGetInfo(pi_context Context, - pi_image_mem_handle MemHandle, - pi_image_info ParamName, - void *ParamValue, - size_t *ParamValueSizeRet) { - return pi2ur::piextMemImageGetInfo(Context, MemHandle, ParamName, ParamValue, - ParamValueSizeRet); -} - -__SYCL_EXPORT pi_result piextImportExternalMemory( - pi_context Context, pi_device Device, pi_external_mem_descriptor *MemDesc, - pi_interop_mem_handle *RetHandle) { - return pi2ur::piextImportExternalMemory(Context, Device, MemDesc, RetHandle); -} - -__SYCL_EXPORT pi_result piextMemMapExternalArray( - pi_context Context, pi_device Device, pi_image_format *ImageFormat, - pi_image_desc *ImageDesc, pi_interop_mem_handle MemHandle, - pi_image_mem_handle *RetMem) { - return pi2ur::piextMemMapExternalArray(Context, Device, ImageFormat, - ImageDesc, MemHandle, RetMem); -} - -__SYCL_EXPORT pi_result piextMemReleaseInterop(pi_context Context, - pi_device Device, - pi_interop_mem_handle ExtMem) { - return pi2ur::piextMemReleaseInterop(Context, Device, ExtMem); -} - -__SYCL_EXPORT pi_result -piextImportExternalSemaphore(pi_context Context, pi_device Device, - pi_external_semaphore_descriptor *SemDesc, - pi_interop_semaphore_handle *RetHandle) { - return pi2ur::piextImportExternalSemaphore(Context, Device, SemDesc, - RetHandle); -} - -__SYCL_EXPORT pi_result -piextReleaseExternalSemaphore(pi_context Context, pi_device Device, - pi_interop_semaphore_handle SemHandle) { - return pi2ur::piextReleaseExternalSemaphore(Context, Device, SemHandle); -} - -__SYCL_EXPORT pi_result piextWaitExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasWaitValue, - pi_uint64 WaitValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextWaitExternalSemaphore(Queue, SemHandle, HasWaitValue, - WaitValue, NumEventsInWaitList, - EventWaitList, Event); -} - -__SYCL_EXPORT pi_result piextSignalExternalSemaphore( - pi_queue Queue, pi_interop_semaphore_handle SemHandle, bool HasSignalValue, - pi_uint64 SignalValue, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextSignalExternalSemaphore(Queue, SemHandle, HasSignalValue, - SignalValue, NumEventsInWaitList, - EventWaitList, Event); -} - -pi_result -piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, - void *Data, pi_uint32 NumMems, const pi_mem *Mems, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { - return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, - NumEventsInWaitList, EventWaitList, - Event); -} - -// This interface is not in Unified Runtime currently -__SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) { - PI_ASSERT(PluginInit, PI_ERROR_INVALID_VALUE); - - const char SupportedVersion[] = _PI_UNIFIED_RUNTIME_PLUGIN_VERSION_STRING; - - // Check that the major version matches in PiVersion and SupportedVersion - _PI_PLUGIN_VERSION_CHECK(PluginInit->PiVersion, SupportedVersion); - - // TODO: handle versioning/targets properly. - size_t PluginVersionSize = sizeof(PluginInit->PluginVersion); - - PI_ASSERT(strlen(_PI_UNIFIED_RUNTIME_PLUGIN_VERSION_STRING) < - PluginVersionSize, - PI_ERROR_INVALID_VALUE); - - strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize); - - // Initialize UR and discover adapters - ur_loader_config_handle_t LoaderConfig{}; - HANDLE_ERRORS(urLoaderConfigCreate(&LoaderConfig)); - - if (PluginInit->SanitizeType == _PI_SANITIZE_TYPE_ADDRESS) { - auto Result = urLoaderConfigEnableLayer(LoaderConfig, "UR_LAYER_ASAN"); - if (Result != UR_RESULT_SUCCESS) { - urLoaderConfigRelease(LoaderConfig); - return ur2piResult(Result); - } - } - - HANDLE_ERRORS(urLoaderInit(0, LoaderConfig)); - HANDLE_ERRORS(urLoaderConfigRelease(LoaderConfig)); - - uint32_t NumAdapters; - HANDLE_ERRORS(urAdapterGet(0, nullptr, &NumAdapters)); - if (NumAdapters > 0) { - Adapters.Vec.resize(NumAdapters); - HANDLE_ERRORS(urAdapterGet(NumAdapters, Adapters.Vec.data(), nullptr)); - } - - // Bind interfaces that are already supported and "die" for unsupported ones -#define _PI_API(api) \ - (PluginInit->PiFunctionTable).api = (decltype(&::api))(&DieUnsupported); -#include - -#define _PI_API(api) \ - (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); - - _PI_API(piPlatformsGet) - _PI_API(piPlatformGetInfo) - _PI_API(piDevicesGet) - _PI_API(piDeviceRetain) - _PI_API(piDeviceRelease) - _PI_API(piDeviceGetInfo) - _PI_API(piDevicePartition) - _PI_API(piextDeviceSelectBinary) - _PI_API(piGetDeviceAndHostTimer) - _PI_API(piextPlatformGetNativeHandle) - _PI_API(piextPlatformCreateWithNativeHandle) - _PI_API(piextDeviceGetNativeHandle) - _PI_API(piextDeviceCreateWithNativeHandle) - _PI_API(piPluginGetBackendOption) - - _PI_API(piContextCreate) - _PI_API(piContextRelease) - _PI_API(piContextRetain) - _PI_API(piContextGetInfo) - _PI_API(piextContextSetExtendedDeleter) - _PI_API(piextContextGetNativeHandle) - _PI_API(piextContextCreateWithNativeHandle) - - _PI_API(piQueueCreate) - _PI_API(piQueueRelease) - _PI_API(piextQueueCreate) - _PI_API(piQueueFinish) - _PI_API(piQueueGetInfo) - _PI_API(piQueueRetain) - _PI_API(piQueueFlush) - _PI_API(piextQueueGetNativeHandle) - _PI_API(piextQueueCreateWithNativeHandle) - - _PI_API(piProgramCreate) - _PI_API(piProgramBuild) - _PI_API(piextProgramGetNativeHandle) - _PI_API(piextProgramCreateWithNativeHandle) - _PI_API(piextProgramSetSpecializationConstant) - _PI_API(piProgramLink) - _PI_API(piKernelCreate) - _PI_API(piextKernelSetArgMemObj) - _PI_API(piextKernelCreateWithNativeHandle) - _PI_API(piProgramRetain) - _PI_API(piKernelSetExecInfo) - _PI_API(piKernelGetInfo) - _PI_API(piKernelSetArg) - _PI_API(piKernelGetGroupInfo) - _PI_API(piKernelRetain) - _PI_API(piKernelRelease) - _PI_API(piProgramRelease) - _PI_API(piextKernelSetArgPointer) - _PI_API(piextKernelSetArgSampler) - _PI_API(piKernelGetSubGroupInfo) - _PI_API(piProgramCreateWithBinary) - _PI_API(piProgramGetInfo) - _PI_API(piProgramCompile) - _PI_API(piProgramGetBuildInfo) - _PI_API(piextGetDeviceFunctionPointer) - _PI_API(piextGetGlobalVariablePointer) - - _PI_API(piMemBufferCreate) - _PI_API(piMemGetInfo) - _PI_API(piMemBufferPartition) - _PI_API(piEnqueueMemImageCopy) - _PI_API(piextMemGetNativeHandle) - _PI_API(piextMemCreateWithNativeHandle) - _PI_API(piMemRetain) - _PI_API(piextUSMGetMemAllocInfo) - _PI_API(piextUSMEnqueuePrefetch) - _PI_API(piextUSMEnqueueFill2D) - _PI_API(piextUSMEnqueueMemset2D) - _PI_API(piextUSMEnqueueMemAdvise) - _PI_API(piMemRelease) - _PI_API(piMemImageCreate) - _PI_API(piMemImageGetInfo) - _PI_API(piextUSMEnqueueMemcpy2D) - _PI_API(piextEnqueueDeviceGlobalVariableWrite) - _PI_API(piextEnqueueDeviceGlobalVariableRead) - - _PI_API(piextUSMHostAlloc) - _PI_API(piextUSMDeviceAlloc) - _PI_API(piextUSMSharedAlloc) - _PI_API(piextUSMFree) - - _PI_API(piextUSMImport) - _PI_API(piextUSMRelease) - - _PI_API(piEnqueueKernelLaunch) - _PI_API(piEnqueueMemImageWrite) - _PI_API(piEnqueueMemImageRead) - _PI_API(piEnqueueMemBufferMap) - _PI_API(piEnqueueMemUnmap) - _PI_API(piEnqueueMemBufferFill) - _PI_API(piextUSMEnqueueFill) - _PI_API(piEnqueueMemBufferCopyRect) - _PI_API(piEnqueueMemBufferCopy) - _PI_API(piextUSMEnqueueMemcpy) - _PI_API(piEnqueueMemBufferWriteRect) - _PI_API(piEnqueueMemBufferWrite) - _PI_API(piEnqueueMemBufferReadRect) - _PI_API(piEnqueueMemBufferRead) - _PI_API(piEnqueueEventsWaitWithBarrier) - _PI_API(piEnqueueEventsWait) - _PI_API(piEnqueueMemImageFill) - - _PI_API(piEventSetCallback) - _PI_API(piEventSetStatus) - _PI_API(piEventRetain) - _PI_API(piEventRelease) - _PI_API(piextEventCreateWithNativeHandle) - _PI_API(piEventsWait) - _PI_API(piEventGetInfo) - _PI_API(piextEventGetNativeHandle) - _PI_API(piEventGetProfilingInfo) - _PI_API(piEventCreate) - _PI_API(piEnqueueTimestampRecordingExp) - - _PI_API(piSamplerCreate) - _PI_API(piSamplerGetInfo) - _PI_API(piSamplerRetain) - _PI_API(piSamplerRelease) - - // Peer to Peer - _PI_API(piextEnablePeerAccess) - _PI_API(piextDisablePeerAccess) - _PI_API(piextPeerAccessGetInfo) - - // Launch Properties - _PI_API(piextEnqueueKernelLaunchCustom) - - _PI_API(piextPluginGetOpaqueData) - _PI_API(piTearDown) - - return PI_SUCCESS; -} - -} // extern "C diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.hpp b/sycl/plugins/unified_runtime/pi_unified_runtime.hpp deleted file mode 100644 index ed1f0f51739c5..0000000000000 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.hpp +++ /dev/null @@ -1,15 +0,0 @@ -//===--- pi_unified_runtime.hpp - Unified Runtime PI Plugin ---------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -// This version should be incremented for any change made to this file or its -// corresponding .cpp file. -#define _PI_UNIFIED_RUNTIME_PLUGIN_VERSION 1 - -#define _PI_UNIFIED_RUNTIME_PLUGIN_VERSION_STRING \ - _PI_PLUGIN_VERSION_STRING(_PI_UNIFIED_RUNTIME_PLUGIN_VERSION) diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/README.md b/sycl/plugins/unified_runtime/ur/adapters/cuda/README.md deleted file mode 100644 index 8af65917dffff..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Cuda adapter -The source for the Cuda adapter has been moved to the -[main](https://github.com/oneapi-src/unified-runtime/tree/main) branch -of the [Unified Runtime](https://github.com/oneapi-src/unified-runtime/) repo. -Changes can be made by opening pull requests against that branch, and updating -the Unified Runtime commit in the parent -[CMakeLists.txt](../../../CMakeLists.txt). diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/README.md b/sycl/plugins/unified_runtime/ur/adapters/hip/README.md deleted file mode 100644 index b698b507a9407..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# HIP adapter -The source for the HIP adapter has been moved to the -[main](https://github.com/oneapi-src/unified-runtime/tree/main) branch -of the [Unified Runtime](https://github.com/oneapi-src/unified-runtime/) repo. -Changes can be made by opening pull requests against that branch, and updating -the Unified Runtime commit in the parent -[CMakeLists.txt](../../../CMakeLists.txt). diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/README.md b/sycl/plugins/unified_runtime/ur/adapters/level_zero/README.md deleted file mode 100644 index 38a9d128e0787..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Level Zero adapter -The source for the Level Zero adapter has been moved to the -[main](https://github.com/oneapi-src/unified-runtime/tree/main) branch -of the [Unified Runtime](https://github.com/oneapi-src/unified-runtime/) repo. -Changes can be made by opening pull requests against that branch, and updating -the Unified Runtime commit in the parent -[CMakeLists.txt](../../../CMakeLists.txt). diff --git a/sycl/plugins/unified_runtime/ur/adapters/native_cpu/README.md b/sycl/plugins/unified_runtime/ur/adapters/native_cpu/README.md deleted file mode 100644 index 03153d76340b1..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/native_cpu/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Native CPU adapter -The source for the SYCL Native CPU adapter has been moved to the -[main](https://github.com/oneapi-src/unified-runtime/tree/main) branch -of the [Unified Runtime](https://github.com/oneapi-src/unified-runtime/) repo. -Changes can be made by opening pull requests against that branch, and updating -the Unified Runtime commit in the parent -[CMakeLists.txt](../../../CMakeLists.txt). diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/README.md b/sycl/plugins/unified_runtime/ur/adapters/opencl/README.md deleted file mode 100644 index 7e006d65310a8..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# OpenCL adapter -The source for the OpenCL adapter has been moved to the -[main](https://github.com/oneapi-src/unified-runtime/tree/main) branch -of the [Unified Runtime](https://github.com/oneapi-src/unified-runtime/) repo. -Changes can be made by opening pull requests against that branch, and updating -the Unified Runtime commit in the parent -[CMakeLists.txt](../../../CMakeLists.txt). diff --git a/sycl/plugins/unified_runtime/ur_bindings.hpp b/sycl/plugins/unified_runtime/ur_bindings.hpp deleted file mode 100644 index 3c5c244602d5f..0000000000000 --- a/sycl/plugins/unified_runtime/ur_bindings.hpp +++ /dev/null @@ -1,11 +0,0 @@ -//===------ ur_bindings.hpp - Complete definitions of UR handles -----------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include -#include diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 70b2e80c35dd0..d02dbb725637a 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -71,17 +71,17 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) - # pi_win_proxy_loader + # ur_win_proxy_loader if (WIN32) - include_directories(${LLVM_EXTERNAL_SYCL_SOURCE_DIR}/pi_win_proxy_loader) + include_directories(${LLVM_EXTERNAL_SYCL_SOURCE_DIR}/ur_win_proxy_loader) if(WIN_DUPE) - target_link_libraries(${LIB_NAME} PUBLIC pi_win_proxy_loaderd) + target_link_libraries(${LIB_NAME} PUBLIC ur_win_proxy_loaderd) set(MANIFEST_FILE_NAME "sycld.manifest") else() - target_link_libraries(${LIB_NAME} PUBLIC pi_win_proxy_loader) + target_link_libraries(${LIB_NAME} PUBLIC ur_win_proxy_loader) set(MANIFEST_FILE_NAME "sycl.manifest") endif() - # Embed manifest into the sycl.dll where pi_win_proxy_loader.dll is described as sycl.dll's private dll and will always be loaded from the same directory. + # Embed manifest into the sycl.dll where ur_win_proxy_loader.dll is described as sycl.dll's private dll and will always be loaded from the same directory. # 0x2000: LOAD_LIBRARY_SAFE_CURRENT_DIRS flag. Using this flag means that loading dependency DLLs (of sycl.dll) # from the current directory is only allowed if it is under a directory in the Safe load list. target_link_options(${LIB_NAME} PRIVATE /DEPENDENTLOADFLAG:0x2000 /MANIFEST:NO /MANIFEST:EMBED /MANIFESTINPUT:${CMAKE_CURRENT_SOURCE_DIR}/${MANIFEST_FILE_NAME}) @@ -163,6 +163,37 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) ${CMAKE_THREAD_LIBS_INIT} ) + # Link and include UR + target_link_libraries(${LIB_OBJ_NAME} + PRIVATE + UnifiedRuntimeLoader + UnifiedRuntime-Headers + UnifiedRuntimeCommon + ) + + target_include_directories(${LIB_OBJ_NAME} + PRIVATE + "${UNIFIED_RUNTIME_SRC_INCLUDE_DIR}" + "${UNIFIED_RUNTIME_COMMON_INCLUDE_DIR}" + ) + + add_dependencies(${LIB_OBJ_NAME} UnifiedRuntimeAdapters ur_umf) + + target_link_libraries(${LIB_NAME} + PRIVATE + UnifiedRuntimeLoader + UnifiedRuntime-Headers + UnifiedRuntimeCommon + ) + + target_include_directories(${LIB_NAME} + PRIVATE + "${UNIFIED_RUNTIME_SRC_INCLUDE_DIR}" + "${UNIFIED_RUNTIME_COMMON_INCLUDE_DIR}" + ) + + add_dependencies(${LIB_NAME} UnifiedRuntimeAdapters ur_umf) + add_common_options(${LIB_NAME} ${LIB_OBJ_NAME}) set_target_properties(${LIB_NAME} PROPERTIES @@ -192,7 +223,6 @@ set(SYCL_COMMON_SOURCES "detail/allowlist.cpp" "detail/bindless_images.cpp" "detail/buffer_impl.cpp" - "detail/pi.cpp" "detail/common.cpp" "detail/composite_device/composite_device.cpp" "detail/config.cpp" @@ -241,6 +271,7 @@ set(SYCL_COMMON_SOURCES "detail/spec_constant_impl.cpp" "detail/sycl_mem_obj_t.cpp" "detail/usm/usm_impl.cpp" + "detail/ur.cpp" "detail/util.cpp" "detail/xpti_registry.cpp" "accessor.cpp" @@ -264,8 +295,8 @@ set(SYCL_COMMON_SOURCES "stream.cpp" "spirv_ops.cpp" "virtual_mem.cpp" - "$<$:detail/windows_pi.cpp>" - "$<$,$>:detail/posix_pi.cpp>" + "$<$:detail/windows_ur.cpp>" + "$<$,$>:detail/posix_ur.cpp>" ) set(SYCL_NON_PREVIEW_SOURCES "${SYCL_COMMON_SOURCES}" @@ -310,7 +341,7 @@ if (MSVC) endif() check_cxx_compiler_flag(-fsemantic-interposition HAS_SEMANTIC_INTERPOSITION_FLAG) -if (HAS_SEMANTIC_INTERPOSITION_FLAG) +if (HAS_SEMANTIC_INTERPOSITION_FLAG AND NOT CMAKE_SYSTEM_NAME STREQUAL Darwin) # See https://github.com/llvm/llvm-project/issues/58295. set_source_files_properties(device_selector.cpp PROPERTIES COMPILE_FLAGS -fsemantic-interposition) endif() diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index ad00aafe22ac2..24e0386c72f45 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -13,11 +13,11 @@ #include "detail/platform_impl.hpp" #include "detail/plugin.hpp" #include "detail/queue_impl.hpp" +#include "sycl/detail/impl_utils.hpp" #include #include #include -#include -#include +#include #include #include #include @@ -32,192 +32,244 @@ namespace detail { static const PluginPtr &getPlugin(backend Backend) { switch (Backend) { case backend::opencl: - return pi::getPlugin(); + return ur::getPlugin(); case backend::ext_oneapi_level_zero: - return pi::getPlugin(); + return ur::getPlugin(); case backend::ext_oneapi_cuda: - return pi::getPlugin(); + return ur::getPlugin(); case backend::ext_oneapi_hip: - return pi::getPlugin(); + return ur::getPlugin(); default: - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "getPlugin: Unsupported backend " + - detail::codeToString(PI_ERROR_INVALID_OPERATION)); + throw sycl::exception( + sycl::make_error_code(sycl::errc::runtime), + "getPlugin: Unsupported backend " + + detail::codeToString(UR_RESULT_ERROR_INVALID_OPERATION)); } } -backend convertBackend(pi_platform_backend PiBackend) { - switch (PiBackend) { - case PI_EXT_PLATFORM_BACKEND_UNKNOWN: +backend convertUrBackend(ur_platform_backend_t UrBackend) { + switch (UrBackend) { + case UR_PLATFORM_BACKEND_UNKNOWN: return backend::all; // No specific backend - case PI_EXT_PLATFORM_BACKEND_LEVEL_ZERO: + case UR_PLATFORM_BACKEND_LEVEL_ZERO: return backend::ext_oneapi_level_zero; - case PI_EXT_PLATFORM_BACKEND_OPENCL: + case UR_PLATFORM_BACKEND_OPENCL: return backend::opencl; - case PI_EXT_PLATFORM_BACKEND_CUDA: + case UR_PLATFORM_BACKEND_CUDA: return backend::ext_oneapi_cuda; - case PI_EXT_PLATFORM_BACKEND_HIP: + case UR_PLATFORM_BACKEND_HIP: return backend::ext_oneapi_hip; - case PI_EXT_PLATFORM_BACKEND_NATIVE_CPU: + case UR_PLATFORM_BACKEND_NATIVE_CPU: return backend::ext_oneapi_native_cpu; + default: + throw exception(make_error_code(errc::runtime), + "convertBackend: Unsupported backend"); } - throw exception(make_error_code(errc::runtime), - "convertBackend: Unsupported backend"); } -platform make_platform(pi_native_handle NativeHandle, backend Backend) { +platform make_platform(ur_native_handle_t NativeHandle, backend Backend) { const auto &Plugin = getPlugin(Backend); - // Create PI platform first. - pi::PiPlatform PiPlatform = nullptr; - Plugin->call(NativeHandle, - &PiPlatform); + // Create UR platform first. + ur_platform_handle_t UrPlatform = nullptr; + Plugin->call(urPlatformCreateWithNativeHandle, NativeHandle, + Plugin->getUrAdapter(), nullptr, &UrPlatform); return detail::createSyclObjFromImpl( - platform_impl::getOrMakePlatformImpl(PiPlatform, Plugin)); + platform_impl::getOrMakePlatformImpl(UrPlatform, Plugin)); } -__SYCL_EXPORT device make_device(pi_native_handle NativeHandle, +__SYCL_EXPORT device make_device(ur_native_handle_t NativeHandle, backend Backend) { const auto &Plugin = getPlugin(Backend); - pi::PiDevice PiDevice = nullptr; - Plugin->call( - NativeHandle, nullptr, &PiDevice); - // Construct the SYCL device from PI device. + ur_device_handle_t UrDevice = nullptr; + Plugin->call(urDeviceCreateWithNativeHandle, NativeHandle, nullptr, nullptr, + &UrDevice); + // Construct the SYCL device from UR device. return detail::createSyclObjFromImpl( - std::make_shared(PiDevice, Plugin)); + std::make_shared(UrDevice, Plugin)); } -__SYCL_EXPORT context make_context(pi_native_handle NativeHandle, +__SYCL_EXPORT context make_context(ur_native_handle_t NativeHandle, const async_handler &Handler, backend Backend, bool KeepOwnership, const std::vector &DeviceList) { const auto &Plugin = getPlugin(Backend); - pi::PiContext PiContext = nullptr; - std::vector DeviceHandles; + ur_context_handle_t UrContext = nullptr; + ur_context_native_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES; + Properties.isNativeHandleOwned = false; + std::vector DeviceHandles; for (auto Dev : DeviceList) { DeviceHandles.push_back(detail::getSyclObjImpl(Dev)->getHandleRef()); } - Plugin->call( - NativeHandle, DeviceHandles.size(), DeviceHandles.data(), false, - &PiContext); - // Construct the SYCL context from PI context. + Plugin->call(urContextCreateWithNativeHandle, NativeHandle, + Plugin->getUrAdapter(), DeviceHandles.size(), + DeviceHandles.data(), &Properties, &UrContext); + // Construct the SYCL context from UR context. return detail::createSyclObjFromImpl(std::make_shared( - PiContext, Handler, Plugin, DeviceList, !KeepOwnership)); + UrContext, Handler, Plugin, DeviceList, !KeepOwnership)); } -__SYCL_EXPORT queue make_queue(pi_native_handle NativeHandle, +__SYCL_EXPORT queue make_queue(ur_native_handle_t NativeHandle, int32_t NativeHandleDesc, const context &Context, const device *Device, bool KeepOwnership, const property_list &PropList, const async_handler &Handler, backend Backend) { - sycl::detail::pi::PiDevice PiDevice = + ur_device_handle_t UrDevice = Device ? getSyclObjImpl(*Device)->getHandleRef() : nullptr; const auto &Plugin = getPlugin(Backend); const auto &ContextImpl = getSyclObjImpl(Context); - // Create PI properties from SYCL properties. - sycl::detail::pi::PiQueueProperties Properties[] = { - PI_QUEUE_FLAGS, - queue_impl::createPiQueueProperties( - PropList, PropList.has_property() - ? QueueOrder::Ordered - : QueueOrder::OOO), - 0, 0, 0}; if (PropList.has_property()) { throw sycl::exception( make_error_code(errc::invalid), "Queue create using make_queue cannot have compute_index property."); } - // Create PI queue first. - pi::PiQueue PiQueue = nullptr; - Plugin->call( - NativeHandle, NativeHandleDesc, ContextImpl->getHandleRef(), PiDevice, - !KeepOwnership, Properties, &PiQueue); - // Construct the SYCL queue from PI queue. + ur_queue_native_desc_t Desc{}; + Desc.stype = UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC; + Desc.pNativeData = &NativeHandleDesc; + + ur_queue_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_QUEUE_PROPERTIES; + Properties.flags = queue_impl::createUrQueueFlags( + PropList, PropList.has_property() + ? QueueOrder::Ordered + : QueueOrder::OOO); + + ur_queue_native_properties_t NativeProperties{}; + NativeProperties.stype = UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES; + NativeProperties.isNativeHandleOwned = !KeepOwnership; + + Properties.pNext = &Desc; + NativeProperties.pNext = &Properties; + + // Create UR queue first. + ur_queue_handle_t UrQueue = nullptr; + + Plugin->call(urQueueCreateWithNativeHandle, NativeHandle, + ContextImpl->getHandleRef(), UrDevice, &NativeProperties, + &UrQueue); + // Construct the SYCL queue from UR queue. return detail::createSyclObjFromImpl( - std::make_shared(PiQueue, ContextImpl, Handler, PropList)); + std::make_shared(UrQueue, ContextImpl, Handler, PropList)); } -__SYCL_EXPORT event make_event(pi_native_handle NativeHandle, +__SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, const context &Context, backend Backend) { return make_event(NativeHandle, Context, false, Backend); } -__SYCL_EXPORT event make_event(pi_native_handle NativeHandle, +__SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, const context &Context, bool KeepOwnership, backend Backend) { const auto &Plugin = getPlugin(Backend); const auto &ContextImpl = getSyclObjImpl(Context); - pi::PiEvent PiEvent = nullptr; - Plugin->call( - NativeHandle, ContextImpl->getHandleRef(), !KeepOwnership, &PiEvent); + ur_event_handle_t UrEvent = nullptr; + ur_event_native_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES; + Properties.isNativeHandleOwned = !KeepOwnership; + Plugin->call(urEventCreateWithNativeHandle, NativeHandle, + ContextImpl->getHandleRef(), &Properties, &UrEvent); event Event = detail::createSyclObjFromImpl( - std::make_shared(PiEvent, Context)); + std::make_shared(UrEvent, Context)); if (Backend == backend::opencl) - Plugin->call(PiEvent); + Plugin->call(urEventRetain, UrEvent); return Event; } std::shared_ptr -make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext, - bool KeepOwnership, bundle_state State, backend Backend) { +make_kernel_bundle(ur_native_handle_t NativeHandle, + const context &TargetContext, bool KeepOwnership, + bundle_state State, backend Backend) { const auto &Plugin = getPlugin(Backend); const auto &ContextImpl = getSyclObjImpl(TargetContext); - pi::PiProgram PiProgram = nullptr; - Plugin->call( - NativeHandle, ContextImpl->getHandleRef(), !KeepOwnership, &PiProgram); + ur_program_handle_t UrProgram = nullptr; + ur_program_native_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES; + Properties.isNativeHandleOwned = !KeepOwnership; + + Plugin->call(urProgramCreateWithNativeHandle, NativeHandle, + ContextImpl->getHandleRef(), &Properties, &UrProgram); if (ContextImpl->getBackend() == backend::opencl) - Plugin->call(PiProgram); + Plugin->call(urProgramRetain, UrProgram); - std::vector ProgramDevices; + std::vector ProgramDevices; uint32_t NumDevices = 0; - Plugin->call( - PiProgram, PI_PROGRAM_INFO_NUM_DEVICES, sizeof(NumDevices), &NumDevices, - nullptr); + Plugin->call(urProgramGetInfo, UrProgram, UR_PROGRAM_INFO_NUM_DEVICES, + sizeof(NumDevices), &NumDevices, nullptr); ProgramDevices.resize(NumDevices); - Plugin->call(PiProgram, PI_PROGRAM_INFO_DEVICES, - sizeof(pi::PiDevice) * NumDevices, - ProgramDevices.data(), nullptr); - - for (const auto &Dev : ProgramDevices) { - size_t BinaryType = 0; - Plugin->call( - PiProgram, Dev, PI_PROGRAM_BUILD_INFO_BINARY_TYPE, sizeof(size_t), - &BinaryType, nullptr); + Plugin->call(urProgramGetInfo, UrProgram, UR_PROGRAM_INFO_DEVICES, + sizeof(ur_device_handle_t) * NumDevices, ProgramDevices.data(), + nullptr); + + for (auto &Dev : ProgramDevices) { + ur_program_binary_type_t BinaryType; + Plugin->call(urProgramGetBuildInfo, UrProgram, Dev, + UR_PROGRAM_BUILD_INFO_BINARY_TYPE, + sizeof(ur_program_binary_type_t), &BinaryType, nullptr); switch (BinaryType) { - case (PI_PROGRAM_BINARY_TYPE_NONE): - if (State == bundle_state::object) - Plugin->call( - PiProgram, 1, &Dev, nullptr, 0, nullptr, nullptr, nullptr, nullptr); - else if (State == bundle_state::executable) - Plugin->call( - PiProgram, 1, &Dev, nullptr, nullptr, nullptr); + case (UR_PROGRAM_BINARY_TYPE_NONE): + if (State == bundle_state::object) { + auto Res = Plugin->call_nocheck(urProgramCompileExp, UrProgram, 1, &Dev, + nullptr); + if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + Res = Plugin->call_nocheck(urProgramCompile, + ContextImpl->getHandleRef(), UrProgram, + nullptr); + } + Plugin->checkUrResult(Res); + } + + else if (State == bundle_state::executable) { + auto Res = Plugin->call_nocheck(urProgramBuildExp, UrProgram, 1, &Dev, + nullptr); + if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + Res = Plugin->call_nocheck( + urProgramBuild, ContextImpl->getHandleRef(), UrProgram, nullptr); + } + Plugin->checkUrResult(Res); + } + break; - case (PI_PROGRAM_BINARY_TYPE_COMPILED_OBJECT): - case (PI_PROGRAM_BINARY_TYPE_LIBRARY): + case (UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT): + case (UR_PROGRAM_BINARY_TYPE_LIBRARY): if (State == bundle_state::input) - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "Program and kernel_bundle state mismatch " + - detail::codeToString(PI_ERROR_INVALID_VALUE)); - if (State == bundle_state::executable) - Plugin->call( - ContextImpl->getHandleRef(), 1, &Dev, nullptr, 1, &PiProgram, - nullptr, nullptr, &PiProgram); + throw sycl::exception( + sycl::make_error_code(sycl::errc::runtime), + "Program and kernel_bundle state mismatch " + + detail::codeToString(UR_RESULT_ERROR_INVALID_VALUE)); + if (State == bundle_state::executable) { + ur_program_handle_t UrLinkedProgram = nullptr; + auto Res = + Plugin->call_nocheck(urProgramLinkExp, ContextImpl->getHandleRef(), + 1, &Dev, 1, &UrProgram, nullptr, &UrLinkedProgram); + if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + Res = Plugin->call_nocheck(urProgramLink, ContextImpl->getHandleRef(), + 1, &UrProgram, nullptr, &UrLinkedProgram); + } + Plugin->checkUrResult(Res); + if (UrLinkedProgram != nullptr) { + UrProgram = UrLinkedProgram; + } + } break; - case (PI_PROGRAM_BINARY_TYPE_EXECUTABLE): + case (UR_PROGRAM_BINARY_TYPE_EXECUTABLE): if (State == bundle_state::input || State == bundle_state::object) - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "Program and kernel_bundle state mismatch " + - detail::codeToString(PI_ERROR_INVALID_VALUE)); + throw sycl::exception( + sycl::make_error_code(sycl::errc::runtime), + "Program and kernel_bundle state mismatch " + + detail::codeToString(UR_RESULT_ERROR_INVALID_VALUE)); + break; + default: break; } } @@ -228,7 +280,7 @@ make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext, ProgramDevices.begin(), ProgramDevices.end(), std::back_inserter(Devices), [&Plugin](const auto &Dev) { auto Platform = - detail::platform_impl::getPlatformFromPiDevice(Dev, Plugin); + detail::platform_impl::getPlatformFromUrDevice(Dev, Plugin); auto DeviceImpl = Platform->getOrMakeDeviceImpl(Dev, Platform); return createSyclObjFromImpl(DeviceImpl); }); @@ -240,7 +292,7 @@ make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext, // symbols (e.g. when kernel_bundle is supposed to be joined with another). auto KernelIDs = std::make_shared>(); auto DevImgImpl = std::make_shared( - nullptr, TargetContext, Devices, State, KernelIDs, PiProgram); + nullptr, TargetContext, Devices, State, KernelIDs, UrProgram); device_image_plain DevImg{DevImgImpl}; return std::make_shared(TargetContext, Devices, DevImg); @@ -248,14 +300,15 @@ make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext, // TODO: Unused. Remove when allowed. std::shared_ptr -make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext, - bundle_state State, backend Backend) { +make_kernel_bundle(ur_native_handle_t NativeHandle, + const context &TargetContext, bundle_state State, + backend Backend) { return make_kernel_bundle(NativeHandle, TargetContext, false, State, Backend); } kernel make_kernel(const context &TargetContext, const kernel_bundle &KernelBundle, - pi_native_handle NativeHandle, bool KeepOwnership, + ur_native_handle_t NativeHandle, bool KeepOwnership, backend Backend) { const auto &Plugin = getPlugin(Backend); const auto &ContextImpl = getSyclObjImpl(TargetContext); @@ -266,38 +319,40 @@ kernel make_kernel(const context &TargetContext, // program/module. This way we don't need to search the exact device image for // the kernel, which may not be trivial. // - // Other backends don't need PI program. + // Other backends don't need UR program. // - pi::PiProgram PiProgram = nullptr; + ur_program_handle_t UrProgram = nullptr; if (Backend == backend::ext_oneapi_level_zero) { if (KernelBundleImpl->size() != 1) throw sycl::exception( sycl::make_error_code(sycl::errc::runtime), "make_kernel: kernel_bundle must have single program image " + - detail::codeToString(PI_ERROR_INVALID_PROGRAM)); + detail::codeToString(UR_RESULT_ERROR_INVALID_PROGRAM)); const device_image &DeviceImage = *KernelBundle.begin(); const auto &DeviceImageImpl = getSyclObjImpl(DeviceImage); - PiProgram = DeviceImageImpl->get_program_ref(); + UrProgram = DeviceImageImpl->get_ur_program_ref(); } - // Create PI kernel first. - pi::PiKernel PiKernel = nullptr; - Plugin->call( - NativeHandle, ContextImpl->getHandleRef(), PiProgram, !KeepOwnership, - &PiKernel); + // Create UR kernel first. + ur_kernel_handle_t UrKernel = nullptr; + ur_kernel_native_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES; + Properties.isNativeHandleOwned = !KeepOwnership; + Plugin->call(urKernelCreateWithNativeHandle, NativeHandle, + ContextImpl->getHandleRef(), UrProgram, &Properties, &UrKernel); if (Backend == backend::opencl) - Plugin->call(PiKernel); + Plugin->call(urKernelRetain, UrKernel); - // Construct the SYCL queue from PI queue. + // Construct the SYCL queue from UR queue. return detail::createSyclObjFromImpl( - std::make_shared(PiKernel, ContextImpl, KernelBundleImpl)); + std::make_shared(UrKernel, ContextImpl, KernelBundleImpl)); } -kernel make_kernel(pi_native_handle NativeHandle, const context &TargetContext, - backend Backend) { +kernel make_kernel(ur_native_handle_t NativeHandle, + const context &TargetContext, backend Backend) { return make_kernel( TargetContext, get_empty_interop_kernel_bundle(TargetContext), diff --git a/sycl/source/backend/level_zero.cpp b/sycl/source/backend/level_zero.cpp index 19ce9281f289d..26f5b3eba0fd6 100644 --- a/sycl/source/backend/level_zero.cpp +++ b/sycl/source/backend/level_zero.cpp @@ -18,16 +18,16 @@ namespace ext::oneapi::level_zero::detail { using namespace sycl::detail; __SYCL_EXPORT device make_device(const platform &Platform, - pi_native_handle NativeHandle) { - const auto &Plugin = pi::getPlugin(); + ur_native_handle_t NativeHandle) { + const auto &Plugin = ur::getPlugin(); const auto &PlatformImpl = getSyclObjImpl(Platform); - // Create PI device first. - pi::PiDevice PiDevice; - Plugin->call( - NativeHandle, PlatformImpl->getHandleRef(), &PiDevice); + // Create UR device first. + ur_device_handle_t UrDevice; + Plugin->call(urDeviceCreateWithNativeHandle, NativeHandle, + PlatformImpl->getHandleRef(), nullptr, &UrDevice); return detail::createSyclObjFromImpl( - PlatformImpl->getOrMakeDeviceImpl(PiDevice, PlatformImpl)); + PlatformImpl->getOrMakeDeviceImpl(UrDevice, PlatformImpl)); } } // namespace ext::oneapi::level_zero::detail diff --git a/sycl/source/backend/opencl.cpp b/sycl/source/backend/opencl.cpp index 07aec7ba0549c..ba6c2576adac6 100644 --- a/sycl/source/backend/opencl.cpp +++ b/sycl/source/backend/opencl.cpp @@ -32,22 +32,21 @@ __SYCL_EXPORT bool has_extension(const sycl::platform &SyclPlatform, std::shared_ptr PlatformImpl = getSyclObjImpl(SyclPlatform); - sycl::detail::pi::PiPlatform PluginPlatform = PlatformImpl->getHandleRef(); + ur_platform_handle_t PluginPlatform = PlatformImpl->getHandleRef(); const PluginPtr &Plugin = PlatformImpl->getPlugin(); // Manual invocation of plugin API to avoid using deprecated // info::platform::extensions call. size_t ResultSize = 0; - Plugin->call( - PluginPlatform, PI_PLATFORM_INFO_EXTENSIONS, /*param_value_size=*/0, - /*param_value_size=*/nullptr, &ResultSize); + Plugin->call(urPlatformGetInfo, PluginPlatform, UR_PLATFORM_INFO_EXTENSIONS, + /*propSize=*/0, + /*pPropValue=*/nullptr, &ResultSize); if (ResultSize == 0) return false; std::unique_ptr Result(new char[ResultSize]); - Plugin->call(PluginPlatform, - PI_PLATFORM_INFO_EXTENSIONS, - ResultSize, Result.get(), nullptr); + Plugin->call(urPlatformGetInfo, PluginPlatform, UR_PLATFORM_INFO_EXTENSIONS, + ResultSize, Result.get(), nullptr); std::string_view ExtensionsString(Result.get()); return ExtensionsString.find(Extension) != std::string::npos; @@ -63,22 +62,21 @@ __SYCL_EXPORT bool has_extension(const sycl::device &SyclDevice, std::shared_ptr DeviceImpl = getSyclObjImpl(SyclDevice); - sycl::detail::pi::PiDevice PluginDevice = DeviceImpl->getHandleRef(); + ur_device_handle_t PluginDevice = DeviceImpl->getHandleRef(); const PluginPtr &Plugin = DeviceImpl->getPlugin(); // Manual invocation of plugin API to avoid using deprecated // info::device::extensions call. size_t ResultSize = 0; - Plugin->call( - PluginDevice, PI_DEVICE_INFO_EXTENSIONS, /*param_value_size=*/0, - /*param_value_size=*/nullptr, &ResultSize); + Plugin->call(urDeviceGetInfo, PluginDevice, UR_DEVICE_INFO_EXTENSIONS, + /*propSize=*/0, + /*pPropValue=*/nullptr, &ResultSize); if (ResultSize == 0) return false; std::unique_ptr Result(new char[ResultSize]); - Plugin->call(PluginDevice, - PI_DEVICE_INFO_EXTENSIONS, - ResultSize, Result.get(), nullptr); + Plugin->call(urDeviceGetInfo, PluginDevice, UR_DEVICE_INFO_EXTENSIONS, + ResultSize, Result.get(), nullptr); std::string_view ExtensionsString(Result.get()); return ExtensionsString.find(Extension) != std::string::npos; diff --git a/sycl/source/buffer.cpp b/sycl/source/buffer.cpp index 5c437bfae7516..f62da9f968ccb 100644 --- a/sycl/source/buffer.cpp +++ b/sycl/source/buffer.cpp @@ -54,7 +54,7 @@ buffer_plain::buffer_plain( } buffer_plain::buffer_plain( - pi_native_handle MemObject, const context &SyclContext, + ur_native_handle_t MemObject, const context &SyclContext, std::unique_ptr Allocator, bool OwnNativeHandle, const event &AvailableEvent) { impl = std::make_shared(MemObject, SyclContext, @@ -82,7 +82,7 @@ void buffer_plain::set_write_back(bool NeedWriteBack) { impl->set_write_back(NeedWriteBack); } -std::vector +std::vector buffer_plain::getNativeVector(backend BackendName) const { return impl->getNativeVector(BackendName); } diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp index 6d8087910c165..3e88e9d80cb44 100644 --- a/sycl/source/context.cpp +++ b/sycl/source/context.cpp @@ -10,10 +10,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -70,10 +72,17 @@ context::context(const std::vector &DeviceList, PropList); } context::context(cl_context ClContext, async_handler AsyncHandler) { - const auto &Plugin = sycl::detail::pi::getPlugin(); + const auto &Plugin = sycl::detail::ur::getPlugin(); + + ur_context_handle_t hContext = nullptr; + ur_native_handle_t nativeHandle = + reinterpret_cast(ClContext); + Plugin->call(urContextCreateWithNativeHandle, nativeHandle, Plugin->getUrAdapter(), + 0, nullptr, nullptr, + &hContext); + impl = std::make_shared( - detail::pi::cast(ClContext), AsyncHandler, - Plugin); + hContext, AsyncHandler, Plugin); } template @@ -118,7 +127,7 @@ std::vector context::get_devices() const { context::context(std::shared_ptr Impl) : impl(Impl) {} -pi_native_handle context::getNative() const { return impl->getNative(); } +ur_native_handle_t context::getNative() const { return impl->getNative(); } const property_list &context::getPropList() const { return impl->getPropList(); diff --git a/sycl/source/detail/allowlist.cpp b/sycl/source/detail/allowlist.cpp index d607090b185d3..2c487e49e4709 100644 --- a/sycl/source/detail/allowlist.cpp +++ b/sycl/source/detail/allowlist.cpp @@ -77,7 +77,7 @@ AllowListParsedT parseAllowList(const std::string &AllowListRaw) { "details, please refer to " "https://github.com/intel/llvm/blob/sycl/sycl/" "doc/EnvironmentVariables.md " + - codeToString(PI_ERROR_INVALID_VALUE)); + codeToString(UR_RESULT_ERROR_INVALID_VALUE)); const std::string &DeprecatedKeyNameDeviceName = DeviceNameKeyName; const std::string &DeprecatedKeyNamePlatformName = PlatformNameKeyName; @@ -102,7 +102,7 @@ AllowListParsedT parseAllowList(const std::string &AllowListRaw) { "refer to " "https://github.com/intel/llvm/blob/sycl/sycl/doc/" "EnvironmentVariables.md " + - codeToString(PI_ERROR_INVALID_VALUE)); + codeToString(UR_RESULT_ERROR_INVALID_VALUE)); } if (Key == DeprecatedKeyNameDeviceName) { @@ -158,7 +158,7 @@ AllowListParsedT parseAllowList(const std::string &AllowListRaw) { "SYCL_DEVICE_ALLOWLIST. For details, please refer to " "https://github.com/intel/llvm/blob/sycl/sycl/doc/" "EnvironmentVariables.md " + - codeToString(PI_ERROR_INVALID_VALUE)); + codeToString(UR_RESULT_ERROR_INVALID_VALUE)); } }; @@ -180,7 +180,7 @@ AllowListParsedT parseAllowList(const std::string &AllowListRaw) { "details, please refer to " "https://github.com/intel/llvm/blob/sycl/sycl/doc/" "EnvironmentVariables.md " + - codeToString(PI_ERROR_INVALID_VALUE)); + codeToString(UR_RESULT_ERROR_INVALID_VALUE)); } } } @@ -197,7 +197,8 @@ AllowListParsedT parseAllowList(const std::string &AllowListRaw) { "Key " + Key + " of SYCL_DEVICE_ALLOWLIST should have " "value which starts with " + - Prefix + " " + detail::codeToString(PI_ERROR_INVALID_VALUE)); + Prefix + " " + + detail::codeToString(UR_RESULT_ERROR_INVALID_VALUE)); } // cut off prefix from the value ValueStart += Prefix.length(); @@ -217,7 +218,7 @@ AllowListParsedT parseAllowList(const std::string &AllowListRaw) { " of SYCL_DEVICE_ALLOWLIST should have " "value which ends with " + Postfix + " " + - detail::codeToString(PI_ERROR_INVALID_VALUE)); + detail::codeToString(UR_RESULT_ERROR_INVALID_VALUE)); } size_t NextExpectedDelimiterPos = ValueEnd + Postfix.length(); // if it is not the end of the string, check that symbol next to a @@ -233,7 +234,7 @@ AllowListParsedT parseAllowList(const std::string &AllowListRaw) { AllowListRaw[NextExpectedDelimiterPos] + ". Should be either " + DelimiterBtwItemsInDeviceDesc + " or " + DelimiterBtwDeviceDescs + - codeToString(PI_ERROR_INVALID_VALUE)); + codeToString(UR_RESULT_ERROR_INVALID_VALUE)); if (AllowListRaw[NextExpectedDelimiterPos] == DelimiterBtwDeviceDescs) ShouldAllocateNewDeviceDescMap = true; @@ -253,7 +254,7 @@ AllowListParsedT parseAllowList(const std::string &AllowListRaw) { "Re-definition of key " + Key + " is not allowed in " "SYCL_DEVICE_ALLOWLIST " + - codeToString(PI_ERROR_INVALID_VALUE)); + codeToString(UR_RESULT_ERROR_INVALID_VALUE)); KeyStart = ValueEnd; if (KeyStart != std::string::npos) @@ -343,9 +344,8 @@ bool deviceIsAllowed(const DeviceDescT &DeviceDesc, return ShouldDeviceBeAllowed; } -void applyAllowList(std::vector &PiDevices, - sycl::detail::pi::PiPlatform PiPlatform, - const PluginPtr &Plugin) { +void applyAllowList(std::vector &UrDevices, + ur_platform_handle_t UrPlatform, const PluginPtr &Plugin) { AllowListParsedT AllowListParsed = parseAllowList(SYCLConfig::get()); @@ -354,7 +354,7 @@ void applyAllowList(std::vector &PiDevices, // Get platform's backend and put it to DeviceDesc DeviceDescT DeviceDesc; - auto PlatformImpl = platform_impl::getOrMakePlatformImpl(PiPlatform, Plugin); + auto PlatformImpl = platform_impl::getOrMakePlatformImpl(UrPlatform, Plugin); backend Backend = PlatformImpl->getBackend(); for (const auto &SyclBe : getSyclBeMap()) { @@ -366,21 +366,37 @@ void applyAllowList(std::vector &PiDevices, // get PlatformVersion value and put it to DeviceDesc DeviceDesc.emplace(PlatformVersionKeyName, sycl::detail::get_platform_info( - PiPlatform, Plugin)); + UrPlatform, Plugin)); // get PlatformName value and put it to DeviceDesc DeviceDesc.emplace(PlatformNameKeyName, sycl::detail::get_platform_info( - PiPlatform, Plugin)); + UrPlatform, Plugin)); int InsertIDx = 0; - for (sycl::detail::pi::PiDevice Device : PiDevices) { + for (ur_device_handle_t Device : UrDevices) { auto DeviceImpl = PlatformImpl->getOrMakeDeviceImpl(Device, PlatformImpl); // get DeviceType value and put it to DeviceDesc - sycl::detail::pi::PiDeviceType PiDevType; - Plugin->call( - Device, PI_DEVICE_INFO_TYPE, sizeof(sycl::detail::pi::PiDeviceType), - &PiDevType, nullptr); - sycl::info::device_type DeviceType = pi::cast(PiDevType); + ur_device_type_t UrDevType = UR_DEVICE_TYPE_ALL; + Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_TYPE, + sizeof(UrDevType), &UrDevType, nullptr); + // TODO need mechanism to do these casts, there's a bunch of this sort of + // thing + sycl::info::device_type DeviceType = info::device_type::all; + switch (UrDevType) { + default: + case UR_DEVICE_TYPE_ALL: + DeviceType = info::device_type::all; + break; + case UR_DEVICE_TYPE_GPU: + DeviceType = info::device_type::gpu; + break; + case UR_DEVICE_TYPE_CPU: + DeviceType = info::device_type::cpu; + break; + case UR_DEVICE_TYPE_FPGA: + DeviceType = info::device_type::accelerator; + break; + } for (const auto &SyclDeviceType : getSyclDeviceTypeMap()) { if (SyclDeviceType.second == DeviceType) { @@ -407,10 +423,10 @@ void applyAllowList(std::vector &PiDevices, // check if we can allow device with such device description DeviceDesc if (deviceIsAllowed(DeviceDesc, AllowListParsed)) { - PiDevices[InsertIDx++] = Device; + UrDevices[InsertIDx++] = Device; } } - PiDevices.resize(InsertIDx); + UrDevices.resize(InsertIDx); } } // namespace detail diff --git a/sycl/source/detail/allowlist.hpp b/sycl/source/detail/allowlist.hpp index 28b194f842068..aa29bd59e4551 100644 --- a/sycl/source/detail/allowlist.hpp +++ b/sycl/source/detail/allowlist.hpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include @@ -27,9 +27,8 @@ AllowListParsedT parseAllowList(const std::string &AllowListRaw); bool deviceIsAllowed(const DeviceDescT &DeviceDesc, const AllowListParsedT &AllowListParsed); -void applyAllowList(std::vector &PiDevices, - sycl::detail::pi::PiPlatform PiPlatform, - const PluginPtr &Plugin); +void applyAllowList(std::vector &UrDevices, + ur_platform_handle_t UrPlatform, const PluginPtr &Plugin); } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/bindless_images.cpp b/sycl/source/detail/bindless_images.cpp index 39f85df580181..3f551d9d84ab6 100644 --- a/sycl/source/detail/bindless_images.cpp +++ b/sycl/source/detail/bindless_images.cpp @@ -7,13 +7,12 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include #include #include #include -#include #include #include @@ -22,37 +21,35 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { -void populate_pi_structs(const image_descriptor &desc, pi_image_desc &piDesc, - pi_image_format &piFormat, size_t pitch = 0) { - piDesc = {}; - piDesc.image_width = desc.width; - piDesc.image_height = desc.height; - piDesc.image_depth = desc.depth; +void populate_ur_structs(const image_descriptor &desc, ur_image_desc_t &urDesc, + ur_image_format_t &urFormat, size_t pitch = 0) { + urDesc = {}; + urDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + urDesc.width = desc.width; + urDesc.height = desc.height; + urDesc.depth = desc.depth; if (desc.array_size > 1) { // Image array or cubemap - piDesc.image_type = desc.type == image_type::cubemap - ? PI_MEM_TYPE_IMAGE_CUBEMAP - : desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY - : PI_MEM_TYPE_IMAGE1D_ARRAY; + urDesc.type = desc.type == image_type::cubemap + ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP + : desc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY + : UR_MEM_TYPE_IMAGE1D_ARRAY; } else { - piDesc.image_type = - desc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (desc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D); + urDesc.type = desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D + : (desc.height > 0 ? UR_MEM_TYPE_IMAGE2D + : UR_MEM_TYPE_IMAGE1D); } - piDesc.image_row_pitch = pitch; - piDesc.image_array_size = desc.array_size; - piDesc.image_slice_pitch = 0; - piDesc.num_mip_levels = desc.num_levels; - piDesc.num_samples = 0; - piDesc.buffer = nullptr; - - piFormat = {}; - piFormat.image_channel_data_type = - sycl::detail::convertChannelType(desc.channel_type); - piFormat.image_channel_order = sycl::detail::convertChannelOrder( + urDesc.rowPitch = pitch; + urDesc.arraySize = desc.array_size; + urDesc.slicePitch = 0; + urDesc.numMipLevel = desc.num_levels; + urDesc.numSamples = 0; + + urFormat = {}; + urFormat.channelType = sycl::detail::convertChannelType(desc.channel_type); + urFormat.channelOrder = sycl::detail::convertChannelOrder( sycl::ext::oneapi::experimental::detail::get_image_default_channel_order( desc.num_channels)); } @@ -116,16 +113,15 @@ __SYCL_EXPORT void destroy_image_handle(unsampled_image_handle &imageHandle, const sycl::context &syclContext) { std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_image_handle piImageHandle = imageHandle.raw_handle; - Plugin->call( - C, Device, piImageHandle); + Plugin->call( + urBindlessImagesUnsampledImageHandleDestroyExp, C, Device, + imageHandle.raw_handle); } __SYCL_EXPORT void destroy_image_handle(unsampled_image_handle &imageHandle, @@ -139,16 +135,15 @@ __SYCL_EXPORT void destroy_image_handle(sampled_image_handle &imageHandle, const sycl::context &syclContext) { std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_image_handle piImageHandle = imageHandle.raw_handle; - Plugin->call( - C, Device, piImageHandle); + Plugin->call( + urBindlessImagesSampledImageHandleDestroyExp, C, Device, + imageHandle.raw_handle); } __SYCL_EXPORT void destroy_image_handle(sampled_image_handle &imageHandle, @@ -164,22 +159,22 @@ alloc_image_mem(const image_descriptor &desc, const sycl::device &syclDevice, std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_image_desc piDesc; - pi_image_format piFormat; - populate_pi_structs(desc, piDesc, piFormat); + ur_image_desc_t urDesc; + ur_image_format_t urFormat; + populate_ur_structs(desc, urDesc, urFormat); image_mem_handle retHandle; // Call impl. - Plugin->call( - C, Device, &piFormat, &piDesc, &retHandle.raw_handle); + Plugin->call( + urBindlessImagesImageAllocateExp, C, Device, &urFormat, &urDesc, + reinterpret_cast(&retHandle.raw_handle)); return retHandle; } @@ -195,17 +190,17 @@ __SYCL_EXPORT image_mem_handle get_mip_level_mem_handle( std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); // Call impl. image_mem_handle individual_image; - Plugin->call( - C, Device, mipMem.raw_handle, level, &individual_image.raw_handle); + Plugin->call(urBindlessImagesMipmapGetLevelExp, C, + Device, mipMem.raw_handle, level, + &individual_image.raw_handle); return individual_image; } @@ -223,23 +218,21 @@ __SYCL_EXPORT void free_image_mem(image_mem_handle memHandle, const sycl::context &syclContext) { std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - if (memHandle.raw_handle != nullptr) { + if (memHandle.raw_handle != 0) { if (imageType == image_type::mipmap) { - Plugin->call( - C, Device, memHandle.raw_handle); + Plugin->call( + urBindlessImagesMipmapFreeExp, C, Device, memHandle.raw_handle); } else if (imageType == image_type::standard || imageType == image_type::array || imageType == image_type::cubemap) { - Plugin->call( - C, Device, memHandle.raw_handle); + Plugin->call( + urBindlessImagesImageFreeExp, C, Device, memHandle.raw_handle); } else { throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), "Invalid image type to free"); @@ -274,23 +267,23 @@ create_image(image_mem_handle memHandle, const image_descriptor &desc, std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_image_desc piDesc; - pi_image_format piFormat; - populate_pi_structs(desc, piDesc, piFormat); + ur_image_desc_t urDesc; + ur_image_format_t urFormat; + populate_ur_structs(desc, urDesc, urFormat); // Call impl. - pi_image_handle piImageHandle; - Plugin->call( - C, Device, memHandle.raw_handle, &piFormat, &piDesc, &piImageHandle); + ur_exp_image_mem_native_handle_t urImageHandle; + Plugin->call(urBindlessImagesUnsampledImageCreateExp, C, + Device, memHandle.raw_handle, &urFormat, + &urDesc, &urImageHandle); - return unsampled_image_handle{piImageHandle}; + return unsampled_image_handle{urImageHandle}; } __SYCL_EXPORT unsampled_image_handle @@ -304,8 +297,8 @@ __SYCL_EXPORT sampled_image_handle create_image(image_mem_handle memHandle, const bindless_image_sampler &sampler, const image_descriptor &desc, const sycl::device &syclDevice, const sycl::context &syclContext) { - return create_image(memHandle.raw_handle, 0 /*pitch*/, sampler, desc, - syclDevice, syclContext); + return create_image(reinterpret_cast(memHandle.raw_handle), + 0 /*pitch*/, sampler, desc, syclDevice, syclContext); } __SYCL_EXPORT sampled_image_handle @@ -319,15 +312,55 @@ __SYCL_EXPORT sampled_image_handle create_image(image_mem &imgMem, const bindless_image_sampler &sampler, const image_descriptor &desc, const sycl::device &syclDevice, const sycl::context &syclContext) { - return create_image(imgMem.get_handle().raw_handle, 0 /*pitch*/, sampler, - desc, syclDevice, syclContext); + return create_image(reinterpret_cast(imgMem.get_handle().raw_handle), + 0 /*pitch*/, sampler, desc, syclDevice, syclContext); } __SYCL_EXPORT sampled_image_handle create_image(image_mem &imgMem, const bindless_image_sampler &sampler, const image_descriptor &desc, const sycl::queue &syclQueue) { - return create_image(imgMem.get_handle().raw_handle, 0 /*pitch*/, sampler, - desc, syclQueue.get_device(), syclQueue.get_context()); + return create_image(reinterpret_cast(imgMem.get_handle().raw_handle), + 0 /*pitch*/, sampler, desc, syclQueue.get_device(), + syclQueue.get_context()); +} + +inline ur_sampler_addressing_mode_t +translate_addressing_mode(sycl::addressing_mode Mode) { + switch (Mode) { + case sycl::addressing_mode::mirrored_repeat: + return UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; + case sycl::addressing_mode::repeat: + return UR_SAMPLER_ADDRESSING_MODE_REPEAT; + case sycl::addressing_mode::clamp_to_edge: + return UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + case sycl::addressing_mode::clamp: + return UR_SAMPLER_ADDRESSING_MODE_CLAMP; + case sycl::addressing_mode::none: + default: + return UR_SAMPLER_ADDRESSING_MODE_NONE; + } +} + +inline ur_sampler_filter_mode_t +translate_filter_mode(sycl::filtering_mode Mode) { + switch (Mode) { + case sycl::filtering_mode::linear: + return UR_SAMPLER_FILTER_MODE_LINEAR; + case sycl::filtering_mode::nearest: + return UR_SAMPLER_FILTER_MODE_NEAREST; + } + return UR_SAMPLER_FILTER_MODE_FORCE_UINT32; +} + +inline ur_exp_sampler_cubemap_filter_mode_t +translate_cubemap_filter_mode(cubemap_filtering_mode Mode) { + switch (Mode) { + case cubemap_filtering_mode::disjointed: + return UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED; + case cubemap_filtering_mode::seamless: + return UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS; + } + return UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_FORCE_UINT32; } __SYCL_EXPORT sampled_image_handle @@ -338,46 +371,56 @@ create_image(void *devPtr, size_t pitch, const bindless_image_sampler &sampler, std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - const pi_sampler_properties sProps[] = { - PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS, - static_cast(sampler.coordinate), - PI_SAMPLER_PROPERTIES_ADDRESSING_MODE, - static_cast(sampler.addressing[0]), - PI_SAMPLER_PROPERTIES_ADDRESSING_MODE, - static_cast(sampler.addressing[1]), - PI_SAMPLER_PROPERTIES_ADDRESSING_MODE, - static_cast(sampler.addressing[2]), - PI_SAMPLER_PROPERTIES_FILTER_MODE, - static_cast(sampler.filtering), - PI_SAMPLER_PROPERTIES_MIP_FILTER_MODE, - static_cast(sampler.mipmap_filtering), - PI_SAMPLER_PROPERTIES_CUBEMAP_FILTER_MODE, - static_cast(sampler.cubemap_filtering), - 0}; - - pi_sampler piSampler = {}; - Plugin->call( - C, sProps, sampler.min_mipmap_level_clamp, sampler.max_mipmap_level_clamp, - sampler.max_anisotropy, &piSampler); - - pi_image_desc piDesc; - pi_image_format piFormat; - populate_pi_structs(desc, piDesc, piFormat, pitch); + ur_sampler_desc_t UrSamplerProps{ + UR_STRUCTURE_TYPE_SAMPLER_DESC, nullptr, + sampler.coordinate == coordinate_normalization_mode::normalized, + translate_addressing_mode(sampler.addressing[0]), + translate_filter_mode(sampler.filtering)}; + + ur_exp_sampler_mip_properties_t UrMipProps{ + UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES, + nullptr, + sampler.min_mipmap_level_clamp, + sampler.max_mipmap_level_clamp, + sampler.max_anisotropy, + translate_filter_mode(sampler.mipmap_filtering)}; + UrSamplerProps.pNext = &UrMipProps; + + ur_exp_sampler_addr_modes_t UrAddrModes{ + UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES, + nullptr, + {translate_addressing_mode(sampler.addressing[0]), + translate_addressing_mode(sampler.addressing[1]), + translate_addressing_mode(sampler.addressing[2])}}; + UrMipProps.pNext = &UrAddrModes; + + ur_exp_sampler_cubemap_properties_t UrCubemapProps{ + UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES, nullptr, + translate_cubemap_filter_mode(sampler.cubemap_filtering)}; + UrAddrModes.pNext = &UrCubemapProps; + + ur_sampler_handle_t urSampler = nullptr; + Plugin->call(urSamplerCreate, C, &UrSamplerProps, + &urSampler); + + ur_image_desc_t urDesc; + ur_image_format_t urFormat; + populate_ur_structs(desc, urDesc, urFormat, pitch); // Call impl. - pi_image_handle piImageHandle; - Plugin->call( - C, Device, devPtr, &piFormat, &piDesc, piSampler, &piImageHandle); + ur_exp_image_mem_native_handle_t urImageHandle; + Plugin->call( + urBindlessImagesSampledImageCreateExp, C, Device, + reinterpret_cast(devPtr), &urFormat, &urDesc, + urSampler, &urImageHandle); - return sampled_image_handle{piImageHandle}; + return sampled_image_handle{urImageHandle}; } __SYCL_EXPORT sampled_image_handle @@ -393,28 +436,29 @@ __SYCL_EXPORT interop_mem_handle import_external_memory( const sycl::device &syclDevice, const sycl::context &syclContext) { std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_interop_mem_handle piInteropMem; - pi_external_mem_descriptor piExternalMemDescriptor; + ur_exp_interop_mem_handle_t urInteropMem = nullptr; + ur_exp_file_descriptor_t urFileDescriptor = {}; + urFileDescriptor.stype = UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR; + urFileDescriptor.fd = externalMem.external_resource.file_descriptor; + ur_exp_interop_mem_desc_t urExternalMemDescriptor = {}; + urExternalMemDescriptor.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC; + urExternalMemDescriptor.pNext = &urFileDescriptor; - piExternalMemDescriptor.memorySizeBytes = externalMem.size_in_bytes; - piExternalMemDescriptor.handle.file_descriptor = - externalMem.external_resource.file_descriptor; // For `resource_fd` external memory type, the handle type is always - // `opaque_fd`. No need for a switch statement like we have for win32 + // `OPAQUE_FD`. No need for a switch statement like we have for win32 // resources. - piExternalMemDescriptor.handleType = pi_external_mem_handle_type::opaque_fd; - - Plugin->call( - C, Device, &piExternalMemDescriptor, &piInteropMem); + Plugin->call(urBindlessImagesImportExternalMemoryExp, C, + Device, externalMem.size_in_bytes, + UR_EXP_EXTERNAL_MEM_TYPE_OPAQUE_FD, + &urExternalMemDescriptor, &urInteropMem); - return interop_mem_handle{piInteropMem}; + return interop_mem_handle{urInteropMem}; } template <> @@ -431,39 +475,40 @@ __SYCL_EXPORT interop_mem_handle import_external_memory( const sycl::device &syclDevice, const sycl::context &syclContext) { std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_interop_mem_handle piInteropMem; - pi_external_mem_descriptor piExternalMemDescriptor; - - piExternalMemDescriptor.memorySizeBytes = externalMem.size_in_bytes; - piExternalMemDescriptor.handle.win32_handle = - externalMem.external_resource.handle; + ur_exp_interop_mem_handle_t urInteropMem = nullptr; + ur_exp_win32_handle_t urWin32Handle = {}; + urWin32Handle.stype = UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE; + urWin32Handle.handle = externalMem.external_resource.handle; + ur_exp_interop_mem_desc_t urExternalMemDescriptor{}; + urExternalMemDescriptor.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC; + urExternalMemDescriptor.pNext = &urWin32Handle; // Select appropriate memory handle type. + ur_exp_external_mem_type_t urHandleType; switch (externalMem.handle_type) { case external_mem_handle_type::win32_nt_handle: - piExternalMemDescriptor.handleType = - pi_external_mem_handle_type::win32_nt_handle; + urHandleType = UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT; break; case external_mem_handle_type::win32_nt_dx12_resource: - piExternalMemDescriptor.handleType = - pi_external_mem_handle_type::win32_nt_dx12_resource; + urHandleType = UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX12_RESOURCE; break; default: throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), "Invalid memory handle type"); } - Plugin->call( - C, Device, &piExternalMemDescriptor, &piInteropMem); + Plugin->call(urBindlessImagesImportExternalMemoryExp, C, + Device, externalMem.size_in_bytes, + urHandleType, &urExternalMemDescriptor, + &urInteropMem); - return interop_mem_handle{piInteropMem}; + return interop_mem_handle{urInteropMem}; } template <> @@ -483,22 +528,22 @@ image_mem_handle map_external_image_memory(interop_mem_handle memHandle, std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_image_desc piDesc; - pi_image_format piFormat; - populate_pi_structs(desc, piDesc, piFormat); + ur_image_desc_t urDesc; + ur_image_format_t urFormat; + populate_ur_structs(desc, urDesc, urFormat); - pi_interop_mem_handle piInteropMem{memHandle.raw_handle}; + ur_exp_interop_mem_handle_t urInteropMem{memHandle.raw_handle}; image_mem_handle retHandle; - Plugin->call( - C, Device, &piFormat, &piDesc, piInteropMem, &retHandle.raw_handle); + Plugin->call(urBindlessImagesMapExternalArrayExp, C, + Device, &urFormat, &urDesc, urInteropMem, + &retHandle.raw_handle); return image_mem_handle{retHandle}; } @@ -516,15 +561,14 @@ __SYCL_EXPORT void release_external_memory(interop_mem_handle interopMem, const sycl::context &syclContext) { std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - Plugin->call( - C, Device, (pi_interop_mem_handle)interopMem.raw_handle); + Plugin->call(urBindlessImagesReleaseInteropExp, C, + Device, interopMem.raw_handle); } __SYCL_EXPORT void release_external_memory(interop_mem_handle interopMem, @@ -540,25 +584,27 @@ __SYCL_EXPORT interop_semaphore_handle import_external_semaphore( std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); - pi_interop_semaphore_handle piInteropSemaphore; - pi_external_semaphore_descriptor piInteropSemDesc; + ur_exp_interop_semaphore_handle_t urInteropSemaphore; + ur_exp_file_descriptor_t urFileDescriptor = {}; + urFileDescriptor.stype = UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR; + urFileDescriptor.fd = externalSemaphoreDesc.external_resource.file_descriptor; + ur_exp_interop_semaphore_desc_t urInteropSemDesc = {}; + urInteropSemDesc.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC; + urInteropSemDesc.pNext = &urFileDescriptor; // For this specialization of `import_external_semaphore` the handleType is - // always `opaque_fd`. - piInteropSemDesc.handleType = pi_external_semaphore_handle_type::opaque_fd; - piInteropSemDesc.handle.file_descriptor = - externalSemaphoreDesc.external_resource.file_descriptor; - - Plugin->call( - C, Device, &piInteropSemDesc, &piInteropSemaphore); + // always `OPAQUE_FD`. + Plugin->call(urBindlessImagesImportExternalSemaphoreExp, + C, Device, + UR_EXP_EXTERNAL_SEMAPHORE_TYPE_OPAQUE_FD, + &urInteropSemDesc, &urInteropSemaphore); - return interop_semaphore_handle{piInteropSemaphore, + return interop_semaphore_handle{urInteropSemaphore, external_semaphore_handle_type::opaque_fd}; } @@ -577,37 +623,38 @@ __SYCL_EXPORT interop_semaphore_handle import_external_semaphore( std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); - pi_interop_semaphore_handle piInteropSemaphore; - pi_external_semaphore_descriptor piInteropSemDesc; + ur_exp_interop_semaphore_handle_t urInteropSemaphore; + ur_exp_win32_handle_t urWin32Handle = {}; + urWin32Handle.stype = UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE; + urWin32Handle.handle = externalSemaphoreDesc.external_resource.handle; + ur_exp_interop_semaphore_desc_t urInteropSemDesc = {}; + urInteropSemDesc.stype = UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC; + urInteropSemDesc.pNext = &urWin32Handle; // Select appropriate semaphore handle type. + ur_exp_external_semaphore_type_t urHandleType; switch (externalSemaphoreDesc.handle_type) { case external_semaphore_handle_type::win32_nt_handle: - piInteropSemDesc.handleType = - pi_external_semaphore_handle_type::win32_nt_handle; + urHandleType = UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT; break; case external_semaphore_handle_type::win32_nt_dx12_fence: - piInteropSemDesc.handleType = - pi_external_semaphore_handle_type::win32_nt_dx12_fence; + urHandleType = UR_EXP_EXTERNAL_SEMAPHORE_TYPE_WIN32_NT_DX12_FENCE; break; default: throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), "Invalid semaphore handle type"); } - piInteropSemDesc.handle.win32_handle = - externalSemaphoreDesc.external_resource.handle; + Plugin->call(urBindlessImagesImportExternalSemaphoreExp, + C, Device, urHandleType, &urInteropSemDesc, + &urInteropSemaphore); - Plugin->call( - C, Device, &piInteropSemDesc, &piInteropSemaphore); - - return interop_semaphore_handle{piInteropSemaphore, + return interop_semaphore_handle{urInteropSemaphore, externalSemaphoreDesc.handle_type}; } @@ -626,14 +673,13 @@ release_external_semaphore(interop_semaphore_handle semaphoreHandle, std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - pi_device Device = DevImpl->getHandleRef(); + ur_device_handle_t Device = DevImpl->getHandleRef(); - Plugin->call( - C, Device, (pi_interop_semaphore_handle)semaphoreHandle.raw_handle); + Plugin->call(urBindlessImagesReleaseExternalSemaphoreExp, + C, Device, semaphoreHandle.raw_handle); } __SYCL_EXPORT void @@ -651,22 +697,22 @@ __SYCL_EXPORT sycl::range<3> get_image_range(const image_mem_handle memHandle, sycl::detail::getSyclObjImpl(syclContext); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - size_t Width, Height, Depth; + size_t Width = 0, Height = 0, Depth = 0; - Plugin->call( - CtxImpl->getHandleRef(), memHandle.raw_handle, PI_IMAGE_INFO_WIDTH, - &Width, nullptr); + Plugin->call(urBindlessImagesImageGetInfoExp, + CtxImpl->getHandleRef(), + memHandle.raw_handle, UR_IMAGE_INFO_WIDTH, + &Width, nullptr); - Plugin->call( - CtxImpl->getHandleRef(), memHandle.raw_handle, PI_IMAGE_INFO_HEIGHT, - &Height, nullptr); + Plugin->call(urBindlessImagesImageGetInfoExp, + CtxImpl->getHandleRef(), + memHandle.raw_handle, UR_IMAGE_INFO_HEIGHT, + &Height, nullptr); - Plugin->call( - CtxImpl->getHandleRef(), memHandle.raw_handle, PI_IMAGE_INFO_DEPTH, - &Depth, nullptr); + Plugin->call(urBindlessImagesImageGetInfoExp, + CtxImpl->getHandleRef(), + memHandle.raw_handle, UR_IMAGE_INFO_DEPTH, + &Depth, nullptr); return {Width, Height, Depth}; } @@ -686,15 +732,14 @@ get_image_channel_type(const image_mem_handle memHandle, sycl::detail::getSyclObjImpl(syclContext); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_image_format PIFormat; + ur_image_format_t URFormat; - Plugin->call( - CtxImpl->getHandleRef(), memHandle.raw_handle, PI_IMAGE_INFO_FORMAT, - &PIFormat, nullptr); + Plugin->call(urBindlessImagesImageGetInfoExp, CtxImpl->getHandleRef(), + memHandle.raw_handle, UR_IMAGE_INFO_FORMAT, + &URFormat, nullptr); image_channel_type ChannelType = - sycl::detail::convertChannelType(PIFormat.image_channel_data_type); + sycl::detail::convertChannelType(URFormat.channelType); return ChannelType; } @@ -720,16 +765,14 @@ __SYCL_EXPORT void *pitched_alloc_device(size_t *resultPitch, std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); - pi_context PiContext = CtxImpl->getHandleRef(); + ur_context_handle_t UrContext = CtxImpl->getHandleRef(); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_device PiDevice; - - PiDevice = sycl::detail::getSyclObjImpl(syclDevice)->getHandleRef(); + ur_device_handle_t UrDevice = + sycl::detail::getSyclObjImpl(syclDevice)->getHandleRef(); - Plugin->call( - &RetVal, resultPitch, PiContext, PiDevice, nullptr, widthInBytes, height, - elementSizeBytes); + Plugin->call( + urUSMPitchedAllocExp, UrContext, UrDevice, nullptr, nullptr, widthInBytes, + height, elementSizeBytes, &RetVal, resultPitch); return RetVal; } @@ -773,15 +816,14 @@ get_image_num_channels(const image_mem_handle memHandle, std::shared_ptr CtxImpl = sycl::detail::getSyclObjImpl(syclContext); const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_image_format PIFormat; + ur_image_format_t URFormat = {}; - Plugin->call( - CtxImpl->getHandleRef(), memHandle.raw_handle, PI_IMAGE_INFO_FORMAT, - &PIFormat, nullptr); + Plugin->call(urBindlessImagesImageGetInfoExp, CtxImpl->getHandleRef(), + memHandle.raw_handle, UR_IMAGE_INFO_FORMAT, + &URFormat, nullptr); image_channel_order Order = - sycl::detail::convertChannelOrder(PIFormat.image_channel_order); + sycl::detail::convertChannelOrder(URFormat.channelOrder); return static_cast(sycl::detail::getImageNumberChannels(Order)); } diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index 7716d0931fbdd..cdde8d9ae7d38 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace sycl { inline namespace _V1 { @@ -21,7 +22,7 @@ uint8_t GBufferStreamID; #endif void *buffer_impl::allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, - sycl::detail::pi::PiEvent &OutEventToWait) { + ur_event_handle_t &OutEventToWait) { bool HostPtrReadOnly = false; BaseT::determineHostPtr(InitFromUserData, HostPtr, HostPtrReadOnly); assert(!(nullptr == HostPtr && BaseT::useHostPtr() && !Context) && @@ -45,30 +46,32 @@ void buffer_impl::destructorNotification(void *UserObj) { } void buffer_impl::addInteropObject( - std::vector &Handles) const { + std::vector &Handles) const { if (MOpenCLInterop) { if (std::find(Handles.begin(), Handles.end(), - pi::cast(MInteropMemObject)) == + ur::cast(MInteropMemObject)) == Handles.end()) { const PluginPtr &Plugin = getPlugin(); - Plugin->call( - pi::cast(MInteropMemObject)); - Handles.push_back(pi::cast(MInteropMemObject)); + Plugin->call(urMemRetain, ur::cast(MInteropMemObject)); + ur_native_handle_t NativeHandle = 0; + Plugin->call(urMemGetNativeHandle, MInteropMemObject, nullptr, + &NativeHandle); + Handles.push_back(NativeHandle); } } } -std::vector +std::vector buffer_impl::getNativeVector(backend BackendName) const { - std::vector Handles{}; + std::vector Handles{}; if (!MRecord) { addInteropObject(Handles); return Handles; } for (auto &Cmd : MRecord->MAllocaCommands) { - sycl::detail::pi::PiMem NativeMem = - pi::cast(Cmd->getMemAllocation()); + ur_mem_handle_t NativeMem = + ur::cast(Cmd->getMemAllocation()); auto Ctx = Cmd->getWorkerContext(); // If Host Shared Memory is not supported then there is alloca for host that // doesn't have context and platform @@ -82,15 +85,14 @@ buffer_impl::getNativeVector(backend BackendName) const { auto Plugin = Platform->getPlugin(); if (Platform->getBackend() == backend::opencl) { - Plugin->call(NativeMem); + Plugin->call(urMemRetain, NativeMem); } - pi_native_handle Handle; + ur_native_handle_t Handle = 0; // When doing buffer interop we don't know what device the memory should be // resident on, so pass nullptr for Device param. Buffer interop may not be // supported by all backends. - Plugin->call(NativeMem, /*Dev*/ nullptr, - &Handle); + Plugin->call(urMemGetNativeHandle, NativeMem, /*Dev*/ nullptr, &Handle); Handles.push_back(Handle); } diff --git a/sycl/source/detail/buffer_impl.hpp b/sycl/source/detail/buffer_impl.hpp index 85a370439cf49..3c81af88be1db 100644 --- a/sycl/source/detail/buffer_impl.hpp +++ b/sycl/source/detail/buffer_impl.hpp @@ -8,7 +8,6 @@ #pragma once -#include "sycl/detail/pi.h" #include #include #include @@ -16,6 +15,7 @@ #include #include #include // for iterator_to_const_type_t +#include #include #include @@ -119,19 +119,18 @@ class buffer_impl final : public SYCLMemObjT { buffer_impl(cl_mem MemObject, const context &SyclContext, std::unique_ptr Allocator, event AvailableEvent) - : buffer_impl(pi::cast(MemObject), SyclContext, + : buffer_impl(ur::cast(MemObject), SyclContext, std::move(Allocator), /*OwnNativeHandle*/ true, std::move(AvailableEvent)) {} - buffer_impl(pi_native_handle MemObject, const context &SyclContext, + buffer_impl(ur_native_handle_t MemObject, const context &SyclContext, std::unique_ptr Allocator, bool OwnNativeHandle, event AvailableEvent) : BaseT(MemObject, SyclContext, OwnNativeHandle, std::move(AvailableEvent), std::move(Allocator)) {} void *allocateMem(ContextImplPtr Context, bool InitFromUserData, - void *HostPtr, - sycl::detail::pi::PiEvent &OutEventToWait) override; + void *HostPtr, ur_event_handle_t &OutEventToWait) override; void constructorNotification(const detail::code_location &CodeLoc, void *UserObj, const void *HostObj, const void *Type, uint32_t Dim, @@ -150,9 +149,9 @@ class buffer_impl final : public SYCLMemObjT { void resize(size_t size) { BaseT::MSizeInBytes = size; } - void addInteropObject(std::vector &Handles) const; + void addInteropObject(std::vector &Handles) const; - std::vector getNativeVector(backend BackendName) const; + std::vector getNativeVector(backend BackendName) const; }; } // namespace detail diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index ba634f074e7f8..f11b67c8a5be4 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -12,8 +12,7 @@ #include // for ArgDesc, HostTask, HostKernelBase #include // for code_location #include // for context_impl -#include // for pi_mem_advice, _pi_ext_command_b... -#include // for PiImageOffset, PiImageRegion +#include // for ur_rect_region_t, ur_rect_offset_t #include // for event_impl #include // for queue_impl #include // for kernel_impl @@ -255,7 +254,7 @@ class CGExecKernel : public CG { std::string MKernelName; std::vector> MStreams; std::vector> MAuxiliaryResources; - sycl::detail::pi::PiKernelCacheConfig MKernelCacheConfig; + ur_kernel_cache_config_t MKernelCacheConfig; bool MKernelIsCooperative = false; bool MKernelUsesClusterLaunch = false; @@ -266,8 +265,7 @@ class CGExecKernel : public CG { std::string KernelName, std::vector> Streams, std::vector> AuxiliaryResources, - CGType Type, - sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, + CGType Type, ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative, bool MKernelUsesClusterLaunch, detail::code_location loc = {}) : CG(Type, std::move(CGData), std::move(loc)), @@ -403,17 +401,17 @@ class CGPrefetchUSM : public CG { class CGAdviseUSM : public CG { void *MDst; size_t MLength; - pi_mem_advice MAdvice; + ur_usm_advice_flags_t MAdvice; public: - CGAdviseUSM(void *DstPtr, size_t Length, pi_mem_advice Advice, + CGAdviseUSM(void *DstPtr, size_t Length, ur_usm_advice_flags_t Advice, CG::StorageInitHelper CGData, CGType Type, detail::code_location loc = {}) : CG(Type, std::move(CGData), std::move(loc)), MDst(DstPtr), MLength(Length), MAdvice(Advice) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } - pi_mem_advice getAdvice() { return MAdvice; } + ur_usm_advice_flags_t getAdvice() { return MAdvice; } }; class CGBarrier : public CG { @@ -578,26 +576,23 @@ class CGCopyFromDeviceGlobal : public CG { class CGCopyImage : public CG { void *MSrc; void *MDst; - sycl::detail::pi::PiMemImageDesc MSrcImageDesc; - sycl::detail::pi::PiMemImageDesc MDstImageDesc; - sycl::detail::pi::PiMemImageFormat MSrcImageFormat; - sycl::detail::pi::PiMemImageFormat MDstImageFormat; - sycl::detail::pi::PiImageCopyFlags MImageCopyFlags; - sycl::detail::pi::PiImageOffset MSrcOffset; - sycl::detail::pi::PiImageOffset MDstOffset; - sycl::detail::pi::PiImageRegion MCopyExtent; + ur_image_desc_t MSrcImageDesc; + ur_image_desc_t MDstImageDesc; + ur_image_format_t MSrcImageFormat; + ur_image_format_t MDstImageFormat; + ur_exp_image_copy_flags_t MImageCopyFlags; + ur_rect_offset_t MSrcOffset; + ur_rect_offset_t MDstOffset; + ur_rect_region_t MCopyExtent; public: - CGCopyImage(void *Src, void *Dst, - sycl::detail::pi::PiMemImageDesc SrcImageDesc, - sycl::detail::pi::PiMemImageDesc DstImageDesc, - sycl::detail::pi::PiMemImageFormat SrcImageFormat, - sycl::detail::pi::PiMemImageFormat DstImageFormat, - sycl::detail::pi::PiImageCopyFlags ImageCopyFlags, - sycl::detail::pi::PiImageOffset SrcOffset, - sycl::detail::pi::PiImageOffset DstOffset, - sycl::detail::pi::PiImageRegion CopyExtent, - CG::StorageInitHelper CGData, detail::code_location loc = {}) + CGCopyImage(void *Src, void *Dst, ur_image_desc_t SrcImageDesc, + ur_image_desc_t DstImageDesc, ur_image_format_t SrcImageFormat, + ur_image_format_t DstImageFormat, + ur_exp_image_copy_flags_t ImageCopyFlags, + ur_rect_offset_t SrcOffset, ur_rect_offset_t DstOffset, + ur_rect_region_t CopyExtent, CG::StorageInitHelper CGData, + detail::code_location loc = {}) : CG(CGType::CopyImage, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), MSrcImageDesc(SrcImageDesc), MDstImageDesc(DstImageDesc), MSrcImageFormat(SrcImageFormat), MDstImageFormat(DstImageFormat), @@ -606,37 +601,30 @@ class CGCopyImage : public CG { void *getSrc() const { return MSrc; } void *getDst() const { return MDst; } - sycl::detail::pi::PiMemImageDesc getSrcDesc() const { return MSrcImageDesc; } - sycl::detail::pi::PiMemImageDesc getDstDesc() const { return MDstImageDesc; } - sycl::detail::pi::PiMemImageFormat getSrcFormat() const { - return MSrcImageFormat; - } - sycl::detail::pi::PiMemImageFormat getDstFormat() const { - return MDstImageFormat; - } - sycl::detail::pi::PiImageCopyFlags getCopyFlags() const { - return MImageCopyFlags; - } - sycl::detail::pi::PiImageOffset getSrcOffset() const { return MSrcOffset; } - sycl::detail::pi::PiImageOffset getDstOffset() const { return MDstOffset; } - sycl::detail::pi::PiImageRegion getCopyExtent() const { return MCopyExtent; } + ur_image_desc_t getSrcDesc() const { return MSrcImageDesc; } + ur_image_desc_t getDstDesc() const { return MDstImageDesc; } + ur_image_format_t getSrcFormat() const { return MSrcImageFormat; } + ur_image_format_t getDstFormat() const { return MDstImageFormat; } + ur_exp_image_copy_flags_t getCopyFlags() const { return MImageCopyFlags; } + ur_rect_offset_t getSrcOffset() const { return MSrcOffset; } + ur_rect_offset_t getDstOffset() const { return MDstOffset; } + ur_rect_region_t getCopyExtent() const { return MCopyExtent; } }; /// "Semaphore Wait" command group class. class CGSemaphoreWait : public CG { - sycl::detail::pi::PiInteropSemaphoreHandle MInteropSemaphoreHandle; + ur_exp_interop_semaphore_handle_t MInteropSemaphoreHandle; std::optional MWaitValue; public: - CGSemaphoreWait( - sycl::detail::pi::PiInteropSemaphoreHandle InteropSemaphoreHandle, - std::optional WaitValue, CG::StorageInitHelper CGData, - detail::code_location loc = {}) + CGSemaphoreWait(ur_exp_interop_semaphore_handle_t InteropSemaphoreHandle, + std::optional WaitValue, + CG::StorageInitHelper CGData, detail::code_location loc = {}) : CG(CGType::SemaphoreWait, std::move(CGData), std::move(loc)), MInteropSemaphoreHandle(InteropSemaphoreHandle), MWaitValue(WaitValue) { } - sycl::detail::pi::PiInteropSemaphoreHandle getInteropSemaphoreHandle() const { + ur_exp_interop_semaphore_handle_t getInteropSemaphoreHandle() const { return MInteropSemaphoreHandle; } std::optional getWaitValue() const { return MWaitValue; } @@ -644,19 +632,19 @@ class CGSemaphoreWait : public CG { /// "Semaphore Signal" command group class. class CGSemaphoreSignal : public CG { - sycl::detail::pi::PiInteropSemaphoreHandle MInteropSemaphoreHandle; + ur_exp_interop_semaphore_handle_t MInteropSemaphoreHandle; std::optional MSignalValue; public: - CGSemaphoreSignal( - sycl::detail::pi::PiInteropSemaphoreHandle InteropSemaphoreHandle, - std::optional SignalValue, CG::StorageInitHelper CGData, - detail::code_location loc = {}) + CGSemaphoreSignal(ur_exp_interop_semaphore_handle_t InteropSemaphoreHandle, + std::optional SignalValue, + CG::StorageInitHelper CGData, + detail::code_location loc = {}) : CG(CGType::SemaphoreSignal, std::move(CGData), std::move(loc)), MInteropSemaphoreHandle(InteropSemaphoreHandle), MSignalValue(SignalValue) {} - sycl::detail::pi::PiInteropSemaphoreHandle getInteropSemaphoreHandle() const { + ur_exp_interop_semaphore_handle_t getInteropSemaphoreHandle() const { return MInteropSemaphoreHandle; } std::optional getSignalValue() const { return MSignalValue; } @@ -665,12 +653,12 @@ class CGSemaphoreSignal : public CG { /// "Execute command-buffer" command group class. class CGExecCommandBuffer : public CG { public: - sycl::detail::pi::PiExtCommandBuffer MCommandBuffer; + ur_exp_command_buffer_handle_t MCommandBuffer; std::shared_ptr MExecGraph; CGExecCommandBuffer( - const sycl::detail::pi::PiExtCommandBuffer &CommandBuffer, + const ur_exp_command_buffer_handle_t &CommandBuffer, const std::shared_ptr< sycl::ext::oneapi::experimental::detail::exec_graph_impl> &ExecGraph, CG::StorageInitHelper CGData) diff --git a/sycl/source/detail/common.cpp b/sycl/source/detail/common.cpp index 7bc85e026dc71..dded4d4955f3a 100644 --- a/sycl/source/detail/common.cpp +++ b/sycl/source/detail/common.cpp @@ -8,6 +8,8 @@ #include +#include + namespace sycl { inline namespace _V1 { namespace detail { @@ -27,6 +29,16 @@ tls_code_loc_t::tls_code_loc_t() { MLocalScope = GCodeLocTLS.fileName() && GCodeLocTLS.functionName(); } +ur_code_location_t codeLocationCallback(void *) { + ur_code_location_t codeloc; + codeloc.columnNumber = GCodeLocTLS.columnNumber(); + codeloc.lineNumber = GCodeLocTLS.lineNumber(); + codeloc.functionName = GCodeLocTLS.functionName(); + codeloc.sourceFile = GCodeLocTLS.fileName(); + + return codeloc; +} + /// @brief Constructor to use at the top level of the calling stack /// @details This is usually a SYCL entry point used by the end user in their /// application code. In this case, we still check to see if another code diff --git a/sycl/source/detail/config.def b/sycl/source/detail/config.def index 94424312c14d2..fe727bf17aad4 100644 --- a/sycl/source/detail/config.def +++ b/sycl/source/detail/config.def @@ -15,6 +15,7 @@ CONFIG(SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP, 1, __SYCL_DISABLE_EXECUTION_GRAPH_C CONFIG(SYCL_DISABLE_POST_ENQUEUE_CLEANUP, 1, __SYCL_DISABLE_POST_ENQUEUE_CLEANUP) CONFIG(SYCL_DEVICE_ALLOWLIST, 1024, __SYCL_DEVICE_ALLOWLIST) CONFIG(SYCL_PI_TRACE, 16, __SYCL_PI_TRACE) +CONFIG(SYCL_UR_TRACE, 1, __SYCL_UR_TRACE) CONFIG(SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE, 16, __SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE) CONFIG(SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING, 16, __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING) CONFIG(SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS, 64, __SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS) diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index c8a079f94ae66..f6d37c4af60db 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include @@ -127,28 +127,6 @@ template class SYCLConfig { } }; -template <> class SYCLConfig { - using BaseT = SYCLConfigBase; - -public: - static int get() { - static bool Initialized = false; - // We don't use TraceLevel enum here because user can provide any bitmask - // which can correspond to several enum values. - static int Level = 0; // No tracing by default - - // Configuration parameters are processed only once, like reading a string - // from environment and converting it into a typed object. - if (Initialized) - return Level; - - const char *ValStr = BaseT::getRawValue(); - Level = (ValStr ? std::atoi(ValStr) : 0); - Initialized = true; - return Level; - } -}; - template <> class SYCLConfig { using BaseT = SYCLConfigBase; diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index db40ee96d5e54..d738053302e54 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -12,8 +12,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -43,7 +42,7 @@ context_impl::context_impl(const std::vector Devices, MContext(nullptr), MPlatform(), MPropList(PropList), MSupportBufferLocationByDevices(NotChecked) { MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform()); - std::vector DeviceIds; + std::vector DeviceIds; for (const auto &D : MDevices) { if (D.has(aspect::ext_oneapi_is_composite)) { // Component devices are considered to be descendent devices from a @@ -60,34 +59,32 @@ context_impl::context_impl(const std::vector Devices, DeviceIds.push_back(getSyclObjImpl(D)->getHandleRef()); } - getPlugin()->call( - nullptr, DeviceIds.size(), DeviceIds.data(), nullptr, nullptr, &MContext); + getPlugin()->call(urContextCreate, DeviceIds.size(), DeviceIds.data(), + nullptr, &MContext); MKernelProgramCache.setContextPtr(this); } -context_impl::context_impl(sycl::detail::pi::PiContext PiContext, +context_impl::context_impl(ur_context_handle_t UrContext, async_handler AsyncHandler, const PluginPtr &Plugin, const std::vector &DeviceList, bool OwnedByRuntime) : MOwnedByRuntime(OwnedByRuntime), MAsyncHandler(AsyncHandler), - MDevices(DeviceList), MContext(PiContext), MPlatform(), + MDevices(DeviceList), MContext(UrContext), MPlatform(), MSupportBufferLocationByDevices(NotChecked) { if (!MDevices.empty()) { MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform()); } else { - std::vector DeviceIds; + std::vector DeviceIds; uint32_t DevicesNum = 0; // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call( - MContext, PI_CONTEXT_INFO_NUM_DEVICES, sizeof(DevicesNum), &DevicesNum, - nullptr); + Plugin->call(urContextGetInfo, MContext, UR_CONTEXT_INFO_NUM_DEVICES, + sizeof(DevicesNum), &DevicesNum, nullptr); DeviceIds.resize(DevicesNum); // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call( - MContext, PI_CONTEXT_INFO_DEVICES, - sizeof(sycl::detail::pi::PiDevice) * DevicesNum, &DeviceIds[0], - nullptr); + Plugin->call(urContextGetInfo, MContext, UR_CONTEXT_INFO_DEVICES, + sizeof(ur_device_handle_t) * DevicesNum, &DeviceIds[0], + nullptr); if (DeviceIds.empty()) throw exception( @@ -95,8 +92,8 @@ context_impl::context_impl(sycl::detail::pi::PiContext PiContext, "No devices in the provided device list and native context."); std::shared_ptr Platform = - platform_impl::getPlatformFromPiDevice(DeviceIds[0], Plugin); - for (sycl::detail::pi::PiDevice Dev : DeviceIds) { + platform_impl::getPlatformFromUrDevice(DeviceIds[0], Plugin); + for (ur_device_handle_t Dev : DeviceIds) { MDevices.emplace_back(createSyclObjFromImpl( Platform->getOrMakeDeviceImpl(Dev, Platform))); } @@ -109,15 +106,17 @@ context_impl::context_impl(sycl::detail::pi::PiContext PiContext, // TODO: Move this backend-specific retain of the context to SYCL-2020 style // make_context interop, when that is created. if (getBackend() == sycl::backend::opencl) { - getPlugin()->call(MContext); + getPlugin()->call(urContextRetain, MContext); } MKernelProgramCache.setContextPtr(this); } cl_context context_impl::get() const { // TODO catch an exception and put it to list of asynchronous exceptions - getPlugin()->call(MContext); - return pi::cast(MContext); + getPlugin()->call(urContextRetain, MContext); + ur_native_handle_t nativeHandle = 0; + getPlugin()->call(urContextGetNativeHandle, MContext, &nativeHandle); + return ur::cast(nativeHandle); } context_impl::~context_impl() { @@ -134,10 +133,10 @@ context_impl::~context_impl() { } for (auto LibProg : MCachedLibPrograms) { assert(LibProg.second && "Null program must not be kept in the cache"); - getPlugin()->call(LibProg.second); + getPlugin()->call(urProgramRelease, LibProg.second); } // TODO catch an exception and put it to list of asynchronous exceptions - getPlugin()->call(MContext); + getPlugin()->call_nocheck(urContextRelease, MContext); } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~context_impl", e); } @@ -264,8 +263,8 @@ context_impl::get_backend_info() const { // empty string as per specification. } -sycl::detail::pi::PiContext &context_impl::getHandleRef() { return MContext; } -const sycl::detail::pi::PiContext &context_impl::getHandleRef() const { +ur_context_handle_t &context_impl::getHandleRef() { return MContext; } +const ur_context_handle_t &context_impl::getHandleRef() const { return MContext; } @@ -281,21 +280,21 @@ bool context_impl::hasDevice( return false; } -DeviceImplPtr context_impl::findMatchingDeviceImpl( - sycl::detail::pi::PiDevice &DevicePI) const { +DeviceImplPtr +context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const { for (device D : MDevices) - if (getSyclObjImpl(D)->getHandleRef() == DevicePI) + if (getSyclObjImpl(D)->getHandleRef() == DeviceUR) return getSyclObjImpl(D); return nullptr; } -pi_native_handle context_impl::getNative() const { +ur_native_handle_t context_impl::getNative() const { const auto &Plugin = getPlugin(); if (getBackend() == backend::opencl) - Plugin->call(getHandleRef()); - pi_native_handle Handle; - Plugin->call(getHandleRef(), &Handle); + Plugin->call(urContextRetain, getHandleRef()); + ur_native_handle_t Handle; + Plugin->call(urContextGetNativeHandle, getHandleRef(), &Handle); return Handle; } @@ -319,7 +318,7 @@ void context_impl::addAssociatedDeviceGlobal(const void *DeviceGlobalPtr) { } void context_impl::addDeviceGlobalInitializer( - sycl::detail::pi::PiProgram Program, const std::vector &Devs, + ur_program_handle_t Program, const std::vector &Devs, const RTDeviceBinaryImage *BinImage) { std::lock_guard Lock(MDeviceGlobalInitializersMutex); for (const device &Dev : Devs) { @@ -328,8 +327,9 @@ void context_impl::addDeviceGlobalInitializer( } } -std::vector context_impl::initializeDeviceGlobals( - pi::PiProgram NativePrg, const std::shared_ptr &QueueImpl) { +std::vector context_impl::initializeDeviceGlobals( + ur_program_handle_t NativePrg, + const std::shared_ptr &QueueImpl) { const PluginPtr &Plugin = getPlugin(); const DeviceImplPtr &DeviceImpl = QueueImpl->getDeviceImplPtr(); std::lock_guard NativeProgramLock(MDeviceGlobalInitializersMutex); @@ -342,19 +342,20 @@ std::vector context_impl::initializeDeviceGlobals( DeviceGlobalInitializer &InitRef = ImgIt->second; { std::lock_guard InitLock(InitRef.MDeviceGlobalInitMutex); - std::vector &InitEventsRef = + std::vector &InitEventsRef = InitRef.MDeviceGlobalInitEvents; if (!InitEventsRef.empty()) { // Initialization has begun but we do not know if the events are done. auto NewEnd = std::remove_if( InitEventsRef.begin(), InitEventsRef.end(), - [&Plugin](const sycl::detail::pi::PiEvent &Event) { + [&Plugin](const ur_event_handle_t &Event) { return get_event_info( Event, Plugin) == info::event_command_status::complete; + return false; }); // Release the removed events. for (auto EventIt = NewEnd; EventIt != InitEventsRef.end(); ++EventIt) - Plugin->call(*EventIt); + Plugin->call(urEventRelease, *EventIt); // Remove them from the collection. InitEventsRef.erase(NewEnd, InitEventsRef.end()); // If there are no more events, we can mark it as fully initialized. @@ -404,17 +405,17 @@ std::vector context_impl::initializeDeviceGlobals( // are cleaned up separately from cleaning up the device global USM memory // this must retain the event. { - if (OwnedPiEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Plugin)) + if (OwnedUrEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Plugin)) InitEventsRef.push_back(ZIEvent.TransferOwnership()); } // Write the pointer to the device global and store the event in the // initialize events list. - sycl::detail::pi::PiEvent InitEvent; + ur_event_handle_t InitEvent; void *const &USMPtr = DeviceGlobalUSM.getPtr(); - Plugin->call( - QueueImpl->getHandleRef(), NativePrg, - DeviceGlobalEntry->MUniqueId.c_str(), false, sizeof(void *), 0, - &USMPtr, 0, nullptr, &InitEvent); + Plugin->call(urEnqueueDeviceGlobalVariableWrite, + QueueImpl->getHandleRef(), NativePrg, + DeviceGlobalEntry->MUniqueId.c_str(), false, sizeof(void *), + 0, &USMPtr, 0, nullptr, &InitEvent); InitEventsRef.push_back(InitEvent); } @@ -424,8 +425,8 @@ std::vector context_impl::initializeDeviceGlobals( void context_impl::DeviceGlobalInitializer::ClearEvents( const PluginPtr &Plugin) { - for (const sycl::detail::pi::PiEvent &Event : MDeviceGlobalInitEvents) - Plugin->call(Event); + for (const ur_event_handle_t &Event : MDeviceGlobalInitEvents) + Plugin->call(urEventRelease, Event); MDeviceGlobalInitEvents.clear(); } @@ -433,7 +434,7 @@ void context_impl::memcpyToHostOnlyDeviceGlobal( const std::shared_ptr &DeviceImpl, const void *DeviceGlobalPtr, const void *Src, size_t DeviceGlobalTSize, bool IsDeviceImageScoped, size_t NumBytes, size_t Offset) { - std::optional KeyDevice = std::nullopt; + std::optional KeyDevice = std::nullopt; if (IsDeviceImageScoped) KeyDevice = DeviceImpl->getHandleRef(); auto Key = std::make_pair(DeviceGlobalPtr, KeyDevice); @@ -456,7 +457,7 @@ void context_impl::memcpyFromHostOnlyDeviceGlobal( const void *DeviceGlobalPtr, bool IsDeviceImageScoped, size_t NumBytes, size_t Offset) { - std::optional KeyDevice = std::nullopt; + std::optional KeyDevice = std::nullopt; if (IsDeviceImageScoped) KeyDevice = DeviceImpl->getHandleRef(); auto Key = std::make_pair(DeviceGlobalPtr, KeyDevice); @@ -475,7 +476,7 @@ void context_impl::memcpyFromHostOnlyDeviceGlobal( std::memcpy(Dest, ValuePtr + Offset, NumBytes); } -std::optional context_impl::getProgramForDevImgs( +std::optional context_impl::getProgramForDevImgs( const device &Device, const std::set &ImgIdentifiers, const std::string &ObjectTypeName) { @@ -484,8 +485,7 @@ std::optional context_impl::getProgramForDevImgs( auto LockedCache = MKernelProgramCache.acquireCachedPrograms(); auto &KeyMap = LockedCache.get().KeyMap; auto &Cache = LockedCache.get().Cache; - sycl::detail::pi::PiDevice &DevHandle = - getSyclObjImpl(Device)->getHandleRef(); + ur_device_handle_t &DevHandle = getSyclObjImpl(Device)->getHandleRef(); for (std::uintptr_t ImageIDs : ImgIdentifiers) { auto OuterKey = std::make_pair(ImageIDs, DevHandle); size_t NProgs = KeyMap.count(OuterKey); @@ -511,7 +511,7 @@ std::optional context_impl::getProgramForDevImgs( using BuildState = KernelProgramCache::BuildState; BuildState NewState = BuildRes->waitUntilTransition(); if (NewState == BuildState::BS_Failed) - throw detail::set_pi_error( + throw detail::set_ur_error( exception(make_error_code(errc::build), BuildRes->Error.Msg), BuildRes->Error.Code); @@ -519,14 +519,13 @@ std::optional context_impl::getProgramForDevImgs( return BuildRes->Val; } -std::optional -context_impl::getProgramForDeviceGlobal( +std::optional context_impl::getProgramForDeviceGlobal( const device &Device, DeviceGlobalMapEntry *DeviceGlobalEntry) { return getProgramForDevImgs(Device, DeviceGlobalEntry->MImageIdentifiers, "device_global"); } /// Gets a program associated with a HostPipe Entry from the cache. -std::optional +std::optional context_impl::getProgramForHostPipe(const device &Device, HostPipeMapEntry *HostPipeEntry) { // One HostPipe entry belongs to one Img diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 278d842b1efc8..48fb83f5807d9 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -63,13 +63,13 @@ class context_impl { /// The constructed context_impl will use the AsyncHandler parameter to /// handle exceptions. /// - /// \param PiContext is an instance of a valid plug-in context handle. + /// \param UrContext is an instance of a valid plug-in context handle. /// \param AsyncHandler is an instance of async_handler. /// \param Plugin is the reference to the underlying Plugin that this /// \param OwnedByRuntime is the flag if ownership is kept by user or /// transferred to runtime - context_impl(sycl::detail::pi::PiContext PiContext, - async_handler AsyncHandler, const PluginPtr &Plugin, + context_impl(ur_context_handle_t UrContext, async_handler AsyncHandler, + const PluginPtr &Plugin, const std::vector &DeviceList = {}, bool OwnedByRuntime = true); @@ -110,7 +110,7 @@ class context_impl { /// reference will be invalid if context_impl was destroyed. /// /// \return an instance of raw plug-in context handle. - sycl::detail::pi::PiContext &getHandleRef(); + ur_context_handle_t &getHandleRef(); /// Gets the underlying context object (if any) without reference count /// modification. @@ -120,22 +120,22 @@ class context_impl { /// reference will be invalid if context_impl was destroyed. /// /// \return an instance of raw plug-in context handle. - const sycl::detail::pi::PiContext &getHandleRef() const; + const ur_context_handle_t &getHandleRef() const; /// Unlike `get_info', this function returns a /// reference. const std::vector &getDevices() const { return MDevices; } using CachedLibProgramsT = - std::map, - sycl::detail::pi::PiProgram>; + std::map, + ur_program_handle_t>; /// In contrast to user programs, which are compiled from user code, library /// programs come from the SYCL runtime. They are identified by the /// corresponding extension: /// - /// cl_intel_devicelib_assert -> # - /// cl_intel_devicelib_complex -> # + /// cl_intel_devicelib_assert -> # + /// cl_intel_devicelib_complex -> # /// etc. /// /// See `doc/design/DeviceLibExtensions.rst' for @@ -188,15 +188,14 @@ class context_impl { return MPlatform->getBackend(); } - /// Given a PiDevice, returns the matching shared_ptr + /// Given a UR device, returns the matching shared_ptr /// within this context. May return nullptr if no match discovered. - DeviceImplPtr - findMatchingDeviceImpl(sycl::detail::pi::PiDevice &DevicePI) const; + DeviceImplPtr findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const; /// Gets the native handle of the SYCL context. /// /// \return a native handle. - pi_native_handle getNative() const; + ur_native_handle_t getNative() const; // Returns true if buffer_location property is supported by devices bool isBufferLocationSupported() const; @@ -205,13 +204,13 @@ class context_impl { void addAssociatedDeviceGlobal(const void *DeviceGlobalPtr); /// Adds a device global initializer. - void addDeviceGlobalInitializer(sycl::detail::pi::PiProgram Program, + void addDeviceGlobalInitializer(ur_program_handle_t Program, const std::vector &Devs, const RTDeviceBinaryImage *BinImage); /// Initializes device globals for a program on the associated queue. - std::vector - initializeDeviceGlobals(pi::PiProgram NativePrg, + std::vector + initializeDeviceGlobals(ur_program_handle_t NativePrg, const std::shared_ptr &QueueImpl); void memcpyToHostOnlyDeviceGlobal( @@ -226,15 +225,15 @@ class context_impl { size_t Offset); /// Gets a program associated with a device global from the cache. - std::optional + std::optional getProgramForDeviceGlobal(const device &Device, DeviceGlobalMapEntry *DeviceGlobalEntry); /// Gets a program associated with a HostPipe Entry from the cache. - std::optional + std::optional getProgramForHostPipe(const device &Device, HostPipeMapEntry *HostPipeEntry); /// Gets a program associated with Dev / Images pairs. - std::optional + std::optional getProgramForDevImgs(const device &Device, const std::set &ImgIdentifiers, const std::string &ObjectTypeName); @@ -249,7 +248,7 @@ class context_impl { bool MOwnedByRuntime; async_handler MAsyncHandler; std::vector MDevices; - sycl::detail::pi::PiContext MContext; + ur_context_handle_t MContext; PlatformImplPtr MPlatform; property_list MPropList; CachedLibProgramsT MCachedLibPrograms; @@ -289,10 +288,10 @@ class context_impl { /// A vector of events associated with the initialization of device globals. /// MDeviceGlobalInitMutex must be held when accessing this. - std::vector MDeviceGlobalInitEvents; + std::vector MDeviceGlobalInitEvents; }; - std::map, + std::map, DeviceGlobalInitializer> MDeviceGlobalInitializers; std::mutex MDeviceGlobalInitializersMutex; @@ -302,7 +301,7 @@ class context_impl { // associated writes. // The key to this map is a combination of a the pointer to the device_global // and optionally a device if the device_global has device image scope. - std::map>, + std::map>, std::unique_ptr> MDeviceGlobalUnregisteredData; std::mutex MDeviceGlobalUnregisteredDataMutex; diff --git a/sycl/source/detail/context_info.hpp b/sycl/source/detail/context_info.hpp index ccb66811b8124..b4ba1f1ec775d 100644 --- a/sycl/source/detail/context_info.hpp +++ b/sycl/source/detail/context_info.hpp @@ -18,14 +18,14 @@ inline namespace _V1 { namespace detail { template -typename Param::return_type get_context_info(sycl::detail::pi::PiContext Ctx, +typename Param::return_type get_context_info(ur_context_handle_t Ctx, const PluginPtr &Plugin) { static_assert(is_context_info_desc::value, "Invalid context information descriptor"); typename Param::return_type Result = 0; // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call(Ctx, PiInfoCode::value, - sizeof(Result), &Result, nullptr); + Plugin->call(urContextGetInfo, Ctx, UrInfoCode::value, sizeof(Result), + &Result, nullptr); return Result; } diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 686c55ad03b9e..a1d89ed845da2 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include #include @@ -58,7 +58,7 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) { return Out; } -pi_uint32 DeviceBinaryProperty::asUint32() const { +uint32_t DeviceBinaryProperty::asUint32() const { assert(Prop->Type == SYCL_PROPERTY_TYPE_UINT32 && "property type mismatch"); // if type fits into the ValSize - it is used to store the property value assert(Prop->ValAddr == nullptr && "primitive types must be stored inline"); @@ -70,7 +70,7 @@ ByteArray DeviceBinaryProperty::asByteArray() const { assert(Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY && "property type mismatch"); assert(Prop->ValSize > 0 && "property size mismatch"); - const auto *Data = pi::cast(Prop->ValAddr); + const auto *Data = ur::cast(Prop->ValAddr); return {Data, Prop->ValSize}; } @@ -81,7 +81,7 @@ const char *DeviceBinaryProperty::asCString() const { assert(Prop->ValSize > 0 && "property size mismatch"); // Byte array stores its size in first 8 bytes size_t Shift = Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY ? 8 : 0; - return pi::cast(Prop->ValAddr) + Shift; + return ur::cast(Prop->ValAddr) + Shift; } void RTDeviceBinaryImage::PropertyRange::init(sycl_device_binary Bin, @@ -165,11 +165,11 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { // which can't be modified (easily). // TODO clang driver + ClangOffloadWrapper can figure out the format and set // it when invoking the offload wrapper job - Format = static_cast(Bin->Format); + Format = static_cast(Bin->Format); if (Format == SYCL_DEVICE_BINARY_TYPE_NONE) // try to determine the format; may remain "NONE" - Format = pi::getBinaryImageFormat(Bin->BinaryStart, getSize()); + Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize()); SpecConstIDMap.init(Bin, __SYCL_PROPERTY_SET_SPEC_CONST_MAP); SpecConstDefaultValuesMap.init( @@ -205,7 +205,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( Bin->BinaryEnd = Bin->BinaryStart + DataSize; Bin->EntriesBegin = nullptr; Bin->EntriesEnd = nullptr; - Bin->Format = pi::getBinaryImageFormat(Bin->BinaryStart, DataSize); + Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, DataSize); switch (Bin->Format) { case SYCL_DEVICE_BINARY_TYPE_SPIRV: Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_SPIRV64; diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index d46d05e1afff1..49047a04ae77c 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -10,9 +10,9 @@ #include #include #include -#include - -#include "pi_utils.hpp" +#include +#include +#include "ur_utils.hpp" #include @@ -71,7 +71,7 @@ class DeviceBinaryProperty { DeviceBinaryProperty(const _sycl_device_binary_property_struct *Prop) : Prop(Prop) {} - pi_uint32 asUint32() const; + uint32_t asUint32() const; ByteArray asByteArray() const; const char *asCString() const; @@ -83,7 +83,7 @@ class DeviceBinaryProperty { std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P); -// SYCL RT wrapper over PI binary image. +// SYCL RT wrapper over UR binary image. class RTDeviceBinaryImage { public: // Represents a range of properties to enable iteration over them. @@ -174,7 +174,7 @@ class RTDeviceBinaryImage { } /// Returns the format of the binary image - pi::PiDeviceBinaryType getFormat() const { + ur::DeviceBinaryType getFormat() const { assert(Bin && "binary image data not set"); return Format; } @@ -216,6 +216,9 @@ class RTDeviceBinaryImage { } const PropertyRange &getAssertUsed() const { return AssertUsed; } const PropertyRange &getProgramMetadata() const { return ProgramMetadata; } + const std::vector &getProgramMetadataUR() const { + return ProgramMetadataUR; + } const PropertyRange &getExportedSymbols() const { return ExportedSymbols; } const PropertyRange &getImportedSymbols() const { return ImportedSymbols; } const PropertyRange &getDeviceGlobals() const { return DeviceGlobals; } @@ -236,7 +239,7 @@ class RTDeviceBinaryImage { sycl_device_binary Bin; - pi::PiDeviceBinaryType Format = SYCL_DEVICE_BINARY_TYPE_NONE; + ur::DeviceBinaryType Format = SYCL_DEVICE_BINARY_TYPE_NONE; RTDeviceBinaryImage::PropertyRange SpecConstIDMap; RTDeviceBinaryImage::PropertyRange SpecConstDefaultValuesMap; RTDeviceBinaryImage::PropertyRange DeviceLibReqMask; @@ -250,6 +253,8 @@ class RTDeviceBinaryImage { RTDeviceBinaryImage::PropertyRange HostPipes; RTDeviceBinaryImage::PropertyRange VirtualFunctions; + std::vector ProgramMetadataUR; + private: static std::atomic ImageCounter; uintptr_t ImageId = 0; diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 9400037a86adf..d12c1de36d024 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -25,20 +25,20 @@ DeviceGlobalUSMMem::~DeviceGlobalUSMMem() { assert(!MInitEvent.has_value() && "MInitEvent has not been cleaned up."); } -OwnedPiEvent DeviceGlobalUSMMem::getInitEvent(const PluginPtr &Plugin) { +OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(const PluginPtr &Plugin) { std::lock_guard Lock(MInitEventMutex); // If there is a init event we can remove it if it is done. if (MInitEvent.has_value()) { if (get_event_info( *MInitEvent, Plugin) == info::event_command_status::complete) { - Plugin->call(*MInitEvent); + Plugin->call(urEventRelease, *MInitEvent); MInitEvent = {}; - return OwnedPiEvent(Plugin); + return OwnedUrEvent(Plugin); } else { - return OwnedPiEvent(*MInitEvent, Plugin); + return OwnedUrEvent(*MInitEvent, Plugin); } } - return OwnedPiEvent(Plugin); + return OwnedUrEvent(Plugin); } DeviceGlobalUSMMem &DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM( @@ -69,19 +69,20 @@ DeviceGlobalUSMMem &DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM( // Initialize here and save the event. { std::lock_guard Lock(NewAlloc.MInitEventMutex); - sycl::detail::pi::PiEvent InitEvent; + ur_event_handle_t InitEvent; // C++ guarantees members appear in memory in the order they are declared, // so since the member variable that contains the initial contents of the // device_global is right after the usm_ptr member variable we can do // some pointer arithmetic to memcopy over this value to the usm_ptr. This // value inside of the device_global will be zero-initialized if it was not // given a value on construction. + MemoryManager::copy_usm(reinterpret_cast( reinterpret_cast(MDeviceGlobalPtr) + sizeof(MDeviceGlobalPtr)), QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr, - std::vector{}, - &InitEvent, nullptr); + std::vector{}, &InitEvent, + nullptr); NewAlloc.MInitEvent = InitEvent; } @@ -99,8 +100,7 @@ void DeviceGlobalMapEntry::removeAssociatedResources( DeviceGlobalUSMMem &USMMem = USMPtrIt->second; detail::usm::freeInternal(USMMem.MPtr, CtxImpl); if (USMMem.MInitEvent.has_value()) - CtxImpl->getPlugin()->call( - *USMMem.MInitEvent); + CtxImpl->getPlugin()->call(urEventRelease, *USMMem.MInitEvent); #ifndef NDEBUG // For debugging we set the event and memory to some recognizable values // to allow us to check that this cleanup happens before erasure. diff --git a/sycl/source/detail/device_global_map_entry.hpp b/sycl/source/detail/device_global_map_entry.hpp index 081ae8705228d..f507b2d9b8060 100644 --- a/sycl/source/detail/device_global_map_entry.hpp +++ b/sycl/source/detail/device_global_map_entry.hpp @@ -16,7 +16,7 @@ #include #include -#include +#include #include namespace sycl { @@ -37,14 +37,14 @@ struct DeviceGlobalUSMMem { void *const &getPtr() const noexcept { return MPtr; } - // Gets the initialization event if it exists. If not the OwnedPiEvent + // Gets the initialization event if it exists. If not the OwnedUrEvent // will contain no event. - OwnedPiEvent getInitEvent(const PluginPtr &Plugin); + OwnedUrEvent getInitEvent(const PluginPtr &Plugin); private: void *MPtr; std::mutex MInitEventMutex; - std::optional MInitEvent; + std::optional MInitEvent; friend struct DeviceGlobalMapEntry; }; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index e79e9c78b1ad0..e54fc0b2976c0 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -17,8 +17,7 @@ #include #include #include -#include -#include +#include #include #include @@ -61,7 +60,7 @@ class device_image_impl { device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector Devices, bundle_state State, std::shared_ptr> KernelIDs, - sycl::detail::pi::PiProgram Program) + ur_program_handle_t Program) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::move(KernelIDs)), @@ -72,7 +71,7 @@ class device_image_impl { device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector Devices, bundle_state State, std::shared_ptr> KernelIDs, - sycl::detail::pi::PiProgram Program, + ur_program_handle_t Program, const SpecConstMapT &SpecConstMap, const std::vector &SpecConstsBlob) : MBinImage(BinImage), MContext(std::move(Context)), @@ -244,7 +243,7 @@ class device_image_impl { [&Dev](const device &DevCand) { return Dev == DevCand; }); } - const sycl::detail::pi::PiProgram &get_program_ref() const noexcept { + const ur_program_handle_t &get_ur_program_ref() const noexcept { return MProgram; } @@ -260,20 +259,21 @@ class device_image_impl { return MSpecConstsBlob; } - sycl::detail::pi::PiMem &get_spec_const_buffer_ref() noexcept { + ur_mem_handle_t &get_spec_const_buffer_ref() noexcept { std::lock_guard Lock{MSpecConstAccessMtx}; if (nullptr == MSpecConstsBuffer && !MSpecConstsBlob.empty()) { const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin(); - // Uses PI_MEM_FLAGS_HOST_PTR_COPY instead of PI_MEM_FLAGS_HOST_PTR_USE - // since post-enqueue cleanup might trigger destruction of - // device_image_impl and, as a result, destruction of MSpecConstsBlob - // while MSpecConstsBuffer is still in use. - // TODO consider changing the lifetime of device_image_impl instead - memBufferCreateHelper(Plugin, - detail::getSyclObjImpl(MContext)->getHandleRef(), - PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_COPY, - MSpecConstsBlob.size(), MSpecConstsBlob.data(), - &MSpecConstsBuffer, nullptr); + // Uses UR_MEM_FLAGS_HOST_PTR_COPY instead of UR_MEM_FLAGS_HOST_PTR_USE + // since post-enqueue cleanup might trigger destruction of + // device_image_impl and, as a result, destruction of MSpecConstsBlob + // while MSpecConstsBuffer is still in use. + // TODO consider changing the lifetime of device_image_impl instead + ur_buffer_properties_t Properties = {UR_STRUCTURE_TYPE_BUFFER_PROPERTIES, + nullptr, MSpecConstsBlob.data()}; + memBufferCreateHelper( + Plugin, detail::getSyclObjImpl(MContext)->getHandleRef(), + UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER, + MSpecConstsBlob.size(), &MSpecConstsBuffer, &Properties); } return MSpecConstsBuffer; } @@ -286,16 +286,15 @@ class device_image_impl { return MSpecConstAccessMtx; } - pi_native_handle getNative() const { + ur_native_handle_t getNative() const { assert(MProgram); const auto &ContextImplPtr = detail::getSyclObjImpl(MContext); const PluginPtr &Plugin = ContextImplPtr->getPlugin(); if (ContextImplPtr->getBackend() == backend::opencl) - Plugin->call(MProgram); - pi_native_handle NativeProgram = 0; - Plugin->call(MProgram, - &NativeProgram); + Plugin->call(urProgramRetain, MProgram); + ur_native_handle_t NativeProgram = 0; + Plugin->call(urProgramGetNativeHandle, MProgram, &NativeProgram); return NativeProgram; } @@ -304,7 +303,7 @@ class device_image_impl { try { if (MProgram) { const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin(); - Plugin->call(MProgram); + Plugin->call(urProgramRelease, MProgram); } if (MSpecConstsBuffer) { std::lock_guard Lock{MSpecConstAccessMtx}; @@ -396,7 +395,8 @@ class device_image_impl { std::vector MDevices; bundle_state MState; // Native program handler which this device image represents - sycl::detail::pi::PiProgram MProgram = nullptr; + ur_program_handle_t MProgram = nullptr; + // List of kernel ids available in this image, elements should be sorted // according to LessByNameComp std::shared_ptr> MKernelIDs; @@ -413,7 +413,7 @@ class device_image_impl { // Buffer containing binary blob which can have values of all specialization // constants in the image, it is using for storing non-native specialization // constants - sycl::detail::pi::PiMem MSpecConstsBuffer = nullptr; + ur_mem_handle_t MSpecConstsBuffer = nullptr; // Contains map of spec const names to their descriptions + offsets in // the MSpecConstsBlob std::map> MSpecConstSymMap; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index df87da17564e1..86dc21e6a15d0 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -17,71 +18,67 @@ namespace sycl { inline namespace _V1 { namespace detail { -device_impl::device_impl(pi_native_handle InteropDeviceHandle, +device_impl::device_impl(ur_native_handle_t InteropDeviceHandle, const PluginPtr &Plugin) : device_impl(InteropDeviceHandle, nullptr, nullptr, Plugin) {} -device_impl::device_impl(sycl::detail::pi::PiDevice Device, - PlatformImplPtr Platform) - : device_impl(reinterpret_cast(nullptr), Device, Platform, - Platform->getPlugin()) {} +/// Constructs a SYCL device instance using the provided +/// UR device instance. +device_impl::device_impl(ur_device_handle_t Device, PlatformImplPtr Platform) + : device_impl(0, Device, Platform, Platform->getPlugin()) {} -device_impl::device_impl(sycl::detail::pi::PiDevice Device, - const PluginPtr &Plugin) - : device_impl(reinterpret_cast(nullptr), Device, nullptr, - Plugin) {} +/// Constructs a SYCL device instance using the provided +/// UR device instance. +device_impl::device_impl(ur_device_handle_t Device, const PluginPtr &Plugin) + : device_impl(0, Device, nullptr, Plugin) {} -device_impl::device_impl(pi_native_handle InteropDeviceHandle, - sycl::detail::pi::PiDevice Device, - PlatformImplPtr Platform, const PluginPtr &Plugin) +device_impl::device_impl(ur_native_handle_t InteropDeviceHandle, + ur_device_handle_t Device, PlatformImplPtr Platform, + const PluginPtr &Plugin) : MDevice(Device), MDeviceHostBaseTime(std::make_pair(0, 0)) { - bool InteroperabilityConstructor = false; if (Device == nullptr) { assert(InteropDeviceHandle); - // Get PI device from the raw device handle. + // Get UR device from the raw device handle. // NOTE: this is for OpenCL interop only (and should go away). // With SYCL-2020 BE generalization "make" functions are used instead. - Plugin->call( - InteropDeviceHandle, nullptr, &MDevice); + Plugin->call(urDeviceCreateWithNativeHandle, InteropDeviceHandle, nullptr, + nullptr, &MDevice); InteroperabilityConstructor = true; } // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call( - MDevice, PI_DEVICE_INFO_TYPE, sizeof(sycl::detail::pi::PiDeviceType), - &MType, nullptr); + Plugin->call(urDeviceGetInfo, MDevice, UR_DEVICE_INFO_TYPE, + sizeof(ur_device_type_t), &MType, nullptr); // No need to set MRootDevice when MAlwaysRootDevice is true if ((Platform == nullptr) || !Platform->MAlwaysRootDevice) { // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call( - MDevice, PI_DEVICE_INFO_PARENT_DEVICE, - sizeof(sycl::detail::pi::PiDevice), &MRootDevice, nullptr); + Plugin->call(urDeviceGetInfo, MDevice, UR_DEVICE_INFO_PARENT_DEVICE, + sizeof(ur_device_handle_t), &MRootDevice, nullptr); } if (!InteroperabilityConstructor) { // TODO catch an exception and put it to list of asynchronous exceptions // Interoperability Constructor already calls DeviceRetain in - // piextDeviceFromNative. - Plugin->call(MDevice); + // urDeviceCreateWithNativeHandle. + Plugin->call(urDeviceRetain, MDevice); } // set MPlatform if (!Platform) { - Platform = platform_impl::getPlatformFromPiDevice(MDevice, Plugin); + Platform = platform_impl::getPlatformFromUrDevice(MDevice, Plugin); } MPlatform = Platform; MIsAssertFailSupported = - has_extension(PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT); + has_extension(UR_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT); } device_impl::~device_impl() { // TODO catch an exception and put it to list of asynchronous exceptions const PluginPtr &Plugin = getPlugin(); - sycl::detail::pi::PiResult Err = - Plugin->call_nocheck(MDevice); + ur_result_t Err = Plugin->call_nocheck(urDeviceRelease, MDevice); __SYCL_CHECK_OCL_CODE_NO_EXC(Err); } @@ -94,8 +91,8 @@ bool device_impl::is_affinity_supported( cl_device_id device_impl::get() const { // TODO catch an exception and put it to list of asynchronous exceptions - getPlugin()->call(MDevice); - return pi::cast(getNative()); + getPlugin()->call(urDeviceRetain, MDevice); + return ur::cast(getNative()); } platform device_impl::get_platform() const { @@ -165,7 +162,7 @@ device_impl::get_backend_info() const { bool device_impl::has_extension(const std::string &ExtensionName) const { std::string AllExtensionNames = - get_device_info_string(PiInfoCode::value); + get_device_info_string(UR_DEVICE_INFO_EXTENSIONS); return (AllExtensionNames.find(ExtensionName) != std::string::npos); } @@ -175,16 +172,15 @@ bool device_impl::is_partition_supported(info::partition_property Prop) const { Prop) != SupportedProperties.end(); } -std::vector -device_impl::create_sub_devices(const cl_device_partition_property *Properties, - size_t SubDevicesCount) const { - - std::vector SubDevices(SubDevicesCount); - pi_uint32 ReturnedSubDevices = 0; +std::vector device_impl::create_sub_devices( + const ur_device_partition_properties_t *Properties, + size_t SubDevicesCount) const { + std::vector SubDevices(SubDevicesCount); + uint32_t ReturnedSubDevices = 0; const PluginPtr &Plugin = getPlugin(); - Plugin->call( - MDevice, Properties, SubDevicesCount, SubDevices.data(), - &ReturnedSubDevices); + Plugin->call(urDevicePartition, MDevice, Properties, + SubDevicesCount, SubDevices.data(), + &ReturnedSubDevices); if (ReturnedSubDevices != SubDevicesCount) { throw sycl::exception( errc::invalid, @@ -196,9 +192,9 @@ device_impl::create_sub_devices(const cl_device_partition_property *Properties, // std::vector res; std::for_each(SubDevices.begin(), SubDevices.end(), - [&res, this](const sycl::detail::pi::PiDevice &a_pi_device) { + [&res, this](const ur_device_handle_t &a_ur_device) { device sycl_device = detail::createSyclObjFromImpl( - MPlatform->getOrMakeDeviceImpl(a_pi_device, MPlatform)); + MPlatform->getOrMakeDeviceImpl(a_ur_device, MPlatform)); res.push_back(sycl_device); }); return res; @@ -218,10 +214,17 @@ std::vector device_impl::create_sub_devices(size_t ComputeUnits) const { "Total counts exceed max compute units"); size_t SubDevicesCount = MaxComputeUnits / ComputeUnits; - const pi_device_partition_property Properties[3] = { - PI_DEVICE_PARTITION_EQUALLY, (pi_device_partition_property)ComputeUnits, - 0}; - return create_sub_devices(Properties, SubDevicesCount); + + ur_device_partition_property_t Prop{}; + Prop.type = UR_DEVICE_PARTITION_EQUALLY; + Prop.value.count = static_cast(ComputeUnits); + + ur_device_partition_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES; + Properties.PropCount = 1; + Properties.pProperties = &Prop; + + return create_sub_devices(&Properties, SubDevicesCount); } std::vector @@ -232,20 +235,24 @@ device_impl::create_sub_devices(const std::vector &Counts) const { "Device does not support " "sycl::info::partition_property::partition_by_counts."); } - static const pi_device_partition_property P[] = { - PI_DEVICE_PARTITION_BY_COUNTS, PI_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0}; - std::vector Properties(P, P + 3); + + std::vector Props{}; // Fill the properties vector with counts and validate it - auto It = Properties.begin() + 1; size_t TotalCounts = 0; size_t NonZeroCounts = 0; for (auto Count : Counts) { TotalCounts += Count; NonZeroCounts += (Count != 0) ? 1 : 0; - It = Properties.insert(It, Count); + Props.push_back(ur_device_partition_property_t{ + UR_DEVICE_PARTITION_BY_COUNTS, {static_cast(Count)}}); } + ur_device_partition_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES; + Properties.pProperties = Props.data(); + Properties.PropCount = Props.size(); + // If the number of non-zero values in counts exceeds the device’s maximum // number of sub devices (as returned by info::device:: // partition_max_sub_devices) an exception with the errc::invalid @@ -262,7 +269,7 @@ device_impl::create_sub_devices(const std::vector &Counts) const { throw sycl::exception(errc::invalid, "Total counts exceed max compute units"); - return create_sub_devices(Properties.data(), Counts.size()); + return create_sub_devices(&Properties, Counts.size()); } std::vector device_impl::create_sub_devices( @@ -279,16 +286,23 @@ std::vector device_impl::create_sub_devices( "Device does not support " + affinityDomainToString(AffinityDomain) + "."); } - const pi_device_partition_property Properties[3] = { - PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, - (pi_device_partition_property)AffinityDomain, 0}; - pi_uint32 SubDevicesCount = 0; + ur_device_partition_property_t Prop; + Prop.type = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + Prop.value.affinity_domain = + static_cast(AffinityDomain); + + ur_device_partition_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES; + Properties.PropCount = 1; + Properties.pProperties = &Prop; + + uint32_t SubDevicesCount = 0; const PluginPtr &Plugin = getPlugin(); - Plugin->call( - MDevice, Properties, 0, nullptr, &SubDevicesCount); + Plugin->call(urDevicePartition, MDevice, &Properties, 0, + nullptr, &SubDevicesCount); - return create_sub_devices(Properties, SubDevicesCount); + return create_sub_devices(&Properties, SubDevicesCount); } std::vector device_impl::create_sub_devices() const { @@ -300,23 +314,28 @@ std::vector device_impl::create_sub_devices() const { "sycl::info::partition_property::ext_intel_partition_by_cslice."); } - const pi_device_partition_property Properties[2] = { - PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE, 0}; + ur_device_partition_property_t Prop; + Prop.type = UR_DEVICE_PARTITION_BY_CSLICE; - pi_uint32 SubDevicesCount = 0; + ur_device_partition_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES; + Properties.pProperties = &Prop; + Properties.PropCount = 1; + + uint32_t SubDevicesCount = 0; const PluginPtr &Plugin = getPlugin(); - Plugin->call( - MDevice, Properties, 0, nullptr, &SubDevicesCount); + Plugin->call(urDevicePartition, MDevice, &Properties, 0, nullptr, + &SubDevicesCount); - return create_sub_devices(Properties, SubDevicesCount); + return create_sub_devices(&Properties, SubDevicesCount); } -pi_native_handle device_impl::getNative() const { +ur_native_handle_t device_impl::getNative() const { auto Plugin = getPlugin(); if (getBackend() == backend::opencl) - Plugin->call(getHandleRef()); - pi_native_handle Handle; - Plugin->call(getHandleRef(), &Handle); + Plugin->call(urDeviceRetain, getHandleRef()); + ur_native_handle_t Handle; + Plugin->call(urDeviceGetNativeHandle, getHandleRef(), &Handle); return Handle; } @@ -367,78 +386,80 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_oneapi_cuda_cluster_group: return get_info(); case aspect::usm_atomic_host_allocations: - return (get_device_info_impl:: get(MPlatform->getDeviceImpl(MDevice)) & - PI_USM_CONCURRENT_ATOMIC_ACCESS); + UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS); case aspect::usm_shared_allocations: return get_info(); case aspect::usm_atomic_shared_allocations: - return (get_device_info_impl:: get(MPlatform->getDeviceImpl(MDevice)) & - PI_USM_CONCURRENT_ATOMIC_ACCESS); + UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS); case aspect::usm_restricted_shared_allocations: return get_info(); case aspect::usm_system_allocations: return get_info(); case aspect::ext_intel_device_id: - return getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_DEVICE_ID, 0, nullptr, &return_size) == - PI_SUCCESS; + return getPlugin()->call_nocheck(urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_DEVICE_ID, 0, nullptr, + &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_pci_address: - return getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_PCI_ADDRESS, 0, nullptr, &return_size) == - PI_SUCCESS; + return getPlugin()->call_nocheck(urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_PCI_ADDRESS, 0, nullptr, + &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_gpu_eu_count: - return getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_GPU_EU_COUNT, 0, nullptr, - &return_size) == PI_SUCCESS; + return getPlugin()->call_nocheck(urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_GPU_EU_COUNT, 0, nullptr, + &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_gpu_eu_simd_width: - return getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH, 0, nullptr, - &return_size) == PI_SUCCESS; + return getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH, 0, + nullptr, &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_gpu_slices: - return getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_GPU_SLICES, 0, nullptr, &return_size) == - PI_SUCCESS; + return getPlugin()->call_nocheck(urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_GPU_EU_SLICES, 0, nullptr, + &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_gpu_subslices_per_slice: - return getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, 0, nullptr, - &return_size) == PI_SUCCESS; + return getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, + 0, nullptr, &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_gpu_eu_count_per_subslice: - return getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, 0, nullptr, - &return_size) == PI_SUCCESS; + return getPlugin()->call_nocheck(urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, + 0, nullptr, + &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_gpu_hw_threads_per_eu: - return getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, 0, nullptr, - &return_size) == PI_SUCCESS; + return getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU, + 0, nullptr, &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_free_memory: - return getPlugin()->call_nocheck( - MDevice, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY, 0, nullptr, - &return_size) == PI_SUCCESS; + return getPlugin()->call_nocheck(urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_GLOBAL_MEM_FREE, 0, nullptr, + &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_memory_clock_rate: - return getPlugin()->call_nocheck( - MDevice, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE, 0, nullptr, - &return_size) == PI_SUCCESS; + return getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_MEMORY_CLOCK_RATE, 0, + nullptr, &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_memory_bus_width: - return getPlugin()->call_nocheck( - MDevice, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH, 0, nullptr, - &return_size) == PI_SUCCESS; + return getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_MEMORY_BUS_WIDTH, 0, + nullptr, &return_size) == UR_RESULT_SUCCESS; case aspect::ext_intel_device_info_uuid: { - auto Result = getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_UUID, 0, nullptr, &return_size); - if (Result != PI_SUCCESS) { + auto Result = + getPlugin()->call_nocheck(urDeviceGetInfo, MDevice, UR_DEVICE_INFO_UUID, + 0, nullptr, &return_size); + if (Result != UR_RESULT_SUCCESS) { return false; } assert(return_size <= 16); unsigned char UUID[16]; - return getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_UUID, 16 * sizeof(unsigned char), UUID, - nullptr) == PI_SUCCESS; + return getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_UUID, + 16 * sizeof(unsigned char), UUID, nullptr) == UR_RESULT_SUCCESS; } case aspect::ext_intel_max_mem_bandwidth: // currently not supported @@ -450,190 +471,203 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_oneapi_cuda_async_barrier: { int async_barrier_supported; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER, sizeof(int), - &async_barrier_supported, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_ASYNC_BARRIER, sizeof(int), + &async_barrier_supported, nullptr) == UR_RESULT_SUCCESS; return call_successful && async_barrier_supported; } case aspect::ext_intel_legacy_image: { - pi_bool legacy_image_support = PI_FALSE; + ur_bool_t legacy_image_support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_DEVICE_INFO_IMAGE_SUPPORT, sizeof(pi_bool), - &legacy_image_support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck(urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_IMAGE_SUPPORTED, + sizeof(ur_bool_t), &legacy_image_support, + nullptr) == UR_RESULT_SUCCESS; return call_successful && legacy_image_support; } case aspect::ext_oneapi_bindless_images: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck(urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP, + sizeof(ur_bool_t), &support, + nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_bindless_images_shared_usm: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_bindless_images_1d_usm: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_bindless_images_2d_usm: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_interop_memory_import: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP, sizeof(ur_bool_t), + &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_interop_semaphore_import: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_mipmap: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_SUPPORT, sizeof(pi_bool), - &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_mipmap_anisotropy: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck(urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP, + sizeof(ur_bool_t), &support, + nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_mipmap_level_reference: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_1d: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_2d: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_bindless_sampled_image_fetch_3d: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_cubemap: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SUPPORT, sizeof(pi_bool), - &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_cubemap_seamless_filtering: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, - PI_EXT_ONEAPI_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_image_array: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_IMAGE_ARRAY_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_IMAGE_ARRAY_SUPPORT_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_unique_addressing_per_dim: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_UNIQUE_ADDRESSING_PER_DIM, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_UNIQUE_ADDRESSING_PER_DIM_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_bindless_images_sample_1d_usm: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLE_1D_USM, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_bindless_images_sample_2d_usm: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLE_2D_USM, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_intel_esimd: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT, sizeof(pi_bool), - &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_ESIMD_SUPPORT, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_ballot_group: @@ -673,37 +707,39 @@ bool device_impl::has(aspect Aspect) const { return components.size() >= 2; } case aspect::ext_oneapi_is_component: { - typename sycl_to_pi::type Result = nullptr; - bool CallSuccessful = getPlugin()->call_nocheck( - getHandleRef(), - PiInfoCode::value, - sizeof(Result), &Result, nullptr) == PI_SUCCESS; + typename sycl_to_ur::type Result; + bool CallSuccessful = + getPlugin()->call_nocheck( + urDeviceGetInfo, getHandleRef(), + UrInfoCode::value, + sizeof(Result), &Result, nullptr) == UR_RESULT_SUCCESS; return CallSuccessful && Result != nullptr; } case aspect::ext_oneapi_graph: { - pi_bool SupportsCommandBufferUpdate = false; + bool SupportsCommandBufferUpdate = false; bool CallSuccessful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT, + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, sizeof(SupportsCommandBufferUpdate), &SupportsCommandBufferUpdate, - nullptr) == PI_SUCCESS; + nullptr) == UR_RESULT_SUCCESS; if (!CallSuccessful) { - return PI_FALSE; + return false; } return has(aspect::ext_oneapi_limited_graph) && SupportsCommandBufferUpdate; } case aspect::ext_oneapi_limited_graph: { - pi_bool SupportsCommandBuffers = false; + bool SupportsCommandBuffers = false; bool CallSuccessful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_SUPPORT, + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP, sizeof(SupportsCommandBuffers), &SupportsCommandBuffers, - nullptr) == PI_SUCCESS; + nullptr) == UR_RESULT_SUCCESS; if (!CallSuccessful) { - return PI_FALSE; + return false; } return SupportsCommandBuffers; @@ -715,19 +751,20 @@ bool device_impl::has(aspect Aspect) const { be == sycl::backend::opencl; } case aspect::ext_oneapi_queue_profiling_tag: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, + UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP, sizeof(ur_bool_t), + &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } case aspect::ext_oneapi_virtual_mem: { - pi_bool support = PI_FALSE; + ur_bool_t support = false; bool call_successful = - getPlugin()->call_nocheck( - MDevice, PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM, - sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + getPlugin()->call_nocheck( + urDeviceGetInfo, MDevice, UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT, + sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; } } @@ -781,34 +818,33 @@ uint64_t device_impl::getCurrentDeviceTime() { // If getCurrentDeviceTime is called for the first time or we have to refresh. if (!MDeviceHostBaseTime.second || Diff > TimeTillRefresh) { const auto &Plugin = getPlugin(); - auto Result = - Plugin->call_nocheck( - MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second); - // We have to remember base host timestamp right after PI call and it is + auto Result = Plugin->call_nocheck(urDeviceGetGlobalTimestamps, MDevice, + &MDeviceHostBaseTime.first, + &MDeviceHostBaseTime.second); + // We have to remember base host timestamp right after UR call and it is // going to be used for calculation of the device timestamp at the next // getCurrentDeviceTime() call. We need to do it here because getPlugin() - // and piGetDeviceAndHostTimer calls may take significant amount of time, + // and urDeviceGetGlobalTimestamps calls may take significant amount of time, // for example on the first call to getPlugin plugins may need to be // initialized. If we use timestamp from the beginning of the function then // the difference between host timestamps of the current // getCurrentDeviceTime and the next getCurrentDeviceTime will be incorrect // because it will include execution time of the code before we get device - // timestamp from piGetDeviceAndHostTimer. + // timestamp from urDeviceGetGlobalTimestamps. HostTime = duration_cast(steady_clock::now().time_since_epoch()) .count(); - if (Result == PI_ERROR_INVALID_OPERATION) { - char *p = nullptr; - Plugin->call_nocheck(&p); - std::string errorMsg(p ? p : ""); - throw detail::set_pi_error( + if (Result == UR_RESULT_ERROR_INVALID_OPERATION) { + // NOTE(UR port): Removed the call to GetLastError because we shouldn't + // be calling it after ERROR_INVALID_OPERATION: there is no + // adapter-specific error. + throw detail::set_ur_error( sycl::exception( make_error_code(errc::feature_not_supported), - "Device and/or backend does not support querying timestamp: " + - errorMsg), - PI_ERROR_INVALID_OPERATION); + "Device and/or backend does not support querying timestamp."), + UR_RESULT_ERROR_INVALID_OPERATION); } else { - Plugin->checkPiResult(Result); + Plugin->checkUrResult(Result); } // Until next sync we will compute device time based on the host time // returned in HostTime, so make this our base host time. @@ -821,10 +857,9 @@ uint64_t device_impl::getCurrentDeviceTime() { bool device_impl::isGetDeviceAndHostTimerSupported() { const auto &Plugin = getPlugin(); uint64_t DeviceTime = 0, HostTime = 0; - auto Result = - Plugin->call_nocheck( - MDevice, &DeviceTime, &HostTime); - return Result != PI_ERROR_INVALID_OPERATION; + auto Result = Plugin->call_nocheck(urDeviceGetGlobalTimestamps, MDevice, + &DeviceTime, &HostTime); + return Result != UR_RESULT_ERROR_INVALID_OPERATION; } bool device_impl::extOneapiCanCompile( diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index a3344ecdd3870..92c55a30b41b9 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -39,17 +39,15 @@ class device_impl { device_impl(); /// Constructs a SYCL device instance using the provided raw device handle. - explicit device_impl(pi_native_handle, const PluginPtr &Plugin); + explicit device_impl(ur_native_handle_t, const PluginPtr &Plugin); /// Constructs a SYCL device instance using the provided - /// PI device instance. - explicit device_impl(sycl::detail::pi::PiDevice Device, - PlatformImplPtr Platform); + /// UR device instance. + explicit device_impl(ur_device_handle_t Device, PlatformImplPtr Platform); /// Constructs a SYCL device instance using the provided - /// PI device instance. - explicit device_impl(sycl::detail::pi::PiDevice Device, - const PluginPtr &Plugin); + /// UR device instance. + explicit device_impl(ur_device_handle_t Device, const PluginPtr &Plugin); ~device_impl(); @@ -59,39 +57,39 @@ class device_impl { /// requirements described in 4.3.1. cl_device_id get() const; - /// Get reference to PI device + /// Get reference to UR device /// /// For host device an exception is thrown /// - /// \return non-constant reference to PI device - sycl::detail::pi::PiDevice &getHandleRef() { return MDevice; } + /// \return non-constant reference to UR device + ur_device_handle_t &getHandleRef() { return MDevice; } - /// Get constant reference to PI device + /// Get constant reference to UR device /// /// For host device an exception is thrown /// - /// \return constant reference to PI device - const sycl::detail::pi::PiDevice &getHandleRef() const { return MDevice; } + /// \return constant reference to UR device + const ur_device_handle_t &getHandleRef() const { return MDevice; } /// Check if device is a CPU device /// /// \return true if SYCL device is a CPU device - bool is_cpu() const { return MType == PI_DEVICE_TYPE_CPU; } + bool is_cpu() const { return MType == UR_DEVICE_TYPE_CPU; } /// Check if device is a GPU device /// /// \return true if SYCL device is a GPU device - bool is_gpu() const { return MType == PI_DEVICE_TYPE_GPU; } + bool is_gpu() const { return MType == UR_DEVICE_TYPE_GPU; } /// Check if device is an accelerator device /// /// \return true if SYCL device is an accelerator device - bool is_accelerator() const { return MType == PI_DEVICE_TYPE_ACC; } + bool is_accelerator() const { return MType == UR_DEVICE_TYPE_FPGA; } /// Return device type /// /// \return the type of the device - sycl::detail::pi::PiDeviceType get_device_type() const { return MType; } + ur_device_type_t get_device_type() const { return MType; } /// Get associated SYCL platform /// @@ -115,7 +113,7 @@ class device_impl { bool has_extension(const std::string &ExtensionName) const; std::vector - create_sub_devices(const cl_device_partition_property *Properties, + create_sub_devices(const ur_device_partition_properties_t *Properties, size_t SubDevicesCount) const; /// Partition device into sub devices @@ -201,7 +199,7 @@ class device_impl { /// Gets the native handle of the SYCL device. /// /// \return a native handle. - pi_native_handle getNative() const; + ur_native_handle_t getNative() const; /// Indicates if the SYCL device has the given feature. /// @@ -290,19 +288,19 @@ class device_impl { PlatformImplPtr getPlatformImpl() const { return MPlatform; } /// Get device info string - std::string - get_device_info_string(sycl::detail::pi::PiDeviceInfo InfoCode) const; + std::string get_device_info_string(ur_device_info_t InfoCode) const; /// Get device architecture ext::oneapi::experimental::architecture getDeviceArch() const; private: - explicit device_impl(pi_native_handle InteropDevice, - sycl::detail::pi::PiDevice Device, - PlatformImplPtr Platform, const PluginPtr &Plugin); - sycl::detail::pi::PiDevice MDevice = 0; - sycl::detail::pi::PiDeviceType MType; - sycl::detail::pi::PiDevice MRootDevice = nullptr; + explicit device_impl(ur_native_handle_t InteropDevice, + ur_device_handle_t Device, PlatformImplPtr Platform, + const PluginPtr &Plugin); + + ur_device_handle_t MDevice = 0; + ur_device_type_t MType; + ur_device_handle_t MRootDevice = nullptr; PlatformImplPtr MPlatform; bool MIsAssertFailSupported = false; mutable std::string MDeviceName; diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index abe8b12786e65..899b879fa2a42 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -34,51 +34,52 @@ namespace sycl { inline namespace _V1 { namespace detail { -inline std::vector read_fp_bitfield(pi_device_fp_config bits) { +inline std::vector +read_fp_bitfield(ur_device_fp_capability_flags_t bits) { std::vector result; - if (bits & PI_FP_DENORM) + if (bits & UR_DEVICE_FP_CAPABILITY_FLAG_DENORM) result.push_back(info::fp_config::denorm); - if (bits & PI_FP_INF_NAN) + if (bits & UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN) result.push_back(info::fp_config::inf_nan); - if (bits & PI_FP_ROUND_TO_NEAREST) + if (bits & UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST) result.push_back(info::fp_config::round_to_nearest); - if (bits & PI_FP_ROUND_TO_ZERO) + if (bits & UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO) result.push_back(info::fp_config::round_to_zero); - if (bits & PI_FP_ROUND_TO_INF) + if (bits & UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF) result.push_back(info::fp_config::round_to_inf); - if (bits & PI_FP_FMA) + if (bits & UR_DEVICE_FP_CAPABILITY_FLAG_FMA) result.push_back(info::fp_config::fma); - if (bits & PI_FP_SOFT_FLOAT) + if (bits & UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT) result.push_back(info::fp_config::soft_float); - if (bits & PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) + if (bits & UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT) result.push_back(info::fp_config::correctly_rounded_divide_sqrt); return result; } inline std::vector -read_domain_bitfield(pi_device_affinity_domain bits) { +read_domain_bitfield(ur_device_affinity_domain_flags_t bits) { std::vector result; - if (bits & PI_DEVICE_AFFINITY_DOMAIN_NUMA) + if (bits & UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA) result.push_back(info::partition_affinity_domain::numa); - if (bits & PI_DEVICE_AFFINITY_DOMAIN_L4_CACHE) + if (bits & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE) result.push_back(info::partition_affinity_domain::L4_cache); - if (bits & PI_DEVICE_AFFINITY_DOMAIN_L3_CACHE) + if (bits & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE) result.push_back(info::partition_affinity_domain::L3_cache); - if (bits & PI_DEVICE_AFFINITY_DOMAIN_L2_CACHE) + if (bits & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE) result.push_back(info::partition_affinity_domain::L2_cache); - if (bits & PI_DEVICE_AFFINITY_DOMAIN_L1_CACHE) + if (bits & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE) result.push_back(info::partition_affinity_domain::L1_cache); - if (bits & PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE) + if (bits & UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE) result.push_back(info::partition_affinity_domain::next_partitionable); return result; } inline std::vector -read_execution_bitfield(pi_device_exec_capabilities bits) { +read_execution_bitfield(ur_device_exec_capability_flag_t bits) { std::vector result; - if (bits & PI_EXEC_KERNEL) + if (bits & UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL) result.push_back(info::execution_capability::exec_kernel); - if (bits & PI_EXEC_NATIVE_KERNEL) + if (bits & UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL) result.push_back(info::execution_capability::exec_native_kernel); return result; } @@ -109,18 +110,18 @@ affinityDomainToString(info::partition_affinity_domain AffinityDomain) { } } -// Mapping expected SYCL return types to those returned by PI calls -template struct sycl_to_pi { +// Mapping expected SYCL return types to those returned by UR calls +template struct sycl_to_ur { using type = T; }; -template <> struct sycl_to_pi { - using type = pi_bool; +template <> struct sycl_to_ur { + using type = ur_bool_t; }; -template <> struct sycl_to_pi { - using type = sycl::detail::pi::PiDevice; +template <> struct sycl_to_ur { + using type = ur_device_handle_t; }; -template <> struct sycl_to_pi { - using type = sycl::detail::pi::PiPlatform; +template <> struct sycl_to_ur { + using type = ur_platform_handle_t; }; // Mapping fp_config device info types to the values used to check fp support @@ -137,13 +138,13 @@ template <> struct check_fp_support { // Structs for emulating function template partial specialization // Default template for the general case // TODO: get rid of remaining uses of OpenCL directly -// + template struct get_device_info_impl { static ReturnT get(const DeviceImplPtr &Dev) { - typename sycl_to_pi::type result; - Dev->getPlugin()->call( - Dev->getHandleRef(), PiInfoCode::value, sizeof(result), &result, - nullptr); + typename sycl_to_ur::type result; + Dev->getPlugin()->call(urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, sizeof(result), &result, + nullptr); return ReturnT(result); } }; @@ -151,11 +152,11 @@ template struct get_device_info_impl { // Specialization for platform template struct get_device_info_impl { static platform get(const DeviceImplPtr &Dev) { - typename sycl_to_pi::type result; - Dev->getPlugin()->call( - Dev->getHandleRef(), PiInfoCode::value, sizeof(result), &result, - nullptr); - // TODO: Change PiDevice to device_impl. + typename sycl_to_ur::type result; + Dev->getPlugin()->call(urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, sizeof(result), &result, + nullptr); + // TODO: Change UrDevice to device_impl. // Use the Plugin from the device_impl class after plugin details // are added to the class. return createSyclObjFromImpl( @@ -165,17 +166,17 @@ template struct get_device_info_impl { // Helper function to allow using the specialization of get_device_info_impl // for string return type in other specializations. -inline std::string device_impl::get_device_info_string( - sycl::detail::pi::PiDeviceInfo InfoCode) const { +inline std::string +device_impl::get_device_info_string(ur_device_info_t InfoCode) const { size_t resultSize = 0; - getPlugin()->call(getHandleRef(), InfoCode, 0, - nullptr, &resultSize); + getPlugin()->call(urDeviceGetInfo, getHandleRef(), InfoCode, 0, nullptr, + &resultSize); if (resultSize == 0) { return std::string(); } std::unique_ptr result(new char[resultSize]); - getPlugin()->call( - getHandleRef(), InfoCode, resultSize, result.get(), nullptr); + getPlugin()->call(urDeviceGetInfo, getHandleRef(), InfoCode, resultSize, + result.get(), nullptr); return std::string(result.get()); } @@ -183,7 +184,7 @@ inline std::string device_impl::get_device_info_string( // Specialization for string return type, variable return size template struct get_device_info_impl { static std::string get(const DeviceImplPtr &Dev) { - return Dev->get_device_info_string(PiInfoCode::value); + return Dev->get_device_info_string(UrInfoCode::value); } }; @@ -203,10 +204,10 @@ struct get_device_info_impl, Param> { typename check_fp_support::type>::get(Dev)) { return {}; } - cl_device_fp_config result; - Dev->getPlugin()->call( - Dev->getHandleRef(), PiInfoCode::value, sizeof(result), &result, - nullptr); + ur_device_fp_capability_flags_t result; + Dev->getPlugin()->call(urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, sizeof(result), &result, + nullptr); return read_fp_bitfield(result); } }; @@ -215,7 +216,7 @@ struct get_device_info_impl, Param> { template <> struct get_device_info_impl { static std::string get(const DeviceImplPtr &Dev) { return Dev->get_device_info_string( - PiInfoCode::value); + UrInfoCode::value); } }; @@ -224,79 +225,79 @@ template <> struct get_device_info_impl, info::device::single_fp_config> { static std::vector get(const DeviceImplPtr &Dev) { - pi_device_fp_config result; - Dev->getPlugin()->call( - Dev->getHandleRef(), PiInfoCode::value, - sizeof(result), &result, nullptr); + ur_device_fp_capability_flags_t result; + Dev->getPlugin()->call(urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(result), &result, nullptr); return read_fp_bitfield(result); } }; -// Specialization for queue_profiling. In addition to pi_queue level profiling, -// piGetDeviceAndHostTimer is not supported, command_submit, command_start, +// Specialization for queue_profiling. In addition to ur_queue level profiling, +// urDeviceGetGlobalTimestamps is not supported, command_submit, command_start, // command_end will be calculated. See MFallbackProfiling template <> struct get_device_info_impl { static bool get(const DeviceImplPtr &Dev) { - pi_queue_properties Properties; - Dev->getPlugin()->call( - Dev->getHandleRef(), PiInfoCode::value, - sizeof(Properties), &Properties, nullptr); - return Properties & PI_QUEUE_FLAG_PROFILING_ENABLE; + ur_queue_flags_t Properties; + Dev->getPlugin()->call(urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(Properties), &Properties, nullptr); + return Properties & UR_QUEUE_FLAG_PROFILING_ENABLE; } }; -// Specialization for atomic_memory_order_capabilities, PI returns a bitfield +// Specialization for atomic_memory_order_capabilities, UR returns a bitfield template <> struct get_device_info_impl, info::device::atomic_memory_order_capabilities> { static std::vector get(const DeviceImplPtr &Dev) { - pi_memory_order_capabilities result; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(pi_memory_order_capabilities), &result, nullptr); + ur_memory_order_capability_flag_t result; + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(result), &result, nullptr); return readMemoryOrderBitfield(result); } }; -// Specialization for atomic_fence_order_capabilities, PI returns a bitfield +// Specialization for atomic_fence_order_capabilities, UR returns a bitfield template <> struct get_device_info_impl, info::device::atomic_fence_order_capabilities> { static std::vector get(const DeviceImplPtr &Dev) { - pi_memory_order_capabilities result; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(pi_memory_order_capabilities), &result, nullptr); + ur_memory_order_capability_flag_t result; + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(result), &result, nullptr); return readMemoryOrderBitfield(result); } }; -// Specialization for atomic_memory_scope_capabilities, PI returns a bitfield +// Specialization for atomic_memory_scope_capabilities, UR returns a bitfield template <> struct get_device_info_impl, info::device::atomic_memory_scope_capabilities> { static std::vector get(const DeviceImplPtr &Dev) { - pi_memory_scope_capabilities result; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(pi_memory_scope_capabilities), &result, nullptr); + size_t result; + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(result), &result, nullptr); return readMemoryScopeBitfield(result); } }; -// Specialization for atomic_fence_scope_capabilities, PI returns a bitfield +// Specialization for atomic_fence_scope_capabilities, UR returns a bitfield template <> struct get_device_info_impl, info::device::atomic_fence_scope_capabilities> { static std::vector get(const DeviceImplPtr &Dev) { - pi_memory_scope_capabilities result; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(pi_memory_scope_capabilities), &result, nullptr); + size_t result; + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(result), &result, nullptr); return readMemoryScopeBitfield(result); } }; @@ -307,12 +308,11 @@ struct get_device_info_impl { static bool get(const DeviceImplPtr &Dev) { bool result = false; if (Dev->getBackend() == backend::ext_oneapi_cuda) { - sycl::detail::pi::PiResult Err = - Dev->getPlugin()->call_nocheck( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(result), &result, nullptr); - if (Err != PI_SUCCESS) { + auto Err = Dev->getPlugin()->call_nocheck( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(result), &result, nullptr); + if (Err != UR_RESULT_SUCCESS) { return false; } } @@ -325,10 +325,10 @@ template <> struct get_device_info_impl, info::device::execution_capabilities> { static std::vector get(const DeviceImplPtr &Dev) { - pi_device_exec_capabilities result; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode::value, sizeof(result), + ur_device_exec_capability_flag_t result; + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, sizeof(result), &result, nullptr); return read_execution_bitfield(result); } @@ -340,7 +340,7 @@ struct get_device_info_impl, info::device::built_in_kernel_ids> { static std::vector get(const DeviceImplPtr &Dev) { std::string result = Dev->get_device_info_string( - PiInfoCode::value); + UrInfoCode::value); auto names = split_string(result, ';'); std::vector ids; @@ -358,7 +358,7 @@ struct get_device_info_impl, info::device::built_in_kernels> { static std::vector get(const DeviceImplPtr &Dev) { std::string result = Dev->get_device_info_string( - PiInfoCode::value); + UrInfoCode::value); return split_string(result, ';'); } }; @@ -391,29 +391,28 @@ template <> struct get_device_info_impl, info::device::partition_properties> { static std::vector get(const DeviceImplPtr &Dev) { - auto info_partition = PiInfoCode::value; + auto info_partition = UrInfoCode::value; const auto &Plugin = Dev->getPlugin(); size_t resultSize; - Plugin->call( - Dev->getHandleRef(), info_partition, 0, nullptr, &resultSize); + Plugin->call(urDeviceGetInfo, Dev->getHandleRef(), info_partition, 0, + nullptr, &resultSize); - size_t arrayLength = resultSize / sizeof(cl_device_partition_property); + size_t arrayLength = resultSize / sizeof(ur_device_partition_t); if (arrayLength == 0) { return {}; } - std::unique_ptr arrayResult( - new cl_device_partition_property[arrayLength]); - Plugin->call(Dev->getHandleRef(), - info_partition, resultSize, - arrayResult.get(), nullptr); + std::unique_ptr arrayResult( + new ur_device_partition_t[arrayLength]); + Plugin->call(urDeviceGetInfo, Dev->getHandleRef(), info_partition, + resultSize, arrayResult.get(), nullptr); std::vector result; for (size_t i = 0; i < arrayLength; ++i) { // OpenCL extensions may have partition_properties that // are not yet defined for SYCL (eg. CL_DEVICE_PARTITION_BY_NAMES_INTEL) info::partition_property pp( - static_cast(arrayResult[i])); + info::ConvertPartitionProperty(arrayResult[i])); if (is_sycl_partition_property(pp)) result.push_back(pp); } @@ -427,40 +426,42 @@ struct get_device_info_impl, info::device::partition_affinity_domains> { static std::vector get(const DeviceImplPtr &Dev) { - pi_device_affinity_domain result; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode::value, + ur_device_affinity_domain_flags_t result; + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, sizeof(result), &result, nullptr); return read_domain_bitfield(result); } }; -// Specialization for partition type affinity domain, OpenCL can return other +// Specialization for partition type affinity domain, UR can return other // partition properties instead template <> struct get_device_info_impl { static info::partition_affinity_domain get(const DeviceImplPtr &Dev) { - size_t resultSize; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode::value, 0, - nullptr, &resultSize); - if (resultSize != 1) { + std::vector PartitionProperties; + size_t PropertiesSize = 0; + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, 0, + nullptr, &PropertiesSize); + if (PropertiesSize == 0) return info::partition_affinity_domain::not_applicable; - } - cl_device_partition_property result; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(result), &result, nullptr); - if (result == PI_DEVICE_AFFINITY_DOMAIN_NUMA || - result == PI_DEVICE_AFFINITY_DOMAIN_L4_CACHE || - result == PI_DEVICE_AFFINITY_DOMAIN_L3_CACHE || - result == PI_DEVICE_AFFINITY_DOMAIN_L2_CACHE || - result == PI_DEVICE_AFFINITY_DOMAIN_L1_CACHE) { - return info::partition_affinity_domain(result); + + PartitionProperties.resize(PropertiesSize / + sizeof(ur_device_partition_property_t)); + + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + PropertiesSize, PartitionProperties.data(), nullptr); + + for (const auto &PartitionProp : PartitionProperties) { + if (PartitionProp.type != UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) + continue; + return info::ConvertAffinityDomain(PartitionProp.value.affinity_domain); } return info::partition_affinity_domain::not_applicable; @@ -472,39 +473,42 @@ template <> struct get_device_info_impl { static info::partition_property get(const DeviceImplPtr &Dev) { - size_t resultSize; - Dev->getPlugin()->call( - Dev->getHandleRef(), PI_DEVICE_INFO_PARTITION_TYPE, 0, nullptr, - &resultSize); - if (!resultSize) + std::vector PartitionProperties; + size_t PropertiesSize = 0; + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, 0, + nullptr, &PropertiesSize); + if (PropertiesSize == 0) return info::partition_property::no_partition; - size_t arrayLength = resultSize / sizeof(cl_device_partition_property); + PartitionProperties.resize(PropertiesSize / + sizeof(ur_device_partition_property_t)); - std::unique_ptr arrayResult( - new cl_device_partition_property[arrayLength]); - Dev->getPlugin()->call( - Dev->getHandleRef(), PI_DEVICE_INFO_PARTITION_TYPE, resultSize, - arrayResult.get(), nullptr); - if (!arrayResult[0]) - return info::partition_property::no_partition; - return info::partition_property(arrayResult[0]); + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + PropertiesSize, PartitionProperties.data(), nullptr); + // The old UR implementation also just checked the first element, is that + // correct? + return info::ConvertPartitionProperty(PartitionProperties[0].type); } }; + // Specialization for supported subgroup sizes template <> struct get_device_info_impl, info::device::sub_group_sizes> { static std::vector get(const DeviceImplPtr &Dev) { size_t resultSize = 0; - Dev->getPlugin()->call( - Dev->getHandleRef(), PiInfoCode::value, - 0, nullptr, &resultSize); + Dev->getPlugin()->call(urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, 0, + nullptr, &resultSize); std::vector result32(resultSize / sizeof(uint32_t)); - Dev->getPlugin()->call( - Dev->getHandleRef(), PiInfoCode::value, - resultSize, result32.data(), nullptr); + Dev->getPlugin()->call(urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + resultSize, result32.data(), nullptr); std::vector result; result.reserve(result32.size()); @@ -558,9 +562,9 @@ struct get_device_info_impl, info::device::max_work_item_sizes> { static range get(const DeviceImplPtr &Dev) { size_t result[3]; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode>::value, + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode>::value, sizeof(result), &result, nullptr); return construct_range(result); } @@ -694,9 +698,9 @@ struct get_device_info_impl< return ext::oneapi::experimental::architecture::unknown; }; uint32_t DeviceIp; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode< + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode< ext::oneapi::experimental::info::device::architecture>::value, sizeof(DeviceIp), &DeviceIp, nullptr); return MapArchIDToArchName(DeviceIp); @@ -710,13 +714,13 @@ struct get_device_info_impl< return ext::oneapi::experimental::architecture::unknown; }; size_t ResultSize = 0; - Dev->getPlugin()->call( - Dev->getHandleRef(), PiInfoCode::value, 0, - nullptr, &ResultSize); + Dev->getPlugin()->call(urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, 0, + nullptr, &ResultSize); std::unique_ptr DeviceArch(new char[ResultSize]); - Dev->getPlugin()->call( - Dev->getHandleRef(), PiInfoCode::value, - ResultSize, DeviceArch.get(), nullptr); + Dev->getPlugin()->call(urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + ResultSize, DeviceArch.get(), nullptr); std::string DeviceArchCopy(DeviceArch.get()); std::string DeviceArchSubstr = DeviceArchCopy.substr(0, DeviceArchCopy.find(":")); @@ -730,9 +734,9 @@ struct get_device_info_impl< return sycl::ext::oneapi::experimental::architecture::x86_64; }; uint32_t DeviceIp; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode< + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode< ext::oneapi::experimental::info::device::architecture>::value, sizeof(DeviceIp), &DeviceIp, nullptr); return MapArchIDToArchName(DeviceIp); @@ -967,9 +971,9 @@ struct get_device_info_impl< size_t Limit = get_device_info_impl::get(Dev); - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode< + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode< ext::oneapi::experimental::info::device::max_work_groups<3>>::value, sizeof(result), &result, nullptr); return id<1>(std::min(Limit, result[0])); @@ -984,9 +988,9 @@ struct get_device_info_impl< size_t Limit = get_device_info_impl::get(Dev); - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode< + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode< ext::oneapi::experimental::info::device::max_work_groups<3>>::value, sizeof(result), &result, nullptr); return id<2>(std::min(Limit, result[1]), std::min(Limit, result[0])); @@ -1001,9 +1005,9 @@ struct get_device_info_impl< size_t Limit = get_device_info_impl::get(Dev); - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode< + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode< ext::oneapi::experimental::info::device::max_work_groups<3>>::value, sizeof(result), &result, nullptr); return id<3>(std::min(Limit, result[2]), std::min(Limit, result[1]), @@ -1062,10 +1066,10 @@ struct get_device_info_impl, // Specialization for parent device template <> struct get_device_info_impl { static device get(const DeviceImplPtr &Dev) { - typename sycl_to_pi::type result; - Dev->getPlugin()->call( - Dev->getHandleRef(), PiInfoCode::value, - sizeof(result), &result, nullptr); + typename sycl_to_ur::type result; + Dev->getPlugin()->call(urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(result), &result, nullptr); if (result == nullptr) throw exception(make_error_code(errc::invalid), "No parent for device because it is not a subdevice"); @@ -1087,30 +1091,36 @@ template <> struct get_device_info_impl { // USM // Specialization for device usm query. + template <> struct get_device_info_impl { static bool get(const DeviceImplPtr &Dev) { - pi_usm_capabilities caps; - pi_result Err = Dev->getPlugin()->call_nocheck( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(pi_usm_capabilities), &caps, nullptr); + ur_device_usm_access_capability_flags_t caps; + ur_result_t Err = Dev->getPlugin()->call_nocheck( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(ur_device_usm_access_capability_flags_t), &caps, nullptr); - return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS); + return (Err != UR_RESULT_SUCCESS) + ? false + : (caps & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS); } }; // Specialization for host usm query. + template <> struct get_device_info_impl { static bool get(const DeviceImplPtr &Dev) { - pi_usm_capabilities caps; - pi_result Err = Dev->getPlugin()->call_nocheck( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(pi_usm_capabilities), &caps, nullptr); + ur_device_usm_access_capability_flags_t caps; + ur_result_t Err = Dev->getPlugin()->call_nocheck( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(ur_device_usm_access_capability_flags_t), &caps, nullptr); - return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS); + return (Err != UR_RESULT_SUCCESS) + ? false + : (caps & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS); } }; @@ -1118,12 +1128,14 @@ struct get_device_info_impl { template <> struct get_device_info_impl { static bool get(const DeviceImplPtr &Dev) { - pi_usm_capabilities caps; - pi_result Err = Dev->getPlugin()->call_nocheck( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(pi_usm_capabilities), &caps, nullptr); - return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS); + ur_device_usm_access_capability_flags_t caps; + ur_result_t Err = Dev->getPlugin()->call_nocheck( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(ur_device_usm_access_capability_flags_t), &caps, nullptr); + return (Err != UR_RESULT_SUCCESS) + ? false + : (caps & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS); } }; @@ -1132,15 +1144,17 @@ template <> struct get_device_info_impl { static bool get(const DeviceImplPtr &Dev) { - pi_usm_capabilities caps; - pi_result Err = Dev->getPlugin()->call_nocheck( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(pi_usm_capabilities), &caps, nullptr); + ur_device_usm_access_capability_flags_t caps; + ur_result_t Err = Dev->getPlugin()->call_nocheck( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(ur_device_usm_access_capability_flags_t), &caps, nullptr); // Check that we don't support any cross device sharing - return (Err != PI_SUCCESS) + return (Err != UR_RESULT_SUCCESS) ? false - : !(caps & (PI_USM_ACCESS | PI_USM_CONCURRENT_ACCESS)); + : !(caps & + (UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS | + UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS)); } }; @@ -1148,12 +1162,14 @@ struct get_device_info_impl struct get_device_info_impl { static bool get(const DeviceImplPtr &Dev) { - pi_usm_capabilities caps; - pi_result Err = Dev->getPlugin()->call_nocheck( - Dev->getHandleRef(), - PiInfoCode::value, - sizeof(pi_usm_capabilities), &caps, nullptr); - return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS); + ur_device_usm_access_capability_flags_t caps; + ur_result_t Err = Dev->getPlugin()->call_nocheck( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, + sizeof(ur_device_usm_access_capability_flags_t), &caps, nullptr); + return (Err != UR_RESULT_SUCCESS) + ? false + : (caps & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS); } }; @@ -1193,9 +1209,9 @@ struct get_device_info_impl< ext::codeplay::experimental::info::device::max_registers_per_work_group> { static uint32_t get(const DeviceImplPtr &Dev) { uint32_t maxRegsPerWG; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCodegetPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode::value, sizeof(maxRegsPerWG), &maxRegsPerWG, nullptr); return maxRegsPerWG; @@ -1210,27 +1226,29 @@ struct get_device_info_impl< static std::vector get(const DeviceImplPtr &Dev) { size_t ResultSize = 0; // First call to get DevCount. - pi_result Err = Dev->getPlugin()->call_nocheck( - Dev->getHandleRef(), - PiInfoCode< + ur_result_t Err = Dev->getPlugin()->call_nocheck( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode< ext::oneapi::experimental::info::device::component_devices>::value, 0, nullptr, &ResultSize); // If the feature is unsupported or if the result was empty, return an empty // list of devices. - if (Err == PI_ERROR_INVALID_VALUE || (Err == PI_SUCCESS && ResultSize == 0)) + if (Err == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION || + (Err == UR_RESULT_SUCCESS && ResultSize == 0)) return {}; - // Otherwise, if there was an error from PI it is unexpected and we should + // Otherwise, if there was an error from UR it is unexpected and we should // handle it accordingly. - Dev->getPlugin()->checkPiResult(Err); + Dev->getPlugin()->checkUrResult(Err); + + size_t DevCount = ResultSize / sizeof(ur_device_handle_t); - size_t DevCount = ResultSize / sizeof(pi_device); // Second call to get the list. - std::vector Devs(DevCount); - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode< + std::vector Devs(DevCount); + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode< ext::oneapi::experimental::info::device::component_devices>::value, ResultSize, Devs.data(), nullptr); std::vector Result; @@ -1242,6 +1260,7 @@ struct get_device_info_impl< return Result; } }; + template <> struct get_device_info_impl< sycl::device, ext::oneapi::experimental::info::device::composite_device> { @@ -1251,10 +1270,10 @@ struct get_device_info_impl< "Only devices with aspect::ext_oneapi_is_component " "can call this function."); - typename sycl_to_pi::type Result; - Dev->getPlugin()->call( - Dev->getHandleRef(), - PiInfoCode< + typename sycl_to_ur::type Result; + Dev->getPlugin()->call( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode< ext::oneapi::experimental::info::device::composite_device>::value, sizeof(Result), &Result, nullptr); diff --git a/sycl/source/detail/error_handling/error_handling.cpp b/sycl/source/detail/error_handling/error_handling.cpp index 6657620f5ed61..3780b526b5fc3 100644 --- a/sycl/source/detail/error_handling/error_handling.cpp +++ b/sycl/source/detail/error_handling/error_handling.cpp @@ -14,38 +14,36 @@ #include #include -#include +#include namespace sycl { inline namespace _V1 { namespace detail::enqueue_kernel_launch { -void handleOutOfResources(const device_impl &DeviceImpl, pi_kernel Kernel, - const NDRDescT &NDRDesc) { +void handleOutOfResources(const device_impl &DeviceImpl, + ur_kernel_handle_t Kernel, const NDRDescT &NDRDesc) { sycl::platform Platform = DeviceImpl.get_platform(); sycl::backend Backend = Platform.get_backend(); if (Backend == sycl::backend::ext_oneapi_cuda) { - // PI_ERROR_OUT_OF_RESOURCES is returned when the kernel registers - // required for the launch config exceeds the maximum number of registers - // per block (PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP). + // CUDA: + // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE is returned when the kernel + // registers required for the launch config exceeds the maximum number of + // registers per block (UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP). // This is if local_work_size[0] * ... * local_work_size[work_dim - 1] - // multiplied by PI_KERNEL_GROUP_INFO_NUM_REGS is greater than the value - // of PI_KERNEL_MAX_NUM_REGISTERS_PER_BLOCK. See Table 15: Technical + // multiplied by UR_KERNEL_INFO_NUM_REGS is greater than the value of + // UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP. See Table 15: Technical // Specifications per Compute Capability, for limitations. const size_t TotalNumberOfWIs = NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; - const uint32_t MaxRegistersPerBlock = - DeviceImpl.get_info(); - const PluginPtr &Plugin = DeviceImpl.getPlugin(); - sycl::detail::pi::PiDevice Device = DeviceImpl.getHandleRef(); - uint32_t NumRegisters = 0; - Plugin->call( - Kernel, Device, PI_KERNEL_GROUP_INFO_NUM_REGS, sizeof(NumRegisters), - &NumRegisters, nullptr); + Plugin->call(urKernelGetInfo, Kernel, UR_KERNEL_INFO_NUM_REGS, + sizeof(NumRegisters), &NumRegisters, nullptr); + + uint32_t MaxRegistersPerBlock = + DeviceImpl.get_info(); const bool HasExceededAvailableRegisters = TotalNumberOfWIs * NumRegisters > MaxRegistersPerBlock; @@ -68,13 +66,14 @@ void handleOutOfResources(const device_impl &DeviceImpl, pi_kernel Kernel, } } // Fallback - constexpr pi_result Error = PI_ERROR_OUT_OF_RESOURCES; + constexpr ur_result_t Error = UR_RESULT_ERROR_OUT_OF_RESOURCES; throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "PI backend failed. PI backend returns:" + + "UR backend failed. UR backend returns:" + codeToString(Error)); } -void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, +void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, + ur_kernel_handle_t Kernel, const NDRDescT &NDRDesc) { sycl::platform Platform = DeviceImpl.get_platform(); @@ -97,17 +96,16 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, } const PluginPtr &Plugin = DeviceImpl.getPlugin(); - sycl::detail::pi::PiDevice Device = DeviceImpl.getHandleRef(); + ur_device_handle_t Device = DeviceImpl.getHandleRef(); size_t CompileWGSize[3] = {0}; - Plugin->call( - Kernel, Device, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, - sizeof(size_t) * 3, CompileWGSize, nullptr); + Plugin->call(urKernelGetGroupInfo, Kernel, Device, + UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(size_t) * 3, + CompileWGSize, nullptr); size_t MaxWGSize = 0; - Plugin->call(Device, - PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, - sizeof(size_t), &MaxWGSize, nullptr); + Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE, + sizeof(size_t), &MaxWGSize, nullptr); const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); @@ -120,8 +118,8 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, "to a device that does not support this work group size is invalid."); // OpenCL 1.x && 2.0: - // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is NULL and the - // reqd_work_group_size attribute is used to declare the work-group size + // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is NULL and + // the reqd_work_group_size attribute is used to declare the work-group size // for kernel in the program source. if (!HasLocalSize && (IsOpenCLV1x || IsOpenCLVGE20)) { throw sycl::exception( @@ -129,8 +127,9 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, "OpenCL 1.x and 2.0 requires to pass local size argument even if " "required work-group size was specified in the program source"); } - // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does - // not match the required work-group size for kernel in the program source. + // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified + // and does not match the required work-group size for kernel in the program + // source. if (NDRDesc.LocalSize[0] != CompileWGSize[0] || NDRDesc.LocalSize[1] != CompileWGSize[1] || NDRDesc.LocalSize[2] != CompileWGSize[2]) @@ -148,9 +147,8 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, if (HasLocalSize) { size_t MaxThreadsPerBlock[3] = {}; - Plugin->call( - Device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxThreadsPerBlock), - MaxThreadsPerBlock, nullptr); + Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, + sizeof(MaxThreadsPerBlock), MaxThreadsPerBlock, nullptr); for (size_t I = 0; I < 3; ++I) { if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) { @@ -167,10 +165,10 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, if (IsOpenCLV1x) { // OpenCL 1.x: - // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and - // the total number of work-items in the work-group computed as + // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified + // and the total number of work-items in the work-group computed as // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater - // than the value specified by PI_DEVICE_MAX_WORK_GROUP_SIZE in + // than the value specified by UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE in // table 4.3 const size_t TotalNumberOfWIs = NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; @@ -181,15 +179,15 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, std::to_string(MaxWGSize)); } else if (IsOpenCLVGE20 || IsLevelZero) { // OpenCL 2.x or OneAPI Level Zero: - // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and - // the total number of work-items in the work-group computed as + // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified + // and the total number of work-items in the work-group computed as // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater - // than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in + // than the value specified by UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in // table 5.21. size_t KernelWGSize = 0; - Plugin->call( - Kernel, Device, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t), - &KernelWGSize, nullptr); + Plugin->call(urKernelGetGroupInfo, Kernel, Device, + UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t), + &KernelWGSize, nullptr); const size_t TotalNumberOfWIs = NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; if (TotalNumberOfWIs > KernelWGSize) @@ -221,9 +219,9 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, if (NonUniformWGs) { if (IsOpenCLV1x) { // OpenCL 1.x: - // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified - // and number of workitems specified by global_work_size is not evenly - // divisible by size of work-group given by local_work_size + // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is + // specified and number of workitems specified by global_work_size is + // not evenly divisible by size of work-group given by local_work_size if (LocalExceedsGlobal) throw sycl::exception(make_error_code(errc::nd_range), "Local workgroup size cannot be greater than " @@ -235,23 +233,21 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, "are not supported by the target device"); } else { // OpenCL 2.x: - // PI_ERROR_INVALID_WORK_GROUP_SIZE if the program was compiled with - // –cl-uniform-work-group-size and the number of work-items specified - // by global_work_size is not evenly divisible by size of work-group - // given by local_work_size - - pi_program Program = nullptr; - Plugin->call( - Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(pi_program), &Program, - nullptr); + // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if the program was compiled + // with –cl-uniform-work-group-size and the number of work-items + // specified by global_work_size is not evenly divisible by size of + // work-group given by local_work_size + + ur_program_handle_t Program = nullptr; + Plugin->call(urKernelGetInfo, Kernel, UR_KERNEL_INFO_PROGRAM, + sizeof(ur_program_handle_t), &Program, nullptr); size_t OptsSize = 0; - Plugin->call( - Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, 0, nullptr, - &OptsSize); + Plugin->call(urProgramGetBuildInfo, Program, Device, + UR_PROGRAM_BUILD_INFO_OPTIONS, 0, nullptr, &OptsSize); std::string Opts(OptsSize, '\0'); - Plugin->call( - Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, OptsSize, - &Opts.front(), nullptr); + Plugin->call(urProgramGetBuildInfo, Program, Device, + UR_PROGRAM_BUILD_INFO_OPTIONS, OptsSize, &Opts.front(), + nullptr); const bool HasStd20 = Opts.find("-cl-std=CL2.0") != std::string::npos; const bool RequiresUniformWGSize = Opts.find("-cl-uniform-work-group-size") != std::string::npos; @@ -297,8 +293,8 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, "Non-uniform work-groups are not supported by the target device"); } // TODO: required number of sub-groups, OpenCL 2.1: - // PI_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not - // consistent with the required number of sub-groups for kernel in the + // UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE if local_work_size is specified and + // is not consistent with the required number of sub-groups for kernel in the // program source. throw exception(make_error_code(errc::nd_range), @@ -309,13 +305,12 @@ void handleInvalidWorkItemSize(const device_impl &DeviceImpl, const NDRDescT &NDRDesc) { const PluginPtr &Plugin = DeviceImpl.getPlugin(); - sycl::detail::pi::PiDevice Device = DeviceImpl.getHandleRef(); + ur_device_handle_t Device = DeviceImpl.getHandleRef(); size_t MaxWISize[] = {0, 0, 0}; - Plugin->call( - Device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxWISize), &MaxWISize, - nullptr); + Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, + sizeof(MaxWISize), &MaxWISize, nullptr); for (unsigned I = 0; I < NDRDesc.Dims; I++) { if (NDRDesc.LocalSize[I] > MaxWISize[I]) throw sycl::exception( @@ -329,12 +324,11 @@ void handleInvalidWorkItemSize(const device_impl &DeviceImpl, void handleInvalidValue(const device_impl &DeviceImpl, const NDRDescT &NDRDesc) { const PluginPtr &Plugin = DeviceImpl.getPlugin(); - sycl::detail::pi::PiDevice Device = DeviceImpl.getHandleRef(); + ur_device_handle_t Device = DeviceImpl.getHandleRef(); size_t MaxNWGs[] = {0, 0, 0}; - Plugin->call( - Device, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D, sizeof(MaxNWGs), - &MaxNWGs, nullptr); + Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_MAX_WORK_GROUPS_3D, + sizeof(MaxNWGs), &MaxNWGs, nullptr); for (unsigned int I = 0; I < NDRDesc.Dims; I++) { size_t NWgs = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; if (NWgs > MaxNWGs[I]) @@ -349,92 +343,94 @@ void handleInvalidValue(const device_impl &DeviceImpl, throw exception(make_error_code(errc::nd_range), "unknown internal error"); } -void handleErrorOrWarning(pi_result Error, const device_impl &DeviceImpl, - pi_kernel Kernel, const NDRDescT &NDRDesc) { - assert(Error != PI_SUCCESS && +void handleErrorOrWarning(ur_result_t Error, const device_impl &DeviceImpl, + ur_kernel_handle_t Kernel, const NDRDescT &NDRDesc) { + assert(Error != UR_RESULT_SUCCESS && "Success is expected to be handled on caller side"); switch (Error) { - case PI_ERROR_OUT_OF_RESOURCES: + case UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY: + case UR_RESULT_ERROR_OUT_OF_RESOURCES: return handleOutOfResources(DeviceImpl, Kernel, NDRDesc); - case PI_ERROR_INVALID_WORK_GROUP_SIZE: + case UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE: return handleInvalidWorkGroupSize(DeviceImpl, Kernel, NDRDesc); - case PI_ERROR_INVALID_KERNEL_ARGS: - throw detail::set_pi_error( + case UR_RESULT_ERROR_INVALID_KERNEL_ARGS: + throw detail::set_ur_error( sycl::exception( make_error_code(errc::kernel_argument), "The kernel argument values have not been specified OR a kernel " "argument declared to be a pointer to a type."), - PI_ERROR_INVALID_KERNEL_ARGS); + UR_RESULT_ERROR_INVALID_KERNEL_ARGS); - case PI_ERROR_INVALID_WORK_ITEM_SIZE: + case UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE: return handleInvalidWorkItemSize(DeviceImpl, NDRDesc); - case PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED: - throw detail::set_pi_error( + case UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT: + throw detail::set_ur_error( sycl::exception( make_error_code(errc::feature_not_supported), "image object is specified as an argument value and the image " "format is not supported by device associated with queue"), - PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED); + UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT); - case PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET: - throw detail::set_pi_error( + case UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET: + throw detail::set_ur_error( sycl::exception(make_error_code(errc::invalid), "a sub-buffer object is specified as the value for an " "argument that is a buffer object and the offset " "specified when the sub-buffer object is created is " "not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value " "for device associated with queue"), - PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET); + UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET); - case PI_ERROR_MEM_OBJECT_ALLOCATION_FAILURE: - throw detail::set_pi_error( + case UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE: + throw detail::set_ur_error( sycl::exception( make_error_code(errc::memory_allocation), "failure to allocate memory for data store associated with image " "or buffer objects specified as arguments to kernel"), - PI_ERROR_MEM_OBJECT_ALLOCATION_FAILURE); + UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE); - case PI_ERROR_INVALID_IMAGE_SIZE: - throw detail::set_pi_error( + case UR_RESULT_ERROR_INVALID_IMAGE_SIZE: + throw detail::set_ur_error( sycl::exception( make_error_code(errc::invalid), "image object is specified as an argument value and the image " "dimensions (image width, height, specified or compute row and/or " "slice pitch) are not supported by device associated with queue"), - PI_ERROR_INVALID_IMAGE_SIZE); + UR_RESULT_ERROR_INVALID_IMAGE_SIZE); - case PI_ERROR_INVALID_VALUE: + case UR_RESULT_ERROR_INVALID_VALUE: return handleInvalidValue(DeviceImpl, NDRDesc); - case PI_ERROR_PLUGIN_SPECIFIC_ERROR: - // checkPiResult does all the necessary handling for - // PI_ERROR_PLUGIN_SPECIFIC_ERROR, making sure an error is thrown or not, - // depending on whether PI_ERROR_PLUGIN_SPECIFIC_ERROR contains an error or - // a warning. It also ensures that the contents of the error message buffer - // (used only by PI_ERROR_PLUGIN_SPECIFIC_ERROR) get handled correctly. - return DeviceImpl.getPlugin()->checkPiResult(Error); + case UR_RESULT_ERROR_ADAPTER_SPECIFIC: + // checkUrResult does all the necessary handling for + // UR_RESULT_ERROR_ADAPTER_SPECIFIC_ERROR, making sure an error is thrown or + // not, depending on whether UR_RESULT_ERROR_ADAPTER_SPECIFIC_ERROR contains + // an error or a warning. It also ensures that the contents of the error + // message buffer (used only by UR_RESULT_ERROR_ADAPTER_SPECIFIC_ERROR) get + // handled correctly. + return DeviceImpl.getPlugin()->checkUrResult(Error); // TODO: Handle other error codes default: - throw detail::set_pi_error( - exception(make_error_code(errc::runtime), "PI error"), Error); + throw detail::set_ur_error( + exception(make_error_code(errc::runtime), "UR error"), Error); } } } // namespace detail::enqueue_kernel_launch namespace detail::kernel_get_group_info { -void handleErrorOrWarning(pi_result Error, pi_kernel_group_info Descriptor, +void handleErrorOrWarning(ur_result_t Error, ur_kernel_group_info_t Descriptor, const PluginPtr &Plugin) { - assert(Error != PI_SUCCESS && + assert(Error != UR_RESULT_SUCCESS && "Success is expected to be handled on caller side"); switch (Error) { - case PI_ERROR_INVALID_VALUE: - if (Descriptor == CL_KERNEL_GLOBAL_WORK_SIZE) + case UR_RESULT_ERROR_INVALID_VALUE: + if (Descriptor == UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE) throw sycl::exception( sycl::make_error_code(errc::invalid), "info::kernel_device_specific::global_work_size descriptor may only " @@ -443,7 +439,7 @@ void handleErrorOrWarning(pi_result Error, pi_kernel_group_info Descriptor, break; // TODO: Handle other error codes default: - Plugin->checkPiResult(Error); + Plugin->checkUrResult(Error); break; } } diff --git a/sycl/source/detail/error_handling/error_handling.hpp b/sycl/source/detail/error_handling/error_handling.hpp index 879a26905088b..3583eb773e764 100644 --- a/sycl/source/detail/error_handling/error_handling.hpp +++ b/sycl/source/detail/error_handling/error_handling.hpp @@ -10,28 +10,29 @@ #include #include -#include +#include namespace sycl { inline namespace _V1 { namespace detail { namespace enqueue_kernel_launch { -/// Analyzes error code and arguments of piEnqueueKernelLaunch to emit +/// Analyzes error code and arguments of urEnqueueKernelLaunch to emit /// user-friendly exception describing the problem. /// /// This function is expected to be called only for non-success error codes, -/// i.e. the first argument must not be equal to PI_SUCCESS. +/// i.e. the first argument must not be equal to UR_RESULT_SUCCESS. /// /// This function actually never returns and always throws an exception with /// error description. -void handleErrorOrWarning(pi_result, const device_impl &, pi_kernel, +void handleErrorOrWarning(ur_result_t, const device_impl &, ur_kernel_handle_t, const NDRDescT &); } // namespace enqueue_kernel_launch namespace kernel_get_group_info { -/// Analyzes error code of piKernelGetGroupInfo. -void handleErrorOrWarning(pi_result, pi_kernel_group_info, const PluginPtr &); +/// Analyzes error code of urKernelGetGroupInfo. +void handleErrorOrWarning(ur_result_t, ur_kernel_group_info_t, + const PluginPtr &); } // namespace kernel_get_group_info } // namespace detail diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 097cef03b4d66..2cc3c01260fba 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -45,7 +45,7 @@ void event_impl::initContextIfNeeded() { event_impl::~event_impl() { try { if (MEvent) - getPlugin()->call(MEvent); + getPlugin()->call(urEventRelease, MEvent); } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~event_impl", e); } @@ -54,17 +54,16 @@ event_impl::~event_impl() { void event_impl::waitInternal(bool *Success) { if (!MIsHostEvent && MEvent) { // Wait for the native event - sycl::detail::pi::PiResult Err = - getPlugin()->call_nocheck(1, &MEvent); - // TODO drop the PI_ERROR_UKNOWN from here once the UR counterpart to - // PI_ERROR_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST is added: - // https://github.com/oneapi-src/unified-runtime/issues/1459 + ur_result_t Err = getPlugin()->call_nocheck(urEventWait, 1, &MEvent); + // TODO drop the UR_RESULT_ERROR_UKNOWN from here (this was waiting for + // https://github.com/oneapi-src/unified-runtime/issues/1459 which is now + // closed). if (Success != nullptr && - (Err == PI_ERROR_UNKNOWN || - Err == PI_ERROR_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)) + (Err == UR_RESULT_ERROR_UNKNOWN || + Err == UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS)) *Success = false; else { - getPlugin()->checkPiResult(Err); + getPlugin()->checkUrResult(Err); if (Success != nullptr) *Success = true; } @@ -112,10 +111,8 @@ static uint64_t inline getTimestamp() { .count(); } -const sycl::detail::pi::PiEvent &event_impl::getHandleRef() const { - return MEvent; -} -sycl::detail::pi::PiEvent &event_impl::getHandleRef() { return MEvent; } +const ur_event_handle_t &event_impl::getHandleRef() const { return MEvent; } +ur_event_handle_t &event_impl::getHandleRef() { return MEvent; } const ContextImplPtr &event_impl::getContextImpl() { initContextIfNeeded(); @@ -134,20 +131,19 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { MContext = Context; } -event_impl::event_impl(sycl::detail::pi::PiEvent Event, - const context &SyclContext) +event_impl::event_impl(ur_event_handle_t Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), MIsFlushed(true), MState(HES_Complete) { - sycl::detail::pi::PiContext TempContext; - getPlugin()->call( - MEvent, PI_EVENT_INFO_CONTEXT, sizeof(sycl::detail::pi::PiContext), - &TempContext, nullptr); + ur_context_handle_t TempContext; + getPlugin()->call(urEventGetInfo, MEvent, UR_EVENT_INFO_CONTEXT, + sizeof(ur_context_handle_t), &TempContext, nullptr); + if (MContext->getHandleRef() != TempContext) { throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), "The syclContext must match the OpenCL context " "associated with the clEvent. " + - codeToString(PI_ERROR_INVALID_CONTEXT)); + codeToString(UR_RESULT_ERROR_INVALID_CONTEXT)); } } @@ -161,9 +157,10 @@ event_impl::event_impl(const QueueImplPtr &Queue) MState.store(HES_NotComplete); MHostProfilingInfo.reset(new HostProfilingInfo()); if (!MHostProfilingInfo) - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "Out of host memory " + - codeToString(PI_ERROR_OUT_OF_HOST_MEMORY)); + throw sycl::exception( + sycl::make_error_code(sycl::errc::runtime), + "Out of host memory " + + codeToString(UR_RESULT_ERROR_OUT_OF_HOST_MEMORY)); return; } MState.store(HES_Complete); @@ -184,7 +181,7 @@ void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID, // Create a string with the event address so it // can be associated with other debug data xpti::utils::StringHelper SH; - Name = SH.nameWithAddress("event.wait", MEvent); + Name = SH.nameWithAddress("event.wait", MEvent); // We can emit the wait associated with the graph if the // event does not have a command object or associated with @@ -337,7 +334,7 @@ event_impl::get_profiling_info() { throw sycl::exception( sycl::make_error_code(sycl::errc::invalid), "Profiling info is not available. " + - codeToString(PI_ERROR_PROFILING_INFO_NOT_AVAILABLE)); + codeToString(UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE)); return MHostProfilingInfo->getStartTime(); } @@ -364,7 +361,7 @@ uint64_t event_impl::get_profiling_info() { throw sycl::exception( sycl::make_error_code(sycl::errc::invalid), "Profiling info is not available. " + - codeToString(PI_ERROR_PROFILING_INFO_NOT_AVAILABLE)); + codeToString(UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE)); return MHostProfilingInfo->getEndTime(); } @@ -383,11 +380,11 @@ event_impl::get_info() { return info::event_command_status::ext_oneapi_unknown; if (!MIsHostEvent) { - // Command is enqueued and PiEvent is ready + // Command is enqueued and UrEvent is ready if (MEvent) return get_event_info( this->getHandleRef(), this->getPlugin()); - // Command is blocked and not enqueued, PiEvent is not assigned yet + // Command is blocked and not enqueued, UrEvent is not assigned yet else if (MCommand) return sycl::info::event_command_status::submitted; } @@ -457,7 +454,7 @@ void HostProfilingInfo::start() { StartTime = getTimestamp(); } void HostProfilingInfo::end() { EndTime = getTimestamp(); } -pi_native_handle event_impl::getNative() { +ur_native_handle_t event_impl::getNative() { if (isHost()) return {}; initContextIfNeeded(); @@ -465,12 +462,14 @@ pi_native_handle event_impl::getNative() { auto Plugin = getPlugin(); if (MIsDefaultConstructed && !MEvent) { auto TempContext = MContext.get()->getHandleRef(); - Plugin->call(TempContext, &MEvent); + ur_event_native_properties_t NativeProperties{}; + Plugin->call(urEventCreateWithNativeHandle, 0, TempContext, + &NativeProperties, &MEvent); } if (MContext->getBackend() == backend::opencl) - Plugin->call(getHandleRef()); - pi_native_handle Handle; - Plugin->call(getHandleRef(), &Handle); + Plugin->call(urEventRetain, getHandleRef()); + ur_native_handle_t Handle; + Plugin->call(urEventGetNativeHandle, getHandleRef(), &Handle); return Handle; } @@ -494,13 +493,13 @@ std::vector event_impl::getWaitList() { void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) { // Some events might not have a native handle underneath even at this point, - // e.g. those produced by memset with 0 size (no PI call is made). + // e.g. those produced by memset with 0 size (no UR call is made). if (MIsFlushed || !MEvent) return; QueueImplPtr Queue = MQueue.lock(); // If the queue has been released, all of the commands have already been - // implicitly flushed by piQueueRelease. + // implicitly flushed by urQueueRelease. if (!Queue) { MIsFlushed = true; return; @@ -509,12 +508,12 @@ void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) { return; // Check if the task for this event has already been submitted. - pi_event_status Status = PI_EVENT_QUEUED; - getPlugin()->call( - MEvent, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(pi_int32), &Status, - nullptr); - if (Status == PI_EVENT_QUEUED) { - getPlugin()->call(Queue->getHandleRef()); + ur_event_status_t Status = UR_EVENT_STATUS_QUEUED; + getPlugin()->call(urEventGetInfo, MEvent, + UR_EVENT_INFO_COMMAND_EXECUTION_STATUS, + sizeof(ur_event_status_t), &Status, nullptr); + if (Status == UR_EVENT_STATUS_QUEUED) { + getPlugin()->call(urQueueFlush, Queue->getHandleRef()); } MIsFlushed = true; } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 88ba1248771be..f44e5b51bf470 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include @@ -65,7 +65,7 @@ class event_impl { /// /// \param Event is a valid instance of plug-in event. /// \param SyclContext is an instance of SYCL context. - event_impl(sycl::detail::pi::PiEvent Event, const context &SyclContext); + event_impl(ur_event_handle_t Event, const context &SyclContext); event_impl(const QueueImplPtr &Queue); /// Waits for the event. @@ -126,16 +126,16 @@ class event_impl { /// Marks this event as completed. void setComplete(); - /// Returns raw interoperability event handle. Returned reference will be] + /// Returns raw interoperability event handle. Returned reference will be /// invalid if event_impl was destroyed. /// /// \return a reference to an instance of plug-in event handle. - sycl::detail::pi::PiEvent &getHandleRef(); - /// Returns raw interoperability event handle. Returned reference will be] + ur_event_handle_t &getHandleRef(); + /// Returns raw interoperability event handle. Returned reference will be /// invalid if event_impl was destroyed. /// /// \return a const reference to an instance of plug-in event handle. - const sycl::detail::pi::PiEvent &getHandleRef() const; + const ur_event_handle_t &getHandleRef() const; /// Returns context that is associated with this event. /// @@ -148,8 +148,8 @@ class event_impl { /// Associate event with the context. /// - /// Provided PiContext inside ContextImplPtr must be associated - /// with the PiEvent object stored in this class + /// Provided UrContext inside ContextImplPtr must be associated + /// with the UrEvent object stored in this class /// /// @param Context is a shared pointer to an instance of valid context_impl. void setContextImpl(const ContextImplPtr &Context); @@ -179,7 +179,7 @@ class event_impl { /// Gets the native handle of the SYCL event. /// /// \return a native handle. - pi_native_handle getNative(); + ur_native_handle_t getNative(); /// Returns vector of event dependencies. /// @@ -280,12 +280,12 @@ class event_impl { // Sets a sync point which is used when this event represents an enqueue to a // Command Buffer. - void setSyncPoint(sycl::detail::pi::PiExtSyncPoint SyncPoint) { + void setSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint) { MSyncPoint = SyncPoint; } // Get the sync point associated with this event. - sycl::detail::pi::PiExtSyncPoint getSyncPoint() const { return MSyncPoint; } + ur_exp_command_buffer_sync_point_t getSyncPoint() const { return MSyncPoint; } void setCommandGraph( std::shared_ptr Graph) { @@ -309,12 +309,11 @@ class event_impl { // Sets a command-buffer command when this event represents an enqueue to a // Command Buffer. - void - setCommandBufferCommand(sycl::detail::pi::PiExtCommandBufferCommand Command) { + void setCommandBufferCommand(ur_exp_command_buffer_command_handle_t Command) { MCommandBufferCommand = Command; } - sycl::detail::pi::PiExtCommandBufferCommand getCommandBufferCommand() const { + ur_exp_command_buffer_command_handle_t getCommandBufferCommand() const { return MCommandBufferCommand; } @@ -340,7 +339,7 @@ class event_impl { int32_t StreamID, uint64_t IId) const; void checkProfilingPreconditions() const; - sycl::detail::pi::PiEvent MEvent = nullptr; + ur_event_handle_t MEvent = nullptr; // Stores submission time of command associated with event uint64_t MSubmitTime = 0; uint64_t MHostBaseTime = 0; @@ -379,14 +378,14 @@ class event_impl { bool MEventFromSubmittedExecCommandBuffer = false; // If this event represents a submission to a - // sycl::detail::pi::PiExtCommandBuffer the sync point for that submission is + // ur_exp_command_buffer_sync_point_t the sync point for that submission is // stored here. - sycl::detail::pi::PiExtSyncPoint MSyncPoint = 0; + ur_exp_command_buffer_sync_point_t MSyncPoint; // If this event represents a submission to a - // sycl::detail::pi::PiExtCommandBuffer the command-buffer command + // ur_exp_command_buffer_command_handle_t the command-buffer command // (if any) associated with that submission is stored here. - sycl::detail::pi::PiExtCommandBufferCommand MCommandBufferCommand = nullptr; + ur_exp_command_buffer_command_handle_t MCommandBufferCommand = nullptr; // Signifies whether this event is the result of a profiling tag command. This // allows for profiling, even if the queue does not have profiling enabled. @@ -398,13 +397,13 @@ class event_impl { // when needed. void initContextIfNeeded(); // Event class represents 3 different kinds of operations: - // | type | has PI event | MContext | MIsHostTask | MIsDefaultConstructed | + // | type | has UR event | MContext | MIsHostTask | MIsDefaultConstructed | // | dev | true | !nullptr | false | false | // | host | false | nullptr | true | false | // |default| * | * | false | true | // Default constructed event is created with empty ctor in host code, MContext // is lazily initialized with default device context on first context query. - // MEvent is lazily created in first pi handle query. + // MEvent is lazily created in first ur handle query. bool MIsDefaultConstructed = false; bool MIsHostEvent = false; }; diff --git a/sycl/source/detail/event_info.hpp b/sycl/source/detail/event_info.hpp index 9c60a226e4798..ba8cf469d5c26 100644 --- a/sycl/source/detail/event_info.hpp +++ b/sycl/source/detail/event_info.hpp @@ -19,33 +19,32 @@ inline namespace _V1 { namespace detail { template -typename Param::return_type -get_event_profiling_info(sycl::detail::pi::PiEvent Event, - const PluginPtr &Plugin) { +typename Param::return_type get_event_profiling_info(ur_event_handle_t Event, + const PluginPtr &Plugin) { static_assert(is_event_profiling_info_desc::value, "Unexpected event profiling info descriptor"); typename Param::return_type Result{0}; // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call( - Event, PiInfoCode::value, sizeof(Result), &Result, nullptr); + Plugin->call(urEventGetProfilingInfo, Event, UrInfoCode::value, + sizeof(Result), &Result, nullptr); return Result; } template -typename Param::return_type get_event_info(sycl::detail::pi::PiEvent Event, +typename Param::return_type get_event_info(ur_event_handle_t Event, const PluginPtr &Plugin) { static_assert(is_event_info_desc::value, "Unexpected event info descriptor"); typename Param::return_type Result{0}; // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call(Event, PiInfoCode::value, - sizeof(Result), &Result, nullptr); + Plugin->call(urEventGetInfo, Event, UrInfoCode::value, sizeof(Result), + &Result, nullptr); - // If the status is PI_EVENT_QUEUED We need to change it since QUEUE is + // If the status is UR_EVENT_STATUS_QUEUED We need to change it since QUEUE is // not a valid status in sycl. if constexpr (std::is_same::value) { - Result = static_cast(Result) == PI_EVENT_QUEUED + Result = static_cast(Result) == UR_EVENT_STATUS_QUEUED ? sycl::info::event_command_status::submitted : Result; } diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 301125d9b5c93..121f47dd43266 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -20,8 +20,8 @@ #include #include #include -#include #include +#include #ifdef _WIN32 #include @@ -207,6 +207,7 @@ std::mutex &GlobalHandler::getPlatformMapMutex() { std::mutex &GlobalHandler::getFilterMutex() { return getOrCreate(MFilterMutex); } + std::vector &GlobalHandler::getPlugins() { enableOnCrashStackPrinting(); return getOrCreate(MPlugins); @@ -231,7 +232,7 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() { void GlobalHandler::releaseDefaultContexts() { // Release shared-pointers to SYCL objects. // Note that on Windows the destruction of the default context - // races with the detaching of the DLL object that calls piTearDown. + // races with the detaching of the DLL object that calls urLoaderTearDown. MPlatformToDefaultContextCache.Inst.reset(nullptr); } @@ -267,15 +268,13 @@ void GlobalHandler::unloadPlugins() { // user application has loaded SYCL runtime, and never called any APIs, // there's no need to load and unload plugins. if (MPlugins.Inst) { - for (const PluginPtr &Plugin : getPlugins()) { - // PluginParameter for Teardown is the boolean tracking if a - // given plugin has been teardown successfully. - // This tracking prevents usage of this plugin after teardown - // has been completed to avoid invalid resource access. - Plugin->call(&Plugin->pluginReleased); - Plugin->unload(); + for (const auto &Plugin : getPlugins()) { + Plugin->release(); } } + + urLoaderTearDown(); + // Clear after unload to avoid uses after unload. getPlugins().clear(); } @@ -356,17 +355,18 @@ void shutdown_late() { extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { - bool PrintPiTrace = false; - static const char *PiTrace = std::getenv("SYCL_PI_TRACE"); - static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0; - if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces - PrintPiTrace = true; + // TODO: Remove from public header files and implementation during the next + // ABI Breaking window. + if (std::getenv("SYCL_PI_TRACE")) { + std::cerr << "SYCL_PI_TRACE has been removed use SYCL_UR_TRACE instead\n"; + std::exit(1); } + bool PrintUrTrace = sycl::detail::ur::trace(); // Perform actions based on the reason for calling. switch (fdwReason) { case DLL_PROCESS_DETACH: - if (PrintPiTrace) + if (PrintUrTrace) std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -378,7 +378,7 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, shutdown_win(); break; case DLL_PROCESS_ATTACH: - if (PrintPiTrace) + if (PrintUrTrace) std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl; break; case DLL_THREAD_ATTACH: diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 4b904890751ce..1c27ad2b455bd 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -624,7 +624,7 @@ std::vector graph_impl::getExitNodesEvents( // Check if nodes are empty and if so loop back through predecessors until we // find the real dependency. void exec_graph_impl::findRealDeps( - std::vector &Deps, + std::vector &Deps, std::shared_ptr CurrentNode, int ReferencePartitionNum) { if (CurrentNode->isEmpty()) { for (auto &N : CurrentNode->MPredecessors) { @@ -635,8 +635,8 @@ void exec_graph_impl::findRealDeps( // Verify if CurrentNode belong the the same partition if (MPartitionNodes[CurrentNode] == ReferencePartitionNum) { // Verify that the sync point has actually been set for this node. - auto SyncPoint = MPiSyncPoints.find(CurrentNode); - assert(SyncPoint != MPiSyncPoints.end() && + auto SyncPoint = MSyncPoints.find(CurrentNode); + assert(SyncPoint != MSyncPoints.end() && "No sync point has been set for node dependency."); // Check if the dependency has already been added. if (std::find(Deps.begin(), Deps.end(), SyncPoint->second) == @@ -647,34 +647,35 @@ void exec_graph_impl::findRealDeps( } } -sycl::detail::pi::PiExtSyncPoint exec_graph_impl::enqueueNodeDirect( - sycl::context Ctx, sycl::detail::DeviceImplPtr DeviceImpl, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, - std::shared_ptr Node) { - std::vector Deps; +ur_exp_command_buffer_sync_point_t +exec_graph_impl::enqueueNodeDirect(sycl::context Ctx, + sycl::detail::DeviceImplPtr DeviceImpl, + ur_exp_command_buffer_handle_t CommandBuffer, + std::shared_ptr Node) { + std::vector Deps; for (auto &N : Node->MPredecessors) { findRealDeps(Deps, N.lock(), MPartitionNodes[Node]); } - sycl::detail::pi::PiExtSyncPoint NewSyncPoint; - sycl::detail::pi::PiExtCommandBufferCommand NewCommand = 0; - pi_int32 Res = sycl::detail::enqueueImpCommandBufferKernel( + ur_exp_command_buffer_sync_point_t NewSyncPoint; + ur_exp_command_buffer_command_handle_t NewCommand = 0; + ur_result_t Res = sycl::detail::enqueueImpCommandBufferKernel( Ctx, DeviceImpl, CommandBuffer, *static_cast((Node->MCommandGroup.get())), Deps, &NewSyncPoint, &NewCommand, nullptr); MCommandMap[Node] = NewCommand; - if (Res != pi_result::PI_SUCCESS) { + if (Res != UR_RESULT_SUCCESS) { throw sycl::exception(errc::invalid, - "Failed to add kernel to PI command-buffer"); + "Failed to add kernel to UR command-buffer"); } return NewSyncPoint; } -sycl::detail::pi::PiExtSyncPoint exec_graph_impl::enqueueNode( +ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNode( sycl::context Ctx, std::shared_ptr DeviceImpl, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + ur_exp_command_buffer_handle_t CommandBuffer, std::shared_ptr Node) { // Queue which will be used for allocation operations for accessors. @@ -682,7 +683,7 @@ sycl::detail::pi::PiExtSyncPoint exec_graph_impl::enqueueNode( DeviceImpl, sycl::detail::getSyclObjImpl(Ctx), sycl::async_handler{}, sycl::property_list{}); - std::vector Deps; + std::vector Deps; for (auto &N : Node->MPredecessors) { findRealDeps(Deps, N.lock(), MPartitionNodes[Node]); } @@ -697,24 +698,21 @@ sycl::detail::pi::PiExtSyncPoint exec_graph_impl::enqueueNode( } void exec_graph_impl::createCommandBuffers( sycl::device Device, std::shared_ptr &Partition) { - sycl::detail::pi::PiExtCommandBuffer OutCommandBuffer; - sycl::detail::pi::PiExtCommandBufferDesc Desc{ - pi_ext_structure_type::PI_EXT_STRUCTURE_TYPE_COMMAND_BUFFER_DESC, nullptr, - pi_bool(Partition->MIsInOrderGraph && !MEnableProfiling), - pi_bool(MEnableProfiling), pi_bool(MIsUpdatable)}; - + ur_exp_command_buffer_handle_t OutCommandBuffer; + ur_exp_command_buffer_desc_t Desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, MIsUpdatable, + Partition->MIsInOrderGraph && !MEnableProfiling, MEnableProfiling}; auto ContextImpl = sycl::detail::getSyclObjImpl(MContext); const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); auto DeviceImpl = sycl::detail::getSyclObjImpl(Device); - pi_result Res = - Plugin->call_nocheck( - ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), &Desc, - &OutCommandBuffer); - if (Res != pi_result::PI_SUCCESS) { - throw sycl::exception(errc::invalid, "Failed to create PI command-buffer"); + ur_result_t Res = Plugin->call_nocheck( + urCommandBufferCreateExp, ContextImpl->getHandleRef(), + DeviceImpl->getHandleRef(), &Desc, &OutCommandBuffer); + if (Res != UR_RESULT_SUCCESS) { + throw sycl::exception(errc::invalid, "Failed to create UR command-buffer"); } - Partition->MPiCommandBuffers[Device] = OutCommandBuffer; + Partition->MCommandBuffers[Device] = OutCommandBuffer; for (const auto &Node : Partition->MSchedule) { // Empty nodes are not processed as other nodes, but only their @@ -731,10 +729,10 @@ void exec_graph_impl::createCommandBuffers( Node->MCommandGroup.get()) ->MStreams.size() == 0) { - MPiSyncPoints[Node] = + MSyncPoints[Node] = enqueueNodeDirect(MContext, DeviceImpl, OutCommandBuffer, Node); } else { - MPiSyncPoints[Node] = + MSyncPoints[Node] = enqueueNode(MContext, DeviceImpl, OutCommandBuffer, Node); } @@ -749,19 +747,17 @@ void exec_graph_impl::createCommandBuffers( Node->MCommandGroup->getAccStorage().end()); } - Res = - Plugin->call_nocheck( - OutCommandBuffer); - if (Res != pi_result::PI_SUCCESS) { + Res = Plugin->call_nocheck(urCommandBufferFinalizeExp, OutCommandBuffer); + if (Res != UR_RESULT_SUCCESS) { throw sycl::exception(errc::invalid, - "Failed to finalize PI command-buffer"); + "Failed to finalize UR command-buffer"); } } exec_graph_impl::exec_graph_impl(sycl::context Context, const std::shared_ptr &GraphImpl, const property_list &PropList) - : MSchedule(), MGraphImpl(GraphImpl), MPiSyncPoints(), + : MSchedule(), MGraphImpl(GraphImpl), MSyncPoints(), MDevice(GraphImpl->getDevice()), MContext(Context), MRequirements(), MExecutionEvents(), MIsUpdatable(PropList.has_property()), @@ -795,22 +791,22 @@ exec_graph_impl::~exec_graph_impl() { for (const auto &Partition : MPartitions) { Partition->MSchedule.clear(); - for (const auto &Iter : Partition->MPiCommandBuffers) { + for (const auto &Iter : Partition->MCommandBuffers) { if (auto CmdBuf = Iter.second; CmdBuf) { - pi_result Res = Plugin->call_nocheck< - sycl::detail::PiApiKind::piextCommandBufferRelease>(CmdBuf); + ur_result_t Res = + Plugin->call_nocheck(urCommandBufferReleaseExp, CmdBuf); (void)Res; - assert(Res == pi_result::PI_SUCCESS); + assert(Res == UR_RESULT_SUCCESS); } } } for (auto &Iter : MCommandMap) { if (auto Command = Iter.second; Command) { - pi_result Res = Plugin->call_nocheck< - sycl::detail::PiApiKind::piextCommandBufferReleaseCommand>(Command); + ur_result_t Res = + Plugin->call_nocheck(urCommandBufferReleaseCommandExp, Command); (void)Res; - assert(Res == pi_result::PI_SUCCESS); + assert(Res == UR_RESULT_SUCCESS); } } } catch (std::exception &e) { @@ -851,8 +847,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, CGData.MEvents.push_back(PartitionsExecutionEvents[DepPartition]); } - auto CommandBuffer = - CurrentPartition->MPiCommandBuffers[Queue->get_device()]; + auto CommandBuffer = CurrentPartition->MCommandBuffers[Queue->get_device()]; if (CommandBuffer) { // if previous submissions are incompleted, we automatically @@ -896,7 +891,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, } NewEvent = CreateNewEvent(); - sycl::detail::pi::PiEvent *OutEvent = &NewEvent->getHandleRef(); + ur_event_handle_t *OutEvent = &NewEvent->getHandleRef(); // Merge requirements from the nodes into requirements (if any) from the // handler. CGData.MRequirements.insert(CGData.MRequirements.end(), @@ -911,19 +906,17 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, NewEvent->setSubmissionTime(); NewEvent->setHostEnqueueTime(); } - pi_result Res = - Queue->getPlugin() - ->call_nocheck< - sycl::detail::PiApiKind::piextEnqueueCommandBuffer>( - CommandBuffer, Queue->getHandleRef(), 0, nullptr, OutEvent); - if (Res == pi_result::PI_ERROR_INVALID_QUEUE_PROPERTIES) { + ur_result_t Res = Queue->getPlugin()->call_nocheck( + urCommandBufferEnqueueExp, CommandBuffer, Queue->getHandleRef(), 0, + nullptr, OutEvent); + if (Res == UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES) { throw sycl::exception( make_error_code(errc::invalid), "Graphs cannot be submitted to a queue which uses " "immediate command lists. Use " "sycl::ext::intel::property::queue::no_immediate_" "command_list to disable them."); - } else if (Res != pi_result::PI_SUCCESS) { + } else if (Res != UR_RESULT_SUCCESS) { throw sycl::exception( errc::event, "Failed to enqueue event for command buffer submission"); @@ -956,7 +949,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, } else { std::vector> ScheduledEvents; for (auto &NodeImpl : CurrentPartition->MSchedule) { - std::vector RawEvents; + std::vector RawEvents; // If the node has no requirements for accessors etc. then we skip the // scheduler and enqueue directly. @@ -976,7 +969,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, // TODO: Pass accessor mem allocations nullptr, // TODO: Extract from handler - PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT, CG->MKernelIsCooperative, + UR_KERNEL_CACHE_CONFIG_DEFAULT, CG->MKernelIsCooperative, CG->MKernelUsesClusterLaunch); ScheduledEvents.push_back(NewEvent); } else if (!NodeImpl->isEmpty()) { @@ -1336,8 +1329,8 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { // Copy NDR desc since we need to modify it auto NDRDesc = ExecCG.MNDRDesc; - pi_kernel PiKernel = nullptr; - pi_program PiProgram = nullptr; + ur_program_handle_t UrProgram = nullptr; + ur_kernel_handle_t UrKernel = nullptr; auto Kernel = ExecCG.MSyclKernel; auto KernelBundleImplPtr = ExecCG.MKernelBundle; std::shared_ptr SyclKernelImpl = nullptr; @@ -1355,13 +1348,13 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { kernel SyclKernel = KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); SyclKernelImpl = sycl::detail::getSyclObjImpl(SyclKernel); - PiKernel = SyclKernelImpl->getHandleRef(); + UrKernel = SyclKernelImpl->getHandleRef(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else if (Kernel != nullptr) { - PiKernel = Kernel->getHandleRef(); + UrKernel = Kernel->getHandleRef(); EliminatedArgMask = Kernel->getKernelArgMask(); } else { - std::tie(PiKernel, std::ignore, EliminatedArgMask, PiProgram) = + std::tie(UrKernel, std::ignore, EliminatedArgMask, UrProgram) = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, ExecCG.MKernelName); } @@ -1385,11 +1378,10 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { if (NDRDesc.LocalSize[0] != 0) LocalSize = &NDRDesc.LocalSize[0]; else { - Plugin->call( - PiKernel, DeviceImpl->getHandleRef(), - PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(RequiredWGSize), - RequiredWGSize, - /* param_value_size_ret = */ nullptr); + Plugin->call(urKernelGetGroupInfo, UrKernel, DeviceImpl->getHandleRef(), + UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, + sizeof(RequiredWGSize), RequiredWGSize, + /* param_value_size_ret = */ nullptr); const bool EnforcedLocalSize = (RequiredWGSize[0] != 0 || RequiredWGSize[1] != 0 || @@ -1400,14 +1392,14 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { // Create update descriptor // Storage for individual arg descriptors - std::vector MemobjDescs; - std::vector PtrDescs; - std::vector ValueDescs; + std::vector MemobjDescs; + std::vector PtrDescs; + std::vector ValueDescs; MemobjDescs.reserve(MaskedArgs.size()); PtrDescs.reserve(MaskedArgs.size()); ValueDescs.reserve(MaskedArgs.size()); - pi_ext_command_buffer_update_kernel_launch_desc UpdateDesc; + ur_exp_command_buffer_update_kernel_launch_desc_t UpdateDesc; // Collect arg descriptors and fill kernel launch descriptor using sycl::detail::kernel_param_kind_t; @@ -1415,38 +1407,43 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { auto &NodeArg = MaskedArgs[i]; switch (NodeArg.MType) { case kernel_param_kind_t::kind_pointer: { - PtrDescs.push_back({static_cast(NodeArg.MIndex), NodeArg.MPtr}); + PtrDescs.push_back( + {UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, + nullptr, static_cast(NodeArg.MIndex), nullptr, + NodeArg.MPtr}); } break; case kernel_param_kind_t::kind_std_layout: { - ValueDescs.push_back({static_cast(NodeArg.MIndex), - static_cast(NodeArg.MSize), - NodeArg.MPtr}); + ValueDescs.push_back( + {UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, nullptr, + static_cast(NodeArg.MIndex), + static_cast(NodeArg.MSize), nullptr, NodeArg.MPtr}); } break; case kernel_param_kind_t::kind_accessor: { sycl::detail::Requirement *Req = static_cast(NodeArg.MPtr); - pi_mem_obj_property MemObjData{}; - + ur_kernel_arg_mem_obj_properties_t MemObjProps; + MemObjProps.stype = UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES; + MemObjProps.pNext = nullptr; switch (Req->MAccessMode) { case access::mode::read: { - MemObjData.mem_access = PI_ACCESS_READ_ONLY; + MemObjProps.memoryAccess = UR_MEM_FLAG_READ_ONLY; break; } case access::mode::write: case access::mode::discard_write: { - MemObjData.mem_access = PI_ACCESS_WRITE_ONLY; + MemObjProps.memoryAccess = UR_MEM_FLAG_WRITE_ONLY; break; } default: { - MemObjData.mem_access = PI_ACCESS_READ_WRITE; + MemObjProps.memoryAccess = UR_MEM_FLAG_READ_WRITE; break; } } - MemObjData.type = PI_KERNEL_ARG_MEM_OBJ_ACCESS; - MemobjDescs.push_back(pi_ext_command_buffer_update_memobj_arg_desc_t{ - static_cast(NodeArg.MIndex), &MemObjData, - static_cast(Req->MData)}); + MemobjDescs.push_back( + {UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, nullptr, + static_cast(NodeArg.MIndex), &MemObjProps, + static_cast(Req->MData)}); } break; @@ -1455,17 +1452,17 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { } } - UpdateDesc.num_mem_obj_args = MemobjDescs.size(); - UpdateDesc.mem_obj_arg_list = MemobjDescs.data(); - UpdateDesc.num_ptr_args = PtrDescs.size(); - UpdateDesc.ptr_arg_list = PtrDescs.data(); - UpdateDesc.num_value_args = ValueDescs.size(); - UpdateDesc.value_arg_list = ValueDescs.data(); + UpdateDesc.numNewMemObjArgs = MemobjDescs.size(); + UpdateDesc.pNewMemObjArgList = MemobjDescs.data(); + UpdateDesc.numNewPointerArgs = PtrDescs.size(); + UpdateDesc.pNewPointerArgList = PtrDescs.data(); + UpdateDesc.numNewValueArgs = ValueDescs.size(); + UpdateDesc.pNewValueArgList = ValueDescs.data(); - UpdateDesc.global_work_offset = &NDRDesc.GlobalOffset[0]; - UpdateDesc.global_work_size = &NDRDesc.GlobalSize[0]; - UpdateDesc.local_work_size = LocalSize; - UpdateDesc.num_work_dim = NDRDesc.Dims; + UpdateDesc.pNewGlobalWorkOffset = &NDRDesc.GlobalOffset[0]; + UpdateDesc.pNewGlobalWorkSize = &NDRDesc.GlobalSize[0]; + UpdateDesc.pNewLocalWorkSize = LocalSize; + UpdateDesc.newWorkDim = NDRDesc.Dims; // Query the ID cache to find the equivalent exec node for the node passed to // this function. @@ -1479,19 +1476,18 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { // rebuild the command buffers ExecNode->second->updateFromOtherNode(Node); - sycl::detail::pi::PiExtCommandBufferCommand Command = + ur_exp_command_buffer_command_handle_t Command = MCommandMap[ExecNode->second]; - pi_result Res = Plugin->call_nocheck< - sycl::detail::PiApiKind::piextCommandBufferUpdateKernelLaunch>( - Command, &UpdateDesc); + ur_result_t Res = Plugin->call_nocheck(urCommandBufferUpdateKernelLaunchExp, + Command, &UpdateDesc); - if (PiProgram) { + if (UrProgram) { // We retained these objects by calling getOrCreateKernel() - Plugin->call(PiKernel); - Plugin->call(PiProgram); + Plugin->call(urKernelRelease, UrKernel); + Plugin->call(urProgramRelease, UrProgram); } - if (Res != PI_SUCCESS) { + if (Res != UR_RESULT_SUCCESS) { throw sycl::exception(errc::invalid, "Error updating command_graph"); } } diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index b79b96ea59093..454a43fe99533 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -805,7 +805,7 @@ class node_impl { class partition { public: /// Constructor. - partition() : MSchedule(), MPiCommandBuffers() {} + partition() : MSchedule(), MCommandBuffers() {} /// List of root nodes. std::set, std::owner_less>> @@ -813,8 +813,8 @@ class partition { /// Execution schedule of nodes in the graph. std::list> MSchedule; /// Map of devices to command buffers. - std::unordered_map - MPiCommandBuffers; + std::unordered_map + MCommandBuffers; /// List of predecessors to this partition. std::vector> MPredecessors; /// True if the graph of this partition is a single path graph @@ -1315,7 +1315,7 @@ class exec_graph_impl { /// Destructor. /// - /// Releases any PI command-buffers the object has created. + /// Releases any UR command-buffers the object has created. ~exec_graph_impl(); /// Partition the graph nodes and put the partition in MPartitions. @@ -1396,10 +1396,10 @@ class exec_graph_impl { /// @param DeviceImpl Device associated with the enqueue. /// @param CommandBuffer Command-buffer to add node to as a command. /// @param Node The node being enqueued. - /// @return PI sync point created for this node in the command-buffer. - sycl::detail::pi::PiExtSyncPoint + /// @return UR sync point created for this node in the command-buffer. + ur_exp_command_buffer_sync_point_t enqueueNode(sycl::context Ctx, sycl::detail::DeviceImplPtr DeviceImpl, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + ur_exp_command_buffer_handle_t CommandBuffer, std::shared_ptr Node); /// Enqueue a node directly to the command-buffer without going through the @@ -1408,10 +1408,10 @@ class exec_graph_impl { /// @param DeviceImpl Device associated with the enqueue. /// @param CommandBuffer Command-buffer to add node to as a command. /// @param Node The node being enqueued. - /// @return PI sync point created for this node in the command-buffer. - sycl::detail::pi::PiExtSyncPoint + /// @return UR sync point created for this node in the command-buffer. + ur_exp_command_buffer_sync_point_t enqueueNodeDirect(sycl::context Ctx, sycl::detail::DeviceImplPtr DeviceImpl, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + ur_exp_command_buffer_handle_t CommandBuffer, std::shared_ptr Node); /// Iterates back through predecessors to find the real dependency. @@ -1420,7 +1420,7 @@ class exec_graph_impl { /// @param[in] ReferencePartitionNum Number of the partition containing the /// SyncPoint for CurrentNode, otherwise we need to /// synchronize on the host with the completion of previous partitions. - void findRealDeps(std::vector &Deps, + void findRealDeps(std::vector &Deps, std::shared_ptr CurrentNode, int ReferencePartitionNum); @@ -1467,8 +1467,8 @@ class exec_graph_impl { /// Map of nodes in the exec graph to the sync point representing their /// execution in the command graph. std::unordered_map, - sycl::detail::pi::PiExtSyncPoint> - MPiSyncPoints; + ur_exp_command_buffer_sync_point_t> + MSyncPoints; /// Map of nodes in the exec graph to the partition number to which they /// belong. std::unordered_map, int> MPartitionNodes; @@ -1488,9 +1488,9 @@ class exec_graph_impl { std::vector> MPartitions; /// Storage for copies of nodes from the original modifiable graph. std::vector> MNodeStorage; - /// Map of nodes to their associated PI command handles. + /// Map of nodes to their associated UR command handles. std::unordered_map, - sycl::detail::pi::PiExtCommandBufferCommand> + ur_exp_command_buffer_command_handle_t> MCommandMap; /// True if this graph can be updated (set with property::updatable) bool MIsUpdatable; diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index c9dc103ac0bdc..0da70c4f86483 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -90,7 +90,7 @@ class handler_impl { std::shared_ptr MKernelBundle; - pi_mem_advice MAdvice; + ur_usm_advice_flags_t MAdvice; // 2D memory operation information. size_t MSrcPitch; @@ -117,25 +117,24 @@ class handler_impl { // If the pipe operation is read or write, 1 for read 0 for write. bool HostPipeRead = true; - sycl::detail::pi::PiKernelCacheConfig MKernelCacheConfig = - PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT; + ur_kernel_cache_config_t MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; bool MKernelIsCooperative = false; bool MKernelUsesClusterLaunch = false; // Extra information for bindless image copy - sycl::detail::pi::PiMemImageDesc MSrcImageDesc; - sycl::detail::pi::PiMemImageDesc MDestImageDesc; - sycl::detail::pi::PiMemImageFormat MSrcImageFormat; - sycl::detail::pi::PiMemImageFormat MDestImageFormat; - sycl::detail::pi::PiImageCopyFlags MImageCopyFlags; + ur_image_desc_t MSrcImageDesc; + ur_image_desc_t MDstImageDesc; + ur_image_format_t MSrcImageFormat; + ur_image_format_t MDstImageFormat; + ur_exp_image_copy_flags_t MImageCopyFlags; - sycl::detail::pi::PiImageOffset MSrcOffset; - sycl::detail::pi::PiImageOffset MDestOffset; - sycl::detail::pi::PiImageRegion MCopyExtent; + ur_rect_offset_t MSrcOffset; + ur_rect_offset_t MDestOffset; + ur_rect_region_t MCopyExtent; // Extra information for semaphore interoperability - sycl::detail::pi::PiInteropSemaphoreHandle MInteropSemaphoreHandle; + ur_exp_interop_semaphore_handle_t MInteropSemaphoreHandle; std::optional MWaitValue; std::optional MSignalValue; diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 9d77a8fb8fc6f..59f4c42b66040 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -34,7 +34,7 @@ void markBufferAsInternal(const std::shared_ptr &BufImpl) { BufImpl->markAsInternal(); } -std::tuple +std::tuple retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, CGExecKernel *KernelCG) { bool isNvidia = @@ -61,15 +61,14 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, auto Context = detail::createSyclObjFromImpl(ContextImpl); auto DeviceImpl = Queue->getDeviceImplPtr(); auto Device = detail::createSyclObjFromImpl(DeviceImpl); - sycl::detail::pi::PiProgram Program = - detail::ProgramManager::getInstance().createPIProgram(**DeviceImage, + ur_program_handle_t Program = + detail::ProgramManager::getInstance().createURProgram(**DeviceImage, Context, Device); return {*DeviceImage, Program}; } - assert(KernelCG && "CGExecKernel must be provided."); const RTDeviceBinaryImage *DeviceImage = nullptr; - sycl::detail::pi::PiProgram Program = nullptr; + ur_program_handle_t Program = nullptr; if (KernelCG->getKernelBundle() != nullptr) { // Retrieve the device image from the kernel bundle. auto KernelBundle = KernelCG->getKernelBundle(); @@ -80,10 +79,10 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, KernelBundle->get_kernel(KernelID, KernelBundle)); DeviceImage = SyclKernel->getDeviceImage()->get_bin_image_ref(); - Program = SyclKernel->getDeviceImage()->get_program_ref(); + Program = SyclKernel->getDeviceImage()->get_ur_program_ref(); } else if (KernelCG->MSyclKernel != nullptr) { DeviceImage = KernelCG->MSyclKernel->getDeviceImage()->get_bin_image_ref(); - Program = KernelCG->MSyclKernel->getDeviceImage()->get_program_ref(); + Program = KernelCG->MSyclKernel->getDeviceImage()->get_ur_program_ref(); } else { auto ContextImpl = Queue->getContextImplPtr(); auto Context = detail::createSyclObjFromImpl(ContextImpl); @@ -91,7 +90,7 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, auto Device = detail::createSyclObjFromImpl(DeviceImpl); DeviceImage = &detail::ProgramManager::getInstance().getDeviceImage( KernelName, Context, Device); - Program = detail::ProgramManager::getInstance().createPIProgram( + Program = detail::ProgramManager::getInstance().createURProgram( *DeviceImage, Context, Device); } return {DeviceImage, Program}; diff --git a/sycl/source/detail/image_impl.cpp b/sycl/source/detail/image_impl.cpp index 71574991745a1..7b2e0b559feeb 100644 --- a/sycl/source/detail/image_impl.cpp +++ b/sycl/source/detail/image_impl.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -107,167 +108,162 @@ uint8_t getImageElementSize(uint8_t NumChannels, image_channel_type Type) { return Retval; } -sycl::detail::pi::PiMemImageChannelOrder -convertChannelOrder(image_channel_order Order) { +ur_image_channel_order_t convertChannelOrder(image_channel_order Order) { switch (Order) { case image_channel_order::a: - return PI_IMAGE_CHANNEL_ORDER_A; + return UR_IMAGE_CHANNEL_ORDER_A; case image_channel_order::r: - return PI_IMAGE_CHANNEL_ORDER_R; + return UR_IMAGE_CHANNEL_ORDER_R; case image_channel_order::rx: - return PI_IMAGE_CHANNEL_ORDER_Rx; + return UR_IMAGE_CHANNEL_ORDER_RX; case image_channel_order::rg: - return PI_IMAGE_CHANNEL_ORDER_RG; + return UR_IMAGE_CHANNEL_ORDER_RG; case image_channel_order::rgx: - return PI_IMAGE_CHANNEL_ORDER_RGx; + return UR_IMAGE_CHANNEL_ORDER_RGX; case image_channel_order::ra: - return PI_IMAGE_CHANNEL_ORDER_RA; + return UR_IMAGE_CHANNEL_ORDER_RA; case image_channel_order::rgb: - return PI_IMAGE_CHANNEL_ORDER_RGB; + return UR_IMAGE_CHANNEL_ORDER_RGB; case image_channel_order::rgbx: - return PI_IMAGE_CHANNEL_ORDER_RGBx; + return UR_IMAGE_CHANNEL_ORDER_RGBX; case image_channel_order::rgba: - return PI_IMAGE_CHANNEL_ORDER_RGBA; + return UR_IMAGE_CHANNEL_ORDER_RGBA; case image_channel_order::argb: - return PI_IMAGE_CHANNEL_ORDER_ARGB; + return UR_IMAGE_CHANNEL_ORDER_ARGB; case image_channel_order::bgra: - return PI_IMAGE_CHANNEL_ORDER_BGRA; + return UR_IMAGE_CHANNEL_ORDER_BGRA; case image_channel_order::intensity: - return PI_IMAGE_CHANNEL_ORDER_INTENSITY; + return UR_IMAGE_CHANNEL_ORDER_INTENSITY; case image_channel_order::luminance: - return PI_IMAGE_CHANNEL_ORDER_LUMINANCE; + return UR_IMAGE_CHANNEL_ORDER_LUMINANCE; case image_channel_order::abgr: - return PI_IMAGE_CHANNEL_ORDER_ABGR; + return UR_IMAGE_CHANNEL_ORDER_ABGR; case image_channel_order::ext_oneapi_srgba: - return PI_IMAGE_CHANNEL_ORDER_sRGBA; + return UR_IMAGE_CHANNEL_ORDER_SRGBA; } assert(false && "Unhandled image_channel_order"); - return static_cast(0); + return static_cast(0); } -image_channel_order -convertChannelOrder(sycl::detail::pi::PiMemImageChannelOrder Order) { +image_channel_order convertChannelOrder(ur_image_channel_order_t Order) { switch (Order) { - case PI_IMAGE_CHANNEL_ORDER_A: + case UR_IMAGE_CHANNEL_ORDER_A: return image_channel_order::a; - case PI_IMAGE_CHANNEL_ORDER_R: + case UR_IMAGE_CHANNEL_ORDER_R: return image_channel_order::r; - case PI_IMAGE_CHANNEL_ORDER_Rx: + case UR_IMAGE_CHANNEL_ORDER_RX: return image_channel_order::rx; - case PI_IMAGE_CHANNEL_ORDER_RG: + case UR_IMAGE_CHANNEL_ORDER_RG: return image_channel_order::rg; - case PI_IMAGE_CHANNEL_ORDER_RGx: + case UR_IMAGE_CHANNEL_ORDER_RGX: return image_channel_order::rgx; - case PI_IMAGE_CHANNEL_ORDER_RA: + case UR_IMAGE_CHANNEL_ORDER_RA: return image_channel_order::ra; - case PI_IMAGE_CHANNEL_ORDER_RGB: + case UR_IMAGE_CHANNEL_ORDER_RGB: return image_channel_order::rgb; - case PI_IMAGE_CHANNEL_ORDER_RGBx: + case UR_IMAGE_CHANNEL_ORDER_RGBX: return image_channel_order::rgbx; - case PI_IMAGE_CHANNEL_ORDER_RGBA: + case UR_IMAGE_CHANNEL_ORDER_RGBA: return image_channel_order::rgba; - case PI_IMAGE_CHANNEL_ORDER_ARGB: + case UR_IMAGE_CHANNEL_ORDER_ARGB: return image_channel_order::argb; - case PI_IMAGE_CHANNEL_ORDER_BGRA: + case UR_IMAGE_CHANNEL_ORDER_BGRA: return image_channel_order::bgra; - case PI_IMAGE_CHANNEL_ORDER_INTENSITY: + case UR_IMAGE_CHANNEL_ORDER_INTENSITY: return image_channel_order::intensity; - case PI_IMAGE_CHANNEL_ORDER_LUMINANCE: + case UR_IMAGE_CHANNEL_ORDER_LUMINANCE: return image_channel_order::luminance; - case PI_IMAGE_CHANNEL_ORDER_ABGR: + case UR_IMAGE_CHANNEL_ORDER_ABGR: return image_channel_order::abgr; - case PI_IMAGE_CHANNEL_ORDER_sRGBA: + case UR_IMAGE_CHANNEL_ORDER_SRGBA: return image_channel_order::ext_oneapi_srgba; + default: + assert(false && "Unhandled image_channel_order"); } - assert(false && "Unhandled image_channel_order"); return static_cast(0); } -sycl::detail::pi::PiMemImageChannelType -convertChannelType(image_channel_type Type) { +ur_image_channel_type_t convertChannelType(image_channel_type Type) { switch (Type) { case image_channel_type::snorm_int8: - return PI_IMAGE_CHANNEL_TYPE_SNORM_INT8; + return UR_IMAGE_CHANNEL_TYPE_SNORM_INT8; case image_channel_type::snorm_int16: - return PI_IMAGE_CHANNEL_TYPE_SNORM_INT16; + return UR_IMAGE_CHANNEL_TYPE_SNORM_INT16; case image_channel_type::unorm_int8: - return PI_IMAGE_CHANNEL_TYPE_UNORM_INT8; + return UR_IMAGE_CHANNEL_TYPE_UNORM_INT8; case image_channel_type::unorm_int16: - return PI_IMAGE_CHANNEL_TYPE_UNORM_INT16; + return UR_IMAGE_CHANNEL_TYPE_UNORM_INT16; case image_channel_type::unorm_short_565: - return PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565; + return UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565; case image_channel_type::unorm_short_555: - return PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555; + return UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555; case image_channel_type::unorm_int_101010: - return PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010; + return UR_IMAGE_CHANNEL_TYPE_INT_101010; case image_channel_type::signed_int8: - return PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8; + return UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8; case image_channel_type::signed_int16: - return PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16; + return UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16; case image_channel_type::signed_int32: - return PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32; + return UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32; case image_channel_type::unsigned_int8: - return PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8; + return UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8; case image_channel_type::unsigned_int16: - return PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16; + return UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16; case image_channel_type::unsigned_int32: - return PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; + return UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; case image_channel_type::fp16: - return PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT; + return UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT; case image_channel_type::fp32: - return PI_IMAGE_CHANNEL_TYPE_FLOAT; + return UR_IMAGE_CHANNEL_TYPE_FLOAT; } assert(false && "Unhandled image_channel_order"); - return static_cast(0); + return static_cast(0); } -image_channel_type -convertChannelType(sycl::detail::pi::PiMemImageChannelType Type) { +image_channel_type convertChannelType(ur_image_channel_type_t Type) { switch (Type) { - case PI_IMAGE_CHANNEL_TYPE_SNORM_INT8: + case UR_IMAGE_CHANNEL_TYPE_SNORM_INT8: return image_channel_type::snorm_int8; - case PI_IMAGE_CHANNEL_TYPE_SNORM_INT16: + case UR_IMAGE_CHANNEL_TYPE_SNORM_INT16: return image_channel_type::snorm_int16; - case PI_IMAGE_CHANNEL_TYPE_UNORM_INT8: + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: return image_channel_type::unorm_int8; - case PI_IMAGE_CHANNEL_TYPE_UNORM_INT16: + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: return image_channel_type::unorm_int16; - case PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: + case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: return image_channel_type::unorm_short_565; - case PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: + case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: return image_channel_type::unorm_short_555; - case PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010: + case UR_IMAGE_CHANNEL_TYPE_INT_101010: return image_channel_type::unorm_int_101010; - case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8: + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: return image_channel_type::signed_int8; - case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16: + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: return image_channel_type::signed_int16; - case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32: + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: return image_channel_type::signed_int32; - case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: return image_channel_type::unsigned_int8; - case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: return image_channel_type::unsigned_int16; - case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: return image_channel_type::unsigned_int32; - case PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT: + case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: return image_channel_type::fp16; - case PI_IMAGE_CHANNEL_TYPE_FLOAT: + case UR_IMAGE_CHANNEL_TYPE_FLOAT: return image_channel_type::fp32; + default: + assert(false && "Unhandled image_channel_order"); } - assert(false && "Unhandled image_channel_order"); return static_cast(0); } template -static void getImageInfo(const ContextImplPtr Context, - sycl::detail::pi::PiMemImageInfo Info, T &Dest, - sycl::detail::pi::PiMem InteropMemObject) { +static void getImageInfo(const ContextImplPtr Context, ur_image_info_t Info, + T &Dest, ur_mem_handle_t InteropMemObject) { const PluginPtr &Plugin = Context->getPlugin(); - sycl::detail::pi::PiMem Mem = - pi::cast(InteropMemObject); - Plugin->call(Mem, Info, sizeof(T), &Dest, - nullptr); + Plugin->call(urMemImageGetInfo, InteropMemObject, Info, sizeof(T), &Dest, + nullptr); } image_impl::image_impl(cl_mem MemObject, const context &SyclContext, @@ -277,46 +273,47 @@ image_impl::image_impl(cl_mem MemObject, const context &SyclContext, : BaseT(MemObject, SyclContext, std::move(AvailableEvent), std::move(Allocator)), MDimensions(Dimensions), MRange({0, 0, 0}) { - sycl::detail::pi::PiMem Mem = - pi::cast(BaseT::MInteropMemObject); + ur_mem_handle_t Mem = ur::cast(BaseT::MInteropMemObject); const ContextImplPtr Context = getSyclObjImpl(SyclContext); const PluginPtr &Plugin = Context->getPlugin(); - Plugin->call(Mem, PI_MEM_SIZE, sizeof(size_t), - &(BaseT::MSizeInBytes), nullptr); + Plugin->call(urMemGetInfo, Mem, UR_MEM_INFO_SIZE, sizeof(size_t), + &(BaseT::MSizeInBytes), nullptr); - sycl::detail::pi::PiMemImageFormat Format; - getImageInfo(Context, PI_IMAGE_INFO_FORMAT, Format, Mem); - MOrder = detail::convertChannelOrder(Format.image_channel_order); - MType = detail::convertChannelType(Format.image_channel_data_type); + ur_image_format_t Format; + getImageInfo(Context, UR_IMAGE_INFO_FORMAT, Format, Mem); + MOrder = detail::convertChannelOrder(Format.channelOrder); + MType = detail::convertChannelType(Format.channelType); MNumChannels = getImageNumberChannels(MOrder); - getImageInfo(Context, PI_IMAGE_INFO_ELEMENT_SIZE, MElementSize, Mem); + getImageInfo(Context, UR_IMAGE_INFO_ELEMENT_SIZE, MElementSize, Mem); assert(getImageElementSize(MNumChannels, MType) == MElementSize); - getImageInfo(Context, PI_IMAGE_INFO_ROW_PITCH, MRowPitch, Mem); - getImageInfo(Context, PI_IMAGE_INFO_SLICE_PITCH, MSlicePitch, Mem); + getImageInfo(Context, UR_IMAGE_INFO_ROW_PITCH, MRowPitch, Mem); + getImageInfo(Context, UR_IMAGE_INFO_SLICE_PITCH, MSlicePitch, Mem); switch (MDimensions) { case 3: - getImageInfo(Context, PI_IMAGE_INFO_DEPTH, MRange[2], Mem); + getImageInfo(Context, UR_IMAGE_INFO_DEPTH, MRange[2], Mem); [[fallthrough]]; case 2: - getImageInfo(Context, PI_IMAGE_INFO_HEIGHT, MRange[1], Mem); + getImageInfo(Context, UR_IMAGE_INFO_HEIGHT, MRange[1], Mem); [[fallthrough]]; case 1: - getImageInfo(Context, PI_IMAGE_INFO_WIDTH, MRange[0], Mem); + getImageInfo(Context, UR_IMAGE_INFO_WIDTH, MRange[0], Mem); } } -image_impl::image_impl(pi_native_handle MemObject, const context &SyclContext, +image_impl::image_impl(ur_native_handle_t MemObject, const context &SyclContext, event AvailableEvent, std::unique_ptr Allocator, uint8_t Dimensions, image_channel_order Order, image_channel_type Type, bool OwnNativeHandle, range<3> Range3WithOnes) : BaseT(MemObject, SyclContext, OwnNativeHandle, std::move(AvailableEvent), - std::move(Allocator), detail::convertChannelOrder(Order), - detail::convertChannelType(Type), Range3WithOnes, Dimensions, + std::move(Allocator), + ur_image_format_t{detail::convertChannelOrder(Order), + detail::convertChannelType(Type)}, + Range3WithOnes, Dimensions, getImageElementSize(getImageNumberChannels(Order), Type)), MDimensions(Dimensions), MRange(Range3WithOnes) { MOrder = Order; @@ -328,15 +325,15 @@ image_impl::image_impl(pi_native_handle MemObject, const context &SyclContext, void *image_impl::allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, - sycl::detail::pi::PiEvent &OutEventToWait) { + ur_event_handle_t &OutEventToWait) { bool HostPtrReadOnly = false; BaseT::determineHostPtr(InitFromUserData, HostPtr, HostPtrReadOnly); - sycl::detail::pi::PiMemImageDesc Desc = getImageDesc(HostPtr != nullptr); + ur_image_desc_t Desc = getImageDesc(HostPtr != nullptr); assert(checkImageDesc(Desc, Context, HostPtr) && "The check an image desc failed."); - sycl::detail::pi::PiMemImageFormat Format = getImageFormat(); + ur_image_format_t Format = getImageFormat(); assert(checkImageFormat(Format, Context) && "The check an image format failed."); @@ -346,110 +343,105 @@ void *image_impl::allocateMem(ContextImplPtr Context, bool InitFromUserData, BaseT::MInteropContext, MProps, OutEventToWait); } -bool image_impl::checkImageDesc(const sycl::detail::pi::PiMemImageDesc &Desc, +bool image_impl::checkImageDesc(const ur_image_desc_t &Desc, ContextImplPtr Context, void *UserPtr) { - if (checkAny(Desc.image_type, PI_MEM_TYPE_IMAGE1D, PI_MEM_TYPE_IMAGE1D_ARRAY, - PI_MEM_TYPE_IMAGE2D_ARRAY, PI_MEM_TYPE_IMAGE2D) && + if (checkAny(Desc.type, UR_MEM_TYPE_IMAGE1D, UR_MEM_TYPE_IMAGE1D_ARRAY, + UR_MEM_TYPE_IMAGE2D_ARRAY, UR_MEM_TYPE_IMAGE2D) && !checkImageValueRange( - getDevices(Context), Desc.image_width)) + getDevices(Context), Desc.width)) throw exception(make_error_code(errc::invalid), "For a 1D/2D image/image array, the width must be a Value " ">= 1 and <= info::device::image2d_max_width"); - if (checkAny(Desc.image_type, PI_MEM_TYPE_IMAGE3D) && + if (checkAny(Desc.type, UR_MEM_TYPE_IMAGE3D) && !checkImageValueRange( - getDevices(Context), Desc.image_width)) + getDevices(Context), Desc.width)) throw exception(make_error_code(errc::invalid), "For a 3D image, the width must be a Value >= 1 and <= " "info::device::image3d_max_width"); - if (checkAny(Desc.image_type, PI_MEM_TYPE_IMAGE2D, - PI_MEM_TYPE_IMAGE2D_ARRAY) && + if (checkAny(Desc.type, UR_MEM_TYPE_IMAGE2D, UR_MEM_TYPE_IMAGE2D_ARRAY) && !checkImageValueRange( - getDevices(Context), Desc.image_height)) + getDevices(Context), Desc.height)) throw exception(make_error_code(errc::invalid), "For a 2D image or image array, the height must be a Value " ">= 1 and <= info::device::image2d_max_height"); - if (checkAny(Desc.image_type, PI_MEM_TYPE_IMAGE3D) && + if (checkAny(Desc.type, UR_MEM_TYPE_IMAGE3D) && !checkImageValueRange( - getDevices(Context), Desc.image_height)) + getDevices(Context), Desc.height)) throw exception(make_error_code(errc::invalid), "For a 3D image, the heightmust be a Value >= 1 and <= " "info::device::image3d_max_height"); - if (checkAny(Desc.image_type, PI_MEM_TYPE_IMAGE3D) && + if (checkAny(Desc.type, UR_MEM_TYPE_IMAGE3D) && !checkImageValueRange( - getDevices(Context), Desc.image_depth)) + getDevices(Context), Desc.depth)) throw exception(make_error_code(errc::invalid), "For a 3D image, the depth must be a Value >= 1 and <= " "info::device::image2d_max_depth"); - if (checkAny(Desc.image_type, PI_MEM_TYPE_IMAGE1D_ARRAY, - PI_MEM_TYPE_IMAGE2D_ARRAY) && + if (checkAny(Desc.type, UR_MEM_TYPE_IMAGE1D_ARRAY, + UR_MEM_TYPE_IMAGE2D_ARRAY) && !checkImageValueRange( - getDevices(Context), Desc.image_array_size)) + getDevices(Context), Desc.arraySize)) throw exception(make_error_code(errc::invalid), "For a 1D and 2D image array, the array_size must be a " "Value >= 1 and <= info::device::image_max_array_size."); - if ((nullptr == UserPtr) && (0 != Desc.image_row_pitch)) + if ((nullptr == UserPtr) && (0 != Desc.rowPitch)) throw exception(make_error_code(errc::invalid), "The row_pitch must be 0 if host_ptr is nullptr."); - if ((nullptr == UserPtr) && (0 != Desc.image_slice_pitch)) + if ((nullptr == UserPtr) && (0 != Desc.slicePitch)) throw exception(make_error_code(errc::invalid), "The slice_pitch must be 0 if host_ptr is nullptr."); - if (0 != Desc.num_mip_levels) + if (0 != Desc.numMipLevel) throw exception(make_error_code(errc::invalid), "The mip_levels must be 0."); - if (0 != Desc.num_samples) + if (0 != Desc.numSamples) throw exception(make_error_code(errc::invalid), "The num_samples must be 0."); - if (nullptr != Desc.buffer) - throw exception(make_error_code(errc::invalid), - "The buffer must be nullptr, because SYCL does not support " - "image creation from memory objects."); - return true; } -bool image_impl::checkImageFormat( - const sycl::detail::pi::PiMemImageFormat &Format, ContextImplPtr Context) { +bool image_impl::checkImageFormat(const ur_image_format_t &Format, + ContextImplPtr Context) { (void)Context; - if (checkAny(Format.image_channel_order, PI_IMAGE_CHANNEL_ORDER_INTENSITY, - PI_IMAGE_CHANNEL_ORDER_LUMINANCE) && - !checkAny( - Format.image_channel_data_type, PI_IMAGE_CHANNEL_TYPE_UNORM_INT8, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT16, PI_IMAGE_CHANNEL_TYPE_SNORM_INT8, - PI_IMAGE_CHANNEL_TYPE_SNORM_INT16, PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT, - PI_IMAGE_CHANNEL_TYPE_FLOAT)) + if (checkAny(Format.channelOrder, UR_IMAGE_CHANNEL_ORDER_INTENSITY, + UR_IMAGE_CHANNEL_ORDER_LUMINANCE) && + !checkAny(Format.channelType, UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, + UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, + UR_IMAGE_CHANNEL_TYPE_SNORM_INT8, + UR_IMAGE_CHANNEL_TYPE_SNORM_INT16, + UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT, UR_IMAGE_CHANNEL_TYPE_FLOAT)) throw exception( make_error_code(errc::invalid), - "CL_INTENSITY or CL_LUMINANCE format can only be used if channel data " - "type = CL_UNORM_INT8, CL_UNORM_INT16, CL_SNORM_INT8, CL_SNORM_INT16, " - "CL_HALF_FLOAT, or CL_FLOAT."); - - if (checkAny(Format.image_channel_data_type, - PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565, - PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010) && - !checkAny(Format.image_channel_order, PI_IMAGE_CHANNEL_ORDER_RGB, - PI_IMAGE_CHANNEL_ORDER_RGBx)) - throw exception(make_error_code(errc::invalid), - "type = CL_UNORM_SHORT_565, CL_UNORM_SHORT_555 or " - "CL_UNORM_INT_101010. These channel types can only be used " - "with CL_RGB or CL_RGBx channel order."); - - if (checkAny(Format.image_channel_order, PI_IMAGE_CHANNEL_ORDER_ARGB, - PI_IMAGE_CHANNEL_ORDER_BGRA, PI_IMAGE_CHANNEL_ORDER_ABGR) && - !checkAny( - Format.image_channel_data_type, PI_IMAGE_CHANNEL_TYPE_UNORM_INT8, - PI_IMAGE_CHANNEL_TYPE_SNORM_INT8, PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8)) + "CL_INTENSITY or CL_LUMINANCE format can only be used if channel " + "data type = CL_UNORM_INT8, CL_UNORM_INT16, CL_SNORM_INT8, " + "CL_SNORM_INT16, CL_HALF_FLOAT, or CL_FLOAT."); + + if (checkAny(Format.channelType, UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565, + UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555, + UR_IMAGE_CHANNEL_TYPE_INT_101010) && + !checkAny(Format.channelOrder, UR_IMAGE_CHANNEL_ORDER_RGB, + UR_IMAGE_CHANNEL_ORDER_RGBX)) + throw exception( + make_error_code(errc::invalid), + "type = CL_UNORM_SHORT_565, CL_UNORM_SHORT_555 or " + "CL_UNORM_INT_101010." + "These channel types can only be used with CL_RGB or CL_RGBx channel " + "order."); + + if (checkAny(Format.channelOrder, UR_IMAGE_CHANNEL_ORDER_ARGB, + UR_IMAGE_CHANNEL_ORDER_BGRA, UR_IMAGE_CHANNEL_ORDER_ABGR) && + !checkAny(Format.channelType, UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, + UR_IMAGE_CHANNEL_TYPE_SNORM_INT8, + UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8, + UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8)) throw exception( make_error_code(errc::invalid), "CL_ARGB, CL_BGRA, CL_ABGR These formats can only be used if " diff --git a/sycl/source/detail/image_impl.hpp b/sycl/source/detail/image_impl.hpp index d526ce19281a4..008d86df2799e 100644 --- a/sycl/source/detail/image_impl.hpp +++ b/sycl/source/detail/image_impl.hpp @@ -45,17 +45,13 @@ uint8_t getImageNumberChannels(image_channel_order Order); // utility function: Returns the number of bytes per image element uint8_t getImageElementSize(uint8_t NumChannels, image_channel_type Type); -sycl::detail::pi::PiMemImageChannelOrder -convertChannelOrder(image_channel_order Order); +ur_image_channel_order_t convertChannelOrder(image_channel_order Order); -image_channel_order -convertChannelOrder(sycl::detail::pi::PiMemImageChannelOrder Order); +image_channel_order convertChannelOrder(ur_image_channel_order_t Order); -sycl::detail::pi::PiMemImageChannelType -convertChannelType(image_channel_type Type); +ur_image_channel_type_t convertChannelType(image_channel_type Type); -image_channel_type -convertChannelType(sycl::detail::pi::PiMemImageChannelType Type); +image_channel_type convertChannelType(ur_image_channel_type_t Type); class image_impl final : public SYCLMemObjT { using BaseT = SYCLMemObjT; @@ -227,7 +223,7 @@ class image_impl final : public SYCLMemObjT { std::unique_ptr Allocator, uint8_t Dimensions); - image_impl(pi_native_handle MemObject, const context &SyclContext, + image_impl(ur_native_handle_t MemObject, const context &SyclContext, event AvailableEvent, std::unique_ptr Allocator, uint8_t Dimensions, image_channel_order Order, image_channel_type Type, @@ -245,8 +241,7 @@ class image_impl final : public SYCLMemObjT { size_t size() const noexcept { return MRange.size(); } void *allocateMem(ContextImplPtr Context, bool InitFromUserData, - void *HostPtr, - sycl::detail::pi::PiEvent &OutEventToWait) override; + void *HostPtr, ur_event_handle_t &OutEventToWait) override; MemObjType getType() const override { return MemObjType::Image; } @@ -291,47 +286,47 @@ class image_impl final : public SYCLMemObjT { private: std::vector getDevices(const ContextImplPtr Context); - sycl::detail::pi::PiMemObjectType getImageType() { + ur_mem_type_t getImageType() { if (MDimensions == 1) - return (MIsArrayImage ? PI_MEM_TYPE_IMAGE1D_ARRAY : PI_MEM_TYPE_IMAGE1D); + return (MIsArrayImage ? UR_MEM_TYPE_IMAGE1D_ARRAY : UR_MEM_TYPE_IMAGE1D); if (MDimensions == 2) - return (MIsArrayImage ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE2D); - return PI_MEM_TYPE_IMAGE3D; + return (MIsArrayImage ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE2D); + return UR_MEM_TYPE_IMAGE3D; } - sycl::detail::pi::PiMemImageDesc getImageDesc(bool InitFromHostPtr) { - sycl::detail::pi::PiMemImageDesc Desc; - Desc.image_type = getImageType(); + ur_image_desc_t getImageDesc(bool InitFromHostPtr) { + ur_image_desc_t Desc = {}; + Desc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + Desc.type = getImageType(); // MRange<> is [width], [width,height], or [width,height,depth] (which // is different than MAccessRange, etc in bufffers) constexpr int XTermPos = 0, YTermPos = 1, ZTermPos = 2; - Desc.image_width = MRange[XTermPos]; - Desc.image_height = MDimensions > 1 ? MRange[YTermPos] : 1; - Desc.image_depth = MDimensions > 2 ? MRange[ZTermPos] : 1; + Desc.width = MRange[XTermPos]; + Desc.height = MDimensions > 1 ? MRange[YTermPos] : 1; + Desc.depth = MDimensions > 2 ? MRange[ZTermPos] : 1; // TODO handle cases with IMAGE1D_ARRAY and IMAGE2D_ARRAY - Desc.image_array_size = 0; + Desc.arraySize = 0; // Pitches must be 0 if host ptr is not provided. - Desc.image_row_pitch = InitFromHostPtr ? MRowPitch : 0; - Desc.image_slice_pitch = InitFromHostPtr ? MSlicePitch : 0; - Desc.num_mip_levels = 0; - Desc.num_samples = 0; - Desc.buffer = nullptr; + Desc.rowPitch = InitFromHostPtr ? MRowPitch : 0; + Desc.slicePitch = InitFromHostPtr ? MSlicePitch : 0; + Desc.numMipLevel = 0; + Desc.numSamples = 0; return Desc; } - bool checkImageDesc(const sycl::detail::pi::PiMemImageDesc &Desc, - ContextImplPtr Context, void *UserPtr); + bool checkImageDesc(const ur_image_desc_t &Desc, ContextImplPtr Context, + void *UserPtr); - sycl::detail::pi::PiMemImageFormat getImageFormat() { - sycl::detail::pi::PiMemImageFormat Format; - Format.image_channel_order = detail::convertChannelOrder(MOrder); - Format.image_channel_data_type = detail::convertChannelType(MType); + ur_image_format_t getImageFormat() { + ur_image_format_t Format = {}; + Format.channelOrder = detail::convertChannelOrder(MOrder); + Format.channelType = detail::convertChannelType(MType); return Format; } - bool checkImageFormat(const sycl::detail::pi::PiMemImageFormat &Format, + bool checkImageFormat(const ur_image_format_t &Format, ContextImplPtr Context); uint8_t MDimensions = 0; diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index a101c203e34c0..909fc751772dc 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -32,14 +32,14 @@ jit_compiler::jit_compiler() { auto checkJITLibrary = [this]() -> bool { static const std::string JITLibraryName = "libsycl-fusion.so"; - void *LibraryPtr = sycl::detail::pi::loadOsLibrary(JITLibraryName); + void *LibraryPtr = sycl::detail::ur::loadOsLibrary(JITLibraryName); if (LibraryPtr == nullptr) { printPerformanceWarning("Could not find JIT library " + JITLibraryName); return false; } this->AddToConfigHandle = reinterpret_cast( - sycl::detail::pi::getOsLibraryFuncAddress(LibraryPtr, + sycl::detail::ur::getOsLibraryFuncAddress(LibraryPtr, "addToJITConfiguration")); if (!this->AddToConfigHandle) { printPerformanceWarning( @@ -48,7 +48,7 @@ jit_compiler::jit_compiler() { } this->ResetConfigHandle = reinterpret_cast( - sycl::detail::pi::getOsLibraryFuncAddress(LibraryPtr, + sycl::detail::ur::getOsLibraryFuncAddress(LibraryPtr, "resetJITConfiguration")); if (!this->ResetConfigHandle) { printPerformanceWarning( @@ -57,7 +57,7 @@ jit_compiler::jit_compiler() { } this->FuseKernelsHandle = reinterpret_cast( - sycl::detail::pi::getOsLibraryFuncAddress(LibraryPtr, "fuseKernels")); + sycl::detail::ur::getOsLibraryFuncAddress(LibraryPtr, "fuseKernels")); if (!this->FuseKernelsHandle) { printPerformanceWarning( "Cannot resolve JIT library function entry point"); @@ -66,7 +66,7 @@ jit_compiler::jit_compiler() { this->MaterializeSpecConstHandle = reinterpret_cast( - sycl::detail::pi::getOsLibraryFuncAddress( + sycl::detail::ur::getOsLibraryFuncAddress( LibraryPtr, "materializeSpecConstants")); if (!this->MaterializeSpecConstHandle) { printPerformanceWarning( @@ -80,7 +80,7 @@ jit_compiler::jit_compiler() { } static ::jit_compiler::BinaryFormat -translateBinaryImageFormat(pi::PiDeviceBinaryType Type) { +translateBinaryImageFormat(ur::DeviceBinaryType Type) { switch (Type) { case SYCL_DEVICE_BINARY_TYPE_SPIRV: return ::jit_compiler::BinaryFormat::SPIRV; @@ -625,7 +625,7 @@ updatePromotedArgs(const ::jit_compiler::SYCLKernelInfo &FusedKernelInfo, } } -sycl::detail::pi::PiKernel jit_compiler::materializeSpecConstants( +ur_kernel_handle_t jit_compiler::materializeSpecConstants( QueueImplPtr Queue, const RTDeviceBinaryImage *BinImage, const std::string &KernelName, const std::vector &SpecConstBlob) { @@ -742,8 +742,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue, std::vector &Requirements = CGData.MRequirements; std::vector &Events = CGData.MEvents; std::vector<::jit_compiler::NDRange> Ranges; - sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig = - PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT; + ur_kernel_cache_config_t KernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; unsigned KernelIndex = 0; ParamList FusedParams; PromotionMap PromotedAccs; @@ -906,7 +905,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue, if (KernelIndex == 0) { KernelCacheConfig = KernelCG->MKernelCacheConfig; } else if (KernelCG->MKernelCacheConfig != KernelCacheConfig) { - KernelCacheConfig = PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT; + KernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; } ++KernelIndex; @@ -918,8 +917,8 @@ jit_compiler::fuseKernels(QueueImplPtr Queue, std::vector<::jit_compiler::JITConstant> JITConstants; std::vector<::jit_compiler::ParameterIdentity> ParamIdentities; ParamList NonIdenticalParameters; - for (auto PI = FusedParams.begin(); PI != FusedParams.end();) { - PI = preProcessArguments(ArgsStorage, PI, PromotedAccs, InternalizeParams, + for (auto UR = FusedParams.begin(); UR != FusedParams.end();) { + UR = preProcessArguments(ArgsStorage, UR, PromotedAccs, InternalizeParams, JITConstants, NonIdenticalParameters, ParamIdentities); } @@ -1070,7 +1069,7 @@ sycl_device_binaries jit_compiler::createPIDeviceBinary( if (Format == ::jit_compiler::BinaryFormat::PTX || Format == ::jit_compiler::BinaryFormat::AMDGCN) { // Add a program metadata property with the reqd_work_group_size attribute. - // See CUDA PI (pi_cuda.cpp) _pi_program::set_metadata for reference. + // See CUDA UR (ur_cuda.cpp) _ur_program::set_metadata for reference. auto ReqdWGS = std::find_if( FusedKernelInfo.Attributes.begin(), FusedKernelInfo.Attributes.end(), [](const ::jit_compiler::SYCLKernelAttribute &Attr) { @@ -1146,7 +1145,7 @@ std::vector jit_compiler::encodeReqdWorkGroupSize( std::vector Encoded(NumBytes, 0u); uint8_t *Ptr = Encoded.data(); // Skip 64-bit wide size argument with value 0 at the start of the data. - // See CUDA PI (pi_cuda.cpp) _pi_program::set_metadata for reference. + // See CUDA UR (ur_cuda.cpp) _ur_program::set_metadata for reference. Ptr += sizeof(uint64_t); for (const auto &Val : Attr.Values) { auto UVal = static_cast(Val); diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index 764fdabae0245..205f4f3521e2a 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -37,7 +37,7 @@ class jit_compiler { std::unique_ptr fuseKernels(QueueImplPtr Queue, std::vector &InputKernels, const property_list &); - sycl::detail::pi::PiKernel + ur_kernel_handle_t materializeSpecConstants(QueueImplPtr Queue, const RTDeviceBinaryImage *BinImage, const std::string &KernelName, @@ -71,7 +71,7 @@ class jit_compiler { // Indicate availability of the JIT compiler bool Available; - // Manages the lifetime of the PI structs for device binaries. + // Manages the lifetime of the UR structs for device binaries. std::vector JITDeviceBinaries; #if SYCL_EXT_CODEPLAY_KERNEL_FUSION diff --git a/sycl/source/detail/jit_device_binaries.cpp b/sycl/source/detail/jit_device_binaries.cpp index 71803013cfcef..b5f3b67c08ecd 100644 --- a/sycl/source/detail/jit_device_binaries.cpp +++ b/sycl/source/detail/jit_device_binaries.cpp @@ -1,4 +1,4 @@ -//==- jit_device_binaries.cpp - Runtime construction of PI device binaries -==// +//==- jit_device_binaries.cpp - Runtime construction of UR device binaries -==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -53,10 +53,10 @@ PropertySetContainer::PropertySetContainer(const std::string &Name) void PropertySetContainer::addProperty(PropertyContainer &&Prop) { // Adding to the vectors might trigger reallocation, which would invalidate - // the pointers used for PI structs if a PI struct has already been created + // the pointers used for UR structs if a UR struct has already been created // via getPIPropertySet(). Forbid calls to this method after the first PI // struct has been created. - assert(Fused && "Adding to container would invalidate existing PI structs"); + assert(Fused && "Adding to container would invalidate existing UR structs"); PIProperties.push_back(Prop.getPIProperty()); Properties.push_back(std::move(Prop)); } @@ -71,20 +71,20 @@ PropertySetContainer::getPIPropertySet() { void DeviceBinaryContainer::addOffloadEntry(OffloadEntryContainer &&Cont) { // Adding to the vectors might trigger reallocation, which would invalidate - // the pointers used for PI structs if a PI struct has already been created + // the pointers used for UR structs if a UR struct has already been created // via getPIDeviceBinary(). Forbid calls to this method after the first PI // struct has been created. - assert(Fused && "Adding to container would invalidate existing PI structs"); + assert(Fused && "Adding to container would invalidate existing UR structs"); PIOffloadEntries.push_back(Cont.getPIOffloadEntry()); OffloadEntries.push_back(std::move(Cont)); } void DeviceBinaryContainer::addProperty(PropertySetContainer &&Cont) { // Adding to the vectors might trigger reallocation, which would invalidate - // the pointers used for PI structs if a PI struct has already been created + // the pointers used for UR structs if a UR struct has already been created // via getPIDeviceBinary(). Forbid calls to this method after the first PI // struct has been created. - assert(Fused && "Adding to container would invalidate existing PI structs"); + assert(Fused && "Adding to container would invalidate existing UR structs"); PIPropertySets.push_back(Cont.getPIPropertySet()); PropertySets.push_back(std::move(Cont)); } @@ -119,10 +119,10 @@ void DeviceBinariesCollection::addDeviceBinary(DeviceBinaryContainer &&Cont, const char *TargetSpec, sycl_device_binary_type Format) { // Adding to the vectors might trigger reallocation, which would invalidate - // the pointers used for PI structs if a PI struct has already been created + // the pointers used for UR structs if a UR struct has already been created // via getPIDeviceStruct(). Forbid calls to this method after the first PI // struct has been created. - assert(Fused && "Adding to container would invalidate existing PI structs"); + assert(Fused && "Adding to container would invalidate existing UR structs"); PIBinaries.push_back( Cont.getPIDeviceBinary(BinaryStart, BinarySize, TargetSpec, Format)); Binaries.push_back(std::move(Cont)); @@ -134,7 +134,7 @@ sycl_device_binaries DeviceBinariesCollection::getPIDeviceStruct() { PIStruct->Version = SYCL_DEVICE_BINARIES_VERSION; PIStruct->NumDeviceBinaries = PIBinaries.size(); PIStruct->DeviceBinaries = PIBinaries.data(); - // According to documentation in pi.h, the HostEntries are not used and + // According to documentation in ur.hpp, the HostEntries are not used and // can therefore be null. PIStruct->HostEntriesBegin = nullptr; PIStruct->HostEntriesEnd = nullptr; diff --git a/sycl/source/detail/jit_device_binaries.hpp b/sycl/source/detail/jit_device_binaries.hpp index ee29a2ae61cb4..49445e07920e7 100644 --- a/sycl/source/detail/jit_device_binaries.hpp +++ b/sycl/source/detail/jit_device_binaries.hpp @@ -1,4 +1,4 @@ -//==- jit_device_binaries.hpp - Runtime construction of PI device binaries -==// +//==- jit_device_binaries.hpp - Runtime construction of UR device binaries -==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,7 +9,7 @@ #pragma once #include -#include +#include #include #include @@ -20,7 +20,7 @@ namespace detail { /// Representation of _sycl_offload_entry_struct for creation of JIT device /// binaries at runtime. Owns the necessary data and provides raw pointers for -/// the PI struct. +/// the UR struct. class OffloadEntryContainer { public: OffloadEntryContainer(const std::string &Name, void *Addr, size_t Size, @@ -46,13 +46,13 @@ class OffloadEntryContainer { /// Representation of _sycl_device_binary_property_struct for creation of JIT /// device binaries at runtime. -/// Owns the necessary data and provides raw pointers for the PI struct. +/// Owns the necessary data and provides raw pointers for the UR struct. class PropertyContainer { public: PropertyContainer(const std::string &Name, void *Data, size_t Size, uint32_t Type); - // Set a PI_PROPERTY_TYPE_UINT32 property + // Set a UR_PROPERTY_TYPE_UINT32 property PropertyContainer(const std::string &Name, uint32_t Data); PropertyContainer(PropertyContainer &&) = default; @@ -73,7 +73,7 @@ class PropertyContainer { /// Representation of _sycl_device_binary_property_set_struct for creation of /// JIT device binaries at runtime. Owns the necessary data and provides raw -/// pointers for the PI struct. +/// pointers for the UR struct. class PropertySetContainer { public: PropertySetContainer(const std::string &Name); @@ -81,7 +81,7 @@ class PropertySetContainer { PropertySetContainer(PropertySetContainer &&) = default; PropertySetContainer &operator=(PropertySetContainer &&) = default; ~PropertySetContainer() = default; - // Copying of the container is not allowed, as it would invalidate PI structs. + // Copying of the container is not allowed, as it would invalidate UR structs. PropertySetContainer(const PropertySetContainer &) = delete; PropertySetContainer &operator=(const PropertySetContainer &) = delete; @@ -98,14 +98,14 @@ class PropertySetContainer { /// Representation of sycl_device_binary_struct for creation of JIT device /// binaries at runtime. -/// Owns the necessary data and provides raw pointers for the PI struct. +/// Owns the necessary data and provides raw pointers for the UR struct. class DeviceBinaryContainer { public: DeviceBinaryContainer() = default; DeviceBinaryContainer(DeviceBinaryContainer &&) = default; DeviceBinaryContainer &operator=(DeviceBinaryContainer &&) = default; ~DeviceBinaryContainer() = default; - // Copying of the container is not allowed, as it would invalidate PI structs. + // Copying of the container is not allowed, as it would invalidate UR structs. DeviceBinaryContainer(const DeviceBinaryContainer &) = delete; DeviceBinaryContainer &operator=(const DeviceBinaryContainer &) = delete; @@ -128,7 +128,7 @@ class DeviceBinaryContainer { /// Representation of sycl_device_binaries_struct for creation of JIT device /// binaries at runtime. -/// Owns the necessary data and provides raw pointers for the PI struct. +/// Owns the necessary data and provides raw pointers for the UR struct. class DeviceBinariesCollection { public: diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index c0e54c4c92754..480bb1934e00d 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -409,10 +408,10 @@ class kernel_bundle_impl { ContextImplPtr ContextImpl = getSyclObjImpl(MContext); const PluginPtr &Plugin = ContextImpl->getPlugin(); - std::vector DeviceVec; + std::vector DeviceVec; DeviceVec.reserve(Devices.size()); for (const auto &SyclDev : Devices) { - pi::PiDevice Dev = getSyclObjImpl(SyclDev)->getHandleRef(); + ur_device_handle_t Dev = getSyclObjImpl(SyclDev)->getHandleRef(); DeviceVec.push_back(Dev); } @@ -423,11 +422,11 @@ class kernel_bundle_impl { const auto &SourceStr = std::get(this->Source); std::vector IPVersionVec(Devices.size()); std::transform(DeviceVec.begin(), DeviceVec.end(), IPVersionVec.begin(), - [&](pi::PiDevice d) { + [&](ur_device_handle_t d) { uint32_t ipVersion = 0; - Plugin->call( - d, PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION, - sizeof(uint32_t), &ipVersion, nullptr); + Plugin->call(urDeviceGetInfo, d, + UR_DEVICE_INFO_IP_VERSION, + sizeof(uint32_t), &ipVersion, nullptr); return ipVersion; }); return syclex::detail::OpenCLC_to_SPIRV(SourceStr, IPVersionVec, @@ -452,32 +451,35 @@ class kernel_bundle_impl { "OpenCL C and SPIR-V are the only supported languages at this time"); }(); - sycl::detail::pi::PiProgram PiProgram = nullptr; - Plugin->call( - ContextImpl->getHandleRef(), spirv.data(), spirv.size(), &PiProgram); - // program created by piProgramCreate is implicitly retained. + ur_program_handle_t UrProgram = nullptr; + Plugin->call(urProgramCreateWithIL, ContextImpl->getHandleRef(), + spirv.data(), spirv.size(), nullptr, &UrProgram); + // program created by urProgramCreateWithIL is implicitly retained. std::string XsFlags = extractXsFlags(BuildOptions); - Plugin->call( - PiProgram, DeviceVec.size(), DeviceVec.data(), XsFlags.c_str(), nullptr, - nullptr); + auto Res = + Plugin->call_nocheck(urProgramBuildExp, UrProgram, DeviceVec.size(), + DeviceVec.data(), XsFlags.c_str()); + if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + Res = Plugin->call_nocheck(urProgramBuild, ContextImpl->getHandleRef(), + UrProgram, XsFlags.c_str()); + } + Plugin->checkUrResult(Res); // Get the number of kernels in the program. size_t NumKernels; - Plugin->call( - PiProgram, PI_PROGRAM_INFO_NUM_KERNELS, sizeof(size_t), &NumKernels, - nullptr); + Plugin->call(urProgramGetInfo, UrProgram, UR_PROGRAM_INFO_NUM_KERNELS, + sizeof(size_t), &NumKernels, nullptr); // Get the kernel names. size_t KernelNamesSize; - Plugin->call( - PiProgram, PI_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); + Plugin->call(urProgramGetInfo, UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, + nullptr, &KernelNamesSize); // semi-colon delimited list of kernel names. std::string KernelNamesStr(KernelNamesSize, ' '); - Plugin->call( - PiProgram, PI_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), - &KernelNamesStr[0], nullptr); + Plugin->call(urProgramGetInfo, UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, + KernelNamesStr.size(), &KernelNamesStr[0], nullptr); std::vector KernelNames = detail::split_string(KernelNamesStr, ';'); @@ -485,7 +487,7 @@ class kernel_bundle_impl { auto KernelIDs = std::make_shared>(); auto DevImgImpl = std::make_shared( nullptr, MContext, MDevices, bundle_state::executable, KernelIDs, - PiProgram); + UrProgram); device_image_plain DevImg{DevImgImpl}; return std::make_shared(MContext, MDevices, DevImg, KernelNames, Language); @@ -525,16 +527,15 @@ class kernel_bundle_impl { assert(MDeviceImages.size() > 0); const std::shared_ptr &DeviceImageImpl = detail::getSyclObjImpl(MDeviceImages[0]); - sycl::detail::pi::PiProgram PiProgram = DeviceImageImpl->get_program_ref(); + ur_program_handle_t UrProgram = DeviceImageImpl->get_ur_program_ref(); ContextImplPtr ContextImpl = getSyclObjImpl(MContext); const PluginPtr &Plugin = ContextImpl->getPlugin(); - sycl::detail::pi::PiKernel PiKernel = nullptr; - Plugin->call(PiProgram, AdjustedName.c_str(), - &PiKernel); - // Kernel created by piKernelCreate is implicitly retained. + ur_kernel_handle_t UrKernel = nullptr; + Plugin->call(urKernelCreate, UrProgram, AdjustedName.c_str(), &UrKernel); + // Kernel created by urKernelCreate is implicitly retained. std::shared_ptr KernelImpl = std::make_shared( - PiKernel, detail::getSyclObjImpl(MContext), Self); + UrKernel, detail::getSyclObjImpl(MContext), Self); return detail::createSyclObjFromImpl(KernelImpl); } @@ -627,11 +628,11 @@ class kernel_bundle_impl { auto [Kernel, CacheMutex, ArgMask] = detail::ProgramManager::getInstance().getOrCreateKernel( MContext, KernelID.get_name(), /*PropList=*/{}, - SelectedImage->get_program_ref()); + SelectedImage->get_ur_program_ref()); std::shared_ptr KernelImpl = std::make_shared( Kernel, detail::getSyclObjImpl(MContext), SelectedImage, Self, ArgMask, - SelectedImage->get_program_ref(), CacheMutex); + SelectedImage->get_ur_program_ref(), CacheMutex); return detail::createSyclObjFromImpl(KernelImpl); } diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp index 5ff14e926755f..3f796f5c647ab 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include // getOsLibraryFuncAddress +#include // getOsLibraryFuncAddress #include // make_error_code #include "kernel_compiler_opencl.hpp" @@ -26,7 +26,7 @@ namespace detail { // ensures the OclocLibrary has the right version, etc. void checkOclocLibrary(void *OclocLibrary) { void *OclocVersionHandle = - sycl::detail::pi::getOsLibraryFuncAddress(OclocLibrary, "oclocVersion"); + sycl::detail::ur::getOsLibraryFuncAddress(OclocLibrary, "oclocVersion"); // The initial versions of ocloc library did not have the oclocVersion() // function. Those versions had the same API as the first version of ocloc // library having that oclocVersion() function. @@ -66,7 +66,7 @@ void *loadOclocLibrary() { #endif void *tempPtr = OclocLibrary; if (tempPtr == nullptr) { - tempPtr = sycl::detail::pi::loadOsLibrary(OclocLibraryName); + tempPtr = sycl::detail::ur::loadOsLibrary(OclocLibraryName); if (tempPtr == nullptr) throw sycl::exception(make_error_code(errc::build), @@ -103,11 +103,11 @@ void SetupLibrary(voidPtr &oclocInvokeHandle, voidPtr &oclocFreeOutputHandle, loadOclocLibrary(); oclocInvokeHandle = - sycl::detail::pi::getOsLibraryFuncAddress(OclocLibrary, "oclocInvoke"); + sycl::detail::ur::getOsLibraryFuncAddress(OclocLibrary, "oclocInvoke"); if (!oclocInvokeHandle) throw sycl::exception(the_errc, "Cannot load oclocInvoke() function"); - oclocFreeOutputHandle = sycl::detail::pi::getOsLibraryFuncAddress( + oclocFreeOutputHandle = sycl::detail::ur::getOsLibraryFuncAddress( OclocLibrary, "oclocFreeOutput"); if (!oclocFreeOutputHandle) throw sycl::exception(the_errc, "Cannot load oclocFreeOutput() function"); diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 2eaea5958ca2d..c458e6b3d47f9 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -16,39 +16,40 @@ namespace sycl { inline namespace _V1 { namespace detail { -kernel_impl::kernel_impl(sycl::detail::pi::PiKernel Kernel, - ContextImplPtr ContextImpl, +kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context, KernelBundleImplPtr KernelBundleImpl, const KernelArgMask *ArgMask) - : MKernel(Kernel), MContext(ContextImpl), - MProgram(ProgramManager::getInstance().getPiProgramFromPiKernel( - Kernel, ContextImpl)), + : MKernel(Kernel), MContext(Context), + MProgram(ProgramManager::getInstance().getUrProgramFromUrKernel(Kernel, + Context)), MCreatedFromSource(true), MKernelBundleImpl(std::move(KernelBundleImpl)), MIsInterop(true), MKernelArgMaskPtr{ArgMask} { - sycl::detail::pi::PiContext Context = nullptr; + ur_context_handle_t UrContext = nullptr; // Using the plugin from the passed ContextImpl - getPlugin()->call( - MKernel, PI_KERNEL_INFO_CONTEXT, sizeof(Context), &Context, nullptr); - if (ContextImpl->getHandleRef() != Context) + getPlugin()->call(urKernelGetInfo, MKernel, UR_KERNEL_INFO_CONTEXT, + sizeof(UrContext), &UrContext, nullptr); + if (Context->getHandleRef() != UrContext) throw sycl::exception( make_error_code(errc::invalid), "Input context must be the same as the context of cl_kernel"); // Enable USM indirect access for interoperability kernels. - // Some PI Plugins (like OpenCL) require this call to enable USM - // For others, PI will turn this into a NOP. - if (ContextImpl->getPlatformImpl()->supports_usm()) - getPlugin()->call( - MKernel, PI_USM_INDIRECT_ACCESS, sizeof(pi_bool), &PI_TRUE); + // Some UR Plugins (like OpenCL) require this call to enable USM + // For others, UR will turn this into a NOP. + if (Context->getPlatformImpl()->supports_usm()) { + bool EnableAccess = true; + getPlugin()->call(urKernelSetExecInfo, MKernel, + UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, + sizeof(ur_bool_t), nullptr, &EnableAccess); + } } -kernel_impl::kernel_impl(sycl::detail::pi::PiKernel Kernel, - ContextImplPtr ContextImpl, +kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl, DeviceImageImplPtr DeviceImageImpl, KernelBundleImplPtr KernelBundleImpl, - const KernelArgMask *ArgMask, PiProgram ProgramPI, - std::mutex *CacheMutex) - : MKernel(Kernel), MContext(std::move(ContextImpl)), MProgram(ProgramPI), + const KernelArgMask *ArgMask, + ur_program_handle_t Program, std::mutex *CacheMutex) + : MKernel(Kernel), MContext(std::move(ContextImpl)), MProgram(Program), MCreatedFromSource(false), MDeviceImageImpl(std::move(DeviceImageImpl)), MKernelBundleImpl(std::move(KernelBundleImpl)), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} { @@ -58,7 +59,7 @@ kernel_impl::kernel_impl(sycl::detail::pi::PiKernel Kernel, kernel_impl::~kernel_impl() { try { // TODO catch an exception and put it to list of asynchronous exceptions - getPlugin()->call(MKernel); + getPlugin()->call(urKernelRelease, MKernel); } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~kernel_impl", e); } diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 13627d62ba88e..e69ddaa2e3f48 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -13,8 +13,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -30,36 +29,35 @@ class kernel_bundle_impl; using ContextImplPtr = std::shared_ptr; using KernelBundleImplPtr = std::shared_ptr; -using sycl::detail::pi::PiProgram; class kernel_impl { public: - /// Constructs a SYCL kernel instance from a PiKernel + /// Constructs a SYCL kernel instance from a UrKernel /// /// This constructor is used for plug-in interoperability. It always marks /// kernel as being created from source. /// - /// \param Kernel is a valid PiKernel instance + /// \param Kernel is a valid UrKernel instance /// \param Context is a valid SYCL context /// \param KernelBundleImpl is a valid instance of kernel_bundle_impl - kernel_impl(sycl::detail::pi::PiKernel Kernel, ContextImplPtr Context, + kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context, KernelBundleImplPtr KernelBundleImpl, const KernelArgMask *ArgMask = nullptr); /// Constructs a SYCL kernel_impl instance from a SYCL device_image, - /// kernel_bundle and / PiKernel. + /// kernel_bundle and / UrKernel. /// - /// \param Kernel is a valid PiKernel instance + /// \param Kernel is a valid UrKernel instance /// \param ContextImpl is a valid SYCL context /// \param KernelBundleImpl is a valid instance of kernel_bundle_impl - kernel_impl(sycl::detail::pi::PiKernel Kernel, ContextImplPtr ContextImpl, + kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl, DeviceImageImplPtr DeviceImageImpl, KernelBundleImplPtr KernelBundleImpl, - const KernelArgMask *ArgMask, PiProgram ProgramPI, + const KernelArgMask *ArgMask, ur_program_handle_t Program, std::mutex *CacheMutex); // This section means the object is non-movable and non-copyable // There is no need of move and copy constructors in kernel_impl. - // If they need to be added, piKernelRetain method for MKernel + // If they need to be added, urKernelRetain method for MKernel // should be present. kernel_impl(const kernel_impl &) = delete; kernel_impl(kernel_impl &&) = delete; @@ -76,8 +74,10 @@ class kernel_impl { /// /// \return a valid cl_kernel instance cl_kernel get() const { - getPlugin()->call(MKernel); - return pi::cast(MKernel); + getPlugin()->call(urKernelRetain, MKernel); + ur_native_handle_t nativeHandle = 0; + getPlugin()->call(urKernelGetNativeHandle, MKernel, &nativeHandle); + return ur::cast(nativeHandle); } const PluginPtr &getPlugin() const { return MContext->getPlugin(); } @@ -116,15 +116,11 @@ class kernel_impl { template typename Param::return_type ext_oneapi_get_info(const queue &q) const; - /// Get a reference to a raw kernel object. - /// - /// \return a reference to a valid PiKernel instance with raw kernel object. - sycl::detail::pi::PiKernel &getHandleRef() { return MKernel; } /// Get a constant reference to a raw kernel object. /// - /// \return a constant reference to a valid PiKernel instance with raw + /// \return a constant reference to a valid UrKernel instance with raw /// kernel object. - const sycl::detail::pi::PiKernel &getHandleRef() const { return MKernel; } + const ur_kernel_handle_t &getHandleRef() const { return MKernel; } /// Check if kernel was created from a program that had been created from /// source. @@ -134,14 +130,14 @@ class kernel_impl { const DeviceImageImplPtr &getDeviceImage() const { return MDeviceImageImpl; } - pi_native_handle getNative() const { + ur_native_handle_t getNative() const { const PluginPtr &Plugin = MContext->getPlugin(); if (MContext->getBackend() == backend::opencl) - Plugin->call(MKernel); + Plugin->call(urKernelRetain, MKernel); - pi_native_handle NativeKernel = 0; - Plugin->call(MKernel, &NativeKernel); + ur_native_handle_t NativeKernel = 0; + Plugin->call(urKernelGetNativeHandle, MKernel, &NativeKernel); return NativeKernel; } @@ -150,7 +146,7 @@ class kernel_impl { bool isInterop() const { return MIsInterop; } - PiProgram getProgramRef() const { return MProgram; } + ur_program_handle_t getProgramRef() const { return MProgram; } ContextImplPtr getContextImplPtr() const { return MContext; } std::mutex &getNoncacheableEnqueueMutex() { @@ -161,9 +157,9 @@ class kernel_impl { std::mutex *getCacheMutex() const { return MCacheMutex; } private: - sycl::detail::pi::PiKernel MKernel; + ur_kernel_handle_t MKernel = nullptr; const ContextImplPtr MContext; - const PiProgram MProgram = nullptr; + const ur_program_handle_t MProgram = nullptr; bool MCreatedFromSource = true; const DeviceImageImplPtr MDeviceImageImpl; const KernelBundleImplPtr MKernelBundleImpl; @@ -230,9 +226,9 @@ inline typename ext::oneapi::experimental::info::kernel_queue_specific:: const auto &Handle = getHandleRef(); const auto MaxWorkGroupSize = Queue.get_device().get_info(); - pi_uint32 GroupCount = 0; - Plugin->call( - Handle, MaxWorkGroupSize, /* DynamicSharedMemorySize */ 0, &GroupCount); + uint32_t GroupCount = 0; + Plugin->call(urKernelSuggestMaxCooperativeGroupCountExp, Handle, + MaxWorkGroupSize, /* DynamicSharedMemorySize */ 0, &GroupCount); return GroupCount; } diff --git a/sycl/source/detail/kernel_info.hpp b/sycl/source/detail/kernel_info.hpp index 79c0f73c952de..397aa44f36e53 100644 --- a/sycl/source/detail/kernel_info.hpp +++ b/sycl/source/detail/kernel_info.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include @@ -25,59 +25,68 @@ template typename std::enable_if< std::is_same::value, std::string>::type -get_kernel_info(sycl::detail::pi::PiKernel Kernel, const PluginPtr &Plugin) { +get_kernel_info(ur_kernel_handle_t Kernel, const PluginPtr &Plugin) { static_assert(detail::is_kernel_info_desc::value, "Invalid kernel information descriptor"); size_t ResultSize = 0; // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call(Kernel, PiInfoCode::value, 0, - nullptr, &ResultSize); + Plugin->call(urKernelGetInfo, Kernel, UrInfoCode::value, 0, nullptr, + &ResultSize); if (ResultSize == 0) { return ""; } std::vector Result(ResultSize); // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call(Kernel, PiInfoCode::value, - ResultSize, Result.data(), nullptr); + Plugin->call(urKernelGetInfo, Kernel, UrInfoCode::value, ResultSize, + Result.data(), nullptr); return std::string(Result.data()); } template typename std::enable_if< std::is_same::value, uint32_t>::type -get_kernel_info(sycl::detail::pi::PiKernel Kernel, const PluginPtr &Plugin) { - uint32_t Result = 0; +get_kernel_info(ur_kernel_handle_t Kernel, const PluginPtr &Plugin) { + ur_result_t Result = UR_RESULT_SUCCESS; // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call(Kernel, PiInfoCode::value, - sizeof(uint32_t), &Result, nullptr); + Plugin->call(urKernelGetInfo, Kernel, UrInfoCode::value, + sizeof(uint32_t), &Result, nullptr); return Result; } // Device-specific methods template typename std::enable_if::value>::type -get_kernel_device_specific_info_helper(sycl::detail::pi::PiKernel Kernel, - sycl::detail::pi::PiDevice Device, +get_kernel_device_specific_info_helper(ur_kernel_handle_t Kernel, + ur_device_handle_t Device, const PluginPtr &Plugin, void *Result, size_t Size) { - Plugin->call( - Kernel, Device, PiInfoCode::value, 0, nullptr, Size, Result, - nullptr); + Plugin->call(urKernelGetSubGroupInfo, Kernel, Device, + UrInfoCode::value, Size, Result, nullptr); +} + +template +typename std::enable_if::value>::type +get_kernel_device_specific_info_helper( + ur_kernel_handle_t Kernel, [[maybe_unused]] ur_device_handle_t Device, + const PluginPtr &Plugin, void *Result, size_t Size) { + Plugin->call(urKernelGetInfo, Kernel, UrInfoCode::value, Size, Result, + nullptr); } template -typename std::enable_if::value>::type -get_kernel_device_specific_info_helper(sycl::detail::pi::PiKernel Kernel, - sycl::detail::pi::PiDevice Device, +typename std::enable_if::value && + !IsKernelInfo::value>::type +get_kernel_device_specific_info_helper(ur_kernel_handle_t Kernel, + ur_device_handle_t Device, const PluginPtr &Plugin, void *Result, size_t Size) { - sycl::detail::pi::PiResult Error = - Plugin->call_nocheck( - Kernel, Device, PiInfoCode::value, Size, Result, nullptr); - if (Error != PI_SUCCESS) - kernel_get_group_info::handleErrorOrWarning(Error, PiInfoCode::value, + ur_result_t Error = + Plugin->call_nocheck(urKernelGetGroupInfo, Kernel, Device, + UrInfoCode::value, Size, Result, nullptr); + if (Error != UR_RESULT_SUCCESS) + kernel_get_group_info::handleErrorOrWarning(Error, UrInfoCode::value, Plugin); } @@ -85,8 +94,8 @@ template typename std::enable_if< !std::is_same>::value, typename Param::return_type>::type -get_kernel_device_specific_info(sycl::detail::pi::PiKernel Kernel, - sycl::detail::pi::PiDevice Device, +get_kernel_device_specific_info(ur_kernel_handle_t Kernel, + ur_device_handle_t Device, const PluginPtr &Plugin) { static_assert(is_kernel_device_specific_info_desc::value, "Unexpected kernel_device_specific information descriptor"); @@ -101,8 +110,8 @@ template typename std::enable_if< std::is_same>::value, sycl::range<3>>::type -get_kernel_device_specific_info(sycl::detail::pi::PiKernel Kernel, - sycl::detail::pi::PiDevice Device, +get_kernel_device_specific_info(ur_kernel_handle_t Kernel, + ur_device_handle_t Device, const PluginPtr &Plugin) { static_assert(is_kernel_device_specific_info_desc::value, "Unexpected kernel_device_specific information descriptor"); @@ -117,9 +126,10 @@ get_kernel_device_specific_info(sycl::detail::pi::PiKernel Kernel, // info::kernel_device_specific::max_sub_group_size taking an input paramter. // This should be removed when the deprecated info query is removed. template -uint32_t get_kernel_device_specific_info_with_input( - sycl::detail::pi::PiKernel Kernel, sycl::detail::pi::PiDevice Device, - sycl::range<3> In, const PluginPtr &Plugin) { +uint32_t get_kernel_device_specific_info_with_input(ur_kernel_handle_t Kernel, + ur_device_handle_t Device, + sycl::range<3>, + const PluginPtr &Plugin) { static_assert(is_kernel_device_specific_info_desc::value, "Unexpected kernel_device_specific information descriptor"); static_assert(std::is_same::value, @@ -127,16 +137,14 @@ uint32_t get_kernel_device_specific_info_with_input( static_assert(IsSubGroupInfo::value, "Unexpected kernel_device_specific information descriptor for " "query with input"); - size_t Input[3] = {In[0], In[1], In[2]}; + uint32_t Result = 0; // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call( - Kernel, Device, PiInfoCode::value, sizeof(size_t) * 3, Input, - sizeof(uint32_t), &Result, nullptr); + Plugin->call(urKernelGetSubGroupInfo, Kernel, Device, + UrInfoCode::value, sizeof(uint32_t), &Result, nullptr); return Result; } - } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index fdaf6d6bb5728..aeb847483b9ee 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -8,12 +8,13 @@ #pragma once +#include "sycl/exception.hpp" #include #include #include #include #include -#include +#include #include #include @@ -37,7 +38,7 @@ class KernelProgramCache { /// class instance. struct BuildError { std::string Msg; - pi_int32 Code; + int32_t Code; bool isFilledIn() const { return !Msg.empty(); } }; @@ -88,7 +89,7 @@ class KernelProgramCache { } }; - struct ProgramBuildResult : public BuildResult { + struct ProgramBuildResult : public BuildResult { PluginPtr Plugin; ProgramBuildResult(const PluginPtr &Plugin) : Plugin(Plugin) { Val = nullptr; @@ -100,8 +101,7 @@ class KernelProgramCache { } ~ProgramBuildResult() { if (Val) { - sycl::detail::pi::PiResult Err = - Plugin->call_nocheck(Val); + ur_result_t Err = Plugin->call_nocheck(urProgramRelease, Val); __SYCL_CHECK_OCL_CODE_NO_EXC(Err); } } @@ -112,10 +112,9 @@ class KernelProgramCache { * when debugging environment variables are set and we can just ignore them * since all kernels will have their build options overridden with the same * string*/ - using ProgramCacheKeyT = std::pair, - sycl::detail::pi::PiDevice>; - using CommonProgramKeyT = - std::pair; + using ProgramCacheKeyT = + std::pair, ur_device_handle_t>; + using CommonProgramKeyT = std::pair; struct ProgramCache { ::boost::unordered_map Cache; @@ -127,7 +126,7 @@ class KernelProgramCache { using ContextPtr = context_impl *; using KernelArgMaskPairT = - std::pair; + std::pair; struct KernelBuildResult : public BuildResult { PluginPtr Plugin; KernelBuildResult(const PluginPtr &Plugin) : Plugin(Plugin) { @@ -135,8 +134,7 @@ class KernelProgramCache { } ~KernelBuildResult() { if (Val.first) { - sycl::detail::pi::PiResult Err = - Plugin->call_nocheck(Val.first); + ur_result_t Err = Plugin->call_nocheck(urKernelRelease, Val.first); __SYCL_CHECK_OCL_CODE_NO_EXC(Err); } } @@ -146,14 +144,13 @@ class KernelProgramCache { using KernelByNameT = ::boost::unordered_map; using KernelCacheT = - ::boost::unordered_map; + ::boost::unordered_map; using KernelFastCacheKeyT = - std::tuple; + std::tuple; using KernelFastCacheValT = - std::tuple; + std::tuple; // This container is used as a fast path for retrieving cached kernels. // unordered_flat_map is used here to reduce lookup overhead. // The slow path is used only once for each newly created kernel, so the @@ -195,7 +192,7 @@ class KernelProgramCache { // // Returns whether or not an insertion took place. bool insertBuiltProgram(const ProgramCacheKeyT &CacheKey, - sycl::detail::pi::PiProgram Program) { + ur_program_handle_t Program) { auto LockedCache = acquireCachedPrograms(); auto &ProgCache = LockedCache.get(); auto [It, DidInsert] = ProgCache.Cache.try_emplace(CacheKey, nullptr); @@ -212,7 +209,7 @@ class KernelProgramCache { } std::pair - getOrInsertKernel(sycl::detail::pi::PiProgram Program, + getOrInsertKernel(ur_program_handle_t Program, const std::string &KernelName) { auto LockedCache = acquireKernelsPerProgramCache(); auto &Cache = LockedCache.get()[Program]; @@ -296,7 +293,7 @@ class KernelProgramCache { if (NewState == BuildState::BS_Failed || AttemptCounter + 1 == MaxAttempts) { if (BuildResult->Error.isFilledIn()) - throw detail::set_pi_error( + throw detail::set_ur_error( exception(make_error_code(Errc), BuildResult->Error.Msg), BuildResult->Error.Code); else @@ -318,10 +315,10 @@ class KernelProgramCache { return BuildResult; } catch (const exception &Ex) { BuildResult->Error.Msg = Ex.what(); - BuildResult->Error.Code = detail::get_pi_error(Ex); + BuildResult->Error.Code = detail::get_ur_error(Ex); if (Ex.code() == errc::memory_allocation || - BuildResult->Error.Code == PI_ERROR_OUT_OF_RESOURCES || - BuildResult->Error.Code == PI_ERROR_OUT_OF_HOST_MEMORY) { + BuildResult->Error.Code == UR_RESULT_ERROR_OUT_OF_RESOURCES || + BuildResult->Error.Code == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) { reset(); BuildResult->updateAndNotify(BuildState::BS_Initial); continue; diff --git a/sycl/source/detail/mem_alloc_helper.hpp b/sycl/source/detail/mem_alloc_helper.hpp index 517058d9dece4..69759709c0b47 100644 --- a/sycl/source/detail/mem_alloc_helper.hpp +++ b/sycl/source/detail/mem_alloc_helper.hpp @@ -8,26 +8,28 @@ #pragma once -#include +#include namespace sycl { inline namespace _V1 { namespace detail { -void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx, - pi_mem_flags Flags, size_t Size, void *HostPtr, - pi_mem *RetMem, - const pi_mem_properties *Props = nullptr); -void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem); -void memBufferMapHelper(const PluginPtr &Plugin, pi_queue command_queue, - pi_mem buffer, pi_bool blocking_map, - pi_map_flags map_flags, size_t offset, size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event, - void **ret_map); -void memUnmapHelper(const PluginPtr &Plugin, pi_queue command_queue, - pi_mem memobj, void *mapped_ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); +void memBufferCreateHelper(const PluginPtr &Plugin, ur_context_handle_t Ctx, + ur_mem_flags_t Flags, size_t Size, + ur_mem_handle_t *RetMem, + const ur_buffer_properties_t *Props = nullptr); +void memReleaseHelper(const PluginPtr &Plugin, ur_mem_handle_t Mem); +void memBufferMapHelper(const PluginPtr &Plugin, + ur_queue_handle_t command_queue, ur_mem_handle_t buffer, + bool blocking_map, ur_map_flags_t map_flags, + size_t offset, size_t size, + uint32_t num_events_in_wait_list, + const ur_event_handle_t *event_wait_list, + ur_event_handle_t *event, void **ret_map); +void memUnmapHelper(const PluginPtr &Plugin, ur_queue_handle_t command_queue, + ur_mem_handle_t memobj, void *mapped_ptr, + uint32_t num_events_in_wait_list, + const ur_event_handle_t *event_wait_list, + ur_event_handle_t *event); } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index bd9f6f356f3b4..aa58c4068d914 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -6,20 +6,21 @@ // //===----------------------------------------------------------------------===// +#include "ur_api.h" #include #include #include #include #include -#include #include +#include #include +#include +#include #include #include -#include - #include #include #include @@ -122,22 +123,25 @@ static void waitForEvents(const std::vector &Events) { // devices associated with the same Backend. if (!Events.empty()) { const PluginPtr &Plugin = Events[0]->getPlugin(); - std::vector PiEvents(Events.size()); - std::transform(Events.begin(), Events.end(), PiEvents.begin(), + std::vector UrEvents(Events.size()); + std::transform(Events.begin(), Events.end(), UrEvents.begin(), [](const EventImplPtr &EventImpl) { return EventImpl->getHandleRef(); }); - Plugin->call(PiEvents.size(), &PiEvents[0]); + if (!UrEvents.empty() && UrEvents[0]) { + Plugin->call(urEventWait, UrEvents.size(), &UrEvents[0]); + } } } -void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx, - pi_mem_flags Flags, size_t Size, void *HostPtr, - pi_mem *RetMem, const pi_mem_properties *Props) { +void memBufferCreateHelper(const PluginPtr &Plugin, ur_context_handle_t Ctx, + ur_mem_flags_t Flags, size_t Size, + ur_mem_handle_t *RetMem, + const ur_buffer_properties_t *Props) { #ifdef XPTI_ENABLE_INSTRUMENTATION uint64_t CorrID = 0; #endif - // We only want to instrument piMemBufferCreate + // We only want to instrument urMemBufferCreate { #ifdef XPTI_ENABLE_INSTRUMENTATION CorrID = @@ -145,66 +149,65 @@ void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx, xpti::utils::finally _{[&] { // C-style cast is required for MSVC uintptr_t MemObjID = (uintptr_t)(*RetMem); - pi_native_handle Ptr = 0; + ur_native_handle_t Ptr = 0; // Always use call_nocheck here, because call may throw an exception, // and this lambda will be called from destructor, which in combination // rewards us with UB. // When doing buffer interop we don't know what device the memory should // be resident on, so pass nullptr for Device param. Buffer interop may // not be supported by all backends. - Plugin->call_nocheck( - *RetMem, /*Dev*/ nullptr, &Ptr); + Plugin->call_nocheck(urMemGetNativeHandle, *RetMem, /*Dev*/ nullptr, + &Ptr); emitMemAllocEndTrace(MemObjID, (uintptr_t)(Ptr), Size, 0 /* guard zone */, CorrID); }}; #endif if (Size) - Plugin->call(Ctx, Flags, Size, HostPtr, - RetMem, Props); + Plugin->call(urMemBufferCreate, Ctx, Flags, Size, Props, RetMem); } } -void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem) { - // FIXME piMemRelease does not guarante memory release. It is only true if +void memReleaseHelper(const PluginPtr &Plugin, ur_mem_handle_t Mem) { + // FIXME urMemRelease does not guarante memory release. It is only true if // reference counter is 1. However, SYCL runtime currently only calls - // piMemRetain only for OpenCL interop + // urMemRetain only for OpenCL interop #ifdef XPTI_ENABLE_INSTRUMENTATION uint64_t CorrID = 0; // C-style cast is required for MSVC uintptr_t MemObjID = (uintptr_t)(Mem); uintptr_t Ptr = 0; - // Do not make unnecessary PI calls without instrumentation enabled + // Do not make unnecessary UR calls without instrumentation enabled if (xptiTraceEnabled()) { - pi_native_handle PtrHandle = 0; + ur_native_handle_t PtrHandle = 0; // When doing buffer interop we don't know what device the memory should be // resident on, so pass nullptr for Device param. Buffer interop may not be // supported by all backends. - Plugin->call(Mem, /*Dev*/ nullptr, - &PtrHandle); + Plugin->call_nocheck(urMemGetNativeHandle, Mem, /*Dev*/ nullptr, + &PtrHandle); Ptr = (uintptr_t)(PtrHandle); } #endif - // We only want to instrument piMemRelease + // We only want to instrument urMemRelease { #ifdef XPTI_ENABLE_INSTRUMENTATION CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr); xpti::utils::finally _{ [&] { emitMemReleaseEndTrace(MemObjID, Ptr, CorrID); }}; #endif - Plugin->call(Mem); + Plugin->call(urMemRelease, Mem); } } -void memBufferMapHelper(const PluginPtr &Plugin, pi_queue Queue, pi_mem Buffer, - pi_bool Blocking, pi_map_flags Flags, size_t Offset, - size_t Size, pi_uint32 NumEvents, - const pi_event *WaitList, pi_event *Event, - void **RetMap) { +void memBufferMapHelper(const PluginPtr &Plugin, ur_queue_handle_t Queue, + ur_mem_handle_t Buffer, bool Blocking, + ur_map_flags_t Flags, size_t Offset, size_t Size, + uint32_t NumEvents, const ur_event_handle_t *WaitList, + ur_event_handle_t *Event, void **RetMap) { #ifdef XPTI_ENABLE_INSTRUMENTATION uint64_t CorrID = 0; uintptr_t MemObjID = (uintptr_t)(Buffer); #endif - // We only want to instrument piEnqueueMemBufferMap + // We only want to instrument urEnqueueMemBufferMap #ifdef XPTI_ENABLE_INSTRUMENTATION CorrID = emitMemAllocBeginTrace(MemObjID, Size, 0 /* guard zone */); @@ -213,20 +216,20 @@ void memBufferMapHelper(const PluginPtr &Plugin, pi_queue Queue, pi_mem Buffer, 0 /* guard zone */, CorrID); }}; #endif - Plugin->call(Queue, Buffer, Blocking, Flags, - Offset, Size, NumEvents, - WaitList, Event, RetMap); + Plugin->call(urEnqueueMemBufferMap, Queue, Buffer, Blocking, Flags, Offset, + Size, NumEvents, WaitList, Event, RetMap); } -void memUnmapHelper(const PluginPtr &Plugin, pi_queue Queue, pi_mem Mem, - void *MappedPtr, pi_uint32 NumEvents, - const pi_event *WaitList, pi_event *Event) { +void memUnmapHelper(const PluginPtr &Plugin, ur_queue_handle_t Queue, + ur_mem_handle_t Mem, void *MappedPtr, uint32_t NumEvents, + const ur_event_handle_t *WaitList, + ur_event_handle_t *Event) { #ifdef XPTI_ENABLE_INSTRUMENTATION uint64_t CorrID = 0; uintptr_t MemObjID = (uintptr_t)(Mem); uintptr_t Ptr = (uintptr_t)(MappedPtr); #endif - // We only want to instrument piEnqueueMemUnmap + // We only want to instrument urEnqueueMemUnmap { #ifdef XPTI_ENABLE_INSTRUMENTATION CorrID = emitMemReleaseBeginTrace(MemObjID, Ptr); @@ -237,19 +240,19 @@ void memUnmapHelper(const PluginPtr &Plugin, pi_queue Queue, pi_mem Mem, // Always use call_nocheck here, because call may throw an exception, // and this lambda will be called from destructor, which in combination // rewards us with UB. - Plugin->call_nocheck(1, Event); + Plugin->call_nocheck(urEventWait, 1, Event); emitMemReleaseEndTrace(MemObjID, Ptr, CorrID); }}; #endif - Plugin->call(Queue, Mem, MappedPtr, NumEvents, - WaitList, Event); + Plugin->call(urEnqueueMemUnmap, Queue, Mem, MappedPtr, NumEvents, WaitList, + Event); } } void MemoryManager::release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent) { + ur_event_handle_t &OutEvent) { // There is no async API for memory releasing. Explicitly wait for all // dependency events and return empty event. waitForEvents(DepEvents); @@ -272,13 +275,13 @@ void MemoryManager::releaseMemObj(ContextImplPtr TargetContext, } const PluginPtr &Plugin = TargetContext->getPlugin(); - memReleaseHelper(Plugin, pi::cast(MemAllocation)); + memReleaseHelper(Plugin, ur::cast(MemAllocation)); } void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, bool InitFromUserData, void *HostPtr, std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent) { + ur_event_handle_t &OutEvent) { // There is no async API for memory allocation. Explicitly wait for all // dependency events and return empty event. waitForEvents(DepEvents); @@ -304,44 +307,44 @@ void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, void *MemoryManager::allocateInteropMemObject( ContextImplPtr TargetContext, void *UserPtr, const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, - const sycl::property_list &, sycl::detail::pi::PiEvent &OutEventToWait) { + const sycl::property_list &, ur_event_handle_t &OutEventToWait) { (void)TargetContext; (void)InteropContext; // If memory object is created with interop c'tor return cl_mem as is. assert(TargetContext == InteropContext && "Expected matching contexts"); + OutEventToWait = InteropEvent->getHandleRef(); // Retain the event since it will be released during alloca command // destruction if (nullptr != OutEventToWait) { const PluginPtr &Plugin = InteropEvent->getPlugin(); - Plugin->call(OutEventToWait); + Plugin->call(urEventRetain, OutEventToWait); } return UserPtr; } -static sycl::detail::pi::PiMemFlags -getMemObjCreationFlags(void *UserPtr, bool HostPtrReadOnly) { +static ur_mem_flags_t getMemObjCreationFlags(void *UserPtr, + bool HostPtrReadOnly) { // Create read_write mem object to handle arbitrary uses. - sycl::detail::pi::PiMemFlags Result = - HostPtrReadOnly ? PI_MEM_ACCESS_READ_ONLY : PI_MEM_FLAGS_ACCESS_RW; + ur_mem_flags_t Result = + HostPtrReadOnly ? UR_MEM_FLAG_READ_ONLY : UR_MEM_FLAG_READ_WRITE; if (UserPtr) - Result |= PI_MEM_FLAGS_HOST_PTR_USE; + Result |= UR_MEM_FLAG_USE_HOST_POINTER; return Result; } -void *MemoryManager::allocateImageObject( - ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, - const sycl::detail::pi::PiMemImageDesc &Desc, - const sycl::detail::pi::PiMemImageFormat &Format, - const sycl::property_list &) { - sycl::detail::pi::PiMemFlags CreationFlags = +void *MemoryManager::allocateImageObject(ContextImplPtr TargetContext, + void *UserPtr, bool HostPtrReadOnly, + const ur_image_desc_t &Desc, + const ur_image_format_t &Format, + const sycl::property_list &) { + ur_mem_flags_t CreationFlags = getMemObjCreationFlags(UserPtr, HostPtrReadOnly); - sycl::detail::pi::PiMem NewMem; + ur_mem_handle_t NewMem = nullptr; const PluginPtr &Plugin = TargetContext->getPlugin(); - Plugin->call(TargetContext->getHandleRef(), - CreationFlags, &Format, &Desc, - UserPtr, &NewMem); + Plugin->call(urMemImageCreate, TargetContext->getHandleRef(), CreationFlags, + &Format, &Desc, UserPtr, &NewMem); return NewMem; } @@ -349,53 +352,50 @@ void * MemoryManager::allocateBufferObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const size_t Size, const sycl::property_list &PropsList) { - sycl::detail::pi::PiMemFlags CreationFlags = + ur_mem_flags_t CreationFlags = getMemObjCreationFlags(UserPtr, HostPtrReadOnly); if (PropsList.has_property< sycl::ext::oneapi::property::buffer::use_pinned_host_memory>()) - CreationFlags |= PI_MEM_FLAGS_HOST_PTR_ALLOC; + CreationFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER; - sycl::detail::pi::PiMem NewMem = nullptr; + ur_mem_handle_t NewMem = nullptr; const PluginPtr &Plugin = TargetContext->getPlugin(); - std::vector AllocProps; + ur_buffer_properties_t AllocProps = {UR_STRUCTURE_TYPE_BUFFER_PROPERTIES, + nullptr, UserPtr}; + void **Next = &AllocProps.pNext; + ur_buffer_alloc_location_properties_t LocationProperties = { + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES, nullptr, 0}; if (PropsList.has_property() && TargetContext->isBufferLocationSupported()) { - auto Location = + LocationProperties.location = PropsList.get_property() .get_buffer_location(); - AllocProps.reserve(AllocProps.size() + 2); - AllocProps.push_back(PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION); - AllocProps.push_back(Location); + *Next = &LocationProperties; + Next = &LocationProperties.pNext; } + ur_buffer_channel_properties_t ChannelProperties = { + UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES, nullptr, 0}; if (PropsList.has_property()) { - auto Channel = + ChannelProperties.channel = PropsList.get_property().get_channel(); - AllocProps.reserve(AllocProps.size() + 2); - AllocProps.push_back(PI_MEM_PROPERTIES_CHANNEL); - AllocProps.push_back(Channel); - } - - pi_mem_properties *AllocPropsPtr = nullptr; - if (!AllocProps.empty()) { - // If there are allocation properties, push an end to the list and update - // the properties pointer. - AllocProps.push_back(0); - AllocPropsPtr = AllocProps.data(); + *Next = &ChannelProperties; } memBufferCreateHelper(Plugin, TargetContext->getHandleRef(), CreationFlags, - Size, UserPtr, &NewMem, AllocPropsPtr); + Size, &NewMem, &AllocProps); return NewMem; } -void *MemoryManager::allocateMemBuffer( - ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, - bool HostPtrReadOnly, size_t Size, const EventImplPtr &InteropEvent, - const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, - sycl::detail::pi::PiEvent &OutEventToWait) { +void *MemoryManager::allocateMemBuffer(ContextImplPtr TargetContext, + SYCLMemObjI *MemObj, void *UserPtr, + bool HostPtrReadOnly, size_t Size, + const EventImplPtr &InteropEvent, + const ContextImplPtr &InteropContext, + const sycl::property_list &PropsList, + ur_event_handle_t &OutEventToWait) { void *MemPtr; if (!TargetContext) MemPtr = @@ -413,12 +413,10 @@ void *MemoryManager::allocateMemBuffer( void *MemoryManager::allocateMemImage( ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, - bool HostPtrReadOnly, size_t Size, - const sycl::detail::pi::PiMemImageDesc &Desc, - const sycl::detail::pi::PiMemImageFormat &Format, - const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, - const sycl::property_list &PropsList, - sycl::detail::pi::PiEvent &OutEventToWait) { + bool HostPtrReadOnly, size_t Size, const ur_image_desc_t &Desc, + const ur_image_format_t &Format, const EventImplPtr &InteropEvent, + const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, + ur_event_handle_t &OutEventToWait) { if (!TargetContext) return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList); @@ -433,7 +431,7 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext, void *ParentMemObj, size_t ElemSize, size_t Offset, range<3> Range, std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent) { + ur_event_handle_t &OutEvent) { waitForEvents(DepEvents); OutEvent = nullptr; @@ -444,25 +442,22 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext, for (size_t I = 0; I < 3; ++I) SizeInBytes *= Range[I]; - sycl::detail::pi::PiResult Error = PI_SUCCESS; - pi_buffer_region_struct Region{Offset, SizeInBytes}; - sycl::detail::pi::PiMem NewMem; + ur_result_t Error = UR_RESULT_SUCCESS; + ur_buffer_region_t Region = {UR_STRUCTURE_TYPE_BUFFER_REGION, nullptr, Offset, + SizeInBytes}; + ur_mem_handle_t NewMem; const PluginPtr &Plugin = TargetContext->getPlugin(); - Error = Plugin->call_nocheck( - pi::cast(ParentMemObj), PI_MEM_FLAGS_ACCESS_RW, - PI_BUFFER_CREATE_TYPE_REGION, &Region, &NewMem); - if (Error == PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET) - throw detail::set_pi_error( + Error = Plugin->call_nocheck( + urMemBufferPartition, ur::cast(ParentMemObj), + UR_MEM_FLAG_READ_WRITE, UR_BUFFER_CREATE_TYPE_REGION, &Region, &NewMem); + if (Error == UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET) + throw detail::set_ur_error( exception(make_error_code(errc::invalid), "Specified offset of the sub-buffer being constructed is not " "a multiple of the memory base address alignment"), Error); - if (Error != PI_SUCCESS) { - throw set_pi_error(exception(make_error_code(errc::runtime), - "allocateMemSubBuffer() failed"), - Error); - } + Plugin->checkUrResult(Error); return NewMem; } @@ -500,18 +495,17 @@ void prepTermPositions(TermPositions &pos, int Dimensions, void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, unsigned int DimSrc, sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, - unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, + unsigned int SrcElemSize, ur_mem_handle_t DstMem, QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, - unsigned int DstElemSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent, + unsigned int DstElemSize, std::vector DepEvents, + ur_event_handle_t &OutEvent, const detail::EventImplPtr &OutEventImpl) { (void)SrcAccessRange; assert(SYCLMemObj && "The SYCLMemObj is nullptr"); assert(TgtQueue && "Destination mem object queue must be not nullptr"); - const sycl::detail::pi::PiQueue Queue = TgtQueue->getHandleRef(); + const ur_queue_handle_t Queue = TgtQueue->getHandleRef(); const PluginPtr &Plugin = TgtQueue->getPlugin(); detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType(); @@ -529,10 +523,10 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, if (1 == DimDst && 1 == DimSrc) { if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - Queue, DstMem, - /*blocking_write=*/PI_FALSE, DstXOffBytes, DstAccessRangeWidthBytes, - SrcMem + SrcXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent); + Plugin->call(urEnqueueMemBufferWrite, Queue, DstMem, + /*blocking_write=*/false, DstXOffBytes, + DstAccessRangeWidthBytes, SrcMem + SrcXOffBytes, + DepEvents.size(), DepEvents.data(), &OutEvent); } else { size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes; size_t BufferSlicePitch = @@ -541,56 +535,54 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, size_t HostSlicePitch = (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0; - pi_buff_rect_offset_struct BufferOffset{ - DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]}; - pi_buff_rect_offset_struct HostOffset{ - SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]}; - pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes, - DstAccessRange[DstPos.YTerm], - DstAccessRange[DstPos.ZTerm]}; + ur_rect_offset_t BufferOffset{DstXOffBytes, DstOffset[DstPos.YTerm], + DstOffset[DstPos.ZTerm]}; + ur_rect_offset_t HostOffset{SrcXOffBytes, SrcOffset[SrcPos.YTerm], + SrcOffset[SrcPos.ZTerm]}; + ur_rect_region_t RectRegion{DstAccessRangeWidthBytes, + DstAccessRange[DstPos.YTerm], + DstAccessRange[DstPos.ZTerm]}; if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - Queue, DstMem, - /*blocking_write=*/PI_FALSE, &BufferOffset, &HostOffset, &RectRegion, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, - SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent); + Plugin->call(urEnqueueMemBufferWriteRect, Queue, DstMem, + /*blocking_write=*/false, BufferOffset, HostOffset, + RectRegion, BufferRowPitch, BufferSlicePitch, HostRowPitch, + HostSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), + &OutEvent); } } else { size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes; size_t InputSlicePitch = (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0; - pi_image_offset_struct Origin{DstOffset[DstPos.XTerm], - DstOffset[DstPos.YTerm], - DstOffset[DstPos.ZTerm]}; - pi_image_region_struct Region{DstAccessRange[DstPos.XTerm], - DstAccessRange[DstPos.YTerm], - DstAccessRange[DstPos.ZTerm]}; + ur_rect_offset_t Origin{DstOffset[DstPos.XTerm], DstOffset[DstPos.YTerm], + DstOffset[DstPos.ZTerm]}; + ur_rect_region_t Region{DstAccessRange[DstPos.XTerm], + DstAccessRange[DstPos.YTerm], + DstAccessRange[DstPos.ZTerm]}; if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - Queue, DstMem, - /*blocking_write=*/PI_FALSE, &Origin, &Region, InputRowPitch, - InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent); + Plugin->call(urEnqueueMemImageWrite, Queue, DstMem, + /*blocking_write=*/false, Origin, Region, InputRowPitch, + InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), + &OutEvent); } } -void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, +void copyD2H(SYCLMemObjI *SYCLMemObj, ur_mem_handle_t SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, - unsigned int DstElemSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent, + unsigned int DstElemSize, std::vector DepEvents, + ur_event_handle_t &OutEvent, const detail::EventImplPtr &OutEventImpl) { (void)DstAccessRange; assert(SYCLMemObj && "The SYCLMemObj is nullptr"); assert(SrcQueue && "Source mem object queue is expected to be not nullptr"); - const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef(); + const ur_queue_handle_t Queue = SrcQueue->getHandleRef(); const PluginPtr &Plugin = SrcQueue->getPlugin(); detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType(); @@ -614,10 +606,10 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, if (1 == DimDst && 1 == DimSrc) { if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - Queue, SrcMem, - /*blocking_read=*/PI_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes, - DstMem + DstXOffBytes, DepEvents.size(), DepEvents.data(), &OutEvent); + Plugin->call(urEnqueueMemBufferRead, Queue, SrcMem, + /*blocking_read=*/false, SrcXOffBytes, + SrcAccessRangeWidthBytes, DstMem + DstXOffBytes, + DepEvents.size(), DepEvents.data(), &OutEvent); } else { size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes; size_t BufferSlicePitch = @@ -626,54 +618,53 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, size_t HostSlicePitch = (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0; - pi_buff_rect_offset_struct BufferOffset{ - SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]}; - pi_buff_rect_offset_struct HostOffset{ - DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]}; - pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes, - SrcAccessRange[SrcPos.YTerm], - SrcAccessRange[SrcPos.ZTerm]}; + ur_rect_offset_t BufferOffset{SrcXOffBytes, SrcOffset[SrcPos.YTerm], + SrcOffset[SrcPos.ZTerm]}; + ur_rect_offset_t HostOffset{DstXOffBytes, DstOffset[DstPos.YTerm], + DstOffset[DstPos.ZTerm]}; + ur_rect_region_t RectRegion{SrcAccessRangeWidthBytes, + SrcAccessRange[SrcPos.YTerm], + SrcAccessRange[SrcPos.ZTerm]}; if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - Queue, SrcMem, - /*blocking_read=*/PI_FALSE, &BufferOffset, &HostOffset, &RectRegion, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, - DstMem, DepEvents.size(), DepEvents.data(), &OutEvent); + Plugin->call(urEnqueueMemBufferReadRect, Queue, SrcMem, + /*blocking_read=*/false, BufferOffset, HostOffset, + RectRegion, BufferRowPitch, BufferSlicePitch, HostRowPitch, + HostSlicePitch, DstMem, DepEvents.size(), DepEvents.data(), + &OutEvent); } } else { size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes; size_t SlicePitch = (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0; - pi_image_offset_struct Offset{SrcOffset[SrcPos.XTerm], - SrcOffset[SrcPos.YTerm], - SrcOffset[SrcPos.ZTerm]}; - pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm], - SrcAccessRange[SrcPos.YTerm], - SrcAccessRange[SrcPos.ZTerm]}; + ur_rect_offset_t Offset{SrcOffset[SrcPos.XTerm], SrcOffset[SrcPos.YTerm], + SrcOffset[SrcPos.ZTerm]}; + ur_rect_region_t Region{SrcAccessRange[SrcPos.XTerm], + SrcAccessRange[SrcPos.YTerm], + SrcAccessRange[SrcPos.ZTerm]}; if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - Queue, SrcMem, PI_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem, - DepEvents.size(), DepEvents.data(), &OutEvent); + Plugin->call(urEnqueueMemImageRead, Queue, SrcMem, false, Offset, Region, + RowPitch, SlicePitch, DstMem, DepEvents.size(), + DepEvents.data(), &OutEvent); } } -void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, +void copyD2D(SYCLMemObjI *SYCLMemObj, ur_mem_handle_t SrcMem, QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, - unsigned int SrcElemSize, sycl::detail::pi::PiMem DstMem, - QueueImplPtr, unsigned int DimDst, sycl::range<3> DstSize, - sycl::range<3>, sycl::id<3> DstOffset, unsigned int DstElemSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent, + unsigned int SrcElemSize, ur_mem_handle_t DstMem, QueueImplPtr, + unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3>, + sycl::id<3> DstOffset, unsigned int DstElemSize, + std::vector DepEvents, + ur_event_handle_t &OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); assert(SrcQueue && "Source mem object and target mem object queues are " "expected to be not nullptr"); - const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef(); + const ur_queue_handle_t Queue = SrcQueue->getHandleRef(); const PluginPtr &Plugin = SrcQueue->getPlugin(); detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType(); @@ -691,10 +682,9 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, if (1 == DimDst && 1 == DimSrc) { if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes, - SrcAccessRangeWidthBytes, DepEvents.size(), DepEvents.data(), - &OutEvent); + Plugin->call(urEnqueueMemBufferCopy, Queue, SrcMem, DstMem, SrcXOffBytes, + DstXOffBytes, SrcAccessRangeWidthBytes, DepEvents.size(), + DepEvents.data(), &OutEvent); } else { // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will // calculate both src and dest pitch using region[0], which is not correct @@ -708,35 +698,33 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem, ? DstSzWidthBytes : DstSzWidthBytes * DstSize[DstPos.YTerm]; - pi_buff_rect_offset_struct SrcOrigin{ - SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]}; - pi_buff_rect_offset_struct DstOrigin{ - DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]}; - pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes, - SrcAccessRange[SrcPos.YTerm], - SrcAccessRange[SrcPos.ZTerm]}; + ur_rect_offset_t SrcOrigin{SrcXOffBytes, SrcOffset[SrcPos.YTerm], + SrcOffset[SrcPos.ZTerm]}; + ur_rect_offset_t DstOrigin{DstXOffBytes, DstOffset[DstPos.YTerm], + DstOffset[DstPos.ZTerm]}; + ur_rect_region_t Region{SrcAccessRangeWidthBytes, + SrcAccessRange[SrcPos.YTerm], + SrcAccessRange[SrcPos.ZTerm]}; if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch, - SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(), - DepEvents.data(), &OutEvent); + Plugin->call(urEnqueueMemBufferCopyRect, Queue, SrcMem, DstMem, SrcOrigin, + DstOrigin, Region, SrcRowPitch, SrcSlicePitch, DstRowPitch, + DstSlicePitch, DepEvents.size(), DepEvents.data(), + &OutEvent); } } else { - pi_image_offset_struct SrcOrigin{SrcOffset[SrcPos.XTerm], - SrcOffset[SrcPos.YTerm], - SrcOffset[SrcPos.ZTerm]}; - pi_image_offset_struct DstOrigin{DstOffset[DstPos.XTerm], - DstOffset[DstPos.YTerm], - DstOffset[DstPos.ZTerm]}; - pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm], - SrcAccessRange[SrcPos.YTerm], - SrcAccessRange[SrcPos.ZTerm]}; + ur_rect_offset_t SrcOrigin{SrcOffset[SrcPos.XTerm], SrcOffset[SrcPos.YTerm], + SrcOffset[SrcPos.ZTerm]}; + ur_rect_offset_t DstOrigin{DstOffset[DstPos.XTerm], DstOffset[DstPos.YTerm], + DstOffset[DstPos.ZTerm]}; + ur_rect_region_t Region{SrcAccessRange[SrcPos.XTerm], + SrcAccessRange[SrcPos.YTerm], + SrcAccessRange[SrcPos.ZTerm]}; if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, - DepEvents.size(), DepEvents.data(), &OutEvent); + Plugin->call(urEnqueueMemImageCopy, Queue, SrcMem, DstMem, SrcOrigin, + DstOrigin, Region, DepEvents.size(), DepEvents.data(), + &OutEvent); } } @@ -746,9 +734,8 @@ static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, unsigned int SrcElemSize, char *DstMem, QueueImplPtr, unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, - unsigned int DstElemSize, - std::vector, - sycl::detail::pi::PiEvent &, const detail::EventImplPtr &) { + unsigned int DstElemSize, std::vector, + ur_event_handle_t &, const detail::EventImplPtr &) { if ((DimSrc != 1 || DimDst != 1) && (SrcOffset != id<3>{0, 0, 0} || DstOffset != id<3>{0, 0, 0} || SrcSize != SrcAccessRange || DstSize != DstAccessRange)) { @@ -769,17 +756,14 @@ static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, // Copies memory between: host and device, host and host, // device and device if memory objects bound to the one context. -void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, - QueueImplPtr SrcQueue, unsigned int DimSrc, - sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, - sycl::id<3> SrcOffset, unsigned int SrcElemSize, - void *DstMem, QueueImplPtr TgtQueue, - unsigned int DimDst, sycl::range<3> DstSize, - sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, - unsigned int DstElemSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent, - const detail::EventImplPtr &OutEventImpl) { +void MemoryManager::copy( + SYCLMemObjI *SYCLMemObj, void *SrcMem, QueueImplPtr SrcQueue, + unsigned int DimSrc, sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, + sycl::id<3> SrcOffset, unsigned int SrcElemSize, void *DstMem, + QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range<3> DstSize, + sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, + unsigned int DstElemSize, std::vector DepEvents, + ur_event_handle_t &OutEvent, const detail::EventImplPtr &OutEventImpl) { if (!SrcQueue) { if (!TgtQueue) @@ -790,20 +774,20 @@ void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, else copyH2D(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize, - pi::cast(DstMem), std::move(TgtQueue), - DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, + ur::cast(DstMem), std::move(TgtQueue), DimDst, + DstSize, DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl); } else { if (!TgtQueue) - copyD2H(SYCLMemObj, pi::cast(SrcMem), + copyD2H(SYCLMemObj, ur::cast(SrcMem), std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem, nullptr, DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl); else - copyD2D(SYCLMemObj, pi::cast(SrcMem), + copyD2D(SYCLMemObj, ur::cast(SrcMem), std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset, - SrcElemSize, pi::cast(DstMem), + SrcElemSize, ur::cast(DstMem), std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl); } @@ -814,8 +798,8 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, unsigned int Dim, sycl::range<3> MemRange, sycl::range<3> AccRange, sycl::id<3> Offset, unsigned int ElementSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent, + std::vector DepEvents, + ur_event_handle_t &OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); assert(Queue && "Fill should be called only with a valid device queue"); @@ -835,11 +819,10 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, size_t RangeMultiplier = AccRange[0] * AccRange[1] * AccRange[2]; if (RangesUsable && OffsetUsable) { - Plugin->call( - Queue->getHandleRef(), pi::cast(Mem), - Pattern, PatternSize, Offset[0] * ElementSize, - RangeMultiplier * ElementSize, DepEvents.size(), DepEvents.data(), - &OutEvent); + Plugin->call(urEnqueueMemBufferFill, Queue->getHandleRef(), + ur::cast(Mem), Pattern, PatternSize, + Offset[0] * ElementSize, RangeMultiplier * ElementSize, + DepEvents.size(), DepEvents.data(), &OutEvent); return; } // The sycl::handler uses a parallel_for kernel in the case of unusable @@ -849,12 +832,11 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, } else { if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - // images don't support offset accessors and thus avoid issues of - // discontinguous data - Plugin->call( - Queue->getHandleRef(), pi::cast(Mem), Pattern, - &Offset[0], &AccRange[0], DepEvents.size(), DepEvents.data(), - &OutEvent); + // We don't have any backend implementations that support enqueueing a fill + // on non-buffer mem objects like this. The old UR function was a stub with + // an abort. + throw exception(make_error_code(errc::runtime), + "Fill operation not supported for the given mem object"); } } @@ -862,29 +844,29 @@ void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, access::mode AccessMode, unsigned int, sycl::range<3>, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, unsigned int ElementSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent) { + std::vector DepEvents, + ur_event_handle_t &OutEvent) { if (!Queue) { throw exception(make_error_code(errc::runtime), "Not supported configuration of map requested"); } - pi_map_flags Flags = 0; + ur_map_flags_t Flags = 0; switch (AccessMode) { case access::mode::read: - Flags |= PI_MAP_READ; + Flags |= UR_MAP_FLAG_READ; break; case access::mode::write: - Flags |= PI_MAP_WRITE; + Flags |= UR_MAP_FLAG_WRITE; break; case access::mode::read_write: case access::mode::atomic: - Flags = PI_MAP_WRITE | PI_MAP_READ; + Flags = UR_MAP_FLAG_WRITE | UR_MAP_FLAG_READ; break; case access::mode::discard_write: case access::mode::discard_read_write: - Flags |= PI_MAP_WRITE_INVALIDATE_REGION; + Flags |= UR_MAP_FLAG_WRITE_INVALIDATE_REGION; break; } @@ -898,7 +880,7 @@ void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2]; const PluginPtr &Plugin = Queue->getPlugin(); memBufferMapHelper(Plugin, Queue->getHandleRef(), - pi::cast(Mem), PI_FALSE, Flags, + ur::cast(Mem), false, Flags, AccessOffset[0], BytesToMap, DepEvents.size(), DepEvents.data(), &OutEvent, &MappedPtr); return MappedPtr; @@ -906,8 +888,8 @@ void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, void *MappedPtr, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent) { + std::vector DepEvents, + ur_event_handle_t &OutEvent) { // Execution on host is not supported here. if (!Queue) { @@ -918,24 +900,22 @@ void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, // Using the plugin of the Queue. const PluginPtr &Plugin = Queue->getPlugin(); - memUnmapHelper(Plugin, Queue->getHandleRef(), - pi::cast(Mem), MappedPtr, - DepEvents.size(), DepEvents.data(), &OutEvent); + memUnmapHelper(Plugin, Queue->getHandleRef(), ur::cast(Mem), + MappedPtr, DepEvents.size(), DepEvents.data(), &OutEvent); } void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, size_t Len, void *DstMem, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, + std::vector DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(SrcQueue && "USM copy must be called with a valid device queue"); if (!Len) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - SrcQueue->getPlugin()->call( - SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(), - OutEvent); + SrcQueue->getPlugin()->call(urEnqueueEventsWait, SrcQueue->getHandleRef(), + DepEvents.size(), DepEvents.data(), OutEvent); } return; } @@ -947,24 +927,23 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, const PluginPtr &Plugin = SrcQueue->getPlugin(); if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - SrcQueue->getHandleRef(), - /* blocking */ PI_FALSE, DstMem, SrcMem, Len, DepEvents.size(), - DepEvents.data(), OutEvent); + Plugin->call(urEnqueueUSMMemcpy, SrcQueue->getHandleRef(), + /* blocking */ false, DstMem, SrcMem, Len, DepEvents.size(), + DepEvents.data(), OutEvent); } void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, const std::vector &Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, + std::vector DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(Queue && "USM fill must be called with a valid device queue"); if (!Length) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Queue->getPlugin()->call( - Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); + Queue->getPlugin()->call(urEnqueueEventsWait, Queue->getHandleRef(), + DepEvents.size(), DepEvents.data(), OutEvent); } return; } @@ -975,52 +954,50 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); const PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Queue->getHandleRef(), Mem, Pattern.data(), Pattern.size(), Length, - DepEvents.size(), DepEvents.data(), OutEvent); + Plugin->call(urEnqueueUSMFill, Queue->getHandleRef(), Mem, Pattern.size(), + Pattern.data(), Length, DepEvents.size(), DepEvents.data(), + OutEvent); } -void MemoryManager::prefetch_usm( - void *Mem, QueueImplPtr Queue, size_t Length, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { +void MemoryManager::prefetch_usm(void *Mem, QueueImplPtr Queue, size_t Length, + std::vector DepEvents, + ur_event_handle_t *OutEvent, + const detail::EventImplPtr &OutEventImpl) { assert(Queue && "USM prefetch must be called with a valid device queue"); const PluginPtr &Plugin = Queue->getPlugin(); if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call( - Queue->getHandleRef(), Mem, Length, _pi_usm_migration_flags(0), - DepEvents.size(), DepEvents.data(), OutEvent); + Plugin->call(urEnqueueUSMPrefetch, Queue->getHandleRef(), Mem, Length, 0, + DepEvents.size(), DepEvents.data(), OutEvent); } -void MemoryManager::advise_usm( - const void *Mem, QueueImplPtr Queue, size_t Length, pi_mem_advice Advice, - std::vector /*DepEvents*/, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { +void MemoryManager::advise_usm(const void *Mem, QueueImplPtr Queue, + size_t Length, ur_usm_advice_flags_t Advice, + std::vector /*DepEvents*/, + ur_event_handle_t *OutEvent, + const detail::EventImplPtr &OutEventImpl) { assert(Queue && "USM advise must be called with a valid device queue"); const PluginPtr &Plugin = Queue->getPlugin(); if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Plugin->call(Queue->getHandleRef(), Mem, - Length, Advice, OutEvent); + Plugin->call(urEnqueueUSMAdvise, Queue->getHandleRef(), Mem, Length, Advice, + OutEvent); } -void MemoryManager::copy_2d_usm( - const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem, - size_t DstPitch, size_t Width, size_t Height, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { +void MemoryManager::copy_2d_usm(const void *SrcMem, size_t SrcPitch, + QueueImplPtr Queue, void *DstMem, + size_t DstPitch, size_t Width, size_t Height, + std::vector DepEvents, + ur_event_handle_t *OutEvent, + const detail::EventImplPtr &OutEventImpl) { assert(Queue && "USM copy 2d must be called with a valid device queue"); if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Queue->getPlugin()->call( - Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); + Queue->getPlugin()->call(urEnqueueEventsWait, Queue->getHandleRef(), + DepEvents.size(), DepEvents.data(), OutEvent); } return; } @@ -1031,19 +1008,18 @@ void MemoryManager::copy_2d_usm( const PluginPtr &Plugin = Queue->getPlugin(); - pi_bool SupportsUSMMemcpy2D = false; - Plugin->call( - Queue->getContextImplPtr()->getHandleRef(), - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, sizeof(pi_bool), - &SupportsUSMMemcpy2D, nullptr); + bool SupportsUSMMemcpy2D = false; + Plugin->call(urContextGetInfo, Queue->getContextImplPtr()->getHandleRef(), + UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, sizeof(bool), + &SupportsUSMMemcpy2D, nullptr); if (SupportsUSMMemcpy2D) { if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); // Direct memcpy2D is supported so we use this function. - Plugin->call( - Queue->getHandleRef(), /*blocking=*/PI_FALSE, DstMem, DstPitch, SrcMem, - SrcPitch, Width, Height, DepEvents.size(), DepEvents.data(), OutEvent); + Plugin->call(urEnqueueUSMMemcpy2D, Queue->getHandleRef(), + /*blocking=*/false, DstMem, DstPitch, SrcMem, SrcPitch, Width, + Height, DepEvents.size(), DepEvents.data(), OutEvent); return; } @@ -1061,42 +1037,43 @@ void MemoryManager::copy_2d_usm( #endif // NDEBUG // The fallback in this case is to insert a copy per row. - std::vector CopyEventsManaged; + std::vector CopyEventsManaged; CopyEventsManaged.reserve(Height); // We'll need continuous range of events for a wait later as well. - std::vector CopyEvents(Height); + std::vector CopyEvents(Height); if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); + for (size_t I = 0; I < Height; ++I) { char *DstItBegin = static_cast(DstMem) + I * DstPitch; const char *SrcItBegin = static_cast(SrcMem) + I * SrcPitch; - Plugin->call( - Queue->getHandleRef(), /* blocking */ PI_FALSE, DstItBegin, SrcItBegin, - Width, DepEvents.size(), DepEvents.data(), CopyEvents.data() + I); + Plugin->call(urEnqueueUSMMemcpy, Queue->getHandleRef(), + /* blocking */ false, DstItBegin, SrcItBegin, Width, + DepEvents.size(), DepEvents.data(), CopyEvents.data() + I); CopyEventsManaged.emplace_back(CopyEvents[I], Plugin, /*TakeOwnership=*/true); } if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); // Then insert a wait to coalesce the copy events. - Queue->getPlugin()->call( - Queue->getHandleRef(), CopyEvents.size(), CopyEvents.data(), OutEvent); + Queue->getPlugin()->call(urEnqueueEventsWait, Queue->getHandleRef(), + CopyEvents.size(), CopyEvents.data(), OutEvent); } -void MemoryManager::fill_2d_usm( - void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, - const std::vector &Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { +void MemoryManager::fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, + size_t Width, size_t Height, + const std::vector &Pattern, + std::vector DepEvents, + ur_event_handle_t *OutEvent, + const detail::EventImplPtr &OutEventImpl) { assert(Queue && "USM fill 2d must be called with a valid device queue"); if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Queue->getPlugin()->call( - Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); + Queue->getPlugin()->call(urEnqueueEventsWait, Queue->getHandleRef(), + DepEvents.size(), DepEvents.data(), OutEvent); } return; } @@ -1107,24 +1084,25 @@ void MemoryManager::fill_2d_usm( if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); const PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Queue->getHandleRef(), DstMem, Pitch, Pattern.size(), Pattern.data(), - Width, Height, DepEvents.size(), DepEvents.data(), OutEvent); + Plugin->call(urEnqueueUSMFill2D, Queue->getHandleRef(), DstMem, Pitch, + Pattern.size(), Pattern.data(), Width, Height, DepEvents.size(), + DepEvents.data(), OutEvent); } -void MemoryManager::memset_2d_usm( - void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, - char Value, std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { +void MemoryManager::memset_2d_usm(void *DstMem, QueueImplPtr Queue, + size_t Pitch, size_t Width, size_t Height, + char Value, + std::vector DepEvents, + ur_event_handle_t *OutEvent, + const detail::EventImplPtr &OutEventImpl) { assert(Queue && "USM memset 2d must be called with a valid device queue"); if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - Queue->getPlugin()->call( - Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); + Queue->getPlugin()->call(urEnqueueEventsWait, Queue->getHandleRef(), + DepEvents.size(), DepEvents.data(), OutEvent); } return; } @@ -1135,19 +1113,16 @@ void MemoryManager::memset_2d_usm( "NULL pointer argument in 2D memory memset operation."); if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); - const PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Queue->getHandleRef(), DstMem, Pitch, static_cast(Value), Width, - Height, DepEvents.size(), DepEvents.data(), OutEvent); + MemoryManager::fill_2d_usm(DstMem, Queue, Pitch, Width, Height, + {static_cast(Value)}, DepEvents, + OutEvent, nullptr); } -static void -memcpyToDeviceGlobalUSM(QueueImplPtr Queue, - DeviceGlobalMapEntry *DeviceGlobalEntry, - size_t NumBytes, size_t Offset, const void *Src, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { +static void memcpyToDeviceGlobalUSM( + QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, + size_t NumBytes, size_t Offset, const void *Src, + const std::vector &DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(Queue && "Copy to device global USM must be called with a valid device queue"); // Get or allocate USM memory for the device_global. @@ -1157,12 +1132,12 @@ memcpyToDeviceGlobalUSM(QueueImplPtr Queue, // OwnedPiEvent will keep the initialization event alive for the duration // of this function call. - OwnedPiEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getPlugin()); + OwnedUrEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getPlugin()); // We may need addtional events, so create a non-const dependency events list // to use if we need to modify it. - std::vector AuxDepEventsStorage; - const std::vector &ActualDepEvents = + std::vector AuxDepEventsStorage; + const std::vector &ActualDepEvents = ZIEvent ? AuxDepEventsStorage : DepEvents; // If there is a zero-initializer event the memory operation should wait for @@ -1180,9 +1155,8 @@ memcpyToDeviceGlobalUSM(QueueImplPtr Queue, static void memcpyFromDeviceGlobalUSM( QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, void *Dest, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { + const std::vector &DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl) { // Get or allocate USM memory for the device_global. Since we are reading from // it, we need it initialized if it has not been yet. DeviceGlobalUSMMem &DeviceGlobalUSM = @@ -1191,12 +1165,12 @@ static void memcpyFromDeviceGlobalUSM( // OwnedPiEvent will keep the initialization event alive for the duration // of this function call. - OwnedPiEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getPlugin()); + OwnedUrEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getPlugin()); // We may need addtional events, so create a non-const dependency events list // to use if we need to modify it. - std::vector AuxDepEventsStorage; - const std::vector &ActualDepEvents = + std::vector AuxDepEventsStorage; + const std::vector &ActualDepEvents = ZIEvent ? AuxDepEventsStorage : DepEvents; // If there is a zero-initializer event the memory operation should wait for @@ -1211,7 +1185,7 @@ static void memcpyFromDeviceGlobalUSM( OutEventImpl); } -static sycl::detail::pi::PiProgram +static ur_program_handle_t getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry) { assert(DeviceGlobalEntry->MIsDeviceImageScopeDecorated && @@ -1230,7 +1204,7 @@ getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, // Look for cached programs with the device_global. device Device = Queue->get_device(); ContextImplPtr ContextImpl = Queue->getContextImplPtr(); - std::optional CachedProgram = + std::optional CachedProgram = ContextImpl->getProgramForDeviceGlobal(Device, DeviceGlobalEntry); if (CachedProgram) return *CachedProgram; @@ -1243,48 +1217,47 @@ getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, device_image_plain DeviceImage = PM.getDeviceImageFromBinaryImage(&Img, Context, Device); device_image_plain BuiltImage = PM.build(DeviceImage, {Device}, {}); - return getSyclObjImpl(BuiltImage)->get_program_ref(); + return getSyclObjImpl(BuiltImage)->get_ur_program_ref(); } -static void memcpyToDeviceGlobalDirect( - QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, - size_t NumBytes, size_t Offset, const void *Src, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { +static void +memcpyToDeviceGlobalDirect(QueueImplPtr Queue, + DeviceGlobalMapEntry *DeviceGlobalEntry, + size_t NumBytes, size_t Offset, const void *Src, + const std::vector &DepEvents, + ur_event_handle_t *OutEvent) { assert( Queue && "Direct copy to device global must be called with a valid device queue"); - sycl::detail::pi::PiProgram Program = + ur_program_handle_t Program = getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry); const PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Queue->getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(), - false, NumBytes, Offset, Src, DepEvents.size(), DepEvents.data(), - OutEvent); + Plugin->call(urEnqueueDeviceGlobalVariableWrite, Queue->getHandleRef(), + Program, DeviceGlobalEntry->MUniqueId.c_str(), false, NumBytes, + Offset, Src, DepEvents.size(), DepEvents.data(), OutEvent); } -static void memcpyFromDeviceGlobalDirect( - QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, - size_t NumBytes, size_t Offset, void *Dest, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { +static void +memcpyFromDeviceGlobalDirect(QueueImplPtr Queue, + DeviceGlobalMapEntry *DeviceGlobalEntry, + size_t NumBytes, size_t Offset, void *Dest, + const std::vector &DepEvents, + ur_event_handle_t *OutEvent) { assert(Queue && "Direct copy from device global must be called with a valid " "device queue"); - sycl::detail::pi::PiProgram Program = + ur_program_handle_t Program = getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry); const PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Queue->getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(), - false, NumBytes, Offset, Dest, DepEvents.size(), DepEvents.data(), - OutEvent); + Plugin->call(urEnqueueDeviceGlobalVariableRead, Queue->getHandleRef(), + Program, DeviceGlobalEntry->MUniqueId.c_str(), false, NumBytes, + Offset, Dest, DepEvents.size(), DepEvents.data(), OutEvent); } void MemoryManager::copy_to_device_global( const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, const void *SrcMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { + const std::vector &DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl) { DeviceGlobalMapEntry *DGEntry = detail::ProgramManager::getInstance().getDeviceGlobalEntry( DeviceGlobalPtr); @@ -1305,9 +1278,8 @@ void MemoryManager::copy_to_device_global( void MemoryManager::copy_from_device_global( const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, void *DstMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { + const std::vector &DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl) { DeviceGlobalMapEntry *DGEntry = detail::ProgramManager::getInstance().getDeviceGlobalEntry( DeviceGlobalPtr); @@ -1328,14 +1300,14 @@ void MemoryManager::copy_from_device_global( // Command buffer methods void MemoryManager::ext_oneapi_copyD2D_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, + ur_exp_command_buffer_handle_t CommandBuffer, SYCLMemObjI *SYCLMemObj, void *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, unsigned int SrcElemSize, void *DstMem, unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, unsigned int DstElemSize, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); (void)DstAccessRange; @@ -1358,11 +1330,11 @@ void MemoryManager::ext_oneapi_copyD2D_cmd_buffer( } if (1 == DimDst && 1 == DimSrc) { - Plugin->call( - CommandBuffer, sycl::detail::pi::cast(SrcMem), - sycl::detail::pi::cast(DstMem), SrcXOffBytes, - DstXOffBytes, SrcAccessRangeWidthBytes, Deps.size(), Deps.data(), - OutSyncPoint); + Plugin->call(urCommandBufferAppendMemBufferCopyExp, CommandBuffer, + sycl::detail::ur::cast(SrcMem), + sycl::detail::ur::cast(DstMem), SrcXOffBytes, + DstXOffBytes, SrcAccessRangeWidthBytes, Deps.size(), + Deps.data(), OutSyncPoint); } else { // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will // calculate both src and dest pitch using region[0], which is not correct @@ -1376,31 +1348,31 @@ void MemoryManager::ext_oneapi_copyD2D_cmd_buffer( ? DstSzWidthBytes : DstSzWidthBytes * DstSize[DstPos.YTerm]; - pi_buff_rect_offset_struct SrcOrigin{SrcXOffBytes, SrcOffset[SrcPos.YTerm], - SrcOffset[SrcPos.ZTerm]}; - pi_buff_rect_offset_struct DstOrigin{DstXOffBytes, DstOffset[DstPos.YTerm], - DstOffset[DstPos.ZTerm]}; - pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes, - SrcAccessRange[SrcPos.YTerm], - SrcAccessRange[SrcPos.ZTerm]}; - - Plugin->call( - CommandBuffer, sycl::detail::pi::cast(SrcMem), - sycl::detail::pi::cast(DstMem), &SrcOrigin, - &DstOrigin, &Region, SrcRowPitch, SrcSlicePitch, DstRowPitch, - DstSlicePitch, Deps.size(), Deps.data(), OutSyncPoint); + ur_rect_offset_t SrcOrigin{SrcXOffBytes, SrcOffset[SrcPos.YTerm], + SrcOffset[SrcPos.ZTerm]}; + ur_rect_offset_t DstOrigin{DstXOffBytes, DstOffset[DstPos.YTerm], + DstOffset[DstPos.ZTerm]}; + ur_rect_region_t Region{SrcAccessRangeWidthBytes, + SrcAccessRange[SrcPos.YTerm], + SrcAccessRange[SrcPos.ZTerm]}; + + Plugin->call(urCommandBufferAppendMemBufferCopyRectExp, CommandBuffer, + sycl::detail::ur::cast(SrcMem), + sycl::detail::ur::cast(DstMem), SrcOrigin, + DstOrigin, Region, SrcRowPitch, SrcSlicePitch, DstRowPitch, + DstSlicePitch, Deps.size(), Deps.data(), OutSyncPoint); } } void MemoryManager::ext_oneapi_copyD2H_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, + ur_exp_command_buffer_handle_t CommandBuffer, SYCLMemObjI *SYCLMemObj, void *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, unsigned int SrcElemSize, char *DstMem, unsigned int DimDst, sycl::range<3> DstSize, sycl::id<3> DstOffset, unsigned int DstElemSize, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); const PluginPtr &Plugin = Context->getPlugin(); @@ -1422,19 +1394,18 @@ void MemoryManager::ext_oneapi_copyD2H_cmd_buffer( } if (1 == DimDst && 1 == DimSrc) { - pi_result Result = - Plugin->call_nocheck( - CommandBuffer, - sycl::detail::pi::cast(SrcMem), - SrcXOffBytes, SrcAccessRangeWidthBytes, DstMem + DstXOffBytes, - Deps.size(), Deps.data(), OutSyncPoint); - - if (Result == PI_ERROR_UNSUPPORTED_FEATURE) { + ur_result_t Result = Plugin->call_nocheck( + urCommandBufferAppendMemBufferReadExp, CommandBuffer, + sycl::detail::ur::cast(SrcMem), SrcXOffBytes, + SrcAccessRangeWidthBytes, DstMem + DstXOffBytes, Deps.size(), + Deps.data(), OutSyncPoint); + + if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { throw sycl::exception( sycl::make_error_code(sycl::errc::feature_not_supported), "Device-to-host buffer copy command not supported by graph backend"); } else { - Plugin->checkPiResult(Result); + Plugin->checkUrResult(Result); } } else { size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes; @@ -1444,40 +1415,38 @@ void MemoryManager::ext_oneapi_copyD2H_cmd_buffer( size_t HostSlicePitch = (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm] : 0; - pi_buff_rect_offset_struct BufferOffset{ - SrcXOffBytes, SrcOffset[SrcPos.YTerm], SrcOffset[SrcPos.ZTerm]}; - pi_buff_rect_offset_struct HostOffset{DstXOffBytes, DstOffset[DstPos.YTerm], - DstOffset[DstPos.ZTerm]}; - pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes, - SrcAccessRange[SrcPos.YTerm], - SrcAccessRange[SrcPos.ZTerm]}; - - pi_result Result = - Plugin->call_nocheck( - CommandBuffer, - sycl::detail::pi::cast(SrcMem), - &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, DstMem, Deps.size(), - Deps.data(), OutSyncPoint); - if (Result == PI_ERROR_UNSUPPORTED_FEATURE) { + ur_rect_offset_t BufferOffset{SrcXOffBytes, SrcOffset[SrcPos.YTerm], + SrcOffset[SrcPos.ZTerm]}; + ur_rect_offset_t HostOffset{DstXOffBytes, DstOffset[DstPos.YTerm], + DstOffset[DstPos.ZTerm]}; + ur_rect_region_t RectRegion{SrcAccessRangeWidthBytes, + SrcAccessRange[SrcPos.YTerm], + SrcAccessRange[SrcPos.ZTerm]}; + + ur_result_t Result = Plugin->call_nocheck( + urCommandBufferAppendMemBufferReadRectExp, CommandBuffer, + sycl::detail::ur::cast(SrcMem), BufferOffset, + HostOffset, RectRegion, BufferRowPitch, BufferSlicePitch, HostRowPitch, + HostSlicePitch, DstMem, Deps.size(), Deps.data(), OutSyncPoint); + if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { throw sycl::exception( sycl::make_error_code(sycl::errc::feature_not_supported), "Device-to-host buffer copy command not supported by graph backend"); } else { - Plugin->checkPiResult(Result); + Plugin->checkUrResult(Result); } } } void MemoryManager::ext_oneapi_copyH2D_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, + ur_exp_command_buffer_handle_t CommandBuffer, SYCLMemObjI *SYCLMemObj, char *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize, sycl::id<3> SrcOffset, unsigned int SrcElemSize, void *DstMem, unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, unsigned int DstElemSize, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); const PluginPtr &Plugin = Context->getPlugin(); @@ -1499,19 +1468,18 @@ void MemoryManager::ext_oneapi_copyH2D_cmd_buffer( } if (1 == DimDst && 1 == DimSrc) { - pi_result Result = - Plugin->call_nocheck( - CommandBuffer, - sycl::detail::pi::cast(DstMem), - DstXOffBytes, DstAccessRangeWidthBytes, SrcMem + SrcXOffBytes, - Deps.size(), Deps.data(), OutSyncPoint); - - if (Result == PI_ERROR_UNSUPPORTED_FEATURE) { + ur_result_t Result = Plugin->call_nocheck( + urCommandBufferAppendMemBufferWriteExp, CommandBuffer, + sycl::detail::ur::cast(DstMem), DstXOffBytes, + DstAccessRangeWidthBytes, SrcMem + SrcXOffBytes, Deps.size(), + Deps.data(), OutSyncPoint); + + if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { throw sycl::exception( sycl::make_error_code(sycl::errc::feature_not_supported), "Host-to-device buffer copy command not supported by graph backend"); } else { - Plugin->checkPiResult(Result); + Plugin->checkUrResult(Result); } } else { size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes; @@ -1521,81 +1489,77 @@ void MemoryManager::ext_oneapi_copyH2D_cmd_buffer( size_t HostSlicePitch = (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm] : 0; - pi_buff_rect_offset_struct BufferOffset{ - DstXOffBytes, DstOffset[DstPos.YTerm], DstOffset[DstPos.ZTerm]}; - pi_buff_rect_offset_struct HostOffset{SrcXOffBytes, SrcOffset[SrcPos.YTerm], - SrcOffset[SrcPos.ZTerm]}; - pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes, - DstAccessRange[DstPos.YTerm], - DstAccessRange[DstPos.ZTerm]}; - - pi_result Result = - Plugin->call_nocheck( - CommandBuffer, - sycl::detail::pi::cast(DstMem), - &BufferOffset, &HostOffset, &RectRegion, BufferRowPitch, - BufferSlicePitch, HostRowPitch, HostSlicePitch, SrcMem, Deps.size(), - Deps.data(), OutSyncPoint); - - if (Result == PI_ERROR_UNSUPPORTED_FEATURE) { + ur_rect_offset_t BufferOffset{DstXOffBytes, DstOffset[DstPos.YTerm], + DstOffset[DstPos.ZTerm]}; + ur_rect_offset_t HostOffset{SrcXOffBytes, SrcOffset[SrcPos.YTerm], + SrcOffset[SrcPos.ZTerm]}; + ur_rect_region_t RectRegion{DstAccessRangeWidthBytes, + DstAccessRange[DstPos.YTerm], + DstAccessRange[DstPos.ZTerm]}; + + ur_result_t Result = Plugin->call_nocheck( + urCommandBufferAppendMemBufferWriteRectExp, CommandBuffer, + sycl::detail::ur::cast(DstMem), BufferOffset, + HostOffset, RectRegion, BufferRowPitch, BufferSlicePitch, HostRowPitch, + HostSlicePitch, SrcMem, Deps.size(), Deps.data(), OutSyncPoint); + + if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { throw sycl::exception( sycl::make_error_code(sycl::errc::feature_not_supported), "Host-to-device buffer copy command not supported by graph backend"); } else { - Plugin->checkPiResult(Result); + Plugin->checkUrResult(Result); } } } void MemoryManager::ext_oneapi_copy_usm_cmd_buffer( ContextImplPtr Context, const void *SrcMem, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, size_t Len, - void *DstMem, std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + ur_exp_command_buffer_handle_t CommandBuffer, size_t Len, void *DstMem, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint) { if (!SrcMem || !DstMem) throw exception(make_error_code(errc::invalid), "NULL pointer argument in memory copy operation."); const PluginPtr &Plugin = Context->getPlugin(); - pi_result Result = - Plugin->call_nocheck( - CommandBuffer, DstMem, SrcMem, Len, Deps.size(), Deps.data(), - OutSyncPoint); - if (Result == PI_ERROR_UNSUPPORTED_FEATURE) { + ur_result_t Result = Plugin->call_nocheck( + urCommandBufferAppendUSMMemcpyExp, CommandBuffer, DstMem, SrcMem, Len, + Deps.size(), Deps.data(), OutSyncPoint); + if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { throw sycl::exception( sycl::make_error_code(sycl::errc::feature_not_supported), "USM copy command not supported by graph backend"); } else { - Plugin->checkPiResult(Result); + Plugin->checkUrResult(Result); } } void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, const std::vector &Pattern, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + ur_exp_command_buffer_handle_t CommandBuffer, void *DstMem, size_t Len, + const std::vector &Pattern, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint) { if (!DstMem) throw exception(make_error_code(errc::invalid), "NULL pointer argument in memory fill operation."); const PluginPtr &Plugin = Context->getPlugin(); - - Plugin->call( - CommandBuffer, DstMem, Pattern.data(), Pattern.size(), Len, Deps.size(), - Deps.data(), OutSyncPoint); + Plugin->call(urCommandBufferAppendUSMFillExp, CommandBuffer, DstMem, + Pattern.data(), Pattern.size(), Len, Deps.size(), Deps.data(), + OutSyncPoint); } void MemoryManager::ext_oneapi_fill_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, + ur_exp_command_buffer_handle_t CommandBuffer, SYCLMemObjI *SYCLMemObj, void *Mem, size_t PatternSize, const unsigned char *Pattern, unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, unsigned int ElementSize, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); const PluginPtr &Plugin = Context->getPlugin(); @@ -1613,10 +1577,10 @@ void MemoryManager::ext_oneapi_fill_cmd_buffer( size_t RangeMultiplier = AccessRange[0] * AccessRange[1] * AccessRange[2]; if (RangesUsable && OffsetUsable) { - Plugin->call( - CommandBuffer, pi::cast(Mem), Pattern, - PatternSize, AccessOffset[0] * ElementSize, - RangeMultiplier * ElementSize, Deps.size(), Deps.data(), OutSyncPoint); + Plugin->call(urCommandBufferAppendMemBufferFillExp, CommandBuffer, + ur::cast(Mem), Pattern, PatternSize, + AccessOffset[0] * ElementSize, RangeMultiplier * ElementSize, + Deps.size(), Deps.data(), OutSyncPoint); return; } // The sycl::handler uses a parallel_for kernel in the case of unusable @@ -1627,47 +1591,39 @@ void MemoryManager::ext_oneapi_fill_cmd_buffer( void MemoryManager::ext_oneapi_prefetch_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *Mem, - size_t Length, std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + ur_exp_command_buffer_handle_t CommandBuffer, void *Mem, size_t Length, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint) { const PluginPtr &Plugin = Context->getPlugin(); - Plugin->call( - CommandBuffer, Mem, Length, _pi_usm_migration_flags(0), Deps.size(), - Deps.data(), OutSyncPoint); + Plugin->call(urCommandBufferAppendUSMPrefetchExp, CommandBuffer, Mem, Length, + ur_usm_migration_flags_t(0), Deps.size(), Deps.data(), + OutSyncPoint); } void MemoryManager::ext_oneapi_advise_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const void *Mem, - size_t Length, pi_mem_advice Advice, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, + size_t Length, ur_usm_advice_flags_t Advice, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint) { const PluginPtr &Plugin = Context->getPlugin(); - Plugin->call( - CommandBuffer, Mem, Length, Advice, Deps.size(), Deps.data(), - OutSyncPoint); + Plugin->call(urCommandBufferAppendUSMAdviseExp, CommandBuffer, Mem, Length, + Advice, Deps.size(), Deps.data(), OutSyncPoint); } void MemoryManager::copy_image_bindless( QueueImplPtr Queue, const void *Src, void *Dst, - const sycl::detail::pi::PiMemImageDesc &SrcImageDesc, - const sycl::detail::pi::PiMemImageDesc &DestImageDesc, - const sycl::detail::pi::PiMemImageFormat &SrcImageFormat, - const sycl::detail::pi::PiMemImageFormat &DestImageFormat, - const sycl::detail::pi::PiImageCopyFlags Flags, - sycl::detail::pi::PiImageOffset SrcOffset, - sycl::detail::pi::PiImageOffset DstOffset, - sycl::detail::pi::PiImageRegion CopyExtent, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { + const ur_image_desc_t &SrcDesc, const ur_image_desc_t &DstDesc, + const ur_image_format_t &SrcFormat, const ur_image_format_t &DstFormat, + const ur_exp_image_copy_flags_t Flags, ur_rect_offset_t SrcOffset, + ur_rect_offset_t DstOffset, ur_rect_region_t CopyExtent, + const std::vector &DepEvents, + ur_event_handle_t *OutEvent) { assert(Queue && "Copy image bindless must be called with a valid device queue"); - assert((Flags == (sycl::detail::pi::PiImageCopyFlags) - ext::oneapi::experimental::image_copy_flags::HtoD || - Flags == (sycl::detail::pi::PiImageCopyFlags) - ext::oneapi::experimental::image_copy_flags::DtoH || - Flags == (sycl::detail::pi::PiImageCopyFlags) - ext::oneapi::experimental::image_copy_flags::DtoD) && + assert((Flags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || + Flags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST || + Flags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE) && "Invalid flags passed to copy_image_bindless."); if (!Dst || !Src) throw sycl::exception( @@ -1675,10 +1631,16 @@ void MemoryManager::copy_image_bindless( "NULL pointer argument in bindless image copy operation."); const detail::PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Queue->getHandleRef(), Dst, Src, &SrcImageDesc, &DestImageDesc, - &SrcImageFormat, &DestImageFormat, Flags, &SrcOffset, &DstOffset, - &CopyExtent, DepEvents.size(), DepEvents.data(), OutEvent); + + ur_exp_image_copy_region_t CopyRegion{}; + CopyRegion.stype = UR_STRUCTURE_TYPE_EXP_IMAGE_COPY_REGION; + CopyRegion.copyExtent = CopyExtent; + CopyRegion.srcOffset = SrcOffset; + CopyRegion.dstOffset = DstOffset; + + Plugin->call(urBindlessImagesImageCopyExp, Queue->getHandleRef(), Src, Dst, + &SrcDesc, &DstDesc, &SrcFormat, &DstFormat, &CopyRegion, Flags, + DepEvents.size(), DepEvents.data(), OutEvent); } } // namespace detail diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 17d106600d68d..cc573abc62ddb 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -15,6 +15,8 @@ #include #include +#include + #include #include @@ -39,14 +41,14 @@ class MemoryManager { // Depending on the context it releases memory on host or on device. static void release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent); + ur_event_handle_t &OutEvent); // The following method allocates memory allocation of memory object. // Depending on the context it allocates memory on host or on device. static void *allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, bool InitFromUserData, void *HostPtr, std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent); + ur_event_handle_t &OutEvent); // The following method creates OpenCL sub buffer for specified // offset, range, and memory object. @@ -54,7 +56,7 @@ class MemoryManager { void *ParentMemObj, size_t ElemSize, size_t Offset, range<3> Range, std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent); + ur_event_handle_t &OutEvent); // Allocates buffer in specified context taking into account situations such // as host ptr or cl_mem provided by user. TargetContext should be device @@ -65,20 +67,17 @@ class MemoryManager { const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext, const sycl::property_list &PropsList, - sycl::detail::pi::PiEvent &OutEventToWait); + ur_event_handle_t &OutEventToWait); // Allocates images in specified context taking into account situations such // as host ptr or cl_mem provided by user. TargetContext should be device // one(not host). - static void * - allocateMemImage(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, - void *UserPtr, bool HostPtrReadOnly, size_t Size, - const sycl::detail::pi::PiMemImageDesc &Desc, - const sycl::detail::pi::PiMemImageFormat &Format, - const EventImplPtr &InteropEvent, - const ContextImplPtr &InteropContext, - const sycl::property_list &PropsList, - sycl::detail::pi::PiEvent &OutEventToWait); + static void *allocateMemImage( + ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *UserPtr, + bool HostPtrReadOnly, size_t Size, const ur_image_desc_t &Desc, + const ur_image_format_t &Format, const EventImplPtr &InteropEvent, + const ContextImplPtr &InteropContext, + const sycl::property_list &PropsList, ur_event_handle_t &OutEventToWait); // Releases memory object(buffer or image). TargetContext should be device // one(not host). @@ -88,19 +87,19 @@ class MemoryManager { static void *allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::property_list &PropsList); - static void * - allocateInteropMemObject(ContextImplPtr TargetContext, void *UserPtr, - const EventImplPtr &InteropEvent, - const ContextImplPtr &InteropContext, - const sycl::property_list &PropsList, - sycl::detail::pi::PiEvent &OutEventToWait); - - static void * - allocateImageObject(ContextImplPtr TargetContext, void *UserPtr, - bool HostPtrReadOnly, - const sycl::detail::pi::PiMemImageDesc &Desc, - const sycl::detail::pi::PiMemImageFormat &Format, - const sycl::property_list &PropsList); + + static void *allocateInteropMemObject(ContextImplPtr TargetContext, + void *UserPtr, + const EventImplPtr &InteropEvent, + const ContextImplPtr &InteropContext, + const sycl::property_list &PropsList, + ur_event_handle_t &OutEventToWait); + + static void *allocateImageObject(ContextImplPtr TargetContext, void *UserPtr, + bool HostPtrReadOnly, + const ur_image_desc_t &Desc, + const ur_image_format_t &Format, + const sycl::property_list &PropsList); static void *allocateBufferObject(ContextImplPtr TargetContext, void *UserPtr, bool HostPtrReadOnly, const size_t Size, @@ -115,8 +114,8 @@ class MemoryManager { QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, unsigned int DstElemSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent, + std::vector DepEvents, + ur_event_handle_t &OutEvent, const detail::EventImplPtr &OutEventImpl); static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, @@ -124,164 +123,151 @@ class MemoryManager { unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, unsigned int ElementSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent, + std::vector DepEvents, + ur_event_handle_t &OutEvent, const detail::EventImplPtr &OutEventImpl); static void *map(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, access::mode AccessMode, unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, unsigned int ElementSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent); + std::vector DepEvents, + ur_event_handle_t &OutEvent); static void unmap(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, - void *MappedPtr, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent); + void *MappedPtr, std::vector DepEvents, + ur_event_handle_t &OutEvent); static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len, - void *DstMem, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, + void *DstMem, std::vector DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl); static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, const std::vector &Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, + std::vector DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl); static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, + std::vector DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl); static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, - pi_mem_advice Advice, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, + ur_usm_advice_flags_t Advice, + std::vector DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl); static void copy_2d_usm(const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem, size_t DstPitch, size_t Width, size_t Height, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, + std::vector DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl); static void fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, const std::vector &Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, + std::vector DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl); static void memset_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, char Value, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, + std::vector DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl); - static void - copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, - QueueImplPtr Queue, size_t NumBytes, size_t Offset, - const void *SrcMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl); + static void copy_to_device_global( + const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, + size_t NumBytes, size_t Offset, const void *SrcMem, + const std::vector &DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl); static void copy_from_device_global( const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, void *DstMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl); + const std::vector &DepEvents, + ur_event_handle_t *OutEvent, const detail::EventImplPtr &OutEventImpl); // Command buffer extension methods static void ext_oneapi_copyD2D_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, - SYCLMemObjI *SYCLMemObj, void *SrcMem, unsigned int DimSrc, - sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, - sycl::id<3> SrcOffset, unsigned int SrcElemSize, void *DstMem, - unsigned int DimDst, sycl::range<3> DstSize, - sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, - unsigned int DstElemSize, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + ur_exp_command_buffer_handle_t CommandBuffer, SYCLMemObjI *SYCLMemObj, + void *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize, + sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, + unsigned int SrcElemSize, void *DstMem, unsigned int DimDst, + sycl::range<3> DstSize, sycl::range<3> DstAccessRange, + sycl::id<3> DstOffset, unsigned int DstElemSize, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint); static void ext_oneapi_copyD2H_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, - SYCLMemObjI *SYCLMemObj, void *SrcMem, unsigned int DimSrc, - sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, - sycl::id<3> SrcOffset, unsigned int SrcElemSize, char *DstMem, - unsigned int DimDst, sycl::range<3> DstSize, sycl::id<3> DstOffset, - unsigned int DstElemSize, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + ur_exp_command_buffer_handle_t CommandBuffer, SYCLMemObjI *SYCLMemObj, + void *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize, + sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, + unsigned int SrcElemSize, char *DstMem, unsigned int DimDst, + sycl::range<3> DstSize, sycl::id<3> DstOffset, unsigned int DstElemSize, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint); static void ext_oneapi_copyH2D_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, - SYCLMemObjI *SYCLMemObj, char *SrcMem, unsigned int DimSrc, - sycl::range<3> SrcSize, sycl::id<3> SrcOffset, unsigned int SrcElemSize, - void *DstMem, unsigned int DimDst, sycl::range<3> DstSize, + ur_exp_command_buffer_handle_t CommandBuffer, SYCLMemObjI *SYCLMemObj, + char *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize, + sycl::id<3> SrcOffset, unsigned int SrcElemSize, void *DstMem, + unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, unsigned int DstElemSize, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint); static void ext_oneapi_copy_usm_cmd_buffer( ContextImplPtr Context, const void *SrcMem, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, size_t Len, - void *DstMem, std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + ur_exp_command_buffer_handle_t CommandBuffer, size_t Len, void *DstMem, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint); static void ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, const std::vector &Pattern, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); - - static void - ext_oneapi_fill_cmd_buffer(sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, - SYCLMemObjI *SYCLMemObj, void *Mem, - size_t PatternSize, const unsigned char *Pattern, - unsigned int Dim, sycl::range<3> Size, - sycl::range<3> AccessRange, - sycl::id<3> AccessOffset, unsigned int ElementSize, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + ur_exp_command_buffer_handle_t CommandBuffer, void *DstMem, size_t Len, + const std::vector &Pattern, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint); + + static void ext_oneapi_fill_cmd_buffer( + sycl::detail::ContextImplPtr Context, + ur_exp_command_buffer_handle_t CommandBuffer, SYCLMemObjI *SYCLMemObj, + void *Mem, size_t PatternSize, const unsigned char *Pattern, + unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, + sycl::id<3> AccessOffset, unsigned int ElementSize, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint); static void ext_oneapi_prefetch_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *Mem, - size_t Length, std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + ur_exp_command_buffer_handle_t CommandBuffer, void *Mem, size_t Length, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint); static void ext_oneapi_advise_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const void *Mem, - size_t Length, pi_mem_advice Advice, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); - - static void - copy_image_bindless(QueueImplPtr Queue, const void *Src, void *Dst, - const sycl::detail::pi::PiMemImageDesc &SrcImageDesc, - const sycl::detail::pi::PiMemImageDesc &DestImageDesc, - const sycl::detail::pi::PiMemImageFormat &SrcImageFormat, - const sycl::detail::pi::PiMemImageFormat &DestImageFormat, - const sycl::detail::pi::PiImageCopyFlags Flags, - sycl::detail::pi::PiImageOffset SrcOffset, - sycl::detail::pi::PiImageOffset DstOffset, - sycl::detail::pi::PiImageRegion CopyExtent, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent); + ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, + size_t Length, ur_usm_advice_flags_t Advice, + std::vector Deps, + ur_exp_command_buffer_sync_point_t *OutSyncPoint); + + static void copy_image_bindless( + QueueImplPtr Queue, const void *Src, void *Dst, + const ur_image_desc_t &SrcDesc, const ur_image_desc_t &DstDesc, + const ur_image_format_t &SrcFormat, const ur_image_format_t &DstFormat, + const ur_exp_image_copy_flags_t Flags, ur_rect_offset_t SrcOffset, + ur_rect_offset_t DstOffset, ur_rect_region_t CopyExtent, + const std::vector &DepEvents, + ur_event_handle_t *OutEvent); }; } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/online_compiler/online_compiler.cpp b/sycl/source/detail/online_compiler/online_compiler.cpp index 2e5b3ce8f8807..5d3c3a381607b 100644 --- a/sycl/source/detail/online_compiler/online_compiler.cpp +++ b/sycl/source/detail/online_compiler/online_compiler.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include #include @@ -94,12 +94,12 @@ compileToSPIRV(const std::string &Source, sycl::info::device_type DeviceType, #else static const std::string OclocLibraryName = "libocloc.so"; #endif - void *OclocLibrary = sycl::detail::pi::loadOsLibrary(OclocLibraryName); + void *OclocLibrary = sycl::detail::ur::loadOsLibrary(OclocLibraryName); if (!OclocLibrary) throw online_compile_error("Cannot load ocloc library: " + OclocLibraryName); void *OclocVersionHandle = - sycl::detail::pi::getOsLibraryFuncAddress(OclocLibrary, "oclocVersion"); + sycl::detail::ur::getOsLibraryFuncAddress(OclocLibrary, "oclocVersion"); // The initial versions of ocloc library did not have the oclocVersion() // function. Those versions had the same API as the first version of ocloc // library having that oclocVersion() function. @@ -126,10 +126,10 @@ compileToSPIRV(const std::string &Source, sycl::info::device_type DeviceType, ".N), where (N >= " + std::to_string(CurrentVersionMinor) + ")."); CompileToSPIRVHandle = - sycl::detail::pi::getOsLibraryFuncAddress(OclocLibrary, "oclocInvoke"); + sycl::detail::ur::getOsLibraryFuncAddress(OclocLibrary, "oclocInvoke"); if (!CompileToSPIRVHandle) throw online_compile_error("Cannot load oclocInvoke() function"); - FreeSPIRVOutputsHandle = sycl::detail::pi::getOsLibraryFuncAddress( + FreeSPIRVOutputsHandle = sycl::detail::ur::getOsLibraryFuncAddress( OclocLibrary, "oclocFreeOutput"); if (!FreeSPIRVOutputsHandle) throw online_compile_error("Cannot load oclocFreeOutput() function"); diff --git a/sycl/source/detail/os_util.cpp b/sycl/source/detail/os_util.cpp index 44eedd54da7b3..860ad71f9f7ea 100644 --- a/sycl/source/detail/os_util.cpp +++ b/sycl/source/detail/os_util.cpp @@ -148,7 +148,7 @@ std::string OSUtil::getDirName(const char *Path) { #elif defined(__SYCL_RT_OS_WINDOWS) /// Returns an absolute path where the object was found. -// pi_win_proxy_loader.dll uses this same logic. If it is changed +// ur_win_proxy_loader.dll uses this same logic. If it is changed // significantly, it might be wise to change it there too. std::string OSUtil::getCurrentDSODir() { char Path[MAX_PATH]; diff --git a/sycl/source/detail/persistent_device_code_cache.cpp b/sycl/source/detail/persistent_device_code_cache.cpp index 5ce065639a69f..f41912ddd11b0 100644 --- a/sycl/source/detail/persistent_device_code_cache.cpp +++ b/sycl/source/detail/persistent_device_code_cache.cpp @@ -54,7 +54,7 @@ LockCacheItem::~LockCacheItem() { // Returns true if the specified format is either SPIRV or a native binary. static bool -IsSupportedImageFormat(sycl::detail::pi::PiDeviceBinaryType Format) { +IsSupportedImageFormat(ur::DeviceBinaryType Format) { return Format == SYCL_DEVICE_BINARY_TYPE_SPIRV || Format == SYCL_DEVICE_BINARY_TYPE_NATIVE; } @@ -115,7 +115,7 @@ getSortedImages(const std::vector &Imgs) { void PersistentDeviceCodeCache::putItemToDisc( const device &Device, const std::vector &Imgs, const SerializedObj &SpecConsts, const std::string &BuildOptionsString, - const sycl::detail::pi::PiProgram &NativePrg) { + const ur_program_handle_t &NativePrg) { if (!areImagesCacheable(Imgs)) return; @@ -131,14 +131,13 @@ void PersistentDeviceCodeCache::putItemToDisc( unsigned int DeviceNum = 0; - Plugin->call( - NativePrg, PI_PROGRAM_INFO_NUM_DEVICES, sizeof(DeviceNum), &DeviceNum, - nullptr); + Plugin->call(urProgramGetInfo, NativePrg, UR_PROGRAM_INFO_NUM_DEVICES, + sizeof(DeviceNum), &DeviceNum, nullptr); std::vector BinarySizes(DeviceNum); - Plugin->call( - NativePrg, PI_PROGRAM_INFO_BINARY_SIZES, - sizeof(size_t) * BinarySizes.size(), BinarySizes.data(), nullptr); + Plugin->call(urProgramGetInfo, NativePrg, UR_PROGRAM_INFO_BINARY_SIZES, + sizeof(size_t) * BinarySizes.size(), BinarySizes.data(), + nullptr); std::vector> Result; std::vector Pointers; @@ -147,9 +146,8 @@ void PersistentDeviceCodeCache::putItemToDisc( Pointers.push_back(Result[I].data()); } - Plugin->call(NativePrg, PI_PROGRAM_INFO_BINARIES, - sizeof(char *) * Pointers.size(), - Pointers.data(), nullptr); + Plugin->call(urProgramGetInfo, NativePrg, UR_PROGRAM_INFO_BINARIES, + sizeof(char *) * Pointers.size(), Pointers.data(), nullptr); size_t i = 0; std::string FileName; do { diff --git a/sycl/source/detail/persistent_device_code_cache.hpp b/sycl/source/detail/persistent_device_code_cache.hpp index 4de7ace5b51f9..e2b3c8f72c4da 100644 --- a/sycl/source/detail/persistent_device_code_cache.hpp +++ b/sycl/source/detail/persistent_device_code_cache.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -186,7 +186,7 @@ class PersistentDeviceCodeCache { const std::vector &Imgs, const SerializedObj &SpecConsts, const std::string &BuildOptionsString, - const sycl::detail::pi::PiProgram &NativePrg); + const ur_program_handle_t &NativePrg); /* Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is set*/ static void trace(const std::string &msg) { diff --git a/sycl/source/detail/physical_mem_impl.hpp b/sycl/source/detail/physical_mem_impl.hpp index e36830ba07bee..9e1d07372077b 100644 --- a/sycl/source/detail/physical_mem_impl.hpp +++ b/sycl/source/detail/physical_mem_impl.hpp @@ -21,15 +21,15 @@ namespace sycl { inline namespace _V1 { namespace detail { -inline sycl::detail::pi::PiVirtualAccessFlags AccessModeToVirtualAccessFlags( +inline ur_virtual_mem_access_flag_t AccessModeToVirtualAccessFlags( ext::oneapi::experimental::address_access_mode Mode) { switch (Mode) { case ext::oneapi::experimental::address_access_mode::read: - return PI_VIRTUAL_ACCESS_FLAG_READ_ONLY; + return UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; case ext::oneapi::experimental::address_access_mode::read_write: - return PI_VIRTUAL_ACCESS_FLAG_RW; + return UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; case ext::oneapi::experimental::address_access_mode::none: - return 0; + return UR_VIRTUAL_MEM_ACCESS_FLAG_NONE; } throw sycl::exception(make_error_code(errc::invalid), "Invalid address_access_mode."); @@ -43,31 +43,30 @@ class physical_mem_impl { MContext(getSyclObjImpl(SyclContext)), MNumBytes(NumBytes) { const PluginPtr &Plugin = MContext->getPlugin(); - auto Err = Plugin->call_nocheck( - MContext->getHandleRef(), MDevice->getHandleRef(), MNumBytes, - &MPhysicalMem); + auto Err = Plugin->call_nocheck( + urPhysicalMemCreate, MContext->getHandleRef(), MDevice->getHandleRef(), + MNumBytes, nullptr, &MPhysicalMem); - if (Err == PI_ERROR_OUT_OF_RESOURCES || Err == PI_ERROR_OUT_OF_HOST_MEMORY) + if (Err == UR_RESULT_ERROR_OUT_OF_RESOURCES || + Err == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) throw sycl::exception(make_error_code(errc::memory_allocation), "Failed to allocate physical memory."); - Plugin->checkPiResult(Err); + Plugin->checkUrResult(Err); } ~physical_mem_impl() noexcept(false) { const PluginPtr &Plugin = MContext->getPlugin(); - Plugin->call(MPhysicalMem); + Plugin->call(urPhysicalMemRelease, MPhysicalMem); } void *map(uintptr_t Ptr, size_t NumBytes, ext::oneapi::experimental::address_access_mode Mode, size_t Offset) const { - sycl::detail::pi::PiVirtualAccessFlags AccessFlags = - AccessModeToVirtualAccessFlags(Mode); + auto AccessFlags = AccessModeToVirtualAccessFlags(Mode); const PluginPtr &Plugin = MContext->getPlugin(); void *ResultPtr = reinterpret_cast(Ptr); - Plugin->call( - MContext->getHandleRef(), ResultPtr, NumBytes, MPhysicalMem, Offset, - AccessFlags); + Plugin->call(urVirtualMemMap, MContext->getHandleRef(), ResultPtr, NumBytes, + MPhysicalMem, Offset, AccessFlags); return ResultPtr; } @@ -77,13 +76,11 @@ class physical_mem_impl { device get_device() const { return createSyclObjFromImpl(MDevice); } size_t size() const noexcept { return MNumBytes; } - sycl::detail::pi::PiPhysicalMem &getHandleRef() { return MPhysicalMem; } - const sycl::detail::pi::PiPhysicalMem &getHandleRef() const { - return MPhysicalMem; - } + ur_physical_mem_handle_t &getHandleRef() { return MPhysicalMem; } + const ur_physical_mem_handle_t &getHandleRef() const { return MPhysicalMem; } private: - sycl::detail::pi::PiPhysicalMem MPhysicalMem = nullptr; + ur_physical_mem_handle_t MPhysicalMem = nullptr; const std::shared_ptr MDevice; const std::shared_ptr MContext; const size_t MNumBytes; diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp deleted file mode 100644 index 004d26e3f85fc..0000000000000 --- a/sycl/source/detail/pi.cpp +++ /dev/null @@ -1,654 +0,0 @@ -//===-- pi.cpp - PI utilities implementation -------------------*- C++ -*--===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// \file pi.cpp -/// Implementation of C++ wrappers for PI interface. -/// -/// \ingroup sycl_pi - -#include "context_impl.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef XPTI_ENABLE_INSTRUMENTATION -// Include the headers necessary for emitting -// traces using the trace framework -#include "xpti/xpti_trace_framework.h" -#endif - -namespace sycl { -inline namespace _V1 { -namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -// Global (to the SYCL runtime) graph handle that all command groups are a -// child of -/// Event to be used by graph related activities -xpti_td *GSYCLGraphEvent = nullptr; -/// Event to be used by PI layer related activities -xpti_td *GPICallEvent = nullptr; -/// Event to be used by PI layer calls with arguments -xpti_td *GPIArgCallEvent = nullptr; -xpti_td *GPIArgCallActiveEvent = nullptr; - -uint8_t PiCallStreamID = 0; -uint8_t PiDebugCallStreamID = 0; - -#endif // XPTI_ENABLE_INSTRUMENTATION - -template void *getPluginOpaqueData(void *OpaqueDataParam) { - void *ReturnOpaqueData = nullptr; - const PluginPtr &Plugin = pi::getPlugin(); - - Plugin->call( - OpaqueDataParam, &ReturnOpaqueData); - - return ReturnOpaqueData; -} - -namespace pi { - -static void initializePlugins(std::vector &Plugins); - -bool XPTIInitDone = false; - -// Implementation of the SYCL PI API call tracing methods that use XPTI -// framework to emit these traces that will be used by tools. -uint64_t emitFunctionBeginTrace(const char *FName) { - uint64_t CorrelationID = 0; -#ifdef XPTI_ENABLE_INSTRUMENTATION - // The function_begin and function_end trace point types are defined to - // trace library API calls and they are currently enabled here for support - // tools that need the API scope. The methods emitFunctionBeginTrace() and - // emitFunctionEndTrace() can be extended to also trace the arguments of the - // PI API call using a trace point type the extends the predefined trace - // point types. - // - // You can use the sample collector in llvm/xptifw/samples/syclpi_collector - // to print the API traces and also extend them to support arguments that - // may be traced later. - // - /// Example Usage: - /// \code{cpp} - /// // Two diagnostic trace types defined for function begin and function end - /// // with different semantics than the one in the default trace type list. - /// typedef enum { - /// diagnostic_func_begin = XPTI_TRACE_POINT_BEGIN(0), - /// diagnostic_func_end = XPTI_TRACE_POINT_END(0), - /// }syclpi_extension_t; - /// ... - /// uint16_t pi_func_begin = - /// xptiRegisterUserDefinedTracePoint("sycl.pi", func_begin); - /// uint16_t pi_func_end = - /// xptiRegisterUserDefinedTracePoint("sycl.pi", func_end); - /// ... - /// // Setup argument data for the function being traced - /// ... - /// xptiNotifySubscribers(stream_id, pi_func_begin, parent, event, instance, - /// (void *)argument_data); - /// \endcode - constexpr uint16_t NotificationTraceType = - (uint16_t)xpti::trace_point_type_t::function_begin; - if (xptiCheckTraceEnabled(PiCallStreamID, NotificationTraceType)) { - CorrelationID = xptiGetUniqueId(); - xptiNotifySubscribers(PiCallStreamID, NotificationTraceType, GPICallEvent, - nullptr, CorrelationID, - static_cast(FName)); - } -#endif // XPTI_ENABLE_INSTRUMENTATION - return CorrelationID; -} - -void emitFunctionEndTrace(uint64_t CorrelationID, const char *FName) { -#ifdef XPTI_ENABLE_INSTRUMENTATION - constexpr uint16_t NotificationTraceType = - (uint16_t)xpti::trace_point_type_t::function_end; - if (xptiCheckTraceEnabled(PiCallStreamID, NotificationTraceType)) { - // CorrelationID is the unique ID that ties together a function_begin and - // function_end pair of trace calls. The splitting of a scoped_notify into - // two function calls incurs an additional overhead as the StreamID must - // be looked up twice. - xptiNotifySubscribers(PiCallStreamID, NotificationTraceType, GPICallEvent, - nullptr, CorrelationID, - static_cast(FName)); - } -#endif // XPTI_ENABLE_INSTRUMENTATION -} - -uint64_t emitFunctionWithArgsBeginTrace(uint32_t FuncID, const char *FuncName, - unsigned char *ArgsData, - pi_plugin Plugin) { - uint64_t CorrelationID = 0; -#ifdef XPTI_ENABLE_INSTRUMENTATION - constexpr uint16_t NotificationTraceType = - (uint16_t)xpti::trace_point_type_t::function_with_args_begin; - if (xptiCheckTraceEnabled(PiDebugCallStreamID, NotificationTraceType)) { - xpti::function_with_args_t Payload{FuncID, FuncName, ArgsData, nullptr, - &Plugin}; - { - detail::tls_code_loc_t Tls; - auto CodeLoc = Tls.query(); - xpti::payload_t PL = xpti::payload_t( - CodeLoc.functionName(), CodeLoc.fileName(), CodeLoc.lineNumber(), - CodeLoc.columnNumber(), nullptr); - uint64_t InstanceNumber{}; - assert(GPIArgCallActiveEvent == nullptr); - GPIArgCallActiveEvent = - xptiMakeEvent("Plugin interface call", &PL, xpti::trace_graph_event, - xpti_at::active, &InstanceNumber); - } - - CorrelationID = xptiGetUniqueId(); - xptiNotifySubscribers(PiDebugCallStreamID, NotificationTraceType, - GPIArgCallEvent, GPIArgCallActiveEvent, CorrelationID, - &Payload); - } -#endif - return CorrelationID; -} - -void emitFunctionWithArgsEndTrace(uint64_t CorrelationID, uint32_t FuncID, - const char *FuncName, unsigned char *ArgsData, - pi_result Result, pi_plugin Plugin) { -#ifdef XPTI_ENABLE_INSTRUMENTATION - constexpr uint16_t NotificationTraceType = - (uint16_t)xpti::trace_point_type_t::function_with_args_end; - if (xptiCheckTraceEnabled(PiDebugCallStreamID, NotificationTraceType)) { - xpti::function_with_args_t Payload{FuncID, FuncName, ArgsData, &Result, - &Plugin}; - - xptiNotifySubscribers(PiDebugCallStreamID, NotificationTraceType, - GPIArgCallEvent, GPIArgCallActiveEvent, CorrelationID, - &Payload); - GPIArgCallActiveEvent = nullptr; - } -#endif -} - -void contextSetExtendedDeleter(const sycl::context &context, - pi_context_extended_deleter func, - void *user_data) { - auto impl = getSyclObjImpl(context); - auto contextHandle = reinterpret_cast(impl->getHandleRef()); - const auto &Plugin = impl->getPlugin(); - Plugin->call(contextHandle, func, - user_data); -} - -std::string platformInfoToString(pi_platform_info info) { - switch (info) { - case PI_PLATFORM_INFO_PROFILE: - return "PI_PLATFORM_INFO_PROFILE"; - case PI_PLATFORM_INFO_VERSION: - return "PI_PLATFORM_INFO_VERSION"; - case PI_PLATFORM_INFO_NAME: - return "PI_PLATFORM_INFO_NAME"; - case PI_PLATFORM_INFO_VENDOR: - return "PI_PLATFORM_INFO_VENDOR"; - case PI_PLATFORM_INFO_EXTENSIONS: - return "PI_PLATFORM_INFO_EXTENSIONS"; - case PI_EXT_PLATFORM_INFO_BACKEND: - return "PI_EXT_PLATFORM_INFO_BACKEND"; - } - return "unknown PI_PLATFORM_INFO enum value"; -} - -std::string memFlagToString(pi_mem_flags Flag) { - assert(((Flag == 0u) || ((Flag & (Flag - 1)) == 0)) && - "More than one bit set"); - - std::stringstream Sstream; - - switch (Flag) { - case pi_mem_flags{0}: - Sstream << "pi_mem_flags(0)"; - break; - case PI_MEM_FLAGS_ACCESS_RW: - Sstream << "PI_MEM_FLAGS_ACCESS_RW"; - break; - case PI_MEM_FLAGS_HOST_PTR_USE: - Sstream << "PI_MEM_FLAGS_HOST_PTR_USE"; - break; - case PI_MEM_FLAGS_HOST_PTR_COPY: - Sstream << "PI_MEM_FLAGS_HOST_PTR_COPY"; - break; - default: - Sstream << "unknown pi_mem_flags bit == " << Flag; - } - - return Sstream.str(); -} - -std::string memFlagsToString(pi_mem_flags Flags) { - std::stringstream Sstream; - bool FoundFlag = false; - - auto FlagSeparator = [](bool FoundFlag) { return FoundFlag ? "|" : ""; }; - - pi_mem_flags ValidFlags[] = {PI_MEM_FLAGS_ACCESS_RW, - PI_MEM_FLAGS_HOST_PTR_USE, - PI_MEM_FLAGS_HOST_PTR_COPY}; - - if (Flags == 0u) { - Sstream << "pi_mem_flags(0)"; - } else { - for (const auto Flag : ValidFlags) { - if (Flag & Flags) { - Sstream << FlagSeparator(FoundFlag) << memFlagToString(Flag); - FoundFlag = true; - } - } - - std::bitset<64> UnkownBits(Flags & ~(PI_MEM_FLAGS_ACCESS_RW | - PI_MEM_FLAGS_HOST_PTR_USE | - PI_MEM_FLAGS_HOST_PTR_COPY)); - if (UnkownBits.any()) { - Sstream << FlagSeparator(FoundFlag) - << "unknown pi_mem_flags bits == " << UnkownBits; - } - } - - return Sstream.str(); -} - -// GlobalPlugin is a global Plugin used with Interoperability constructors that -// use OpenCL objects to construct SYCL class objects. -// TODO: GlobalPlugin does not seem to be needed anymore. Consider removing it! -std::shared_ptr GlobalPlugin; - -// Find the plugin at the appropriate location and return the location. -std::vector> findPlugins() { - std::vector> PluginNames; - - // TODO: Based on final design discussions, change the location where the - // plugin must be searched; how to identify the plugins etc. Currently the - // search is done for libpi_opencl.so/pi_opencl.dll file in LD_LIBRARY_PATH - // env only. - // - ods_target_list *OdsTargetList = SYCLConfig::get(); - if (!OdsTargetList) { - PluginNames.emplace_back(__SYCL_OPENCL_PLUGIN_NAME, backend::opencl); - PluginNames.emplace_back(__SYCL_LEVEL_ZERO_PLUGIN_NAME, - backend::ext_oneapi_level_zero); - PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, backend::ext_oneapi_cuda); - PluginNames.emplace_back(__SYCL_HIP_PLUGIN_NAME, backend::ext_oneapi_hip); - PluginNames.emplace_back(__SYCL_UR_PLUGIN_NAME, backend::all); - PluginNames.emplace_back(__SYCL_NATIVE_CPU_PLUGIN_NAME, - backend::ext_oneapi_native_cpu); - - } else { - ods_target_list &list = *OdsTargetList; - if (list.backendCompatible(backend::opencl)) { - PluginNames.emplace_back(__SYCL_OPENCL_PLUGIN_NAME, backend::opencl); - } - if (list.backendCompatible(backend::ext_oneapi_level_zero)) { - PluginNames.emplace_back(__SYCL_LEVEL_ZERO_PLUGIN_NAME, - backend::ext_oneapi_level_zero); - } - if (list.backendCompatible(backend::ext_oneapi_cuda)) { - PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, - backend::ext_oneapi_cuda); - } - if (list.backendCompatible(backend::ext_oneapi_hip)) { - PluginNames.emplace_back(__SYCL_HIP_PLUGIN_NAME, backend::ext_oneapi_hip); - } - if (list.backendCompatible(backend::ext_oneapi_native_cpu)) { - PluginNames.emplace_back(__SYCL_NATIVE_CPU_PLUGIN_NAME, - backend::ext_oneapi_native_cpu); - } - PluginNames.emplace_back(__SYCL_UR_PLUGIN_NAME, backend::all); - } - return PluginNames; -} - -// Load the Plugin by calling the OS dependent library loading call. -// Return the handle to the Library. -void *loadPlugin(const std::string &PluginPath) { - return loadOsPluginLibrary(PluginPath); -} - -// Unload the given plugin by calling teh OS-specific library unloading call. -// \param Library OS-specific library handle created when loading. -int unloadPlugin(void *Library) { return unloadOsPluginLibrary(Library); } - -// Binds all the PI Interface APIs to Plugin Library Function Addresses. -// TODO: Remove the 'OclPtr' extension to PI_API. -// TODO: Change the functionality such that a single getOsLibraryFuncAddress -// call is done to get all Interface API mapping. The plugin interface also -// needs to setup infrastructure to route PI_CALLs to the appropriate plugins. -// Currently, we bind to a singe plugin. -bool bindPlugin(void *Library, - const std::shared_ptr &PluginInformation) { - - decltype(::piPluginInit) *PluginInitializeFunction = - (decltype(&::piPluginInit))(getOsLibraryFuncAddress(Library, - "piPluginInit")); - if (PluginInitializeFunction == nullptr) - return false; - - int Err = PluginInitializeFunction(PluginInformation.get()); - - // TODO: Compare Supported versions and check for backward compatibility. - // Make sure err is PI_SUCCESS. - assert((Err == PI_SUCCESS) && "Unexpected error when binding to Plugin."); - (void)Err; - - // TODO: Return a more meaningful value/enum. - return true; -} - -bool trace(TraceLevel Level) { - auto TraceLevelMask = SYCLConfig::get(); - return (TraceLevelMask & Level) == Level; -} - -// Initializes all available Plugins. -std::vector &initialize() { - // This uses static variable initialization to work around a gcc bug with - // std::call_once and exceptions. - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66146 - auto initializeHelper = []() { - initializePlugins(GlobalHandler::instance().getPlugins()); - return true; - }; - static bool Initialized = initializeHelper(); - std::ignore = Initialized; - - return GlobalHandler::instance().getPlugins(); -} - -// Implementation of this function is OS specific. Please see windows_pi.cpp and -// posix_pi.cpp. -// TODO: refactor code when support matrix for DPCPP changes and is -// available on all supported systems. -std::vector> -loadPlugins(const std::vector> &&PluginNames); - -static void initializePlugins(std::vector &Plugins) { - const std::vector> PluginNames = - findPlugins(); - - if (PluginNames.empty() && trace(PI_TRACE_ALL)) - std::cerr << "SYCL_PI_TRACE[all]: " - << "No Plugins Found." << std::endl; - - // Get library handles for the list of plugins. - std::vector> LoadedPlugins = - loadPlugins(std::move(PluginNames)); - - bool IsAsanUsed = ProgramManager::getInstance().kernelUsesAsan(); - - for (auto &[Name, Backend, Library] : LoadedPlugins) { - std::shared_ptr PluginInformation = - std::make_shared(PiPlugin{ - _PI_H_VERSION_STRING, _PI_H_VERSION_STRING, - /*Targets=*/nullptr, /*FunctionPointers=*/{}, - /*IsAsanUsed*/ - IsAsanUsed ? _PI_SANITIZE_TYPE_ADDRESS : _PI_SANITIZE_TYPE_NONE}); - - if (!Library) { - if (trace(PI_TRACE_ALL)) { - std::cerr << "SYCL_PI_TRACE[all]: " - << "Check if plugin is present. " - << "Failed to load plugin: " << Name << std::endl; - } - continue; - } - - if (!bindPlugin(Library, PluginInformation)) { - if (trace(PI_TRACE_ALL)) { - std::cerr << "SYCL_PI_TRACE[all]: " - << "Failed to bind PI APIs to the plugin: " << Name - << std::endl; - } - continue; - } - PluginPtr &NewPlugin = Plugins.emplace_back( - std::make_shared(PluginInformation, Backend, Library)); - if (trace(TraceLevel::PI_TRACE_BASIC)) - std::cerr << "SYCL_PI_TRACE[basic]: " - << "Plugin found and successfully loaded: " << Name - << " [ PluginVersion: " - << NewPlugin->getPiPlugin().PluginVersion << " ]" << std::endl; - } - -#ifdef XPTI_ENABLE_INSTRUMENTATION - GlobalHandler::instance().getXPTIRegistry().initializeFrameworkOnce(); - - if (!(xptiTraceEnabled() && !XPTIInitDone)) - return; - // Not sure this is the best place to initialize the framework; SYCL runtime - // team needs to advise on the right place, until then we piggy-back on the - // initialization of the PI layer. - - // Initialize the global events just once, in the case pi::initialize() is - // called multiple times - XPTIInitDone = true; - // Registers a new stream for 'sycl' and any plugin that wants to listen to - // this stream will register itself using this string or stream ID for this - // string. - uint8_t StreamID = xptiRegisterStream(SYCL_STREAM_NAME); - // Let all tool plugins know that a stream by the name of 'sycl' has been - // initialized and will be generating the trace stream. - GlobalHandler::instance().getXPTIRegistry().initializeStream( - SYCL_STREAM_NAME, GMajVer, GMinVer, GVerStr); - // Create a tracepoint to indicate the graph creation - xpti::payload_t GraphPayload("application_graph"); - uint64_t GraphInstanceNo; - GSYCLGraphEvent = - xptiMakeEvent("application_graph", &GraphPayload, xpti::trace_graph_event, - xpti_at::active, &GraphInstanceNo); - if (GSYCLGraphEvent) { - // The graph event is a global event and will be used as the parent for - // all nodes (command groups) - xptiNotifySubscribers(StreamID, xpti::trace_graph_create, nullptr, - GSYCLGraphEvent, GraphInstanceNo, nullptr); - } - - // Let subscribers know a new stream is being initialized - GlobalHandler::instance().getXPTIRegistry().initializeStream( - SYCL_PICALL_STREAM_NAME, GMajVer, GMinVer, GVerStr); - xpti::payload_t PIPayload("Plugin Interface Layer"); - uint64_t PiInstanceNo; - GPICallEvent = - xptiMakeEvent("PI Layer", &PIPayload, xpti::trace_algorithm_event, - xpti_at::active, &PiInstanceNo); - - GlobalHandler::instance().getXPTIRegistry().initializeStream( - SYCL_PIDEBUGCALL_STREAM_NAME, GMajVer, GMinVer, GVerStr); - xpti::payload_t PIArgPayload( - "Plugin Interface Layer (with function arguments)"); - uint64_t PiArgInstanceNo; - GPIArgCallEvent = xptiMakeEvent("PI Layer with arguments", &PIArgPayload, - xpti::trace_algorithm_event, xpti_at::active, - &PiArgInstanceNo); - - PiCallStreamID = xptiRegisterStream(SYCL_PICALL_STREAM_NAME); - PiDebugCallStreamID = xptiRegisterStream(SYCL_PIDEBUGCALL_STREAM_NAME); -#endif -} - -// Get the plugin serving given backend. -template const PluginPtr &getPlugin() { - static PluginPtr *Plugin = nullptr; - if (Plugin) - return *Plugin; - - std::vector &Plugins = pi::initialize(); - for (auto &P : Plugins) - if (P->hasBackend(BE)) { - Plugin = &P; - return *Plugin; - } - - throw exception(make_error_code(errc::runtime), - "pi::getPlugin couldn't find plugin"); -} - -template const PluginPtr &getPlugin(); -template const PluginPtr &getPlugin(); -template const PluginPtr &getPlugin(); -template const PluginPtr &getPlugin(); - -// Reads an integer value from ELF data. -template -static ResT readELFValue(const unsigned char *Data, size_t NumBytes, - bool IsBigEndian) { - assert(NumBytes <= sizeof(ResT)); - ResT Result = 0; - if (IsBigEndian) { - for (size_t I = 0; I < NumBytes; ++I) { - Result = (Result << 8) | static_cast(Data[I]); - } - } else { - std::copy(Data, Data + NumBytes, reinterpret_cast(&Result)); - } - return Result; -} - -// Checks if an ELF image contains a section with a specified name. -static bool checkELFSectionPresent(const std::string &ExpectedSectionName, - const unsigned char *ImgData, - size_t ImgSize) { - // Check for 64bit and big-endian. - bool Is64bit = ImgData[4] == 2; - bool IsBigEndian = ImgData[5] == 2; - - // Make offsets based on whether the ELF file is 64bit or not. - size_t SectionHeaderOffsetInfoOffset = Is64bit ? 0x28 : 0x20; - size_t SectionHeaderSizeInfoOffset = Is64bit ? 0x3A : 0x2E; - size_t SectionHeaderNumInfoOffset = Is64bit ? 0x3C : 0x30; - size_t SectionStringsHeaderIndexInfoOffset = Is64bit ? 0x3E : 0x32; - - // if the image doesn't contain enough data for the header values, end early. - if (ImgSize < SectionStringsHeaderIndexInfoOffset + 2) - return false; - - // Read the e_shoff, e_shentsize, e_shnum, and e_shstrndx entries in the - // header. - uint64_t SectionHeaderOffset = readELFValue( - ImgData + SectionHeaderOffsetInfoOffset, Is64bit ? 8 : 4, IsBigEndian); - uint16_t SectionHeaderSize = readELFValue( - ImgData + SectionHeaderSizeInfoOffset, 2, IsBigEndian); - uint16_t SectionHeaderNum = readELFValue( - ImgData + SectionHeaderNumInfoOffset, 2, IsBigEndian); - uint16_t SectionStringsHeaderIndex = readELFValue( - ImgData + SectionStringsHeaderIndexInfoOffset, 2, IsBigEndian); - - // End early if we do not have the expected number of section headers or - // if the read section string header index is out-of-range. - if (ImgSize < SectionHeaderOffset + SectionHeaderNum * SectionHeaderSize || - SectionStringsHeaderIndex >= SectionHeaderNum) - return false; - - // Get the location of the section string data. - size_t SectionStringsInfoOffset = Is64bit ? 0x18 : 0x10; - const unsigned char *SectionStringsHeaderData = - ImgData + SectionHeaderOffset + - SectionStringsHeaderIndex * SectionHeaderSize; - uint64_t SectionStrings = readELFValue( - SectionStringsHeaderData + SectionStringsInfoOffset, Is64bit ? 8 : 4, - IsBigEndian); - const unsigned char *SectionStringsData = ImgData + SectionStrings; - - // For each section, check the name against the expected section and return - // true if we find it. - for (size_t I = 0; I < SectionHeaderNum; ++I) { - // Get the offset into the section string data of this sections name. - const unsigned char *HeaderData = - ImgData + SectionHeaderOffset + I * SectionHeaderSize; - uint32_t SectionNameOffset = - readELFValue(HeaderData, 4, IsBigEndian); - - // Read the section name and check if it is the same as the name we are - // looking for. - const char *SectionName = - reinterpret_cast(SectionStringsData + SectionNameOffset); - if (SectionName == ExpectedSectionName) - return true; - } - return false; -} - -// Returns the e_type field from an ELF image. -static uint16_t getELFHeaderType(const unsigned char *ImgData, size_t ImgSize) { - (void)ImgSize; - assert(ImgSize >= 18 && "Not enough bytes to have an ELF header type."); - - bool IsBigEndian = ImgData[5] == 2; - return readELFValue(ImgData + 16, 2, IsBigEndian); -} - -sycl::detail::pi::PiDeviceBinaryType -getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize) { - // Top-level magic numbers for the recognized binary image formats. - auto MatchMagicNumber = [&](auto Number) { - return ImgSize >= sizeof(Number) && - std::memcmp(ImgData, &Number, sizeof(Number)) == 0; - }; - - if (MatchMagicNumber(uint32_t{0x07230203})) - return SYCL_DEVICE_BINARY_TYPE_SPIRV; - - if (MatchMagicNumber(uint32_t{0xDEC04342})) - return SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE; - - if (MatchMagicNumber(uint32_t{0x43544E49})) - // 'I', 'N', 'T', 'C' ; Intel native - return SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE; - - // Check for ELF format, size requirements include data we'll read in case of - // succesful match. - if (ImgSize >= 18 && MatchMagicNumber(uint32_t{0x464c457F})) { - uint16_t ELFHdrType = getELFHeaderType(ImgData, ImgSize); - if (ELFHdrType == 0xFF04) - // OpenCL executable. - return SYCL_DEVICE_BINARY_TYPE_NATIVE; - - if (ELFHdrType == 0xFF12) - // ZEBIN executable. - return SYCL_DEVICE_BINARY_TYPE_NATIVE; - - // Newer ZEBIN format does not have a special header type, but can instead - // be identified by having a required .ze_info section. - if (checkELFSectionPresent(".ze_info", ImgData, ImgSize)) - return SYCL_DEVICE_BINARY_TYPE_NATIVE; - } - - if (MatchMagicNumber(std::array{'!', '<', 'a', 'r', 'c', 'h', '>', '\n'})) - // "ar" format is used to pack binaries for multiple devices, e.g. via - // - // -Xsycl-target-backend=spir64_gen "-device acm-g10,acm-g11" - // - // option. - return SYCL_DEVICE_BINARY_TYPE_NATIVE; - - return SYCL_DEVICE_BINARY_TYPE_NONE; -} - -} // namespace pi -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index 1b085ed7bcaee..fc50886ada43c 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "sycl/detail/ur.hpp" +#include "sycl/info/info_desc.hpp" #include #include #include @@ -14,6 +16,7 @@ #include #include #include +#include #include #include @@ -31,7 +34,7 @@ namespace detail { using PlatformImplPtr = std::shared_ptr; PlatformImplPtr -platform_impl::getOrMakePlatformImpl(sycl::detail::pi::PiPlatform PiPlatform, +platform_impl::getOrMakePlatformImpl(ur_platform_handle_t UrPlatform, const PluginPtr &Plugin) { PlatformImplPtr Result; { @@ -43,12 +46,12 @@ platform_impl::getOrMakePlatformImpl(sycl::detail::pi::PiPlatform PiPlatform, // If we've already seen this platform, return the impl for (const auto &PlatImpl : PlatformCache) { - if (PlatImpl->getHandleRef() == PiPlatform) + if (PlatImpl->getHandleRef() == UrPlatform) return PlatImpl; } // Otherwise make the impl - Result = std::make_shared(PiPlatform, Plugin); + Result = std::make_shared(UrPlatform, Plugin); PlatformCache.emplace_back(Result); } @@ -56,13 +59,13 @@ platform_impl::getOrMakePlatformImpl(sycl::detail::pi::PiPlatform PiPlatform, } PlatformImplPtr -platform_impl::getPlatformFromPiDevice(sycl::detail::pi::PiDevice PiDevice, +platform_impl::getPlatformFromUrDevice(ur_device_handle_t UrDevice, const PluginPtr &Plugin) { - sycl::detail::pi::PiPlatform Plt = + ur_platform_handle_t Plt = nullptr; // TODO catch an exception and put it to list // of asynchronous exceptions - Plugin->call(PiDevice, PI_DEVICE_INFO_PLATFORM, - sizeof(Plt), &Plt, nullptr); + Plugin->call(urDeviceGetInfo, UrDevice, UR_DEVICE_INFO_PLATFORM, sizeof(Plt), + &Plt, nullptr); return getOrMakePlatformImpl(Plt, Plugin); } @@ -83,9 +86,8 @@ static bool IsBannedPlatform(platform Platform) { name) != std::string::npos; const auto Backend = detail::getSyclObjImpl(Platform)->getBackend(); const bool IsMatchingOCL = (HasNameMatch && Backend == backend::opencl); - if (detail::pi::trace(detail::pi::TraceLevel::PI_TRACE_ALL) && - IsMatchingOCL) { - std::cout << "SYCL_PI_TRACE[all]: " << name + if (detail::ur::trace() && IsMatchingOCL) { + std::cout << "SYCL_UR_TRACE: " << name << " OpenCL platform found but is not compatible." << std::endl; } return IsMatchingOCL; @@ -98,80 +100,47 @@ static bool IsBannedPlatform(platform Platform) { // id into each plugin, which is used for device counting. std::vector platform_impl::get_platforms() { - // Get the vector of platforms supported by a given PI plugin + // Get the vector of platforms supported by a given UR plugin + // replace uses of this with with a helper in plugin object, the plugin + // objects will own the ur adapter handles and they'll need to pass them to + // urPlatformsGet - so urPlatformsGet will need to be wrapped with a helper auto getPluginPlatforms = [](PluginPtr &Plugin) { std::vector Platforms; - pi_uint32 NumPlatforms = 0; - if (Plugin->call_nocheck( - 0, nullptr, &NumPlatforms) != PI_SUCCESS) + + auto UrPlatforms = Plugin->getUrPlatforms(); + + if (UrPlatforms.empty()) { return Platforms; + } - if (NumPlatforms) { - std::vector PiPlatforms(NumPlatforms); - if (Plugin->call_nocheck( - NumPlatforms, PiPlatforms.data(), nullptr) != PI_SUCCESS) - return Platforms; - - for (const auto &PiPlatform : PiPlatforms) { - platform Platform = detail::createSyclObjFromImpl( - getOrMakePlatformImpl(PiPlatform, Plugin)); - if (IsBannedPlatform(Platform)) { - continue; // bail as early as possible, otherwise banned platforms may - // mess up device counting - } + for (const auto &UrPlatform : UrPlatforms) { + platform Platform = detail::createSyclObjFromImpl( + getOrMakePlatformImpl(UrPlatform, Plugin)); + if (IsBannedPlatform(Platform)) { + continue; // bail as early as possible, otherwise banned platforms may + // mess up device counting + } - // The SYCL spec says that a platform has one or more devices. ( SYCL - // 2020 4.6.2 ) If we have an empty platform, we don't report it back - // from platform::get_platforms(). - if (!Platform.get_devices(info::device_type::all).empty()) { - Platforms.push_back(Platform); - } + // The SYCL spec says that a platform has one or more devices. ( SYCL + // 2020 4.6.2 ) If we have an empty platform, we don't report it back + // from platform::get_platforms(). + if (!Platform.get_devices(info::device_type::all).empty()) { + Platforms.push_back(Platform); } } return Platforms; }; - static const bool PreferUR = [] { - const char *PreferURStr = std::getenv("SYCL_PREFER_UR"); - return (PreferURStr && (std::stoi(PreferURStr) != 0)); - }(); - // See which platform we want to be served by which plugin. // There should be just one plugin serving each backend. - std::vector &Plugins = sycl::detail::pi::initialize(); + std::vector &Plugins = sycl::detail::ur::initializeUr(); std::vector> PlatformsWithPlugin; - // First check Unified Runtime - // Keep track of backends covered by UR - std::unordered_set BackendsUR; - if (PreferUR) { - PluginPtr *PluginUR = nullptr; - for (PluginPtr &Plugin : Plugins) { - if (Plugin->hasBackend(backend::all)) { // this denotes UR - PluginUR = &Plugin; - break; - } - } - if (PluginUR) { - for (const auto &P : getPluginPlatforms(*PluginUR)) { - PlatformsWithPlugin.push_back({P, *PluginUR}); - BackendsUR.insert(getSyclObjImpl(P)->getBackend()); - } - } - } - // Then check backend-specific plugins for (auto &Plugin : Plugins) { - if (Plugin->hasBackend(backend::all)) { - continue; // skip UR on this pass - } const auto &PluginPlatforms = getPluginPlatforms(Plugin); for (const auto &P : PluginPlatforms) { - // Only add those not already covered by UR - if (BackendsUR.find(getSyclObjImpl(P)->getBackend()) == - BackendsUR.end()) { - PlatformsWithPlugin.push_back({P, Plugin}); - } + PlatformsWithPlugin.push_back({P, Plugin}); } } @@ -198,9 +167,9 @@ std::vector platform_impl::get_platforms() { // The return value is a vector that represents the indices of the chosen // devices. template -std::vector platform_impl::filterDeviceFilter( - std::vector &PiDevices, - ListT *FilterList) const { +std::vector +platform_impl::filterDeviceFilter(std::vector &UrDevices, + ListT *FilterList) const { constexpr bool is_ods_target = std::is_same_v; @@ -228,25 +197,38 @@ std::vector platform_impl::filterDeviceFilter( std::vector original_indices; // Find out backend of the platform - sycl::detail::pi::PiPlatformBackend PiBackend; - MPlugin->call( - MPlatform, PI_EXT_PLATFORM_INFO_BACKEND, - sizeof(sycl::detail::pi::PiPlatformBackend), &PiBackend, nullptr); - backend Backend = convertBackend(PiBackend); + ur_platform_backend_t UrBackend = UR_PLATFORM_BACKEND_UNKNOWN; + MPlugin->call(urPlatformGetInfo, MPlatform, UR_PLATFORM_INFO_BACKEND, + sizeof(ur_platform_backend_t), &UrBackend, nullptr); + backend Backend = convertUrBackend(UrBackend); int InsertIDx = 0; // DeviceIds should be given consecutive numbers across platforms in the same // backend std::lock_guard Guard(*MPlugin->getPluginMutex()); int DeviceNum = MPlugin->getStartingDeviceId(MPlatform); - for (sycl::detail::pi::PiDevice Device : PiDevices) { - sycl::detail::pi::PiDeviceType PiDevType; - MPlugin->call( - Device, PI_DEVICE_INFO_TYPE, sizeof(sycl::detail::pi::PiDeviceType), - &PiDevType, nullptr); - // Assumption here is that there is 1-to-1 mapping between PiDevType and + for (ur_device_handle_t Device : UrDevices) { + ur_device_type_t UrDevType = UR_DEVICE_TYPE_ALL; + MPlugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_TYPE, + sizeof(ur_device_type_t), &UrDevType, nullptr); + // Assumption here is that there is 1-to-1 mapping between UrDevType and // Sycl device type for GPU, CPU, and ACC. - info::device_type DeviceType = pi::cast(PiDevType); + info::device_type DeviceType = info::device_type::all; + switch (UrDevType) { + default: + case UR_DEVICE_TYPE_ALL: + DeviceType = info::device_type::all; + break; + case UR_DEVICE_TYPE_GPU: + DeviceType = info::device_type::gpu; + break; + case UR_DEVICE_TYPE_CPU: + DeviceType = info::device_type::cpu; + break; + case UR_DEVICE_TYPE_FPGA: + DeviceType = info::device_type::accelerator; + break; + } for (const FilterT &Filter : FilterList->get()) { backend FilterBackend = Filter.Backend.value_or(backend::all); @@ -277,13 +259,13 @@ std::vector platform_impl::filterDeviceFilter( } } - PiDevices[InsertIDx++] = Device; + UrDevices[InsertIDx++] = Device; original_indices.push_back(DeviceNum); break; } DeviceNum++; } - PiDevices.resize(InsertIDx); + UrDevices.resize(InsertIDx); // remember the last backend that has gone through this filter function // to assign a unique device id number across platforms that belong to // the same backend. For example, opencl:cpu:0, opencl:acc:1, opencl:gpu:2 @@ -292,22 +274,22 @@ std::vector platform_impl::filterDeviceFilter( } std::shared_ptr -platform_impl::getDeviceImpl(sycl::detail::pi::PiDevice PiDevice) { +platform_impl::getDeviceImpl(ur_device_handle_t UrDevice) { const std::lock_guard Guard(MDeviceMapMutex); - return getDeviceImplHelper(PiDevice); + return getDeviceImplHelper(UrDevice); } std::shared_ptr platform_impl::getOrMakeDeviceImpl( - sycl::detail::pi::PiDevice PiDevice, + ur_device_handle_t UrDevice, const std::shared_ptr &PlatformImpl) { const std::lock_guard Guard(MDeviceMapMutex); // If we've already seen this device, return the impl - std::shared_ptr Result = getDeviceImplHelper(PiDevice); + std::shared_ptr Result = getDeviceImplHelper(UrDevice); if (Result) return Result; // Otherwise make the impl - Result = std::make_shared(PiDevice, PlatformImpl); + Result = std::make_shared(UrDevice, PlatformImpl); MDeviceCache.emplace_back(Result); return Result; @@ -459,11 +441,28 @@ platform_impl::get_devices(info::device_type DeviceType) const { if (DeviceType == info::device_type::host) return Res; - pi_uint32 NumDevices = 0; - MPlugin->call( - MPlatform, pi::cast(DeviceType), - 0, // CP info::device_type::all - pi::cast(nullptr), &NumDevices); + ur_device_type_t UrDeviceType = UR_DEVICE_TYPE_ALL; + + switch (DeviceType) { + default: + case info::device_type::all: + UrDeviceType = UR_DEVICE_TYPE_ALL; + break; + case info::device_type::gpu: + UrDeviceType = UR_DEVICE_TYPE_GPU; + break; + case info::device_type::cpu: + UrDeviceType = UR_DEVICE_TYPE_CPU; + break; + case info::device_type::accelerator: + UrDeviceType = UR_DEVICE_TYPE_FPGA; + break; + } + + uint32_t NumDevices = 0; + MPlugin->call(urDeviceGet, MPlatform, UrDeviceType, + 0, // CP info::device_type::all + nullptr, &NumDevices); const backend Backend = getBackend(); if (NumDevices == 0) { @@ -472,10 +471,10 @@ platform_impl::get_devices(info::device_type DeviceType) const { // analysis. Doing adjustment by simple copy of last device num from // previous platform. // Needs non const plugin reference. - std::vector &Plugins = sycl::detail::pi::initialize(); + std::vector &Plugins = sycl::detail::ur::initializeUr(); auto It = std::find_if(Plugins.begin(), Plugins.end(), [&Platform = MPlatform](PluginPtr &Plugin) { - return Plugin->containsPiPlatform(Platform); + return Plugin->containsUrPlatform(Platform); }); if (It != Plugins.end()) { PluginPtr &Plugin = *It; @@ -485,21 +484,19 @@ platform_impl::get_devices(info::device_type DeviceType) const { return Res; } - std::vector PiDevices(NumDevices); + std::vector UrDevices(NumDevices); // TODO catch an exception and put it to list of asynchronous exceptions - MPlugin->call( - MPlatform, - pi::cast( - DeviceType), // CP info::device_type::all - NumDevices, PiDevices.data(), nullptr); + MPlugin->call(urDeviceGet, MPlatform, + UrDeviceType, // CP info::device_type::all + NumDevices, UrDevices.data(), nullptr); - // Some elements of PiDevices vector might be filtered out, so make a copy of + // Some elements of UrDevices vector might be filtered out, so make a copy of // handles to do a cleanup later - std::vector PiDevicesToCleanUp = PiDevices; + std::vector UrDevicesToCleanUp = UrDevices; // Filter out devices that are not present in the SYCL_DEVICE_ALLOWLIST if (SYCLConfig::get()) - applyAllowList(PiDevices, MPlatform, MPlugin); + applyAllowList(UrDevices, MPlatform, MPlugin); // The first step is to filter out devices that are not compatible with // ONEAPI_DEVICE_SELECTOR. This is also the mechanism by which top level @@ -507,23 +504,23 @@ platform_impl::get_devices(info::device_type DeviceType) const { std::vector PlatformDeviceIndices; if (OdsTargetList) { PlatformDeviceIndices = filterDeviceFilter( - PiDevices, OdsTargetList); + UrDevices, OdsTargetList); } - // The next step is to inflate the filtered PIDevices into SYCL Device + // The next step is to inflate the filtered UrDevices into SYCL Device // objects. PlatformImplPtr PlatformImpl = getOrMakePlatformImpl(MPlatform, MPlugin); std::transform( - PiDevices.begin(), PiDevices.end(), std::back_inserter(Res), - [PlatformImpl](const sycl::detail::pi::PiDevice &PiDevice) -> device { + UrDevices.begin(), UrDevices.end(), std::back_inserter(Res), + [PlatformImpl](const ur_device_handle_t UrDevice) -> device { return detail::createSyclObjFromImpl( - PlatformImpl->getOrMakeDeviceImpl(PiDevice, PlatformImpl)); + PlatformImpl->getOrMakeDeviceImpl(UrDevice, PlatformImpl)); }); // The reference counter for handles, that we used to create sycl objects, is // incremented, so we need to call release here. - for (sycl::detail::pi::PiDevice &PiDev : PiDevicesToCleanUp) - MPlugin->call(PiDev); + for (ur_device_handle_t &UrDev : UrDevicesToCleanUp) + MPlugin->call(urDeviceRelease, UrDev); // If we aren't using ONEAPI_DEVICE_SELECTOR, then we are done. // and if there are no devices so far, there won't be any need to replace them @@ -540,7 +537,7 @@ platform_impl::get_devices(info::device_type DeviceType) const { bool platform_impl::has_extension(const std::string &ExtensionName) const { std::string AllExtensionNames = get_platform_info_string_impl( MPlatform, getPlugin(), - detail::PiInfoCode::value); + detail::UrInfoCode::value); return (AllExtensionNames.find(ExtensionName) != std::string::npos); } @@ -549,11 +546,10 @@ bool platform_impl::supports_usm() const { has_extension("cl_intel_unified_shared_memory"); } -pi_native_handle platform_impl::getNative() const { +ur_native_handle_t platform_impl::getNative() const { const auto &Plugin = getPlugin(); - pi_native_handle Handle; - Plugin->call(getHandleRef(), - &Handle); + ur_native_handle_t Handle = 0; + Plugin->call(urPlatformGetNativeHandle, getHandleRef(), &Handle); return Handle; } @@ -618,10 +614,10 @@ bool platform_impl::has(aspect Aspect) const { } std::shared_ptr -platform_impl::getDeviceImplHelper(sycl::detail::pi::PiDevice PiDevice) { +platform_impl::getDeviceImplHelper(ur_device_handle_t UrDevice) { for (const std::weak_ptr &DeviceWP : MDeviceCache) { if (std::shared_ptr Device = DeviceWP.lock()) { - if (Device->getHandleRef() == PiDevice) + if (Device->getHandleRef() == UrDevice) return Device; } } diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index dfb2597bf417b..a2d926834bf05 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include namespace sycl { @@ -37,16 +37,15 @@ class platform_impl { /// /// \param APlatform is a raw plug-in platform handle. /// \param APlugin is a plug-in handle. - explicit platform_impl(sycl::detail::pi::PiPlatform APlatform, + explicit platform_impl(ur_platform_handle_t APlatform, const std::shared_ptr &APlugin) : MPlatform(APlatform), MPlugin(APlugin) { - // Find out backend of the platform - sycl::detail::pi::PiPlatformBackend PiBackend; - APlugin->call_nocheck( - APlatform, PI_EXT_PLATFORM_INFO_BACKEND, - sizeof(sycl::detail::pi::PiPlatformBackend), &PiBackend, nullptr); - MBackend = convertBackend(PiBackend); + ur_platform_backend_t UrBackend = UR_PLATFORM_BACKEND_UNKNOWN; + APlugin->call_nocheck(urPlatformGetInfo, APlatform, + UR_PLATFORM_INFO_BACKEND, + sizeof(ur_platform_backend_t), &UrBackend, nullptr); + MBackend = convertUrBackend(UrBackend); } ~platform_impl() = default; @@ -93,14 +92,17 @@ class platform_impl { void getBackendOption(const char *frontend_option, const char **backend_option) const { const auto &Plugin = getPlugin(); - sycl::detail::pi::PiResult Err = - Plugin->call_nocheck( - MPlatform, frontend_option, backend_option); - Plugin->checkPiResult(Err); + ur_result_t Err = Plugin->call_nocheck( + urPlatformGetBackendOption, MPlatform, frontend_option, backend_option); + Plugin->checkUrResult(Err); } /// \return an instance of OpenCL cl_platform_id. - cl_platform_id get() const { return pi::cast(MPlatform); } + cl_platform_id get() const { + ur_native_handle_t nativeHandle = 0; + getPlugin()->call(urPlatformGetNativeHandle, MPlatform, &nativeHandle); + return ur::cast(nativeHandle); + } /// Returns raw underlying plug-in platform handle. /// @@ -109,7 +111,7 @@ class platform_impl { /// is in use. /// /// \return a raw plug-in platform handle. - const sycl::detail::pi::PiPlatform &getHandleRef() const { return MPlatform; } + const ur_platform_handle_t &getHandleRef() const { return MPlatform; } /// Returns all available SYCL platforms in the system. /// @@ -135,7 +137,7 @@ class platform_impl { /// Gets the native handle of the SYCL platform. /// /// \return a native handle. - pi_native_handle getNative() const; + ur_native_handle_t getNative() const; /// Indicates if all of the SYCL devices on this platform have the /// given feature. @@ -148,69 +150,67 @@ class platform_impl { bool has(aspect Aspect) const; /// Queries the device_impl cache to return a shared_ptr for the - /// device_impl corresponding to the PiDevice. + /// device_impl corresponding to the UrDevice. /// - /// \param PiDevice is the PiDevice whose impl is requested + /// \param UrDevice is the UrDevice whose impl is requested /// /// \return a shared_ptr corresponding to the device - std::shared_ptr - getDeviceImpl(sycl::detail::pi::PiDevice PiDevice); + std::shared_ptr getDeviceImpl(ur_device_handle_t UrDevice); /// Queries the device_impl cache to either return a shared_ptr - /// for the device_impl corresponding to the PiDevice or add + /// for the device_impl corresponding to the UrDevice or add /// a new entry to the cache /// - /// \param PiDevice is the PiDevice whose impl is requested + /// \param UrDevice is the UrDevice whose impl is requested /// /// \param PlatormImpl is the Platform for that Device /// /// \return a shared_ptr corresponding to the device std::shared_ptr - getOrMakeDeviceImpl(sycl::detail::pi::PiDevice PiDevice, + getOrMakeDeviceImpl(ur_device_handle_t UrDevice, const std::shared_ptr &PlatformImpl); - /// Queries the cache to see if the specified PiPlatform has been seen + /// Queries the cache to see if the specified UR platform has been seen /// before. If so, return the cached platform_impl, otherwise create a new /// one and cache it. /// - /// \param PiPlatform is the PI Platform handle representing the platform - /// \param Plugin is the PI plugin providing the backend for the platform - /// \return the platform_impl representing the PI platform + /// \param UrPlatform is the UR Platform handle representing the platform + /// \param Plugin is the UR plugin providing the backend for the platform + /// \return the platform_impl representing the UR platform static std::shared_ptr - getOrMakePlatformImpl(sycl::detail::pi::PiPlatform PiPlatform, + getOrMakePlatformImpl(ur_platform_handle_t UrPlatform, const PluginPtr &Plugin); /// Queries the cache for the specified platform based on an input device. /// If found, returns the the cached platform_impl, otherwise creates a new /// one and caches it. /// - /// \param PiDevice is the PI device handle for the device whose platform is + /// \param UrDevice is the UR device handle for the device whose platform is /// desired - /// \param Plugin is the PI plugin providing the backend for the device and + /// \param Plugin is the UR plugin providing the backend for the device and /// platform /// \return the platform_impl that contains the input device static std::shared_ptr - getPlatformFromPiDevice(sycl::detail::pi::PiDevice PiDevice, - const PluginPtr &Plugin); + getPlatformFromUrDevice(ur_device_handle_t UrDevice, const PluginPtr &Plugin); // when getting sub-devices for ONEAPI_DEVICE_SELECTOR we may temporarily // ensure every device is a root one. bool MAlwaysRootDevice = false; private: - std::shared_ptr - getDeviceImplHelper(sycl::detail::pi::PiDevice PiDevice); + std::shared_ptr getDeviceImplHelper(ur_device_handle_t UrDevice); // Helper to filter reportable devices in the platform template std::vector - filterDeviceFilter(std::vector &PiDevices, + filterDeviceFilter(std::vector &UrDevices, ListT *FilterList) const; - sycl::detail::pi::PiPlatform MPlatform = 0; + ur_platform_handle_t MPlatform = 0; backend MBackend; PluginPtr MPlugin; + std::vector> MDeviceCache; std::mutex MDeviceMapMutex; }; diff --git a/sycl/source/detail/platform_info.hpp b/sycl/source/detail/platform_info.hpp index 70bcd626024d9..20e698eaf2390 100644 --- a/sycl/source/detail/platform_info.hpp +++ b/sycl/source/detail/platform_info.hpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include "split_string.hpp" @@ -19,21 +19,19 @@ namespace sycl { inline namespace _V1 { namespace detail { -inline std::string -get_platform_info_string_impl(sycl::detail::pi::PiPlatform Plt, - const PluginPtr &Plugin, - pi_platform_info PiCode) { - size_t ResultSize; +inline std::string get_platform_info_string_impl(ur_platform_handle_t Plt, + const PluginPtr &Plugin, + ur_platform_info_t UrCode) { + size_t ResultSize = 0; // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call(Plt, PiCode, 0, nullptr, - &ResultSize); + Plugin->call(urPlatformGetInfo, Plt, UrCode, 0, nullptr, &ResultSize); if (ResultSize == 0) { return ""; } std::unique_ptr Result(new char[ResultSize]); // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call(Plt, PiCode, ResultSize, - Result.get(), nullptr); + Plugin->call(urPlatformGetInfo, Plt, UrCode, ResultSize, Result.get(), + nullptr); return Result.get(); } // The platform information methods @@ -41,21 +39,21 @@ template typename std::enable_if< std::is_same::value, std::string>::type -get_platform_info(sycl::detail::pi::PiPlatform Plt, const PluginPtr &Plugin) { +get_platform_info(ur_platform_handle_t Plt, const PluginPtr &Plugin) { static_assert(is_platform_info_desc::value, "Invalid platform information descriptor"); return get_platform_info_string_impl(Plt, Plugin, - detail::PiInfoCode::value); + detail::UrInfoCode::value); } template typename std::enable_if::value, std::vector>::type -get_platform_info(sycl::detail::pi::PiPlatform Plt, const PluginPtr &Plugin) { +get_platform_info(ur_platform_handle_t Plt, const PluginPtr &Plugin) { static_assert(is_platform_info_desc::value, "Invalid platform information descriptor"); std::string Result = get_platform_info_string_impl( - Plt, Plugin, detail::PiInfoCode::value); + Plt, Plugin, detail::UrInfoCode::value); return split_string(Result, ' '); } diff --git a/sycl/source/detail/platform_util.cpp b/sycl/source/detail/platform_util.cpp index 14fa713674e3e..68692ce14b96b 100644 --- a/sycl/source/detail/platform_util.cpp +++ b/sycl/source/detail/platform_util.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #if defined(__SYCL_RT_OS_LINUX) #include diff --git a/sycl/source/detail/plugin.hpp b/sycl/source/detail/plugin.hpp index a0fa663ab3099..6cf40da421ff3 100644 --- a/sycl/source/detail/plugin.hpp +++ b/sycl/source/detail/plugin.hpp @@ -8,13 +8,14 @@ #pragma once #include -#include #include #include #include #include -#include #include +#include + +#include #ifdef XPTI_ENABLE_INSTRUMENTATION // Include the headers necessary for emitting traces using the trace framework @@ -23,90 +24,31 @@ #include -#define __SYCL_REPORT_PI_ERR_TO_STREAM(expr) \ +#define __SYCL_REPORT_UR_ERR_TO_STREAM(expr) \ { \ auto code = expr; \ - if (code != PI_SUCCESS) { \ - std::cerr << __SYCL_PI_ERROR_REPORT << sycl::detail::codeToString(code) \ + if (code != UR_RESULT_SUCCESS) { \ + std::cerr << __SYCL_UR_ERROR_REPORT << sycl::detail::codeToString(code) \ << std::endl; \ } \ } -#define __SYCL_CHECK_OCL_CODE_NO_EXC(X) __SYCL_REPORT_PI_ERR_TO_STREAM(X) +#define __SYCL_CHECK_OCL_CODE_NO_EXC(X) __SYCL_REPORT_UR_ERR_TO_STREAM(X) namespace sycl { inline namespace _V1 { namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -extern xpti::trace_event_data_t *GPICallEvent; -extern xpti::trace_event_data_t *GPIArgCallEvent; -extern uint8_t PiCallStreamID; -extern uint8_t PiDebugCallStreamID; -#endif - -template -struct array_fill_helper; - -template struct PiApiArgTuple; - -#define _PI_API(api) \ - template <> struct PiApiArgTuple { \ - using type = typename function_traits::args_type; \ - }; - -#include -#undef _PI_API - -template -struct array_fill_helper { - static void fill(unsigned char *Dst, T &&Arg) { - using ArgsTuple = typename PiApiArgTuple::type; - // C-style cast is required here. - auto RealArg = (std::tuple_element_t)(Arg); - *(std::remove_cv_t> *)Dst = RealArg; - } -}; - -template -struct array_fill_helper { - static void fill(unsigned char *Dst, const T &&Arg, Args &&...Rest) { - using ArgsTuple = typename PiApiArgTuple::type; - // C-style cast is required here. - auto RealArg = (std::tuple_element_t)(Arg); - *(std::remove_cv_t> *)Dst = RealArg; - array_fill_helper::fill( - Dst + sizeof(decltype(RealArg)), std::forward(Rest)...); - } -}; - -template -constexpr size_t totalSize(const std::tuple &) { - return (sizeof(Ts) + ...); -} - -template -auto packCallArguments(ArgsT &&...Args) { - using ArgsTuple = typename PiApiArgTuple::type; - - constexpr size_t TotalSize = totalSize(ArgsTuple{}); - - std::array ArgsData; - array_fill_helper::fill(ArgsData.data(), - std::forward(Args)...); - - return ArgsData; -} /// The plugin class provides a unified interface to the underlying low-level /// runtimes for the device-agnostic SYCL runtime. /// -/// \ingroup sycl_pi +/// \ingroup sycl_ur class plugin { public: plugin() = delete; - plugin(const std::shared_ptr &Plugin, - backend UseBackend, void *LibraryHandle) - : MPlugin(Plugin), MBackend(UseBackend), MLibraryHandle(LibraryHandle), + + plugin(ur_adapter_handle_t adapter, backend UseBackend) + : MAdapter(adapter), MBackend(UseBackend), TracingMutex(std::make_shared()), MPluginMutex(std::make_shared()) {} @@ -118,150 +60,83 @@ class plugin { ~plugin() = default; - const sycl::detail::pi::PiPlugin &getPiPlugin() const { return *MPlugin; } - sycl::detail::pi::PiPlugin &getPiPlugin() { return *MPlugin; } - const std::shared_ptr &getPiPluginPtr() const { - return MPlugin; - } - - /// \throw SYCL 2020 exception(errc) if pi_result is not PI_SUCCESS + /// \throw SYCL 2020 exception(errc) if ur_result is not UR_RESULT_SUCCESS template - void checkPiResult(sycl::detail::pi::PiResult pi_result) const { - char *message = nullptr; - if (pi_result == PI_ERROR_PLUGIN_SPECIFIC_ERROR) { - pi_result = call_nocheck(&message); + void checkUrResult(ur_result_t ur_result) const { + const char *message = nullptr; + if (ur_result == UR_RESULT_ERROR_ADAPTER_SPECIFIC) { + int32_t adapter_error = 0; + ur_result = call_nocheck(urAdapterGetLastError, MAdapter, &message, &adapter_error); // If the warning level is greater then 2 emit the message - if (detail::SYCLConfig::get() >= 2) + if (detail::SYCLConfig::get() >= 2) { std::clog << message << std::endl; + } // If it is a warning do not throw code - if (pi_result == PI_SUCCESS) + if (ur_result == UR_RESULT_SUCCESS) { return; + } } - if (pi_result != PI_SUCCESS) { - throw sycl::detail::set_pi_error( + if (ur_result != UR_RESULT_SUCCESS) { + throw sycl::detail::set_ur_error( sycl::exception(sycl::make_error_code(errc), - __SYCL_PI_ERROR_REPORT + - sycl::detail::codeToString(pi_result) + + __SYCL_UR_ERROR_REPORT + + sycl::detail::codeToString(ur_result) + (message ? "\n" + std::string(message) + "\n" : std::string{})), - pi_result); + ur_result); } } - /// Calls the PiApi, traces the call, and returns the result. + std::vector &getUrPlatforms() { + std::call_once(PlatformsPopulated, [&]() { + uint32_t platformCount = 0; + call(urPlatformGet, &MAdapter, 1, 0, nullptr, &platformCount); + UrPlatforms.resize(platformCount); + call(urPlatformGet, &MAdapter, 1, platformCount, UrPlatforms.data(), + nullptr); + // We need one entry in this per platform + LastDeviceIds.resize(platformCount); + }); + return UrPlatforms; + } + + ur_adapter_handle_t getUrAdapter() { return MAdapter; } + + /// Calls the UR Api, traces the call, and returns the result. /// /// Usage: /// \code{cpp} - /// PiResult Err = Plugin->call(Args); - /// Plugin->checkPiResult(Err); // Checks Result and throws an exception with - /// an errc::runtime error code. + /// ur_result_t Err = Plugin->call(urEntryPoint, Args); + /// Plugin->checkUrResult(Err); // Checks Result and throws a runtime_error + /// // exception. /// \endcode /// - /// \sa plugin::checkPiResult - - template - sycl::detail::pi::PiResult call_nocheck(ArgsT... Args) const { - sycl::detail::pi::PiFuncInfo PiCallInfo; -#ifdef XPTI_ENABLE_INSTRUMENTATION - bool CorrelationIDAvailable = false, CorrelationIDWithArgsAvailable = false; - // Emit a function_begin trace for the PI API before the call is executed. - // If arguments need to be captured, then a data structure can be sent in - // the per_instance_user_data field. - const char *PIFnName = PiCallInfo.getFuncName(); - uint64_t CorrelationIDWithArgs = 0, CorrelationID = 0; - - if (xptiCheckTraceEnabled( - PiCallStreamID, - (uint16_t)xpti::trace_point_type_t::function_begin)) { - CorrelationID = pi::emitFunctionBeginTrace(PIFnName); - CorrelationIDAvailable = true; - } - using PackCallArgumentsTy = - decltype(packCallArguments(std::forward(Args)...)); - std::unique_ptr ArgsDataPtr = nullptr; - // If subscribers are listening to Pi debug call stream, only then prepare - // the data for the notifications and emit notifications. Even though the - // function emitFunctionWithArgsBeginTrace() checks for the trqace typoe - // using xptiTraceCheckEnabled(), we add a guard here before we prepare the - // data for the notification, as it comes with a cost - if (xptiCheckTraceEnabled( - PiDebugCallStreamID, - (uint16_t)xpti::trace_point_type_t::function_with_args_begin)) { - // TODO check if stream is observed when corresponding API is present. - ArgsDataPtr = std::make_unique( - xptiTraceEnabled() - ? packCallArguments(std::forward(Args)...) - : PackCallArgumentsTy{}); - CorrelationIDWithArgs = pi::emitFunctionWithArgsBeginTrace( - static_cast(PiApiOffset), PIFnName, ArgsDataPtr->data(), - *MPlugin); - CorrelationIDWithArgsAvailable = true; + /// \sa plugin::checkUrResult + template + ur_result_t call_nocheck(UrFunc F, ArgsT... Args) const { + ur_result_t R = UR_RESULT_SUCCESS; + if (!adapterReleased) { + R = F(Args...); } -#endif - sycl::detail::pi::PiResult R = PI_SUCCESS; - if (pi::trace(pi::TraceLevel::PI_TRACE_CALLS)) { - std::unique_lock Guard(*TracingMutex, std::defer_lock); - const char *FnName = PiCallInfo.getFuncName(); - if (std::string{FnName} != "piextEnqueueNativeCommand") { - Guard.lock(); - } - std::cout << "---> " << FnName << "(" << std::endl; - sycl::detail::pi::printArgs(Args...); - if (!pluginReleased) { - if (std::string{FnName} == "piextEnqueueNativeCommand") { - std::cout << "---> Begin Native Command\n\n[\n" << std::endl; - } - R = PiCallInfo.getFuncPtr(*MPlugin)(Args...); - if (std::string{FnName} == "piextEnqueueNativeCommand") { - std::cout << "]\n---> End Native Command\n" << std::endl; - } - std::cout << ") ---> "; - sycl::detail::pi::printArgs(R); - sycl::detail::pi::printOuts(Args...); - std::cout << std::endl; - } else { - std::cout << ") ---> "; - std::cout << "API Called After Plugin Teardown, Functon Call ignored."; - std::cout << std::endl; - } - } else { - if (!pluginReleased) { - R = PiCallInfo.getFuncPtr(*MPlugin)(Args...); - } - } -#ifdef XPTI_ENABLE_INSTRUMENTATION - // Close the function begin with a call to function end; we do not need to - // check th xptiTraceCheckEnbled() here as it is performed within the - // function - if (CorrelationIDAvailable) { - // Only send function_end notification if function_begin is subscribed to - pi::emitFunctionEndTrace(CorrelationID, PIFnName); - } - if (CorrelationIDWithArgsAvailable) { - pi::emitFunctionWithArgsEndTrace( - CorrelationIDWithArgs, static_cast(PiApiOffset), PIFnName, - ArgsDataPtr->data(), R, *MPlugin); - } -#endif return R; } /// Calls the API, traces the call, checks the result /// /// \throw sycl::runtime_exception if the call was not successful. - template - void call(ArgsT... Args) const { - sycl::detail::pi::PiResult Err = call_nocheck(Args...); - checkPiResult(Err); + template + void call(UrFunc F, ArgsT... Args) const { + auto Err = call_nocheck(F, Args...); + checkUrResult(Err); } /// \throw sycl::exceptions(errc) if the call was not successful. - template - void call(ArgsT... Args) const { - sycl::detail::pi::PiResult Err = call_nocheck(Args...); - checkPiResult(Err); + template + void call(UrFunc F, ArgsT... Args) const { + auto Err = call_nocheck(F, Args...); + checkUrResult(Err); } /// Tells if this plugin can serve specified backend. @@ -269,24 +144,22 @@ class plugin { /// multiple backends as determined by the platforms reported by the plugin. bool hasBackend(backend Backend) const { return Backend == MBackend; } - void *getLibraryHandle() const { return MLibraryHandle; } - void *getLibraryHandle() { return MLibraryHandle; } - int unload() { - this->pluginReleased = true; - return sycl::detail::pi::unloadPlugin(MLibraryHandle); + void release() { + call(urAdapterRelease, MAdapter); + this->adapterReleased = true; } - // return the index of PiPlatforms. + // Return the index of a UR platform. // If not found, add it and return its index. // The function is expected to be called in a thread safe manner. - int getPlatformId(sycl::detail::pi::PiPlatform Platform) { - auto It = std::find(PiPlatforms.begin(), PiPlatforms.end(), Platform); - if (It != PiPlatforms.end()) - return It - PiPlatforms.begin(); + int getPlatformId(ur_platform_handle_t Platform) { + auto It = std::find(UrPlatforms.begin(), UrPlatforms.end(), Platform); + if (It != UrPlatforms.end()) + return It - UrPlatforms.begin(); - PiPlatforms.push_back(Platform); + UrPlatforms.push_back(Platform); LastDeviceIds.push_back(0); - return PiPlatforms.size() - 1; + return UrPlatforms.size() - 1; } // Device ids are consecutive across platforms within a plugin. @@ -294,7 +167,7 @@ class plugin { // So, instead of returing the last device id of the given platform, // return the last device id of the predecessor platform. // The function is expected to be called in a thread safe manner. - int getStartingDeviceId(sycl::detail::pi::PiPlatform Platform) { + int getStartingDeviceId(ur_platform_handle_t Platform) { int PlatformId = getPlatformId(Platform); if (PlatformId == 0) return 0; @@ -303,7 +176,7 @@ class plugin { // set the id of the last device for the given platform // The function is expected to be called in a thread safe manner. - void setLastDeviceId(sycl::detail::pi::PiPlatform Platform, int Id) { + void setLastDeviceId(ur_platform_handle_t Platform, int Id) { int PlatformId = getPlatformId(Platform); LastDeviceIds[PlatformId] = Id; } @@ -311,34 +184,34 @@ class plugin { // Adjust the id of the last device for the given platform. // Involved when there is no device on that platform at all. // The function is expected to be called in a thread safe manner. - void adjustLastDeviceId(sycl::detail::pi::PiPlatform Platform) { + void adjustLastDeviceId(ur_platform_handle_t Platform) { int PlatformId = getPlatformId(Platform); if (PlatformId > 0 && LastDeviceIds[PlatformId] < LastDeviceIds[PlatformId - 1]) LastDeviceIds[PlatformId] = LastDeviceIds[PlatformId - 1]; } - bool containsPiPlatform(sycl::detail::pi::PiPlatform Platform) { - auto It = std::find(PiPlatforms.begin(), PiPlatforms.end(), Platform); - return It != PiPlatforms.end(); + bool containsUrPlatform(ur_platform_handle_t Platform) { + auto It = std::find(UrPlatforms.begin(), UrPlatforms.end(), Platform); + return It != UrPlatforms.end(); } std::shared_ptr getPluginMutex() { return MPluginMutex; } - bool pluginReleased = false; + bool adapterReleased = false; private: - std::shared_ptr MPlugin; + ur_adapter_handle_t MAdapter; backend MBackend; - void *MLibraryHandle; // the handle returned from dlopen std::shared_ptr TracingMutex; - // Mutex to guard PiPlatforms and LastDeviceIds. + // Mutex to guard UrPlatforms and LastDeviceIds. // Note that this is a temporary solution until we implement the global // Device/Platform cache later. std::shared_ptr MPluginMutex; - // vector of PiPlatforms that belong to this plugin - std::vector PiPlatforms; + // vector of UrPlatforms that belong to this plugin + std::once_flag PlatformsPopulated; + std::vector UrPlatforms; // represents the unique ids of the last device of each platform - // index of this vector corresponds to the index in PiPlatforms vector. + // index of this vector corresponds to the index in UrPlatforms vector. std::vector LastDeviceIds; }; // class plugin diff --git a/sycl/source/detail/plugin_printers.hpp b/sycl/source/detail/plugin_printers.hpp deleted file mode 100644 index a71c5e48d9b08..0000000000000 --- a/sycl/source/detail/plugin_printers.hpp +++ /dev/null @@ -1,183 +0,0 @@ -//==--------- plugin_printers.hpp - Printers for the Plugin Interface ------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Print functions used for the Plguin Interface tracing. - -#pragma once - -#include -#include - -#include - -namespace sycl { -inline namespace _V1 { -namespace detail { -namespace pi { - -template -inline typename std::enable_if::value, void>::type -print(T val) { - std::cout << " : " << val << std::endl; -} - -template -inline typename std::enable_if::value, void>::type -print(T val) { - std::cout << " : " << reinterpret_cast(val) - << std::endl; -} - -template <> inline void print<>(PiPlatform val) { - std::cout << "pi_platform : " << val << std::endl; -} - -template <> inline void print<>(PiEvent val) { - std::cout << "pi_event : " << val << std::endl; -} - -template <> inline void print<>(PiMem val) { - std::cout << "pi_mem : " << val << std::endl; -} - -template <> inline void print<>(PiEvent *val) { - std::cout << "pi_event * : " << val; - if (val) - std::cout << "[ " << *val << " ... ]"; - else - std::cout << "[ nullptr ]"; - std::cout << std::endl; -} - -template <> inline void print<>(const PiEvent *val) { - std::cout << "const pi_event * : " << val; - if (val) - std::cout << "[ " << *val << " ... ]"; - else - std::cout << "[ nullptr ]"; - std::cout << std::endl; -} - -template <> inline void print<>(pi_buffer_region rgn) { - std::cout << "pi_buffer_region origin/size : " << rgn->origin << "/" - << rgn->size << std::endl; -} - -template <> inline void print<>(pi_buff_rect_region rgn) { - std::cout << "pi_buff_rect_region width_bytes/height/depth : " - << rgn->width_bytes << "/" << rgn->height_scalar << "/" - << rgn->depth_scalar << std::endl; -} - -template <> inline void print<>(pi_buff_rect_offset off) { - std::cout << "pi_buff_rect_offset x_bytes/y/z : " << off->x_bytes << "/" - << off->y_scalar << "/" << off->z_scalar << std::endl; -} - -template <> inline void print<>(pi_image_region rgn) { - std::cout << "pi_image_region width/height/depth : " << rgn->width << "/" - << rgn->height << "/" << rgn->depth << std::endl; -} - -template <> inline void print<>(pi_image_offset off) { - std::cout << "pi_image_offset x/y/z : " << off->x << "/" << off->y << "/" - << off->z << std::endl; -} - -template <> inline void print<>(const pi_image_desc *desc) { - std::cout << "image_desc w/h/d : " << desc->image_width << " / " - << desc->image_height << " / " << desc->image_depth - << " -- arrSz/row/slice : " << desc->image_array_size << " / " - << desc->image_row_pitch << " / " << desc->image_slice_pitch - << " -- num_mip_lvls/num_smpls/image_type : " - << desc->num_mip_levels << " / " << desc->num_samples << " / " - << desc->image_type << std::endl; -} - -template <> inline void print<>(PiResult val) { - std::cout << "pi_result : "; - if (val == PI_SUCCESS) - std::cout << "PI_SUCCESS" << std::endl; - else - std::cout << val << std::endl; -} - -// cout does not resolve a nullptr. -template <> inline void print<>(std::nullptr_t) { - std::cout << "" << std::endl; -} - -template <> inline void print<>(char *val) { - std::cout << " : " << static_cast(val) << std::endl; -} - -template <> inline void print<>(const char *val) { - std::cout << ": " << val << std::endl; -} - -inline void printArgs(void) {} -template -void printArgs(Arg0 arg0, Args... args) { - std::cout << "\t"; - print(arg0); - pi::printArgs(std::forward(args)...); -} - -template struct printOut { - printOut(T) {} -}; // Do nothing - -template <> struct printOut { - printOut(PiEvent *val) { - std::cout << "\t[out]pi_event * : " << val; - if (val) - std::cout << "[ " << *val << " ... ]"; - else - std::cout << "[ nullptr ]"; - std::cout << std::endl; - } -}; - -template <> struct printOut { - printOut(PiMem *val) { - std::cout << "\t[out]pi_mem * : " << val; - if (val) - std::cout << "[ " << *val << " ... ]"; - else - std::cout << "[ nullptr ]"; - std::cout << std::endl; - } -}; - -template <> struct printOut { - printOut(void *val) { std::cout << "\t[out]void * : " << val << std::endl; } -}; - -template struct printOut { - printOut(T **val) { - std::cout << "\t[out] ** : " << val; - if (val) - std::cout << "[ " << *val << " ... ]"; - else - std::cout << "[ nullptr ]"; - std::cout << std::endl; - } -}; - -inline void printOuts(void) {} -template -void printOuts(Arg0 arg0, Args... args) { - using T = decltype(arg0); - printOut a(arg0); - printOuts(std::forward(args)...); -} - -} // namespace pi -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/source/detail/posix_pi.cpp b/sycl/source/detail/posix_pi.cpp deleted file mode 100644 index 220727f3bb59a..0000000000000 --- a/sycl/source/detail/posix_pi.cpp +++ /dev/null @@ -1,69 +0,0 @@ -//==---------------- posix_pi.cpp ------------------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include -#include - -#include -#include - -namespace sycl { -inline namespace _V1 { -namespace detail::pi { - -void *loadOsLibrary(const std::string &LibraryPath) { - // TODO: Check if the option RTLD_NOW is correct. Explore using - // RTLD_DEEPBIND option when there are multiple plugins. - void *so = dlopen(LibraryPath.c_str(), RTLD_NOW); - if (!so && trace(TraceLevel::PI_TRACE_ALL)) { - char *Error = dlerror(); - std::cerr << "SYCL_PI_TRACE[-1]: dlopen(" << LibraryPath - << ") failed with <" << (Error ? Error : "unknown error") << ">" - << std::endl; - } - return so; -} - -void *loadOsPluginLibrary(const std::string &PluginPath) { - return loadOsLibrary(PluginPath); -} - -int unloadOsLibrary(void *Library) { return dlclose(Library); } - -int unloadOsPluginLibrary(void *Library) { - // The mock plugin does not have an associated library, so we allow nullptr - // here to avoid it trying to free a non-existent library. - if (!Library) - return 0; - return dlclose(Library); -} - -void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { - return dlsym(Library, FunctionName.c_str()); -} - -// Load plugins corresponding to provided list of plugin names. -std::vector> -loadPlugins(const std::vector> &&PluginNames) { - std::vector> LoadedPlugins; - const std::string LibSYCLDir = - sycl::detail::OSUtil::getCurrentDSODir() + sycl::detail::OSUtil::DirSep; - - for (auto &PluginName : PluginNames) { - void *Library = loadOsPluginLibrary(LibSYCLDir + PluginName.first); - LoadedPlugins.push_back(std::make_tuple( - std::move(PluginName.first), std::move(PluginName.second), Library)); - } - - return LoadedPlugins; -} - -} // namespace detail::pi -} // namespace _V1 -} // namespace sycl diff --git a/sycl/source/detail/posix_ur.cpp b/sycl/source/detail/posix_ur.cpp new file mode 100644 index 0000000000000..8ca9991a03363 --- /dev/null +++ b/sycl/source/detail/posix_ur.cpp @@ -0,0 +1,40 @@ +//==---------------- posix_ur.cpp ------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail::ur { + +void *loadOsLibrary(const std::string &LibraryPath) { + // TODO: Check if the option RTLD_NOW is correct. Explore using + // RTLD_DEEPBIND option when there are multiple plugins. + void *so = dlopen(LibraryPath.c_str(), RTLD_NOW); + if (!so && detail::ur::trace()) { + char *Error = dlerror(); + std::cerr << "SYCL_UR_TRACE: dlopen(" << LibraryPath << ") failed with <" + << (Error ? Error : "unknown error") << ">" << std::endl; + } + return so; +} + +int unloadOsLibrary(void *Library) { return dlclose(Library); } + +void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { + return dlsym(Library, FunctionName.c_str()); +} + +} // namespace detail::ur +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 1a421214a4e81..ab0eb8a00f13a 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -56,13 +57,13 @@ static constexpr char UseSpvEnv[]("SYCL_USE_KERNEL_SPV"); /// This function enables ITT annotations in SPIR-V module by setting /// a specialization constant if INTEL_LIBITTNOTIFY64 env variable is set. -static void -enableITTAnnotationsIfNeeded(const sycl::detail::pi::PiProgram &Prog, - const PluginPtr &Plugin) { +static void enableITTAnnotationsIfNeeded(const ur_program_handle_t &Prog, + const PluginPtr &Plugin) { if (SYCLConfig::get() != nullptr) { constexpr char SpecValue = 1; - Plugin->call( - Prog, ITTSpecConstId, sizeof(char), &SpecValue); + ur_specialization_constant_info_t SpecConstInfo = { + ITTSpecConstId, sizeof(char), &SpecValue}; + Plugin->call(urProgramSetSpecializationConstants, Prog, 1, &SpecConstInfo); } } @@ -70,31 +71,33 @@ ProgramManager &ProgramManager::getInstance() { return GlobalHandler::instance().getProgramManager(); } -static sycl::detail::pi::PiProgram +static ur_program_handle_t createBinaryProgram(const ContextImplPtr Context, const device &Device, const unsigned char *Data, size_t DataLen, - const std::vector Metadata) { + const std::vector Metadata) { const PluginPtr &Plugin = Context->getPlugin(); #ifndef _NDEBUG - pi_uint32 NumDevices = 0; - Plugin->call(Context->getHandleRef(), - PI_CONTEXT_INFO_NUM_DEVICES, - sizeof(NumDevices), &NumDevices, - /*param_value_size_ret=*/nullptr); + uint32_t NumDevices = 0; + Plugin->call(urContextGetInfo, Context->getHandleRef(), + UR_CONTEXT_INFO_NUM_DEVICES, sizeof(NumDevices), &NumDevices, + /*param_value_size_ret=*/nullptr); assert(NumDevices > 0 && "Only a single device is supported for AOT compilation"); #endif - sycl::detail::pi::PiProgram Program; - const sycl::detail::pi::PiDevice PiDevice = - getSyclObjImpl(Device)->getHandleRef(); - pi_int32 BinaryStatus = CL_SUCCESS; - Plugin->call( - Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data, - Metadata.size(), Metadata.data(), &BinaryStatus, &Program); - - if (BinaryStatus != CL_SUCCESS) { - throw detail::set_pi_error( + ur_program_handle_t Program; + ur_device_handle_t UrDevice = getSyclObjImpl(Device)->getHandleRef(); + ur_result_t BinaryStatus = UR_RESULT_SUCCESS; + ur_program_properties_t Properties = {}; + Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES; + Properties.pNext = nullptr; + Properties.count = Metadata.size(); + Properties.pMetadatas = Metadata.data(); + Plugin->call(urProgramCreateWithBinary, Context->getHandleRef(), UrDevice, + DataLen, Data, &Properties, &Program); + + if (BinaryStatus != UR_RESULT_SUCCESS) { + throw detail::set_ur_error( exception(make_error_code(errc::runtime), "Creating program with binary failed."), BinaryStatus); @@ -103,20 +106,20 @@ createBinaryProgram(const ContextImplPtr Context, const device &Device, return Program; } -static sycl::detail::pi::PiProgram -createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data, - size_t DataLen) { - sycl::detail::pi::PiProgram Program = nullptr; +static ur_program_handle_t createSpirvProgram(const ContextImplPtr Context, + const unsigned char *Data, + size_t DataLen) { + ur_program_handle_t Program = nullptr; const PluginPtr &Plugin = Context->getPlugin(); - Plugin->call(Context->getHandleRef(), Data, - DataLen, &Program); + Plugin->call(urProgramCreateWithIL, Context->getHandleRef(), Data, DataLen, + nullptr, &Program); return Program; } -// TODO replace this with a new PI API function +// TODO replace this with a new UR API function static bool isDeviceBinaryTypeSupported(const context &C, - sycl::detail::pi::PiDeviceBinaryType Format) { + ur::DeviceBinaryType Format) { // All formats except SYCL_DEVICE_BINARY_TYPE_SPIRV are supported. if (Format != SYCL_DEVICE_BINARY_TYPE_SPIRV) return true; @@ -159,8 +162,7 @@ isDeviceBinaryTypeSupported(const context &C, } // getFormatStr is used for debug-printing, so it may be unused. -[[maybe_unused]] static const char * -getFormatStr(sycl::detail::pi::PiDeviceBinaryType Format) { +[[maybe_unused]] static const char *getFormatStr(ur::DeviceBinaryType Format) { switch (Format) { case SYCL_DEVICE_BINARY_TYPE_NONE: return "none"; @@ -175,8 +177,8 @@ getFormatStr(sycl::detail::pi::PiDeviceBinaryType Format) { return "unknown"; } -sycl::detail::pi::PiProgram -ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, +ur_program_handle_t +ProgramManager::createURProgram(const RTDeviceBinaryImage &Img, const context &Context, const device &Device) { if constexpr (DbgProgMgr > 0) std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", " @@ -201,10 +203,10 @@ ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, // implementation, so will be implemented together with it. // Img->Format can't be updated as it is inside of the in-memory // OS module binary. - sycl::detail::pi::PiDeviceBinaryType Format = Img.getFormat(); + ur::DeviceBinaryType Format = Img.getFormat(); if (Format == SYCL_DEVICE_BINARY_TYPE_NONE) - Format = pi::getBinaryImageFormat(RawImg.BinaryStart, ImgSize); + Format = ur::getBinaryImageFormat(RawImg.BinaryStart, ImgSize); // sycl::detail::pi::PiDeviceBinaryType Format = Img->Format; // assert(Format != SYCL_DEVICE_BINARY_TYPE_NONE && "Image format not set"); @@ -214,21 +216,19 @@ ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, "SPIR-V online compilation is not supported in this context"); // Get program metadata from properties - auto ProgMetadata = Img.getProgramMetadata(); - std::vector ProgMetadataVector{ - ProgMetadata.begin(), ProgMetadata.end()}; + auto ProgMetadata = Img.getProgramMetadataUR(); // Load the image const ContextImplPtr Ctx = getSyclObjImpl(Context); - sycl::detail::pi::PiProgram Res = + ur_program_handle_t Res = Format == SYCL_DEVICE_BINARY_TYPE_SPIRV ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize) : createBinaryProgram(Ctx, Device, RawImg.BinaryStart, ImgSize, - ProgMetadataVector); + ProgMetadata); { std::lock_guard Lock(MNativeProgramsMutex); - // associate the PI program with the image it was created for + // associate the UR program with the image it was created for NativePrograms.insert({Res, &Img}); } @@ -487,37 +487,38 @@ static void applyOptionsFromEnvironment(std::string &CompileOpts, applyLinkOptionsFromEnvironment(LinkOpts); } -std::pair -ProgramManager::getOrCreatePIProgram( +std::pair ProgramManager::getOrCreateURProgram( const RTDeviceBinaryImage &MainImg, const std::vector &AllImages, const context &Context, const device &Device, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts) { - sycl::detail::pi::PiProgram NativePrg; + ur_program_handle_t NativePrg; // TODO: Or native? auto BinProg = PersistentDeviceCodeCache::getItemFromDisc( Device, AllImages, SpecConsts, CompileAndLinkOptions); if (BinProg.size()) { // Get program metadata from properties - std::vector ProgMetadataVector; + std::vector ProgMetadataVector; for (const RTDeviceBinaryImage *Img : AllImages) { auto ProgMetadata = Img->getProgramMetadata(); - std::copy(ProgMetadata.begin(), ProgMetadata.end(), - std::back_inserter(ProgMetadataVector)); + for (const auto &Prop : ProgMetadata) { + ProgMetadataVector.push_back( + ur::mapDeviceBinaryPropertyToProgramMetadata(Prop)); + } } // TODO: Build for multiple devices once supported by program manager NativePrg = createBinaryProgram(getSyclObjImpl(Context), Device, (const unsigned char *)BinProg[0].data(), BinProg[0].size(), ProgMetadataVector); } else { - NativePrg = createPIProgram(MainImg, Context, Device); + NativePrg = createURProgram(MainImg, Context, Device); } return {NativePrg, BinProg.size()}; } /// Emits information about built programs if the appropriate contitions are /// met, namely when SYCL_RT_WARNING_LEVEL is greater than or equal to 2. -static void emitBuiltProgramInfo(const pi_program &Prog, +static void emitBuiltProgramInfo(const ur_program_handle_t &Prog, const ContextImplPtr &Context) { if (SYCLConfig::get() >= 2) { std::string ProgramBuildLog = @@ -526,26 +527,56 @@ static void emitBuiltProgramInfo(const pi_program &Prog, } } +static const char *getUrDeviceTarget(const char *URDeviceTarget) { + if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_UNKNOWN) == 0) + return UR_DEVICE_BINARY_TARGET_UNKNOWN; + else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV32) == 0) + return UR_DEVICE_BINARY_TARGET_SPIRV32; + else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) + return UR_DEVICE_BINARY_TARGET_SPIRV64; + else if (strcmp(URDeviceTarget, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) + return UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; + else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN) == + 0) + return UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + else if (strcmp(URDeviceTarget, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) + return UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA; + else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_NVPTX64) == 0) + return UR_DEVICE_BINARY_TARGET_NVPTX64; + else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_AMDGCN) == 0) + return UR_DEVICE_BINARY_TARGET_AMDGCN; + else if (strcmp(URDeviceTarget, __SYCL_DEVICE_BINARY_TARGET_NATIVE_CPU) == + 0) + return "native_cpu"; // todo: define UR_DEVICE_BINARY_TARGET_NATIVE_CPU; + + return UR_DEVICE_BINARY_TARGET_UNKNOWN; +} + static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, const device &Dev) { const std::shared_ptr &DeviceImpl = detail::getSyclObjImpl(Dev); auto &Plugin = DeviceImpl->getPlugin(); - const sycl::detail::pi::PiDevice &PIDeviceHandle = DeviceImpl->getHandleRef(); + const ur_device_handle_t &URDeviceHandle = DeviceImpl->getHandleRef(); - // Call piextDeviceSelectBinary with only one image to check if an image is + // Call urDeviceSelectBinary with only one image to check if an image is // compatible with implementation. The function returns invalid index if no // device images are compatible. - pi_uint32 SuitableImageID = std::numeric_limits::max(); - auto DevBin = reinterpret_cast( - const_cast(&BinImage->getRawData())); - sycl::detail::pi::PiResult Error = - Plugin->call_nocheck( - PIDeviceHandle, &DevBin, - /*num bin images = */ (pi_uint32)1, &SuitableImageID); - if (Error != PI_SUCCESS && Error != PI_ERROR_INVALID_BINARY) - throw detail::set_pi_error(exception(make_error_code(errc::runtime), + uint32_t SuitableImageID = std::numeric_limits::max(); + sycl_device_binary DevBin = + const_cast(&BinImage->getRawData()); + + ur_device_binary_t UrBinary{}; + UrBinary.pDeviceTargetSpec = getUrDeviceTarget(DevBin->DeviceTargetSpec); + + ur_result_t Error = + Plugin->call_nocheck(urDeviceSelectBinary, URDeviceHandle, &UrBinary, + /*num bin images = */ (uint32_t)1, &SuitableImageID); + if (Error != UR_RESULT_SUCCESS && Error != UR_RESULT_ERROR_INVALID_BINARY) + throw detail::set_ur_error(exception(make_error_code(errc::runtime), "Invalid binary image or device"), Error); @@ -563,7 +594,7 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( WorkList.push(ISProp->Name); HandledSymbols.insert(ISProp->Name); } - sycl::detail::pi::PiDeviceBinaryType Format = MainImg.getFormat(); + ur::DeviceBinaryType Format = MainImg.getFormat(); if (!WorkList.empty() && Format != SYCL_DEVICE_BINARY_TYPE_SPIRV) throw exception(make_error_code(errc::feature_not_supported), "Dynamic linking is not supported for AOT compilation yet"); @@ -666,8 +697,7 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( static void setSpecializationConstants(const std::shared_ptr &InputImpl, - sycl::detail::pi::PiProgram Prog, - const PluginPtr &Plugin) { + ur_program_handle_t Prog, const PluginPtr &Plugin) { // Set ITT annotation specialization constant if needed. enableITTAnnotationsIfNeeded(Prog, Plugin); @@ -681,17 +711,19 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, std::ignore = SpecConstNames; for (const device_image_impl::SpecConstDescT &SpecIDDesc : SpecConstDescs) { if (SpecIDDesc.IsSet) { - Plugin->call( - Prog, SpecIDDesc.ID, SpecIDDesc.Size, - SpecConsts.data() + SpecIDDesc.BlobOffset); + ur_specialization_constant_info_t SpecConstInfo = { + SpecIDDesc.ID, SpecIDDesc.Size, + SpecConsts.data() + SpecIDDesc.BlobOffset}; + Plugin->call(urProgramSetSpecializationConstants, Prog, 1, + &SpecConstInfo); } } } } -// When caching is enabled, the returned PiProgram will already have +// When caching is enabled, the returned UrProgram will already have // its ref count incremented. -sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( +ur_program_handle_t ProgramManager::getBuiltURProgram( const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc, bool JITCompilationIsRequired) { @@ -716,13 +748,13 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( RootDevImpl = ParentDev; } - pi_bool MustBuildOnSubdevice = PI_TRUE; - ContextImpl->getPlugin()->call( - RootDevImpl->getHandleRef(), PI_DEVICE_INFO_BUILD_ON_SUBDEVICE, - sizeof(pi_bool), &MustBuildOnSubdevice, nullptr); + ur_bool_t MustBuildOnSubdevice = true; + ContextImpl->getPlugin()->call(urDeviceGetInfo, RootDevImpl->getHandleRef(), + UR_DEVICE_INFO_BUILD_ON_SUBDEVICE, + sizeof(ur_bool_t), &MustBuildOnSubdevice, + nullptr); - DeviceImplPtr Dev = - (MustBuildOnSubdevice == PI_TRUE) ? DeviceImpl : RootDevImpl; + DeviceImplPtr Dev = (MustBuildOnSubdevice == true) ? DeviceImpl : RootDevImpl; auto Context = createSyclObjFromImpl(ContextImpl); auto Device = createSyclObjFromImpl(Dev); const RTDeviceBinaryImage &Img = @@ -754,7 +786,7 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( // Should always come last! appendCompileEnvironmentVariablesThatAppend(CompileOpts); appendLinkEnvironmentVariablesThatAppend(LinkOpts); - auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram( + auto [NativePrg, DeviceCodeWasInCache] = getOrCreateURProgram( Img, AllImages, Context, Device, CompileOpts + LinkOpts, SpecConsts); if (!DeviceCodeWasInCache) { @@ -762,8 +794,7 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( enableITTAnnotationsIfNeeded(NativePrg, Plugin); } - ProgramPtr ProgramManaged( - NativePrg, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease); + ProgramPtr ProgramManaged(NativePrg, urProgramRelease); // Link a fallback implementation of device libraries if they are not // supported by a device compiler. @@ -777,7 +808,7 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( !SYCLConfig::get()) DeviceLibReqMask = getDeviceLibReqMask(Img); - std::vector ProgramsToLink; + std::vector ProgramsToLink; // If we had a program in cache, then it should have been the fully linked // program already. if (!DeviceCodeWasInCache) { @@ -790,8 +821,8 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( SerializedObj ImgSpecConsts = DeviceImageImpl->get_spec_const_blob_ref(); - sycl::detail::pi::PiProgram NativePrg = - createPIProgram(*BinImg, Context, Device); + ur_program_handle_t NativePrg = + createURProgram(*BinImg, Context, Device); if (BinImg->supportsSpecConstants()) setSpecializationConstants(DeviceImageImpl, NativePrg, Plugin); @@ -804,8 +835,8 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( getSyclObjImpl(Device).get()->getHandleRef(), DeviceLibReqMask, ProgramsToLink); // Those extra programs won't be used anymore, just the final linked result - for (sycl::detail::pi::PiProgram Prg : ProgramsToLink) - Plugin->call(Prg); + for (ur_program_handle_t Prg : ProgramsToLink) + Plugin->call(urProgramRelease, Prg); emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl); @@ -829,8 +860,9 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( }; uint32_t ImgId = Img.getImageID(); - const sycl::detail::pi::PiDevice PiDevice = Dev->getHandleRef(); - auto CacheKey = std::make_pair(std::make_pair(SpecConsts, ImgId), PiDevice); + const ur_device_handle_t UrDevice = Dev->getHandleRef(); + auto CacheKey = + std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), UrDevice); auto GetCachedBuildF = [&Cache, &CacheKey]() { return Cache.getOrInsertProgram(CacheKey); @@ -843,7 +875,7 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( // getOrBuild is not supposed to return nullptr assert(BuildResult != nullptr && "Invalid build result"); - sycl::detail::pi::PiProgram ResProgram = BuildResult->Val; + ur_program_handle_t ResProgram = BuildResult->Val; auto Plugin = ContextImpl->getPlugin(); // If we linked any extra device images, then we need to @@ -855,7 +887,7 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram); if (DidInsert) { // For every cached copy of the program, we need to increment its refcount - Plugin->call(ResProgram); + Plugin->call(urProgramRetain, ResProgram); } } @@ -863,14 +895,14 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( // stored in the cache, and one handle is returned to the // caller. In that case, we need to increase the ref count of the // program. - Plugin->call(ResProgram); + ContextImpl->getPlugin()->call(urProgramRetain, ResProgram); return ResProgram; } -// When caching is enabled, the returned PiProgram and PiKernel will +// When caching is enabled, the returned UrProgram and UrKernel will // already have their ref count incremented. -std::tuple +std::tuple ProgramManager::getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, @@ -890,9 +922,9 @@ ProgramManager::getOrCreateKernel(const ContextImplPtr &ContextImpl, // Should always come last! appendCompileEnvironmentVariablesThatAppend(CompileOpts); appendLinkEnvironmentVariablesThatAppend(LinkOpts); - const sycl::detail::pi::PiDevice PiDevice = DeviceImpl->getHandleRef(); + ur_device_handle_t UrDevice = DeviceImpl->getHandleRef(); - auto key = std::make_tuple(std::move(SpecConsts), PiDevice, + auto key = std::make_tuple(std::move(SpecConsts), UrDevice, CompileOpts + LinkOpts, KernelName); if (SYCLConfig::get()) { auto ret_tuple = Cache.tryToGetKernelFast(key); @@ -901,30 +933,32 @@ ProgramManager::getOrCreateKernel(const ContextImplPtr &ContextImpl, if (std::get(ret_tuple)) { // Pulling a copy of a kernel and program from the cache, // so we need to retain those resources. - ContextImpl->getPlugin()->call( - std::get(ret_tuple)); - ContextImpl->getPlugin()->call( - std::get(ret_tuple)); + ContextImpl->getPlugin()->call(urKernelRetain, + std::get(ret_tuple)); + ContextImpl->getPlugin()->call(urProgramRetain, + std::get(ret_tuple)); return ret_tuple; } } - sycl::detail::pi::PiProgram Program = - getBuiltPIProgram(ContextImpl, DeviceImpl, KernelName, NDRDesc); + ur_program_handle_t Program = + getBuiltURProgram(ContextImpl, DeviceImpl, KernelName, NDRDesc); auto BuildF = [this, &Program, &KernelName, &ContextImpl] { - sycl::detail::pi::PiKernel Kernel = nullptr; + ur_kernel_handle_t Kernel = nullptr; const PluginPtr &Plugin = ContextImpl->getPlugin(); - Plugin->call( - Program, KernelName.c_str(), &Kernel); + Plugin->call(urKernelCreate, Program, + KernelName.c_str(), &Kernel); - // Only set PI_USM_INDIRECT_ACCESS if the platform can handle it. + // Only set UR_USM_INDIRECT_ACCESS if the platform can handle it. if (ContextImpl->getPlatformImpl()->supports_usm()) { - // Some PI Plugins (like OpenCL) require this call to enable USM - // For others, PI will turn this into a NOP. - Plugin->call( - Kernel, PI_USM_INDIRECT_ACCESS, sizeof(pi_bool), &PI_TRUE); + // Some UR Plugins (like OpenCL) require this call to enable USM + // For others, UR will turn this into a NOP. + const ur_bool_t UrTrue = true; + Plugin->call(urKernelSetExecInfo, Kernel, + UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, sizeof(ur_bool_t), + nullptr, &UrTrue); } const KernelArgMask *ArgMask = nullptr; @@ -956,60 +990,56 @@ ProgramManager::getOrCreateKernel(const ContextImplPtr &ContextImpl, // stored in the cache, and one handle is returned to the // caller. In that case, we need to increase the ref count of the // kernel. - ContextImpl->getPlugin()->call( - KernelArgMaskPair.first); + ContextImpl->getPlugin()->call(urKernelRetain, KernelArgMaskPair.first); Cache.saveKernel(key, ret_val); return ret_val; } -sycl::detail::pi::PiProgram -ProgramManager::getPiProgramFromPiKernel(sycl::detail::pi::PiKernel Kernel, +ur_program_handle_t +ProgramManager::getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, const ContextImplPtr Context) { - sycl::detail::pi::PiProgram Program; + ur_program_handle_t Program; const PluginPtr &Plugin = Context->getPlugin(); - Plugin->call(Kernel, PI_KERNEL_INFO_PROGRAM, - sizeof(sycl::detail::pi::PiProgram), - &Program, nullptr); + Plugin->call(urKernelGetInfo, Kernel, UR_KERNEL_INFO_PROGRAM, + sizeof(ur_program_handle_t), &Program, nullptr); return Program; } std::string -ProgramManager::getProgramBuildLog(const sycl::detail::pi::PiProgram &Program, +ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program, const ContextImplPtr Context) { - size_t PIDevicesSize = 0; + size_t URDevicesSize = 0; const PluginPtr &Plugin = Context->getPlugin(); - Plugin->call(Program, PI_PROGRAM_INFO_DEVICES, 0, - nullptr, &PIDevicesSize); - std::vector PIDevices( - PIDevicesSize / sizeof(sycl::detail::pi::PiDevice)); - Plugin->call(Program, PI_PROGRAM_INFO_DEVICES, - PIDevicesSize, PIDevices.data(), - nullptr); + Plugin->call(urProgramGetInfo, Program, UR_PROGRAM_INFO_DEVICES, 0, nullptr, + &URDevicesSize); + std::vector URDevices(URDevicesSize / + sizeof(ur_device_handle_t)); + Plugin->call(urProgramGetInfo, Program, UR_PROGRAM_INFO_DEVICES, + URDevicesSize, URDevices.data(), nullptr); std::string Log = "The program was built for " + - std::to_string(PIDevices.size()) + " devices"; - for (sycl::detail::pi::PiDevice &Device : PIDevices) { + std::to_string(URDevices.size()) + " devices"; + for (ur_device_handle_t &Device : URDevices) { std::string DeviceBuildInfoString; size_t DeviceBuildInfoStrSize = 0; - Plugin->call( - Program, Device, PI_PROGRAM_BUILD_INFO_LOG, 0, nullptr, - &DeviceBuildInfoStrSize); + Plugin->call(urProgramGetBuildInfo, Program, Device, + UR_PROGRAM_BUILD_INFO_LOG, 0, nullptr, + &DeviceBuildInfoStrSize); if (DeviceBuildInfoStrSize > 0) { std::vector DeviceBuildInfo(DeviceBuildInfoStrSize); - Plugin->call( - Program, Device, PI_PROGRAM_BUILD_INFO_LOG, DeviceBuildInfoStrSize, - DeviceBuildInfo.data(), nullptr); + Plugin->call(urProgramGetBuildInfo, Program, Device, + UR_PROGRAM_BUILD_INFO_LOG, DeviceBuildInfoStrSize, + DeviceBuildInfo.data(), nullptr); DeviceBuildInfoString = std::string(DeviceBuildInfo.data()); } std::string DeviceNameString; size_t DeviceNameStrSize = 0; - Plugin->call(Device, PI_DEVICE_INFO_NAME, 0, - nullptr, &DeviceNameStrSize); + Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_NAME, 0, nullptr, + &DeviceNameStrSize); if (DeviceNameStrSize > 0) { std::vector DeviceName(DeviceNameStrSize); - Plugin->call(Device, PI_DEVICE_INFO_NAME, - DeviceNameStrSize, - DeviceName.data(), nullptr); + Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_NAME, + DeviceNameStrSize, DeviceName.data(), nullptr); DeviceNameString = std::string(DeviceName.data()); } Log += "\nBuild program log for '" + DeviceNameString + "':\n" + @@ -1022,7 +1052,7 @@ ProgramManager::getProgramBuildLog(const sycl::detail::pi::PiProgram &Program, // To support that they need to be delivered in a different container - so that // sycl_device_binary_struct can be created for each of them. static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, - sycl::detail::pi::PiProgram &Prog) { + ur_program_handle_t &Prog) { std::string LibSyclDir = OSUtil::getCurrentDSODir(); std::ifstream File(LibSyclDir + OSUtil::DirSep + Name, std::ifstream::in | std::ifstream::binary); @@ -1104,10 +1134,24 @@ static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { return Ext->second; } -static sycl::detail::pi::PiProgram -loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, - const sycl::detail::pi::PiDevice &Device, - bool UseNativeLib) { +static ur_result_t doCompile(const PluginPtr &Plugin, + ur_program_handle_t Program, uint32_t NumDevs, + ur_device_handle_t *Devs, ur_context_handle_t Ctx, + const char *Opts) { + // Try to compile with given devices, fall back to compiling with the program + // context if unsupported by the adapter + auto Result = + Plugin->call_nocheck(urProgramCompileExp, Program, NumDevs, Devs, Opts); + if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + return Plugin->call_nocheck(urProgramCompile, Ctx, Program, Opts); + } + return Result; +} + +static ur_program_handle_t loadDeviceLibFallback(const ContextImplPtr Context, + DeviceLibExt Extension, + ur_device_handle_t Device, + bool UseNativeLib) { auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib); @@ -1117,7 +1161,7 @@ loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, std::make_pair(std::make_pair(Extension, Device), nullptr)); bool Cached = !CacheResult.second; auto LibProgIt = CacheResult.first; - sycl::detail::pi::PiProgram &LibProg = LibProgIt->second; + ur_program_handle_t &LibProg = LibProgIt->second; if (Cached) return LibProg; @@ -1130,18 +1174,14 @@ loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, const PluginPtr &Plugin = Context->getPlugin(); // TODO no spec constants are used in the std libraries, support in the future - sycl::detail::pi::PiResult Error = - Plugin->call_nocheck( - LibProg, - /*num devices = */ 1, &Device, - // Do not use compile options for library programs: it is not clear - // if user options (image options) are supposed to be applied to - // library program as well, and what actually happens to a SPIR-V - // program if we apply them. - "", 0, nullptr, nullptr, nullptr, nullptr); - if (Error != PI_SUCCESS) { + // Do not use compile options for library programs: it is not clear if user + // options (image options) are supposed to be applied to library program as + // well, and what actually happens to a SPIR-V program if we apply them. + ur_result_t Error = + doCompile(Plugin, LibProg, 1, &Device, Context->getHandleRef(), ""); + if (Error != UR_RESULT_SUCCESS) { CachedLibPrograms.erase(LibProgIt); - throw detail::set_pi_error( + throw detail::set_ur_error( exception(make_error_code(errc::build), ProgramManager::getProgramBuildLog(LibProg, Context)), Error); @@ -1213,20 +1253,24 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( if (ItBegin == ItEnd) return nullptr; - std::vector RawImgs(std::distance(ItBegin, ItEnd)); + std::vector RawImgs(std::distance(ItBegin, ItEnd)); auto It = ItBegin; for (unsigned I = 0; It != ItEnd; ++It, ++I) - RawImgs[I] = reinterpret_cast( + RawImgs[I] = reinterpret_cast( const_cast(&It->second->getRawData())); - pi_uint32 ImgInd = 0; + std::vector UrBinaries(RawImgs.size()); + for (uint32_t BinaryCount = 0; BinaryCount < RawImgs.size(); BinaryCount++) { + UrBinaries[BinaryCount].pDeviceTargetSpec = + getUrDeviceTarget(RawImgs[BinaryCount]->DeviceTargetSpec); + } + + uint32_t ImgInd = 0; // Ask the native runtime under the given context to choose the device image // it prefers. - getSyclObjImpl(Context) - ->getPlugin() - ->call( - getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(), - (pi_uint32)RawImgs.size(), &ImgInd); + getSyclObjImpl(Context)->getPlugin()->call( + urDeviceSelectBinary, getSyclObjImpl(Device)->getHandleRef(), + UrBinaries.data(), UrBinaries.size(), &ImgInd); std::advance(ItBegin, ImgInd); return ItBegin->second; } @@ -1297,19 +1341,24 @@ RTDeviceBinaryImage &ProgramManager::getDeviceImage( } std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - std::vector RawImgs(ImageSet.size()); + std::vector RawImgs(ImageSet.size()); auto ImageIterator = ImageSet.begin(); for (size_t i = 0; i < ImageSet.size(); i++, ImageIterator++) - RawImgs[i] = reinterpret_cast( + RawImgs[i] = reinterpret_cast( const_cast(&(*ImageIterator)->getRawData())); - pi_uint32 ImgInd = 0; + uint32_t ImgInd = 0; // Ask the native runtime under the given context to choose the device image // it prefers. - getSyclObjImpl(Context) - ->getPlugin() - ->call( - getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(), - (pi_uint32)RawImgs.size(), &ImgInd); + + std::vector UrBinaries(RawImgs.size()); + for (uint32_t BinaryCount = 0; BinaryCount < RawImgs.size(); BinaryCount++) { + UrBinaries[BinaryCount].pDeviceTargetSpec = + getUrDeviceTarget(RawImgs[BinaryCount]->DeviceTargetSpec); + } + + getSyclObjImpl(Context)->getPlugin()->call( + urDeviceSelectBinary, getSyclObjImpl(Device)->getHandleRef(), + UrBinaries.data(), UrBinaries.size(), &ImgInd); ImageIterator = ImageSet.begin(); std::advance(ImageIterator, ImgInd); @@ -1331,11 +1380,11 @@ static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { return ((DeviceLibReqMask & Mask) == Mask); } -static std::vector +static std::vector getDeviceLibPrograms(const ContextImplPtr Context, - const sycl::detail::pi::PiDevice &Device, + const ur_device_handle_t &Device, uint32_t DeviceLibReqMask) { - std::vector Programs; + std::vector Programs; std::pair RequiredDeviceLibExt[] = { {DeviceLibExt::cl_intel_devicelib_assert, @@ -1354,7 +1403,7 @@ getDeviceLibPrograms(const ContextImplPtr Context, // one underlying device doesn't support cl_khr_fp64. std::string DevExtList = Context->getPlatformImpl()->getDeviceImpl(Device)->get_device_info_string( - PiInfoCode::value); + UrInfoCode::value); const bool fp64Support = (DevExtList.npos != DevExtList.find("cl_khr_fp64")); // Load a fallback library for an extension if the device does not @@ -1405,8 +1454,8 @@ getDeviceLibPrograms(const ContextImplPtr Context, ProgramManager::ProgramPtr ProgramManager::build( ProgramPtr Program, const ContextImplPtr Context, const std::string &CompileOptions, const std::string &LinkOptions, - const sycl::detail::pi::PiDevice &Device, uint32_t DeviceLibReqMask, - const std::vector &ExtraProgramsToLink) { + ur_device_handle_t Device, uint32_t DeviceLibReqMask, + const std::vector &ExtraProgramsToLink) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::build(" << Program.get() << ", " @@ -1423,7 +1472,7 @@ ProgramManager::ProgramPtr ProgramManager::build( CompileOptions.find(std::string("-vc-codegen")) != std::string::npos) LinkDeviceLibs = false; - std::vector LinkPrograms; + std::vector LinkPrograms; if (LinkDeviceLibs) { LinkPrograms = getDeviceLibPrograms(Context, Device, DeviceLibReqMask); } @@ -1436,41 +1485,58 @@ ProgramManager::ProgramPtr ProgramManager::build( const std::string &Options = LinkOptions.empty() ? CompileOptions : (CompileOptions + " " + LinkOptions); - sycl::detail::pi::PiResult Error = - Plugin->call_nocheck( - Program.get(), /*num devices =*/1, &Device, Options.c_str(), - nullptr, nullptr); - if (Error != PI_SUCCESS) - throw detail::set_pi_error( + ur_result_t Error = + Plugin->call_nocheck(urProgramBuildExp, Program.get(), + /*num devices =*/1, &Device, Options.c_str()); + if (Error == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + Error = Plugin->call_nocheck(urProgramBuild, Context->getHandleRef(), + Program.get(), Options.c_str()); + } + + if (Error != UR_RESULT_SUCCESS) + throw detail::set_ur_error( exception(make_error_code(errc::build), getProgramBuildLog(Program.get(), Context)), Error); + return Program; } // Include the main program and compile/link everything together - Plugin->call(Program.get(), /*num devices =*/1, - &Device, CompileOptions.c_str(), 0, - nullptr, nullptr, nullptr, nullptr); + auto Res = doCompile(Plugin, Program.get(), /*num devices =*/1, &Device, + Context->getHandleRef(), CompileOptions.c_str()); + Plugin->checkUrResult(Res); LinkPrograms.push_back(Program.get()); - for (sycl::detail::pi::PiProgram Prg : ExtraProgramsToLink) { - Plugin->call( - Prg, /*num devices =*/1, &Device, CompileOptions.c_str(), 0, nullptr, - nullptr, nullptr, nullptr); + for (ur_program_handle_t Prg : ExtraProgramsToLink) { + auto Result = + Plugin->call_nocheck(urProgramCompileExp, Prg, /* num devices =*/1, + &Device, CompileOptions.c_str()); + if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + Plugin->call(urProgramCompile, Context->getHandleRef(), Prg, + CompileOptions.c_str()); + } + Plugin->checkUrResult(Result); + LinkPrograms.push_back(Prg); } - sycl::detail::pi::PiProgram LinkedProg = nullptr; + ur_program_handle_t LinkedProg = nullptr; auto doLink = [&] { - return Plugin->call_nocheck( - Context->getHandleRef(), /*num devices =*/1, &Device, - LinkOptions.c_str(), LinkPrograms.size(), LinkPrograms.data(), nullptr, - nullptr, &LinkedProg); + auto Res = Plugin->call_nocheck(urProgramLinkExp, Context->getHandleRef(), + /*num devices =*/1, &Device, + LinkPrograms.size(), LinkPrograms.data(), + LinkOptions.c_str(), &LinkedProg); + if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + Res = Plugin->call_nocheck(urProgramLink, Context->getHandleRef(), + LinkPrograms.size(), LinkPrograms.data(), + LinkOptions.c_str(), &LinkedProg); + } + return Res; }; - sycl::detail::pi::PiResult Error = doLink(); - if (Error == PI_ERROR_OUT_OF_RESOURCES || - Error == PI_ERROR_OUT_OF_HOST_MEMORY) { + ur_result_t Error = doLink(); + if (Error == UR_RESULT_ERROR_OUT_OF_RESOURCES || + Error == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) { Context->getKernelProgramCache().reset(); Error = doLink(); } @@ -1478,16 +1544,16 @@ ProgramManager::ProgramPtr ProgramManager::build( // Link program call returns a new program object if all parameters are valid, // or NULL otherwise. Release the original (user) program. Program.reset(LinkedProg); - if (Error != PI_SUCCESS) { + if (Error != UR_RESULT_SUCCESS) { if (LinkedProg) { // A non-trivial error occurred during linkage: get a build log, release // an incomplete (but valid) LinkedProg, and throw. - throw detail::set_pi_error( + throw detail::set_ur_error( exception(make_error_code(errc::build), getProgramBuildLog(LinkedProg, Context)), Error); } - Plugin->checkPiResult(Error); + Plugin->checkUrResult(Error); } return Program; } @@ -1692,7 +1758,7 @@ void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img, Fname += '_' + std::to_string(SequenceID); std::string Ext; - sycl::detail::pi::PiDeviceBinaryType Format = Img.getFormat(); + ur::DeviceBinaryType Format = Img.getFormat(); if (Format == SYCL_DEVICE_BINARY_TYPE_SPIRV) Ext = ".spv"; else if (Format == SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE) @@ -1720,7 +1786,7 @@ uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) { } const KernelArgMask * -ProgramManager::getEliminatedKernelArgMask(pi::PiProgram NativePrg, +ProgramManager::getEliminatedKernelArgMask(ur_program_handle_t NativePrg, const std::string &KernelName) { // Bail out if there are no eliminated kernel arg masks in our images if (m_EliminatedKernelArgMasks.empty()) @@ -2212,8 +2278,8 @@ ProgramManager::compile(const device_image_plain &DeviceImage, // Device is not used when creating program from SPIRV, so passing only one // device is OK. - sycl::detail::pi::PiProgram Prog = createPIProgram( - *InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs[0]); + ur_program_handle_t Prog = createURProgram(*InputImpl->get_bin_image_ref(), + InputImpl->get_context(), Devs[0]); if (InputImpl->get_bin_image_ref()->supportsSpecConstants()) setSpecializationConstants(InputImpl, Prog, Plugin); @@ -2224,10 +2290,10 @@ ProgramManager::compile(const device_image_plain &DeviceImage, InputImpl->get_spec_const_data_ref(), InputImpl->get_spec_const_blob_ref()); - std::vector PIDevices; - PIDevices.reserve(Devs.size()); + std::vector URDevices; + URDevices.reserve(Devs.size()); for (const device &Dev : Devs) - PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef()); + URDevices.push_back(getSyclObjImpl(Dev)->getHandleRef()); // TODO: Handle zero sized Device list. std::string CompileOptions; @@ -2236,17 +2302,14 @@ ProgramManager::compile(const device_image_plain &DeviceImage, CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Plugin); // Should always come last! appendCompileEnvironmentVariablesThatAppend(CompileOptions); - sycl::detail::pi::PiResult Error = - Plugin->call_nocheck( - ObjectImpl->get_program_ref(), /*num devices=*/Devs.size(), - PIDevices.data(), CompileOptions.c_str(), - /*num_input_headers=*/0, /*input_headers=*/nullptr, - /*header_include_names=*/nullptr, - /*pfn_notify=*/nullptr, /*user_data*/ nullptr); - if (Error != PI_SUCCESS) + ur_result_t Error = doCompile( + Plugin, ObjectImpl->get_ur_program_ref(), Devs.size(), URDevices.data(), + getSyclObjImpl(InputImpl->get_context()).get()->getHandleRef(), + CompileOptions.c_str()); + if (Error != UR_RESULT_SUCCESS) throw sycl::exception( make_error_code(errc::build), - getProgramBuildLog(ObjectImpl->get_program_ref(), + getProgramBuildLog(ObjectImpl->get_ur_program_ref(), getSyclObjImpl(ObjectImpl->get_context()))); return createSyclObjFromImpl(ObjectImpl); @@ -2258,13 +2321,13 @@ ProgramManager::link(const device_image_plain &DeviceImage, const property_list &PropList) { (void)PropList; - std::vector PIPrograms; - PIPrograms.push_back(getSyclObjImpl(DeviceImage)->get_program_ref()); + std::vector URPrograms; + URPrograms.push_back(getSyclObjImpl(DeviceImage)->get_ur_program_ref()); - std::vector PIDevices; - PIDevices.reserve(Devs.size()); + std::vector URDevices; + URDevices.reserve(Devs.size()); for (const device &Dev : Devs) - PIDevices.push_back(getSyclObjImpl(Dev)->getHandleRef()); + URDevices.push_back(getSyclObjImpl(Dev)->getHandleRef()); std::string LinkOptionsStr; applyLinkOptionsFromEnvironment(LinkOptionsStr); @@ -2280,28 +2343,32 @@ ProgramManager::link(const device_image_plain &DeviceImage, const ContextImplPtr ContextImpl = getSyclObjImpl(Context); const PluginPtr &Plugin = ContextImpl->getPlugin(); - sycl::detail::pi::PiProgram LinkedProg = nullptr; + ur_program_handle_t LinkedProg = nullptr; auto doLink = [&] { - return Plugin->call_nocheck( - ContextImpl->getHandleRef(), PIDevices.size(), PIDevices.data(), - /*options=*/LinkOptionsStr.c_str(), PIPrograms.size(), - PIPrograms.data(), - /*pfn_notify=*/nullptr, - /*user_data=*/nullptr, &LinkedProg); + auto Res = Plugin->call_nocheck( + urProgramLinkExp, ContextImpl->getHandleRef(), URDevices.size(), + URDevices.data(), URPrograms.size(), URPrograms.data(), + LinkOptionsStr.c_str(), &LinkedProg); + if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + Res = Plugin->call_nocheck(urProgramLink, ContextImpl->getHandleRef(), + URPrograms.size(), URPrograms.data(), + LinkOptionsStr.c_str(), &LinkedProg); + } + return Res; }; - sycl::detail::pi::PiResult Error = doLink(); - if (Error == PI_ERROR_OUT_OF_RESOURCES || - Error == PI_ERROR_OUT_OF_HOST_MEMORY) { + ur_result_t Error = doLink(); + if (Error == UR_RESULT_ERROR_OUT_OF_RESOURCES || + Error == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) { ContextImpl->getKernelProgramCache().reset(); Error = doLink(); } - if (Error != PI_SUCCESS) { + if (Error != UR_RESULT_SUCCESS) { if (LinkedProg) { const std::string ErrorMsg = getProgramBuildLog(LinkedProg, ContextImpl); throw sycl::exception(make_error_code(errc::build), ErrorMsg); } - throw set_pi_error(exception(make_error_code(errc::build), "link() failed"), + throw set_ur_error(exception(make_error_code(errc::build), "link() failed"), Error); } @@ -2312,7 +2379,7 @@ ProgramManager::link(const device_image_plain &DeviceImage, std::shared_ptr DeviceImageImpl = getSyclObjImpl(DeviceImage); - // Duplicates are not expected here, otherwise piProgramLink should fail + // Duplicates are not expected here, otherwise urProgramLink should fail KernelIDs->insert(KernelIDs->end(), DeviceImageImpl->get_kernel_ids_ptr()->begin(), DeviceImageImpl->get_kernel_ids_ptr()->end()); @@ -2410,15 +2477,14 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, // Device is not used when creating program from SPIRV, so passing only one // device is OK. - auto [NativePrg, DeviceCodeWasInCache] = getOrCreatePIProgram( + auto [NativePrg, DeviceCodeWasInCache] = getOrCreateURProgram( Img, {&Img}, Context, Devs[0], CompileOpts + LinkOpts, SpecConsts); if (!DeviceCodeWasInCache && InputImpl->get_bin_image_ref()->supportsSpecConstants()) setSpecializationConstants(InputImpl, NativePrg, Plugin); - ProgramPtr ProgramManaged( - NativePrg, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease); + ProgramPtr ProgramManaged(NativePrg, urProgramRelease); // Link a fallback implementation of device libraries if they are not // supported by a device compiler. @@ -2430,9 +2496,8 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, !SYCLConfig::get()) DeviceLibReqMask = getDeviceLibReqMask(Img); - // TODO: Add support for using virtual functions with kernel bundles // TODO: Add support for dynamic linking with kernel bundles - std::vector ExtraProgramsToLink; + std::vector ExtraProgramsToLink; ProgramPtr BuiltProgram = build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, getSyclObjImpl(Devs[0]).get()->getHandleRef(), DeviceLibReqMask, @@ -2468,10 +2533,9 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, } uint32_t ImgId = Img.getImageID(); - const sycl::detail::pi::PiDevice PiDevice = - getSyclObjImpl(Devs[0]).get()->getHandleRef(); + ur_device_handle_t UrDevice = getSyclObjImpl(Devs[0]).get()->getHandleRef(); auto CacheKey = - std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), PiDevice); + std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), UrDevice); // CacheKey is captured by reference so when we overwrite it later we can // reuse this function. @@ -2483,25 +2547,25 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, // getOrBuild is not supposed to return nullptr assert(BuildResult != nullptr && "Invalid build result"); - sycl::detail::pi::PiProgram ResProgram = BuildResult->Val; + ur_program_handle_t ResProgram = BuildResult->Val; // Cache supports key with once device only, but here we have multiple // devices a program is built for, so add the program to the cache for all // other devices. const PluginPtr &Plugin = ContextImpl->getPlugin(); auto CacheOtherDevices = [ResProgram, &Plugin]() { - Plugin->call(ResProgram); + Plugin->call(urProgramRetain, ResProgram); return ResProgram; }; // The program for device "0" is already added to the cache during the first // call to getOrBuild, so starting with "1" for (size_t Idx = 1; Idx < Devs.size(); ++Idx) { - const sycl::detail::pi::PiDevice PiDeviceAdd = + const ur_device_handle_t UrDeviceAdd = getSyclObjImpl(Devs[Idx]).get()->getHandleRef(); // Change device in the cache key to reduce copying of spec const data. - CacheKey.second = PiDeviceAdd; + CacheKey.second = UrDeviceAdd; Cache.getOrBuild(GetCachedBuildF, CacheOtherDevices); // getOrBuild is not supposed to return nullptr assert(BuildResult != nullptr && "Invalid build result"); @@ -2510,7 +2574,7 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, // devive_image_impl shares ownership of PIProgram with, at least, program // cache. The ref counter will be descremented in the destructor of // device_image_impl - Plugin->call(ResProgram); + Plugin->call(urProgramRetain, ResProgram); DeviceImageImplPtr ExecImpl = std::make_shared( InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable, @@ -2521,13 +2585,13 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, return createSyclObjFromImpl(ExecImpl); } -// When caching is enabled, the returned PiKernel will already have +// When caching is enabled, the returned UrKernel will already have // its ref count incremented. -std::tuple +std::tuple ProgramManager::getOrCreateKernel(const context &Context, const std::string &KernelName, const property_list &PropList, - sycl::detail::pi::PiProgram Program) { + ur_program_handle_t Program) { (void)PropList; @@ -2536,16 +2600,18 @@ ProgramManager::getOrCreateKernel(const context &Context, KernelProgramCache &Cache = Ctx->getKernelProgramCache(); auto BuildF = [this, &Program, &KernelName, &Ctx] { - sycl::detail::pi::PiKernel Kernel = nullptr; + ur_kernel_handle_t Kernel = nullptr; const PluginPtr &Plugin = Ctx->getPlugin(); - Plugin->call(Program, KernelName.c_str(), - &Kernel); - - // Only set PI_USM_INDIRECT_ACCESS if the platform can handle it. - if (Ctx->getPlatformImpl()->supports_usm()) - Plugin->call( - Kernel, PI_USM_INDIRECT_ACCESS, sizeof(pi_bool), &PI_TRUE); + Plugin->call(urKernelCreate, Program, KernelName.c_str(), &Kernel); + + // Only set UR_USM_INDIRECT_ACCESS if the platform can handle it. + if (Ctx->getPlatformImpl()->supports_usm()) { + bool EnableAccess = true; + Plugin->call(urKernelSetExecInfo, Kernel, + UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, sizeof(ur_bool_t), + nullptr, &EnableAccess); + } // Ignore possible m_UseSpvFile for now. // TODO consider making m_UseSpvFile interact with kernel bundles as well. @@ -2574,13 +2640,13 @@ ProgramManager::getOrCreateKernel(const context &Context, // stored in the cache, and one handle is returned to the // caller. In that case, we need to increase the ref count of the // kernel. - Ctx->getPlugin()->call(BuildResult->Val.first); + Ctx->getPlugin()->call(urKernelRetain, BuildResult->Val.first); return std::make_tuple(BuildResult->Val.first, &(BuildResult->MBuildResultMutex), BuildResult->Val.second); } -sycl::detail::pi::PiKernel ProgramManager::getCachedMaterializedKernel( +ur_kernel_handle_t ProgramManager::getCachedMaterializedKernel( const std::string &KernelName, const std::vector &SpecializationConsts) { if constexpr (DbgProgMgr > 0) @@ -2610,7 +2676,7 @@ sycl::detail::pi::PiKernel ProgramManager::getCachedMaterializedKernel( return nullptr; } -sycl::detail::pi::PiKernel ProgramManager::getOrCreateMaterializedKernel( +ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( const RTDeviceBinaryImage &Img, const context &Context, const device &Device, const std::string &KernelName, const std::vector &SpecializationConsts) { @@ -2625,31 +2691,30 @@ sycl::detail::pi::PiKernel ProgramManager::getOrCreateMaterializedKernel( if constexpr (DbgProgMgr > 0) std::cerr << ">>> Adding the kernel to the cache.\n"; - auto Program = createPIProgram(Img, Context, Device); + auto Program = createURProgram(Img, Context, Device); auto DeviceImpl = detail::getSyclObjImpl(Device); auto &Plugin = DeviceImpl->getPlugin(); - ProgramPtr ProgramManaged( - Program, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease); + ProgramPtr ProgramManaged(Program, urProgramRelease); std::string CompileOpts; std::string LinkOpts; applyOptionsFromEnvironment(CompileOpts, LinkOpts); // No linking of extra programs reqruired. - std::vector ExtraProgramsToLink; + std::vector ExtraProgramsToLink; auto BuildProgram = build(std::move(ProgramManaged), detail::getSyclObjImpl(Context), CompileOpts, LinkOpts, DeviceImpl->getHandleRef(), /*For non SPIR-V devices DeviceLibReqdMask is always 0*/ 0, ExtraProgramsToLink); - sycl::detail::pi::PiKernel PiKernel{nullptr}; - Plugin->call( - BuildProgram.get(), KernelName.c_str(), &PiKernel); + ur_kernel_handle_t UrKernel{nullptr}; + Plugin->call(urKernelCreate, + BuildProgram.get(), KernelName.c_str(), &UrKernel); { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - m_MaterializedKernels[KernelName][SpecializationConsts] = PiKernel; + m_MaterializedKernels[KernelName][SpecializationConsts] = UrKernel; } - return PiKernel; + return UrKernel; } bool doesDevSupportDeviceRequirements(const device &Dev, diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index ae5813e392038..f973e8043b769 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -101,18 +101,18 @@ class ProgramManager { const context &Context, const device &Device, bool JITCompilationIsRequired = false); - sycl::detail::pi::PiProgram createPIProgram(const RTDeviceBinaryImage &Img, - const context &Context, - const device &Device); - /// Creates a PI program using either a cached device code binary if present + ur_program_handle_t createURProgram(const RTDeviceBinaryImage &Img, + const context &Context, + const device &Device); + /// Creates a UR program using either a cached device code binary if present /// in the persistent cache or from the supplied device image otherwise. /// \param Img The device image used to create the program. /// \param AllImages All images needed to build the program, used for cache /// lookup. - /// \param Context The context to find or create the PI program with. - /// \param Device The device to find or create the PI program for. + /// \param Context The context to find or create the UR program with. + /// \param Device The device to find or create the UR program for. /// \param CompileAndLinkOptions The compile and linking options to be used - /// for building the PI program. These options must appear in the + /// for building the UR program. These options must appear in the /// mentioned order. This parameter is used as a partial key in the /// cache and has no effect if no cached device code binary is found in /// the persistent cache. @@ -120,14 +120,17 @@ class ProgramManager { /// image. This parameter is used as a partial key in the cache and /// has no effect if no cached device code binary is found in the /// persistent cache. - /// \return A pair consisting of the PI program created with the corresponding + /// \return A pair consisting of the UR program created with the corresponding /// device code binary and a boolean that is true if the device code /// binary was found in the persistent cache and false otherwise. - std::pair getOrCreatePIProgram( + std::pair + getOrCreateURProgram( const RTDeviceBinaryImage &Img, const std::vector &AllImages, - const context &Context, const device &Device, - const std::string &CompileAndLinkOptions, SerializedObj SpecConsts); + const context &Context, + const device &Device, + const std::string &CompileAndLinkOptions, + SerializedObj SpecConsts); /// Builds or retrieves from cache a program defining the kernel with given /// name. /// \param M identifies the OS module the kernel comes from (multiple OS @@ -137,52 +140,50 @@ class ProgramManager { /// \param KernelName the kernel's name /// \param JITCompilationIsRequired If JITCompilationIsRequired is true /// add a check that kernel is compiled, otherwise don't add the check. - sycl::detail::pi::PiProgram - getBuiltPIProgram(const ContextImplPtr &ContextImpl, - const DeviceImplPtr &DeviceImpl, - const std::string &KernelName, const NDRDescT &NDRDesc = {}, - bool JITCompilationIsRequired = false); - - sycl::detail::pi::PiProgram - getBuiltPIProgram(const context &Context, const device &Device, - const std::string &KernelName, - const property_list &PropList, - bool JITCompilationIsRequired = false); - - std::tuple + ur_program_handle_t getBuiltURProgram(const ContextImplPtr &ContextImpl, + const DeviceImplPtr &DeviceImpl, + const std::string &KernelName, + const NDRDescT &NDRDesc = {}, + bool JITCompilationIsRequired = false); + + ur_program_handle_t getBuiltURProgram(const context &Context, + const device &Device, + const std::string &KernelName, + const property_list &PropList, + bool JITCompilationIsRequired = false); + + std::tuple getOrCreateKernel(const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc = {}); - sycl::detail::pi::PiKernel getCachedMaterializedKernel( + ur_kernel_handle_t getCachedMaterializedKernel( const std::string &KernelName, const std::vector &SpecializationConsts); - sycl::detail::pi::PiKernel getOrCreateMaterializedKernel( + ur_kernel_handle_t getOrCreateMaterializedKernel( const RTDeviceBinaryImage &Img, const context &Context, const device &Device, const std::string &KernelName, const std::vector &SpecializationConsts); - sycl::detail::pi::PiProgram - getPiProgramFromPiKernel(sycl::detail::pi::PiKernel Kernel, - const ContextImplPtr Context); + ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, + const ContextImplPtr Context); void addImages(sycl_device_binaries DeviceImages); void debugPrintBinaryImages() const; - static std::string - getProgramBuildLog(const sycl::detail::pi::PiProgram &Program, - const ContextImplPtr Context); + static std::string getProgramBuildLog(const ur_program_handle_t &Program, + const ContextImplPtr Context); uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); /// Returns the mask for eliminated kernel arguments for the requested kernel /// within the native program. - /// \param NativePrg the PI program associated with the kernel. + /// \param NativePrg the UR program associated with the kernel. /// \param KernelName the name of the kernel. const KernelArgMask * - getEliminatedKernelArgMask(pi::PiProgram NativePrg, + getEliminatedKernelArgMask(ur_program_handle_t NativePrg, const std::string &KernelName); // The function returns the unique SYCL kernel identifier associated with a @@ -281,10 +282,9 @@ class ProgramManager { const std::vector &Devs, const property_list &PropList); - std::tuple + std::tuple getOrCreateKernel(const context &Context, const std::string &KernelName, - const property_list &PropList, - sycl::detail::pi::PiProgram Program); + const property_list &PropList, ur_program_handle_t Program); ProgramManager(); ~ProgramManager() = default; @@ -300,14 +300,14 @@ class ProgramManager { ProgramManager(ProgramManager const &) = delete; ProgramManager &operator=(ProgramManager const &) = delete; - using ProgramPtr = - std::unique_ptr, - decltype(&::piProgramRelease)>; - ProgramPtr - build(ProgramPtr Program, const ContextImplPtr Context, - const std::string &CompileOptions, const std::string &LinkOptions, - const sycl::detail::pi::PiDevice &Device, uint32_t DeviceLibReqMask, - const std::vector &ProgramsToLink); + using ProgramPtr = std::unique_ptr, + decltype(&::urProgramRelease)>; + ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context, + const std::string &CompileOptions, + const std::string &LinkOptions, ur_device_handle_t Device, + uint32_t DeviceLibReqMask, + const std::vector &ProgramsToLink); + /// Dumps image to current directory void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const; @@ -385,20 +385,20 @@ class ProgramManager { /// Protects built-in kernel ID cache. std::mutex m_BuiltInKernelIDsMutex; - // Keeps track of pi_program to image correspondence. Needed for: + // Keeps track of ur_program to image correspondence. Needed for: // - knowing which specialization constants are used in the program and // injecting their current values before compiling the SPIR-V; the binary // image object has info about all spec constants used in the module // - finding kernel argument masks for kernels associated with each - // pi_program + // ur_program // NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not // referenced from outside SYCL runtime and RTDeviceBinaryImage object // lifetime matches program manager's one. // NOTE: keys in the map can be invalid (reference count went to zero and // the underlying program disposed of), so the map can't be used in any way - // other than binary image lookup with known live PiProgram as the key. + // other than binary image lookup with known live UrProgram as the key. // NOTE: access is synchronized via the MNativeProgramsMutex - std::unordered_multimap + std::unordered_map NativePrograms; /// Protects NativePrograms that can be changed by class' methods. @@ -436,7 +436,7 @@ class ProgramManager { std::mutex m_HostPipesMutex; using MaterializedEntries = - std::map, pi::PiKernel>; + std::map, ur_kernel_handle_t>; std::unordered_map m_MaterializedKernels; }; } // namespace detail diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 6bc27710e7663..2a980c9de9701 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include @@ -44,23 +44,22 @@ class NestedCallsTracker { ~NestedCallsTracker() { NestedCallsDetector = false; } }; -static std::vector -getPIEvents(const std::vector &DepEvents) { - std::vector RetPiEvents; +static std::vector +getUrEvents(const std::vector &DepEvents) { + std::vector RetUrEvents; for (const sycl::event &Event : DepEvents) { const EventImplPtr &EventImpl = detail::getSyclObjImpl(Event); if (EventImpl->getHandleRef() != nullptr) - RetPiEvents.push_back(EventImpl->getHandleRef()); + RetUrEvents.push_back(EventImpl->getHandleRef()); } - return RetPiEvents; + return RetUrEvents; } template <> uint32_t queue_impl::get_info() const { - sycl::detail::pi::PiResult result = PI_SUCCESS; - getPlugin()->call( - MQueues[0], PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result, - nullptr); + ur_result_t result = UR_RESULT_SUCCESS; + getPlugin()->call(urQueueGetInfo, MQueues[0], UR_QUEUE_INFO_REFERENCE_COUNT, + sizeof(result), &result, nullptr); return result; } @@ -234,7 +233,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, event queue_impl::mem_advise(const std::shared_ptr &Self, const void *Ptr, size_t Length, - pi_mem_advice Advice, + ur_usm_advice_flags_t Advice, const std::vector &DepEvents, bool CallerNeedsEvent) { return submitMemOpHelper( @@ -302,11 +301,11 @@ void queue_impl::addEvent(const event &Event) { if (!Cmd) { // if there is no command on the event, we cannot track it with MEventsWeak // as that will leave it with no owner. Track in MEventsShared only if we're - // unable to call piQueueFinish during wait. + // unable to call urQueueFinish during wait. if (MEmulateOOO) addSharedEvent(Event); } - // As long as the queue supports piQueueFinish we only need to store events + // As long as the queue supports urQueueFinish we only need to store events // for unenqueued commands and host tasks. else if (MEmulateOOO || EImpl->getHandleRef() == nullptr) { std::weak_ptr EventWeakPtr{EImpl}; @@ -439,7 +438,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, if ((MDiscardEvents || !CallerNeedsEvent) && supportsDiscardingPiEvents()) { NestedCallsTracker tracker; - MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents), + MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr); return createDiscardedEvent(); } @@ -448,7 +447,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, auto EventImpl = detail::getSyclObjImpl(ResEvent); { NestedCallsTracker tracker; - MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents), + MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), &EventImpl->getHandleRef(), EventImpl); } @@ -457,7 +456,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, : MExtGraphDeps.LastEventPtr; EventToStoreIn = EventImpl; } - // Track only if we won't be able to handle it with piQueueFinish. + // Track only if we won't be able to handle it with urQueueFinish. if (MEmulateOOO) addSharedEvent(ResEvent); return discard_or_return(ResEvent); @@ -596,13 +595,13 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { // multiple in-order queues as a result of that), wait for each event // directly. Otherwise, only wait for unenqueued or host task events, starting // from the latest submitted task in order to minimize total amount of calls, - // then handle the rest with piQueueFinish. + // then handle the rest with urQueueFinish. const bool SupportsPiFinish = !MEmulateOOO; for (auto EventImplWeakPtrIt = WeakEvents.rbegin(); EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) { if (std::shared_ptr EventImplSharedPtr = EventImplWeakPtrIt->lock()) { - // A nullptr PI event indicates that piQueueFinish will not cover it, + // A nullptr UR event indicates that urQueueFinish will not cover it, // either because it's a host task event or an unenqueued one. if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef()) { EventImplSharedPtr->wait(EventImplSharedPtr); @@ -611,7 +610,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { } if (SupportsPiFinish) { const PluginPtr &Plugin = getPlugin(); - Plugin->call(getHandleRef()); + Plugin->call(urQueueFinish, getHandleRef()); assert(SharedEvents.empty() && "Queues that support calling piQueueFinish " "shouldn't have shared events"); } else { @@ -632,13 +631,16 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { #endif } -pi_native_handle queue_impl::getNative(int32_t &NativeHandleDesc) const { +ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const { const PluginPtr &Plugin = getPlugin(); if (getContextImplPtr()->getBackend() == backend::opencl) - Plugin->call(MQueues[0]); - pi_native_handle Handle{}; - Plugin->call(MQueues[0], &Handle, - &NativeHandleDesc); + Plugin->call(urQueueRetain, MQueues[0]); + ur_native_handle_t Handle{}; + ur_queue_native_desc_t UrNativeDesc{UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC, + nullptr, nullptr}; + UrNativeDesc.pNativeData = &NativeHandleDesc; + + Plugin->call(urQueueGetNativeHandle, MQueues[0], &UrNativeDesc, &Handle); return Handle; } @@ -666,11 +668,10 @@ bool queue_impl::ext_oneapi_empty() const { info::event_command_status::complete; } - // Check the status of the backend queue. - pi_bool IsReady = false; - getPlugin()->call( - MQueues[0], PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, sizeof(pi_bool), &IsReady, - nullptr); + // Check the status of the backend queue if this is not a host queue. + ur_bool_t IsReady = false; + getPlugin()->call(urQueueGetInfo, MQueues[0], UR_QUEUE_INFO_EMPTY, + sizeof(IsReady), &IsReady, nullptr); if (!IsReady) return false; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index b0c989c35fa53..7b694ad796f40 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include @@ -59,7 +59,7 @@ using DeviceImplPtr = std::shared_ptr; /// Sets max number of queues supported by FPGA RT. static constexpr size_t MaxNumQueues = 256; -//// Possible CUDA context types supported by PI CUDA backend +//// Possible CUDA context types supported by UR CUDA backend /// TODO: Implement this as a property once there is an extension document enum class CUDAContextT : char { primary, custom }; @@ -121,7 +121,7 @@ class queue_impl { "discard_events and enable_profiling."); // fallback profiling support. See MFallbackProfiling if (MDevice->has(aspect::queue_profiling)) { - // When piGetDeviceAndHostTimer is not supported, compute the + // When urDeviceGetGlobalTimestamps is not supported, compute the // profiling time OpenCL version < 2.1 case if (!getDeviceImplPtr()->isGetDeviceAndHostTimerSupported()) MFallbackProfiling = true; @@ -164,7 +164,6 @@ class queue_impl { "since the device is neither a member of the context nor a " "descendant of its member."); } - const QueueOrder QOrder = MIsInorder ? QueueOrder::Ordered : QueueOrder::OOO; MQueues.push_back(createQueue(QOrder)); @@ -211,7 +210,7 @@ class queue_impl { event getLastEvent(); private: - void queue_impl_interop(sycl::detail::pi::PiQueue PiQueue) { + void queue_impl_interop(ur_queue_handle_t UrQueue) { if (has_property() && has_property()) { throw sycl::exception(make_error_code(errc::invalid), @@ -219,14 +218,14 @@ class queue_impl { "discard_events and enable_profiling."); } - MQueues.push_back(pi::cast(PiQueue)); + MQueues.push_back(UrQueue); - sycl::detail::pi::PiDevice DevicePI{}; + ur_device_handle_t DeviceUr{}; const PluginPtr &Plugin = getPlugin(); // TODO catch an exception and put it to list of asynchronous exceptions - Plugin->call( - MQueues[0], PI_QUEUE_INFO_DEVICE, sizeof(DevicePI), &DevicePI, nullptr); - MDevice = MContext->findMatchingDeviceImpl(DevicePI); + Plugin->call(urQueueGetInfo, MQueues[0], UR_QUEUE_INFO_DEVICE, + sizeof(DeviceUr), &DeviceUr, nullptr); + MDevice = MContext->findMatchingDeviceImpl(DeviceUr); if (MDevice == nullptr) { throw sycl::exception( make_error_code(errc::invalid), @@ -272,11 +271,11 @@ class queue_impl { public: /// Constructs a SYCL queue from plugin interoperability handle. /// - /// \param PiQueue is a raw PI queue handle. + /// \param UrQueue is a raw UR queue handle. /// \param Context is a SYCL context to associate with the queue being /// constructed. /// \param AsyncHandler is a SYCL asynchronous exception handler. - queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context, + queue_impl(ur_queue_handle_t UrQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler) : MContext(Context), MAsyncHandler(AsyncHandler), MIsInorder(has_property()), @@ -285,24 +284,24 @@ class queue_impl { MIsProfilingEnabled(has_property()), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { - queue_impl_interop(PiQueue); + queue_impl_interop(UrQueue); } /// Constructs a SYCL queue from plugin interoperability handle. /// - /// \param PiQueue is a raw PI queue handle. + /// \param UrQueue is a raw UR queue handle. /// \param Context is a SYCL context to associate with the queue being /// constructed. /// \param AsyncHandler is a SYCL asynchronous exception handler. /// \param PropList is the queue properties. - queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context, + queue_impl(ur_queue_handle_t UrQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList) : MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList), MIsInorder(has_property()), MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()) { - queue_impl_interop(PiQueue); + queue_impl_interop(UrQueue); } ~queue_impl() { @@ -324,16 +323,20 @@ class queue_impl { #endif throw_asynchronous(); cleanup_fusion_cmd(); - getPlugin()->call(MQueues[0]); + getPlugin()->call(urQueueRelease, MQueues[0]); } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~queue_impl", e); } } /// \return an OpenCL interoperability queue handle. + cl_command_queue get() { - getPlugin()->call(MQueues[0]); - return pi::cast(MQueues[0]); + getPlugin()->call(urQueueRetain, MQueues[0]); + ur_native_handle_t nativeHandle = 0; + getPlugin()->call(urQueueGetNativeHandle, MQueues[0], nullptr, + &nativeHandle); + return ur::cast(nativeHandle); } /// \return an associated SYCL context. @@ -379,7 +382,7 @@ class queue_impl { "recording to a command graph."); } for (const auto &queue : MQueues) { - getPlugin()->call(queue); + getPlugin()->call(urQueueFlush, queue); } } @@ -479,29 +482,29 @@ class queue_impl { MAsyncHandler(std::move(Exceptions)); } - /// Creates PI properties array. + /// Creates UR properties array. /// /// \param PropList SYCL properties. /// \param Order specifies whether queue is in-order or out-of-order. - /// \param Properties PI properties array created from SYCL properties. - static sycl::detail::pi::PiQueueProperties - createPiQueueProperties(const property_list &PropList, QueueOrder Order) { - sycl::detail::pi::PiQueueProperties CreationFlags = 0; + /// \param Properties UR properties array created from SYCL properties. + static ur_queue_flags_t createUrQueueFlags(const property_list &PropList, + QueueOrder Order) { + ur_queue_flags_t CreationFlags = 0; if (Order == QueueOrder::OOO) { - CreationFlags = PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; + CreationFlags = UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; } if (PropList.has_property()) { - CreationFlags |= PI_QUEUE_FLAG_PROFILING_ENABLE; + CreationFlags |= UR_QUEUE_FLAG_PROFILING_ENABLE; } if (PropList.has_property< ext::oneapi::cuda::property::queue::use_default_stream>()) { - CreationFlags |= __SYCL_PI_CUDA_USE_DEFAULT_STREAM; + CreationFlags |= UR_QUEUE_FLAG_USE_DEFAULT_STREAM; } if (PropList.has_property()) { // Pass this flag to the Level Zero plugin to be able to check it from // queue property. - CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS; + CreationFlags |= UR_QUEUE_FLAG_DISCARD_EVENTS; } // Track that priority settings are not ambiguous. bool PrioritySeen = false; @@ -516,7 +519,7 @@ class queue_impl { make_error_code(errc::invalid), "Queue cannot be constructed with different priorities."); } - CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW; + CreationFlags |= UR_QUEUE_FLAG_PRIORITY_LOW; PrioritySeen = true; } if (PropList.has_property()) { @@ -525,14 +528,14 @@ class queue_impl { make_error_code(errc::invalid), "Queue cannot be constructed with different priorities."); } - CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH; + CreationFlags |= UR_QUEUE_FLAG_PRIORITY_HIGH; } // Track that submission modes do not conflict. bool SubmissionSeen = false; if (PropList.has_property< ext::intel::property::queue::no_immediate_command_list>()) { SubmissionSeen = true; - CreationFlags |= PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE; + CreationFlags |= UR_QUEUE_FLAG_SUBMISSION_BATCHED; } if (PropList.has_property< ext::intel::property::queue::immediate_command_list>()) { @@ -542,50 +545,56 @@ class queue_impl { "Queue cannot be constructed with different submission modes."); } SubmissionSeen = true; - CreationFlags |= PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE; + CreationFlags |= UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE; } return CreationFlags; } - /// Creates PI queue. + /// Creates UR queue. /// /// \param Order specifies whether the queue being constructed as in-order /// or out-of-order. - sycl::detail::pi::PiQueue createQueue(QueueOrder Order) { - sycl::detail::pi::PiQueue Queue{}; - sycl::detail::pi::PiContext Context = MContext->getHandleRef(); - sycl::detail::pi::PiDevice Device = MDevice->getHandleRef(); + ur_queue_handle_t createQueue(QueueOrder Order) { + ur_queue_handle_t Queue{}; + ur_context_handle_t Context = MContext->getHandleRef(); + ur_device_handle_t Device = MDevice->getHandleRef(); const PluginPtr &Plugin = getPlugin(); - - sycl::detail::pi::PiQueueProperties Properties[] = { - PI_QUEUE_FLAGS, createPiQueueProperties(MPropList, Order), 0, 0, 0}; + /* + sycl::detail::pi::PiQueueProperties Properties[] = { + PI_QUEUE_FLAGS, createPiQueueProperties(MPropList, Order), 0, 0, 0}; + */ + ur_queue_properties_t Properties = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, + nullptr, 0}; + Properties.flags = createUrQueueFlags(MPropList, Order); + ur_queue_index_properties_t IndexProperties = { + UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES, nullptr, 0}; if (has_property()) { - int Idx = get_property() - .get_index(); - Properties[2] = PI_QUEUE_COMPUTE_INDEX; - Properties[3] = static_cast(Idx); + IndexProperties.computeIndex = + get_property() + .get_index(); + Properties.pNext = &IndexProperties; } - sycl::detail::pi::PiResult Error = - Plugin->call_nocheck(Context, Device, - Properties, &Queue); + ur_result_t Error = Plugin->call_nocheck(urQueueCreate, Context, Device, + &Properties, &Queue); // If creating out-of-order queue failed and this property is not // supported (for example, on FPGA), it will return - // PI_ERROR_INVALID_QUEUE_PROPERTIES and will try to create in-order queue. - if (!MEmulateOOO && Error == PI_ERROR_INVALID_QUEUE_PROPERTIES) { + // UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES and will try to create in-order + // queue. + if (!MEmulateOOO && Error == UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES) { MEmulateOOO = true; Queue = createQueue(QueueOrder::Ordered); } else { - Plugin->checkPiResult(Error); + Plugin->checkUrResult(Error); } return Queue; } - /// \return a raw PI handle for a free queue. The returned handle is not + /// \return a raw UR handle for a free queue. The returned handle is not /// retained. It is caller responsibility to make sure queue is still alive. - sycl::detail::pi::PiQueue &getExclusiveQueueHandleRef() { - sycl::detail::pi::PiQueue *PIQ = nullptr; + ur_queue_handle_t &getExclusiveUrQueueHandleRef() { + ur_queue_handle_t *PIQ = nullptr; bool ReuseQueue = false; { std::lock_guard Lock(MMutex); @@ -608,18 +617,18 @@ class queue_impl { if (!ReuseQueue) *PIQ = createQueue(QueueOrder::Ordered); else - getPlugin()->call(*PIQ); + getPlugin()->call(urQueueFinish, *PIQ); return *PIQ; } - /// \return a raw PI queue handle. The returned handle is not retained. It + /// \return a raw UR queue handle. The returned handle is not retained. It /// is caller responsibility to make sure queue is still alive. - sycl::detail::pi::PiQueue &getHandleRef() { + ur_queue_handle_t &getHandleRef() { if (!MEmulateOOO) return MQueues[0]; - return getExclusiveQueueHandleRef(); + return getExclusiveUrQueueHandleRef(); } /// \return true if the queue was constructed with property specified by @@ -675,7 +684,7 @@ class queue_impl { /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing advise operation. event mem_advise(const std::shared_ptr &Self, const void *Ptr, - size_t Length, pi_mem_advice Advice, + size_t Length, ur_usm_advice_flags_t Advice, const std::vector &DepEvents, bool CallerNeedsEvent); /// Puts exception to the list of asynchronous ecxeptions. @@ -693,7 +702,7 @@ class queue_impl { /// Gets the native handle of the SYCL queue. /// /// \return a native handle. - pi_native_handle getNative(int32_t &NativeHandleDesc) const; + ur_native_handle_t getNative(int32_t &NativeHandleDesc) const; void registerStreamServiceEvent(const EventImplPtr &Event) { std::lock_guard Lock(MStreamsServiceEventsMutex); @@ -757,13 +766,13 @@ class queue_impl { std::unique_lock &QueueLock); // Helps to manage host tasks presence in scenario with barrier usage. - // Approach that tracks almost all tasks to provide barrier sync for both pi + // Approach that tracks almost all tasks to provide barrier sync for both ur // tasks and host tasks is applicable for out of order queues only. No-op // for in order ones. void tryToResetEnqueuedBarrierDep(const EventImplPtr &EnqueuedBarrierEvent); // Called on host task completion that could block some kernels from enqueue. - // Approach that tracks almost all tasks to provide barrier sync for both pi + // Approach that tracks almost all tasks to provide barrier sync for both ur // tasks and host tasks is applicable for out of order queues only. Not neede // for in order ones. void revisitUnenqueuedCommandsState(const EventImplPtr &CompletedHostTask); @@ -787,8 +796,9 @@ class queue_impl { template EventImplPtr insertHelperBarrier(const HandlerType &Handler) { auto ResEvent = std::make_shared(Handler.MQueue); - getPlugin()->call( - Handler.MQueue->getHandleRef(), 0, nullptr, &ResEvent->getHandleRef()); + getPlugin()->call(urEnqueueEventsWaitWithBarrier, + Handler.MQueue->getHandleRef(), 0, nullptr, + &ResEvent->getHandleRef()); return ResEvent; } @@ -908,7 +918,7 @@ class queue_impl { /// \param MemMngrFunc is a function that forwards its arguments to the /// appropriate memory manager function. /// \param MemMngrArgs are all the arguments that need to be passed to memory - /// manager except the last three: dependencies, PI event and + /// manager except the last three: dependencies, UR event and /// EventImplPtr are filled out by this helper. /// \return an event representing the submitted operation. template MQueues; + std::vector MQueues; /// Iterator through MQueues. size_t MNextQueueIdx = 0; diff --git a/sycl/source/detail/sampler_impl.cpp b/sycl/source/detail/sampler_impl.cpp index edca8eb1be025..dd117814fba2c 100644 --- a/sycl/source/detail/sampler_impl.cpp +++ b/sycl/source/detail/sampler_impl.cpp @@ -22,21 +22,55 @@ sampler_impl::sampler_impl(coordinate_normalization_mode normalizationMode, MFiltMode(filteringMode), MPropList(propList) {} sampler_impl::sampler_impl(cl_sampler clSampler, const context &syclContext) { + const PluginPtr &Plugin = getSyclObjImpl(syclContext)->getPlugin(); + ur_sampler_handle_t Sampler{}; + Plugin->call(urSamplerCreateWithNativeHandle, + reinterpret_cast(clSampler), + getSyclObjImpl(syclContext)->getHandleRef(), nullptr, &Sampler); - sycl::detail::pi::PiSampler Sampler = - pi::cast(clSampler); MContextToSampler[syclContext] = Sampler; - const PluginPtr &Plugin = getSyclObjImpl(syclContext)->getPlugin(); - Plugin->call(Sampler); - Plugin->call( - Sampler, PI_SAMPLER_INFO_NORMALIZED_COORDS, sizeof(pi_bool), - &MCoordNormMode, nullptr); - Plugin->call( - Sampler, PI_SAMPLER_INFO_ADDRESSING_MODE, - sizeof(pi_sampler_addressing_mode), &MAddrMode, nullptr); - Plugin->call( - Sampler, PI_SAMPLER_INFO_FILTER_MODE, sizeof(pi_sampler_filter_mode), - &MFiltMode, nullptr); + bool NormalizedCoords; + + Plugin->call(urSamplerGetInfo, Sampler, UR_SAMPLER_INFO_NORMALIZED_COORDS, + sizeof(ur_bool_t), &NormalizedCoords, nullptr); + MCoordNormMode = NormalizedCoords + ? coordinate_normalization_mode::normalized + : coordinate_normalization_mode::unnormalized; + + ur_sampler_addressing_mode_t AddrMode; + Plugin->call(urSamplerGetInfo, Sampler, UR_SAMPLER_INFO_ADDRESSING_MODE, + sizeof(ur_sampler_addressing_mode_t), &AddrMode, nullptr); + switch (AddrMode) { + case UR_SAMPLER_ADDRESSING_MODE_CLAMP: + MAddrMode = addressing_mode::clamp; + break; + case UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: + MAddrMode = addressing_mode::clamp_to_edge; + break; + case UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT: + MAddrMode = addressing_mode::mirrored_repeat; + break; + case UR_SAMPLER_ADDRESSING_MODE_REPEAT: + MAddrMode = addressing_mode::repeat; + break; + case UR_SAMPLER_ADDRESSING_MODE_NONE: + default: + MAddrMode = addressing_mode::none; + break; + } + + ur_sampler_filter_mode_t FiltMode; + Plugin->call(urSamplerGetInfo, Sampler, UR_SAMPLER_INFO_FILTER_MODE, + sizeof(ur_sampler_filter_mode_t), &FiltMode, nullptr); + switch (FiltMode) { + case UR_SAMPLER_FILTER_MODE_LINEAR: + MFiltMode = filtering_mode::linear; + break; + case UR_SAMPLER_FILTER_MODE_NEAREST: + default: + MFiltMode = filtering_mode::nearest; + break; + } } sampler_impl::~sampler_impl() { @@ -46,15 +80,14 @@ sampler_impl::~sampler_impl() { // TODO catch an exception and add it to the list of asynchronous // exceptions const PluginPtr &Plugin = getSyclObjImpl(Iter.first)->getPlugin(); - Plugin->call(Iter.second); + Plugin->call(urSamplerRelease, Iter.second); } } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~sample_impl", e); } } -sycl::detail::pi::PiSampler -sampler_impl::getOrCreateSampler(const context &Context) { +ur_sampler_handle_t sampler_impl::getOrCreateSampler(const context &Context) { { std::lock_guard Lock(MMutex); auto It = MContextToSampler.find(Context); @@ -62,27 +95,49 @@ sampler_impl::getOrCreateSampler(const context &Context) { return It->second; } - const pi_sampler_properties sprops[] = { - PI_SAMPLER_INFO_NORMALIZED_COORDS, - static_cast(MCoordNormMode), - PI_SAMPLER_INFO_ADDRESSING_MODE, - static_cast(MAddrMode), - PI_SAMPLER_INFO_FILTER_MODE, - static_cast(MFiltMode), - 0}; - - sycl::detail::pi::PiResult errcode_ret = PI_SUCCESS; - sycl::detail::pi::PiSampler resultSampler = nullptr; + ur_sampler_desc_t desc{}; + desc.stype = UR_STRUCTURE_TYPE_SAMPLER_DESC; + switch (MAddrMode) { + case addressing_mode::clamp: + desc.addressingMode = UR_SAMPLER_ADDRESSING_MODE_CLAMP; + break; + case addressing_mode::clamp_to_edge: + desc.addressingMode = UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + break; + case addressing_mode::repeat: + desc.addressingMode = UR_SAMPLER_ADDRESSING_MODE_REPEAT; + break; + case addressing_mode::none: + desc.addressingMode = UR_SAMPLER_ADDRESSING_MODE_NONE; + break; + case addressing_mode::mirrored_repeat: + desc.addressingMode = UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; + break; + } + switch (MFiltMode) { + case filtering_mode::linear: + desc.filterMode = UR_SAMPLER_FILTER_MODE_LINEAR; + break; + case filtering_mode::nearest: + desc.filterMode = UR_SAMPLER_FILTER_MODE_NEAREST; + break; + } + desc.normalizedCoords = + (MCoordNormMode == coordinate_normalization_mode::normalized); + + ur_result_t errcode_ret = UR_RESULT_SUCCESS; + ur_sampler_handle_t resultSampler = nullptr; const PluginPtr &Plugin = getSyclObjImpl(Context)->getPlugin(); - errcode_ret = Plugin->call_nocheck( - getSyclObjImpl(Context)->getHandleRef(), sprops, &resultSampler); + errcode_ret = Plugin->call_nocheck(urSamplerCreate, + getSyclObjImpl(Context)->getHandleRef(), + &desc, &resultSampler); - if (errcode_ret == PI_ERROR_UNSUPPORTED_FEATURE) + if (errcode_ret == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) throw sycl::exception(sycl::errc::feature_not_supported, "Images are not supported by this device."); - Plugin->checkPiResult(errcode_ret); + Plugin->checkUrResult(errcode_ret); std::lock_guard Lock(MMutex); MContextToSampler[Context] = resultSampler; diff --git a/sycl/source/detail/sampler_impl.hpp b/sycl/source/detail/sampler_impl.hpp index 8e6f52bf5720b..cfa4c83b15607 100644 --- a/sycl/source/detail/sampler_impl.hpp +++ b/sycl/source/detail/sampler_impl.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include @@ -39,7 +39,7 @@ class sampler_impl { coordinate_normalization_mode get_coordinate_normalization_mode() const; - sycl::detail::pi::PiSampler getOrCreateSampler(const context &Context); + ur_sampler_handle_t getOrCreateSampler(const context &Context); ~sampler_impl(); @@ -49,7 +49,7 @@ class sampler_impl { /// Protects all the fields that can be changed by class' methods. std::mutex MMutex; - std::unordered_map MContextToSampler; + std::unordered_map MContextToSampler; coordinate_normalization_mode MCoordNormMode; addressing_mode MAddrMode; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 76320c2833891..010d366c85416 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1,4 +1,3 @@ -//===----------- commands.cpp - SYCL commands -------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,6 +5,7 @@ // //===----------------------------------------------------------------------===// +#include "ur_api.h" #include #include @@ -232,34 +232,34 @@ static std::string commandToName(Command::CommandType Type) { } #endif -std::vector -Command::getPiEvents(const std::vector &EventImpls) const { - std::vector RetPiEvents; +std::vector +Command::getUrEvents(const std::vector &EventImpls) const { + std::vector RetUrEvents; for (auto &EventImpl : EventImpls) { if (EventImpl->getHandleRef() == nullptr) continue; // Do not add redundant event dependencies for in-order queues. - // At this stage dependency is definitely pi task and need to check if - // current one is a host task. In this case we should not skip pi event due + // At this stage dependency is definitely ur task and need to check if + // current one is a host task. In this case we should not skip ur event due // to different sync mechanisms for different task types on in-order queue. if (MWorkerQueue && EventImpl->getWorkerQueue() == MWorkerQueue && MWorkerQueue->isInOrder() && !isHostTask()) continue; - RetPiEvents.push_back(EventImpl->getHandleRef()); + RetUrEvents.push_back(EventImpl->getHandleRef()); } - return RetPiEvents; + return RetUrEvents; } -// This function is implemented (duplicating getPiEvents a lot) as short term +// This function is implemented (duplicating getUrEvents a lot) as short term // solution for the issue that barrier with wait list could not -// handle empty pi event handles when kernel is enqueued on host task +// handle empty ur event handles when kernel is enqueued on host task // completion. -std::vector Command::getPiEventsBlocking( +std::vector Command::getUrEventsBlocking( const std::vector &EventImpls) const { - std::vector RetPiEvents; + std::vector RetUrEvents; for (auto &EventImpl : EventImpls) { // Throwaway events created with empty constructor will not have a context // (which is set lazily) calling getContextImpl() would set that @@ -279,17 +279,17 @@ std::vector Command::getPiEventsBlocking( BLOCKING); } // Do not add redundant event dependencies for in-order queues. - // At this stage dependency is definitely pi task and need to check if + // At this stage dependency is definitely ur task and need to check if // current one is a host task. In this case we should not skip pi event due // to different sync mechanisms for different task types on in-order queue. if (MWorkerQueue && EventImpl->getWorkerQueue() == MWorkerQueue && MWorkerQueue->isInOrder() && !isHostTask()) continue; - RetPiEvents.push_back(EventImpl->getHandleRef()); + RetUrEvents.push_back(EventImpl->getHandleRef()); } - return RetPiEvents; + return RetUrEvents; } bool Command::isHostTask() const { @@ -322,7 +322,7 @@ struct EnqueueNativeCommandData { std::function func; }; -void InteropFreeFunc(pi_queue, void *InteropData) { +void InteropFreeFunc(ur_queue_handle_t, void *InteropData) { auto *Data = reinterpret_cast(InteropData); return Data->func(Data->ih); } @@ -331,7 +331,7 @@ void InteropFreeFunc(pi_queue, void *InteropData) { class DispatchHostTask { ExecCGCommand *MThisCmd; std::vector MReqToMem; - std::vector MReqPiMem; + std::vector MReqUrMem; bool waitForEvents() const { std::map> @@ -348,13 +348,13 @@ class DispatchHostTask { // sophisticated waiting mechanism to allow to utilize this thread for any // other available job and resume once all required events are ready. for (auto &PluginWithEvents : RequiredEventsPerPlugin) { - std::vector RawEvents = - MThisCmd->getPiEvents(PluginWithEvents.second); + std::vector RawEvents = + MThisCmd->getUrEvents(PluginWithEvents.second); if (RawEvents.size() == 0) continue; try { - PluginWithEvents.first->call(RawEvents.size(), - RawEvents.data()); + PluginWithEvents.first->call(urEventWait, RawEvents.size(), + RawEvents.data()); } catch (const sycl::exception &) { MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException( std::current_exception()); @@ -378,9 +378,9 @@ class DispatchHostTask { public: DispatchHostTask(ExecCGCommand *ThisCmd, std::vector ReqToMem, - std::vector ReqPiMem) + std::vector ReqUrMem) : MThisCmd{ThisCmd}, MReqToMem(std::move(ReqToMem)), - MReqPiMem(std::move(ReqPiMem)) {} + MReqUrMem(std::move(ReqUrMem)) {} void operator()() const { assert(MThisCmd->getCG().getType() == CGType::CodeplayHostTask); @@ -423,9 +423,10 @@ class DispatchHostTask { // for host task? auto &Queue = HostTask.MQueue; bool NativeCommandSupport = false; - Queue->getPlugin()->call( + Queue->getPlugin()->call( + urDeviceGetInfo, detail::getSyclObjImpl(Queue->get_device())->getHandleRef(), - PI_EXT_ONEAPI_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT, + UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP, sizeof(NativeCommandSupport), &NativeCommandSupport, nullptr); if (NativeCommandSupport) { EnqueueNativeCommandData CustomOpData{ @@ -438,9 +439,10 @@ class DispatchHostTask { // // This entry point is needed in order to migrate memory across // devices in the same context for CUDA and HIP backends - Queue->getPlugin()->call( - HostTask.MQueue->getHandleRef(), InteropFreeFunc, &CustomOpData, - MReqPiMem.size(), MReqPiMem.data(), 0, nullptr, nullptr); + Queue->getPlugin()->call( + urEnqueueNativeCommandExp, HostTask.MQueue->getHandleRef(), + InteropFreeFunc, &CustomOpData, MReqUrMem.size(), + MReqUrMem.data(), nullptr, 0, nullptr, nullptr); } else { HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo(), IH); @@ -479,7 +481,7 @@ class DispatchHostTask { #endif try { - // If we enqueue blocked users - pi level could throw exception that + // If we enqueue blocked users - ur level could throw exception that // should be treated as async now. Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd); } catch (...) { @@ -497,7 +499,7 @@ void Command::waitForPreparedHostEvents() const { void Command::waitForEvents(QueueImplPtr Queue, std::vector &EventImpls, - sycl::detail::pi::PiEvent &Event) { + ur_event_handle_t &Event) { #ifndef NDEBUG for (const EventImplPtr &Event : EventImpls) assert(!Event->isHost() && @@ -515,7 +517,7 @@ void Command::waitForEvents(QueueImplPtr Queue, // Also we have default host queue. This queue is accessible via // Scheduler. Now, let's assume we have three different events: E1(C1), // E2(C1), E3(C2). The command's MPreparedDepsEvents will contain all - // three events (E1, E2, E3). Now, if piEventsWait is called for all + // three events (E1, E2, E3). Now, if urEventWait is called for all // three events we'll experience failure with CL_INVALID_CONTEXT 'cause // these events refer to different contexts. std::map> @@ -529,21 +531,22 @@ void Command::waitForEvents(QueueImplPtr Queue, } for (auto &CtxWithEvents : RequiredEventsPerContext) { - std::vector RawEvents = - getPiEvents(CtxWithEvents.second); - CtxWithEvents.first->getPlugin()->call( - RawEvents.size(), RawEvents.data()); + std::vector RawEvents = + getUrEvents(CtxWithEvents.second); + if (!RawEvents.empty()) { + CtxWithEvents.first->getPlugin()->call(urEventWait, RawEvents.size(), + RawEvents.data()); + } } } else { - std::vector RawEvents = - getPiEvents(EventImpls); + std::vector RawEvents = getUrEvents(EventImpls); flushCrossQueueDeps(EventImpls, MWorkerQueue); const PluginPtr &Plugin = Queue->getPlugin(); if (MEvent != nullptr) MEvent->setHostEnqueueTime(); - Plugin->call( - Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event); + Plugin->call(urEnqueueEventsWait, Queue->getHandleRef(), RawEvents.size(), + &RawEvents[0], &Event); } } } @@ -553,8 +556,8 @@ void Command::waitForEvents(QueueImplPtr Queue, /// should not outlive the event connected to it. Command::Command( CommandType Type, QueueImplPtr Queue, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, - const std::vector &SyncPoints) + ur_exp_command_buffer_handle_t CommandBuffer, + const std::vector &SyncPoints) : MQueue(std::move(Queue)), MEvent(std::make_shared(MQueue)), MPreparedDepsEvents(MEvent->getPreparedDepsEvents()), @@ -642,10 +645,10 @@ void Command::emitEdgeEventForCommandDependence( /// Creates an edge when the dependency is due to an event. /// @param Cmd The command object of the source of the edge -/// @param PiEventAddr The address that defines the edge dependency, which in +/// @param UrEventAddr The address that defines the edge dependency, which in /// this case is an event -void Command::emitEdgeEventForEventDependence( - Command *Cmd, sycl::detail::pi::PiEvent &PiEventAddr) { +void Command::emitEdgeEventForEventDependence(Command *Cmd, + ur_event_handle_t &UrEventAddr) { #ifdef XPTI_ENABLE_INSTRUMENTATION // If we have failed to create an event to represent the Command, then we // cannot emit an edge event. Bail early! @@ -655,13 +658,12 @@ void Command::emitEdgeEventForEventDependence( if (Cmd && Cmd->MTraceEvent) { // If the event is associated with a command, we use this command's trace // event as the source of edge, hence modeling the control flow - emitEdgeEventForCommandDependence(Cmd, (void *)PiEventAddr, false); + emitEdgeEventForCommandDependence(Cmd, (void *)UrEventAddr, false); return; } - if (PiEventAddr) { + if (UrEventAddr) { xpti::utils::StringHelper SH; - std::string AddressStr = - SH.addressAsString(PiEventAddr); + std::string AddressStr = SH.addressAsString(UrEventAddr); // This is the case when it is a OCL event enqueued by the user or another // event is registered by the runtime as a dependency The dependency on // this occasion is an OCL event; so we build a virtual node in the graph @@ -692,7 +694,7 @@ void Command::emitEdgeEventForEventDependence( EdgeEvent->source_id = NodeEvent->unique_id; EdgeEvent->target_id = TgtEvent->unique_id; xpti::addMetadata(EdgeEvent, "event", - reinterpret_cast(PiEventAddr)); + reinterpret_cast(UrEventAddr)); xptiNotifySubscribers(MStreamID, xpti::trace_edge_create, detail::GSYCLGraphEvent, EdgeEvent, EdgeInstanceNo, nullptr); @@ -752,8 +754,8 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, const ContextImplPtr &WorkerContext = getWorkerContext(); // 1. Non-host events can be ignored if they are not fully initialized. - // 2. Some types of commands do not produce PI events after they are - // enqueued (e.g. alloca). Note that we can't check the pi event to make that + // 2. Some types of commands do not produce UR events after they are + // enqueued (e.g. alloca). Note that we can't check the ur event to make that // distinction since the command might still be unenqueued at this point. bool PiEventExpected = (!DepEvent->isHost() && !DepEvent->isDefaultConstructed()); @@ -826,22 +828,22 @@ Command *Command::addDep(EventImplPtr Event, // We need this for just the instrumentation, so guarding it will prevent // unused variable warnings when instrumentation is turned off Command *Cmd = (Command *)Event->getCommand(); - sycl::detail::pi::PiEvent &PiEventAddr = Event->getHandleRef(); + ur_event_handle_t &UrEventAddr = Event->getHandleRef(); // Now make an edge for the dependent event - emitEdgeEventForEventDependence(Cmd, PiEventAddr); + emitEdgeEventForEventDependence(Cmd, UrEventAddr); #endif return processDepEvent(std::move(Event), DepDesc{nullptr, nullptr, nullptr}, ToCleanUp); } -void Command::emitEnqueuedEventSignal(sycl::detail::pi::PiEvent &PiEventAddr) { +void Command::emitEnqueuedEventSignal(ur_event_handle_t &UrEventAddr) { #ifdef XPTI_ENABLE_INSTRUMENTATION emitInstrumentationGeneral( MStreamID, MInstanceID, static_cast(MTraceEvent), - xpti::trace_signal, static_cast(PiEventAddr)); + xpti::trace_signal, static_cast(UrEventAddr)); #endif - std::ignore = PiEventAddr; + std::ignore = UrEventAddr; } void Command::emitInstrumentation(uint16_t Type, const char *Txt) { @@ -860,7 +862,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, #ifdef XPTI_ENABLE_INSTRUMENTATION // If command is enqueued from host task thread - it will not have valid // submission code location set. So we set it manually to properly trace - // failures if pi level report any. + // failures if ur level report any. std::unique_ptr AsyncCodeLocationPtr; if (xptiTraceEnabled() && !CurrentCodeLocationValid()) { AsyncCodeLocationPtr.reset( @@ -916,9 +918,9 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, // This will avoid execution of the same failed command twice. MEnqueueStatus = EnqueueResultT::SyclEnqueueFailed; MShouldCompleteEventIfPossible = true; - pi_int32 Res = enqueueImp(); + ur_result_t Res = enqueueImp(); - if (PI_SUCCESS != Res) + if (UR_RESULT_SUCCESS != Res) EnqueueResult = EnqueueResultT(EnqueueResultT::SyclEnqueueFailed, this, Res); else { @@ -928,7 +930,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking, MEvent->setComplete(); // Consider the command is successfully enqueued if return code is - // PI_SUCCESS + // UR_RESULT_SUCCESS MEnqueueStatus = EnqueueResultT::SyclEnqueueSuccess; if (MLeafCounter == 0 && supportsPostEnqueueCleanup() && !SYCLConfig::get() && @@ -1086,11 +1088,11 @@ void AllocaCommand::emitInstrumentationData() { #endif } -pi_int32 AllocaCommand::enqueueImp() { +ur_result_t AllocaCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); + ur_event_handle_t &Event = MEvent->getHandleRef(); void *HostPtr = nullptr; if (!MIsLeaderAlloca) { @@ -1099,7 +1101,7 @@ pi_int32 AllocaCommand::enqueueImp() { // Do not need to make allocation if we have a linked device allocation Command::waitForEvents(MQueue, EventImpls, Event); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } HostPtr = MLinkedAllocaCmd->getMemAllocation(); } @@ -1109,7 +1111,7 @@ pi_int32 AllocaCommand::enqueueImp() { MInitFromUserData, HostPtr, std::move(EventImpls), Event); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } void AllocaCommand::printDot(std::ostream &Stream) const { @@ -1183,10 +1185,10 @@ void *AllocaSubBufCommand::getMemAllocation() const { return MMemAllocation; } -pi_int32 AllocaSubBufCommand::enqueueImp() { +ur_result_t AllocaSubBufCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); + ur_event_handle_t &Event = MEvent->getHandleRef(); MMemAllocation = MemoryManager::allocateMemSubBuffer( getContext(MQueue), MParentAlloca->getMemAllocation(), @@ -1195,7 +1197,7 @@ pi_int32 AllocaSubBufCommand::enqueueImp() { XPTIRegistry::bufferAssociateNotification(MParentAlloca->getSYCLMemObj(), MMemAllocation); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } void AllocaSubBufCommand::printDot(std::ostream &Stream) const { @@ -1247,10 +1249,10 @@ void ReleaseCommand::emitInstrumentationData() { #endif } -pi_int32 ReleaseCommand::enqueueImp() { +ur_result_t ReleaseCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - std::vector RawEvents = getPiEvents(EventImpls); + std::vector RawEvents = getUrEvents(EventImpls); bool SkipRelease = false; // On host side we only allocate memory for full buffers. @@ -1283,7 +1285,7 @@ pi_int32 ReleaseCommand::enqueueImp() { EventImplPtr UnmapEventImpl(new event_impl(Queue)); UnmapEventImpl->setContextImpl(getContext(Queue)); UnmapEventImpl->setStateIncomplete(); - sycl::detail::pi::PiEvent &UnmapEvent = UnmapEventImpl->getHandleRef(); + ur_event_handle_t &UnmapEvent = UnmapEventImpl->getHandleRef(); void *Src = CurAllocaIsHost ? MAllocaCmd->getMemAllocation() @@ -1300,7 +1302,7 @@ pi_int32 ReleaseCommand::enqueueImp() { EventImpls.clear(); EventImpls.push_back(UnmapEventImpl); } - sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); + ur_event_handle_t &Event = MEvent->getHandleRef(); if (SkipRelease) Command::waitForEvents(MQueue, EventImpls, Event); else { @@ -1308,7 +1310,7 @@ pi_int32 ReleaseCommand::enqueueImp() { MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } void ReleaseCommand::printDot(std::ostream &Stream) const { @@ -1366,19 +1368,19 @@ void MapMemObject::emitInstrumentationData() { #endif } -pi_int32 MapMemObject::enqueueImp() { +ur_result_t MapMemObject::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - std::vector RawEvents = getPiEvents(EventImpls); + std::vector RawEvents = getUrEvents(EventImpls); flushCrossQueueDeps(EventImpls, MWorkerQueue); - sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); + ur_event_handle_t &Event = MEvent->getHandleRef(); *MDstPtr = MemoryManager::map( MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), MQueue, MMapMode, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, MSrcReq.MOffset, MSrcReq.MElemSize, std::move(RawEvents), Event); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } void MapMemObject::printDot(std::ostream &Stream) const { @@ -1448,18 +1450,18 @@ bool UnMapMemObject::producesPiEvent() const { MEvent->getHandleRef() != nullptr); } -pi_int32 UnMapMemObject::enqueueImp() { +ur_result_t UnMapMemObject::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - std::vector RawEvents = getPiEvents(EventImpls); + std::vector RawEvents = getUrEvents(EventImpls); flushCrossQueueDeps(EventImpls, MWorkerQueue); - sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); + ur_event_handle_t &Event = MEvent->getHandleRef(); MemoryManager::unmap(MDstAllocaCmd->getSYCLMemObj(), MDstAllocaCmd->getMemAllocation(), MQueue, *MSrcPtr, std::move(RawEvents), Event); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } void UnMapMemObject::printDot(std::ostream &Stream) const { @@ -1553,13 +1555,13 @@ bool MemCpyCommand::producesPiEvent() const { MEvent->getHandleRef() != nullptr; } -pi_int32 MemCpyCommand::enqueueImp() { +ur_result_t MemCpyCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); + ur_event_handle_t &Event = MEvent->getHandleRef(); - auto RawEvents = getPiEvents(EventImpls); + auto RawEvents = getUrEvents(EventImpls); flushCrossQueueDeps(EventImpls, MWorkerQueue); MemoryManager::copy( @@ -1569,7 +1571,7 @@ pi_int32 MemCpyCommand::enqueueImp() { MQueue, MDstReq.MDims, MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), Event, MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } void MemCpyCommand::printDot(std::ostream &Stream) const { @@ -1614,10 +1616,10 @@ void ExecCGCommand::clearAuxiliaryResources() { ((CGExecKernel *)MCommandGroup.get())->clearAuxiliaryResources(); } -pi_int32 UpdateHostRequirementCommand::enqueueImp() { +ur_result_t UpdateHostRequirementCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); + ur_event_handle_t &Event = MEvent->getHandleRef(); Command::waitForEvents(MQueue, EventImpls, Event); assert(MSrcAllocaCmd && "Expected valid alloca command"); @@ -1625,7 +1627,7 @@ pi_int32 UpdateHostRequirementCommand::enqueueImp() { assert(MDstPtr && "Expected valid target pointer"); *MDstPtr = MSrcAllocaCmd->getMemAllocation(); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const { @@ -1702,13 +1704,13 @@ ContextImplPtr MemCpyCommandHost::getWorkerContext() const { return MWorkerQueue->getContextImplPtr(); } -pi_int32 MemCpyCommandHost::enqueueImp() { +ur_result_t MemCpyCommandHost::enqueueImp() { const QueueImplPtr &Queue = MWorkerQueue; waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - std::vector RawEvents = getPiEvents(EventImpls); + std::vector RawEvents = getUrEvents(EventImpls); - sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); + ur_event_handle_t &Event = MEvent->getHandleRef(); // Omit copying if mode is discard one. // TODO: Handle this at the graph building time by, for example, creating // empty node instead of memcpy. @@ -1716,7 +1718,7 @@ pi_int32 MemCpyCommandHost::enqueueImp() { MDstReq.MAccessMode == access::mode::discard_write) { Command::waitForEvents(Queue, EventImpls, Event); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } flushCrossQueueDeps(EventImpls, MWorkerQueue); @@ -1726,19 +1728,18 @@ pi_int32 MemCpyCommandHost::enqueueImp() { MSrcReq.MOffset, MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims, MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent); - - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } EmptyCommand::EmptyCommand() : Command(CommandType::EMPTY_TASK, nullptr) { emitInstrumentationDataProxy(); } -pi_int32 EmptyCommand::enqueueImp() { +ur_result_t EmptyCommand::enqueueImp() { waitForPreparedHostEvents(); waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef()); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } void EmptyCommand::addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, @@ -1925,8 +1926,8 @@ static std::string_view cgTypeToString(detail::CGType Type) { ExecCGCommand::ExecCGCommand( std::unique_ptr CommandGroup, QueueImplPtr Queue, - bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, - const std::vector &Dependencies) + bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer, + const std::vector &Dependencies) : Command(CommandType::RUN_CG, std::move(Queue), CommandBuffer, Dependencies), MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) { @@ -1948,7 +1949,7 @@ std::string instrumentationGetKernelName( std::string KernelName; if (SyclKernel && SyclKernel->isCreatedFromSource()) { FromSource = true; - pi_kernel KernelHandle = SyclKernel->getHandleRef(); + ur_kernel_handle_t KernelHandle = SyclKernel->getHandleRef(); Address = KernelHandle; KernelName = FunctionName; } else { @@ -1972,8 +1973,8 @@ void instrumentationAddExtraKernelMetadata( auto FilterArgs = [&Args](detail::ArgDesc &Arg, int NextTrueIndex) { Args.push_back({Arg.MType, Arg.MPtr, Arg.MSize, NextTrueIndex}); }; - sycl::detail::pi::PiProgram Program = nullptr; - sycl::detail::pi::PiKernel Kernel = nullptr; + ur_program_handle_t Program = nullptr; + ur_kernel_handle_t Kernel = nullptr; std::mutex *KernelMutex = nullptr; const KernelArgMask *EliminatedArgMask = nullptr; @@ -1994,7 +1995,7 @@ void instrumentationAddExtraKernelMetadata( detail::getSyclObjImpl(SyclKernel); EliminatedArgMask = KernelImpl->getKernelArgMask(); - Program = KernelImpl->getDeviceImage()->get_program_ref(); + Program = KernelImpl->getDeviceImage()->get_ur_program_ref(); } else if (nullptr != SyclKernel) { Program = SyclKernel->getProgramRef(); if (!SyclKernel->isCreatedFromSource()) @@ -2227,8 +2228,7 @@ std::string_view ExecCGCommand::getTypeString() const { // the number of work - groups, such that the size of each group is chosen by // the runtime, or by the number of work - groups and number of work - items // for users who need more control. -static void adjustNDRangePerKernel(NDRDescT &NDR, - sycl::detail::pi::PiKernel Kernel, +static void adjustNDRangePerKernel(NDRDescT &NDR, ur_kernel_handle_t Kernel, const device_impl &DeviceImpl) { if (NDR.GlobalSize[0] != 0) return; // GlobalSize is set - no need to adjust @@ -2264,20 +2264,20 @@ void ReverseRangeDimensionsForKernel(NDRDescT &NDR) { } } -pi_mem_obj_access AccessModeToPi(access::mode AccessorMode) { +ur_mem_flags_t AccessModeToUr(access::mode AccessorMode) { switch (AccessorMode) { case access::mode::read: - return PI_ACCESS_READ_ONLY; + return UR_MEM_FLAG_READ_ONLY; case access::mode::write: case access::mode::discard_write: - return PI_ACCESS_WRITE_ONLY; + return UR_MEM_FLAG_WRITE_ONLY; default: - return PI_ACCESS_READ_WRITE; + return UR_MEM_FLAG_READ_WRITE; } } void SetArgBasedOnType( - const PluginPtr &Plugin, sycl::detail::pi::PiKernel Kernel, + const PluginPtr &Plugin, ur_kernel_handle_t Kernel, const std::shared_ptr &DeviceImageImpl, const std::function &getMemAllocationFunc, const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex) { @@ -2291,86 +2291,70 @@ void SetArgBasedOnType( // we may pass default constructed accessors to a command, which don't add // requirements. In such case, getMemAllocationFunc is nullptr, but it's a // valid case, so we need to properly handle it. - sycl::detail::pi::PiMem MemArg = + ur_mem_handle_t MemArg = getMemAllocationFunc - ? (sycl::detail::pi::PiMem)getMemAllocationFunc(Req) + ? reinterpret_cast(getMemAllocationFunc(Req)) : nullptr; - // Only call piKernelSetArg for opencl plugin. Although for now opencl - // plugin is a thin wrapper for UR plugin, but they still produce different - // MemArg. For opencl plugin, the MemArg is a straight-forward cl_mem, so it - // will be fine using piKernelSetArg, which will call urKernelSetArgValue to - // pass the cl_mem object directly to clSetKernelArg. But when in - // SYCL_PREFER_UR=1, the MemArg is a cl_mem wrapped by ur_mem_object_t, - // which will need to unpack by calling piextKernelSetArgMemObj, which calls - // urKernelSetArgMemObj. If we call piKernelSetArg in such case, the - // clSetKernelArg will report CL_INVALID_MEM_OBJECT since the arg_value is - // not a valid cl_mem object but a ur_mem_object_t object. - if (Context.get_backend() == backend::opencl && - !Plugin->hasBackend(backend::all)) { - // clSetKernelArg (corresponding to piKernelSetArg) returns an error - // when MemArg is null, which is the case when zero-sized buffers are - // handled. Below assignment provides later call to clSetKernelArg with - // acceptable arguments. - if (!MemArg) - MemArg = sycl::detail::pi::PiMem(); - - Plugin->call( - Kernel, NextTrueIndex, sizeof(sycl::detail::pi::PiMem), &MemArg); - } else { - pi_mem_obj_property MemObjData{}; - MemObjData.mem_access = AccessModeToPi(Req->MAccessMode); - MemObjData.type = PI_KERNEL_ARG_MEM_OBJ_ACCESS; - Plugin->call(Kernel, NextTrueIndex, - &MemObjData, &MemArg); - } + ur_kernel_arg_mem_obj_properties_t MemObjData{}; + MemObjData.stype = UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES; + MemObjData.memoryAccess = AccessModeToUr(Req->MAccessMode); + Plugin->call(urKernelSetArgMemObj, Kernel, NextTrueIndex, &MemObjData, + MemArg); break; } case kernel_param_kind_t::kind_std_layout: { - Plugin->call(Kernel, NextTrueIndex, Arg.MSize, - Arg.MPtr); + if (Arg.MPtr) { + Plugin->call(urKernelSetArgValue, Kernel, NextTrueIndex, Arg.MSize, + nullptr, Arg.MPtr); + } else { + Plugin->call(urKernelSetArgLocal, Kernel, NextTrueIndex, Arg.MSize, + nullptr); + } + break; } case kernel_param_kind_t::kind_sampler: { sampler *SamplerPtr = (sampler *)Arg.MPtr; - sycl::detail::pi::PiSampler Sampler = - detail::getSyclObjImpl(*SamplerPtr)->getOrCreateSampler(Context); - Plugin->call(Kernel, NextTrueIndex, - &Sampler); + ur_sampler_handle_t Sampler = + (ur_sampler_handle_t)detail::getSyclObjImpl(*SamplerPtr) + ->getOrCreateSampler(Context); + Plugin->call(urKernelSetArgSampler, Kernel, NextTrueIndex, nullptr, + Sampler); break; } case kernel_param_kind_t::kind_pointer: { - Plugin->call(Kernel, NextTrueIndex, - Arg.MSize, Arg.MPtr); + // We need to de-rerence this to get the actual USM allocation - that's the + // pointer UR is expecting. + const void *Ptr = *static_cast(Arg.MPtr); + Plugin->call(urKernelSetArgPointer, Kernel, NextTrueIndex, nullptr, Ptr); break; } case kernel_param_kind_t::kind_specialization_constants_buffer: { assert(DeviceImageImpl != nullptr); - sycl::detail::pi::PiMem SpecConstsBuffer = + ur_mem_handle_t SpecConstsBuffer = DeviceImageImpl->get_spec_const_buffer_ref(); - // Avoid taking an address of nullptr - sycl::detail::pi::PiMem *SpecConstsBufferArg = - SpecConstsBuffer ? &SpecConstsBuffer : nullptr; - - pi_mem_obj_property MemObjData{}; - MemObjData.mem_access = PI_ACCESS_READ_ONLY; - MemObjData.type = PI_KERNEL_ARG_MEM_OBJ_ACCESS; - Plugin->call( - Kernel, NextTrueIndex, &MemObjData, SpecConstsBufferArg); + + ur_kernel_arg_mem_obj_properties_t MemObjProps{}; + MemObjProps.pNext = nullptr; + MemObjProps.stype = UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES; + MemObjProps.memoryAccess = UR_MEM_FLAG_READ_ONLY; + Plugin->call(urKernelSetArgMemObj, Kernel, NextTrueIndex, &MemObjProps, + SpecConstsBuffer); break; } case kernel_param_kind_t::kind_invalid: throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), "Invalid kernel param kind " + - codeToString(PI_ERROR_INVALID_VALUE)); + codeToString(UR_RESULT_ERROR_INVALID_VALUE)); break; } } -static pi_result SetKernelParamsAndLaunch( +static ur_result_t SetKernelParamsAndLaunch( const QueueImplPtr &Queue, std::vector &Args, const std::shared_ptr &DeviceImageImpl, - sycl::detail::pi::PiKernel Kernel, NDRDescT &NDRDesc, - std::vector &RawEvents, + ur_kernel_handle_t Kernel, NDRDescT &NDRDesc, + std::vector &RawEvents, const detail::EventImplPtr &OutEventImpl, const KernelArgMask *EliminatedArgMask, const std::function &getMemAllocationFunc, @@ -2408,10 +2392,11 @@ static pi_result SetKernelParamsAndLaunch( if (HasLocalSize) LocalSize = &NDRDesc.LocalSize[0]; else { - Plugin->call( - Kernel, Queue->getDeviceImplPtr()->getHandleRef(), - PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(RequiredWGSize), - RequiredWGSize, /* param_value_size_ret = */ nullptr); + Plugin->call(urKernelGetGroupInfo, Kernel, + Queue->getDeviceImplPtr()->getHandleRef(), + UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, + sizeof(RequiredWGSize), RequiredWGSize, + /* pPropSizeRet = */ nullptr); const bool EnforcedLocalSize = (RequiredWGSize[0] != 0 || RequiredWGSize[1] != 0 || @@ -2422,42 +2407,40 @@ static pi_result SetKernelParamsAndLaunch( if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); if (KernelUsesClusterLaunch) { - std::vector property_list; + std::vector property_list; - pi_launch_property_value launch_property_value_cluster_range; - launch_property_value_cluster_range.cluster_dims[0] = + ur_exp_launch_property_value_t launch_property_value_cluster_range; + launch_property_value_cluster_range.clusterDim[0] = NDRDesc.ClusterDimensions[0]; - launch_property_value_cluster_range.cluster_dims[1] = + launch_property_value_cluster_range.clusterDim[1] = NDRDesc.ClusterDimensions[1]; - launch_property_value_cluster_range.cluster_dims[2] = + launch_property_value_cluster_range.clusterDim[2] = NDRDesc.ClusterDimensions[2]; - property_list.push_back( - {pi_launch_property_id::PI_LAUNCH_PROPERTY_CLUSTER_DIMENSION, - launch_property_value_cluster_range}); + property_list.push_back({UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION, + launch_property_value_cluster_range}); if (IsCooperative) { - pi_launch_property_value launch_property_value_cooperative; + ur_exp_launch_property_value_t launch_property_value_cooperative; launch_property_value_cooperative.cooperative = 1; - property_list.push_back( - {pi_launch_property_id::PI_LAUNCH_PROPERTY_COOPERATIVE, - launch_property_value_cooperative}); + property_list.push_back({UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE, + launch_property_value_cooperative}); } - return Plugin->call_nocheck( - Queue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalSize[0], - LocalSize, property_list.size(), property_list.data(), RawEvents.size(), + return Plugin->call_nocheck( + urEnqueueKernelLaunchCustomExp, Queue->getHandleRef(), Kernel, + NDRDesc.Dims, &NDRDesc.GlobalSize[0], LocalSize, property_list.size(), + property_list.data(), RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], OutEventImpl ? &OutEventImpl->getHandleRef() : nullptr); } - pi_result Error = + ur_result_t Error = [&](auto... Args) { if (IsCooperative) { - return Plugin - ->call_nocheck( - Args...); + return Plugin->call_nocheck(urEnqueueCooperativeKernelLaunchExp, + Args...); } - return Plugin->call_nocheck(Args...); + return Plugin->call_nocheck(urEnqueueKernelLaunch, Args...); }(Queue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], LocalSize, RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], @@ -2465,18 +2448,18 @@ static pi_result SetKernelParamsAndLaunch( return Error; } -pi_int32 enqueueImpCommandBufferKernel( +ur_result_t enqueueImpCommandBufferKernel( context Ctx, DeviceImplPtr DeviceImpl, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, - std::vector &SyncPoints, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint, - sycl::detail::pi::PiExtCommandBufferCommand *OutCommand, + std::vector &SyncPoints, + ur_exp_command_buffer_sync_point_t *OutSyncPoint, + ur_exp_command_buffer_command_handle_t *OutCommand, const std::function &getMemAllocationFunc) { auto ContextImpl = sycl::detail::getSyclObjImpl(Ctx); const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); - pi_kernel PiKernel = nullptr; - pi_program PiProgram = nullptr; + ur_kernel_handle_t UrKernel = nullptr; + ur_program_handle_t UrProgram = nullptr; std::shared_ptr SyclKernelImpl = nullptr; std::shared_ptr DeviceImageImpl = nullptr; @@ -2496,24 +2479,24 @@ pi_int32 enqueueImpCommandBufferKernel( kernel SyclKernel = KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); - PiKernel = SyclKernelImpl->getHandleRef(); + UrKernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = SyclKernelImpl->getDeviceImage(); - PiProgram = DeviceImageImpl->get_program_ref(); + UrProgram = DeviceImageImpl->get_ur_program_ref(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else if (Kernel != nullptr) { - PiKernel = Kernel->getHandleRef(); - PiProgram = Kernel->getProgramRef(); + UrKernel = Kernel->getHandleRef(); + UrProgram = Kernel->getProgramRef(); EliminatedArgMask = Kernel->getKernelArgMask(); } else { - std::tie(PiKernel, std::ignore, EliminatedArgMask, PiProgram) = + std::tie(UrKernel, std::ignore, EliminatedArgMask, UrProgram) = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, CommandGroup.MKernelName); } - auto SetFunc = [&Plugin, &PiKernel, &DeviceImageImpl, &Ctx, + auto SetFunc = [&Plugin, &UrKernel, &DeviceImageImpl, &Ctx, &getMemAllocationFunc](sycl::detail::ArgDesc &Arg, size_t NextTrueIndex) { - sycl::detail::SetArgBasedOnType(Plugin, PiKernel, DeviceImageImpl, + sycl::detail::SetArgBasedOnType(Plugin, UrKernel, DeviceImageImpl, getMemAllocationFunc, Ctx, Arg, NextTrueIndex); }; @@ -2535,11 +2518,10 @@ pi_int32 enqueueImpCommandBufferKernel( if (HasLocalSize) LocalSize = &NDRDesc.LocalSize[0]; else { - Plugin->call( - PiKernel, DeviceImpl->getHandleRef(), - PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(RequiredWGSize), - RequiredWGSize, - /* param_value_size_ret = */ nullptr); + Plugin->call(urKernelGetGroupInfo, UrKernel, DeviceImpl->getHandleRef(), + UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, + sizeof(RequiredWGSize), RequiredWGSize, + /* pPropSizeRet = */ nullptr); const bool EnforcedLocalSize = (RequiredWGSize[0] != 0 || RequiredWGSize[1] != 0 || @@ -2548,22 +2530,21 @@ pi_int32 enqueueImpCommandBufferKernel( LocalSize = RequiredWGSize; } - pi_result Res = Plugin->call_nocheck< - sycl::detail::PiApiKind::piextCommandBufferNDRangeKernel>( - CommandBuffer, PiKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], - &NDRDesc.GlobalSize[0], LocalSize, SyncPoints.size(), - SyncPoints.size() ? SyncPoints.data() : nullptr, OutSyncPoint, - OutCommand); + ur_result_t Res = Plugin->call_nocheck( + urCommandBufferAppendKernelLaunchExp, CommandBuffer, UrKernel, + NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], LocalSize, + SyncPoints.size(), SyncPoints.size() ? SyncPoints.data() : nullptr, + OutSyncPoint, OutCommand); if (!SyclKernelImpl && !Kernel) { - Plugin->call(PiKernel); - Plugin->call(PiProgram); + Plugin->call(urKernelRelease, UrKernel); + Plugin->call(urProgramRelease, UrProgram); } - if (Res != pi_result::PI_SUCCESS) { + if (Res != UR_RESULT_SUCCESS) { const device_impl &DeviceImplem = *(DeviceImpl); detail::enqueue_kernel_launch::handleErrorOrWarning(Res, DeviceImplem, - PiKernel, NDRDesc); + UrKernel, NDRDesc); } return Res; @@ -2573,20 +2554,18 @@ void enqueueImpKernel( const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector &Args, const std::shared_ptr &KernelBundleImplPtr, const std::shared_ptr &MSyclKernel, - const std::string &KernelName, - std::vector &RawEvents, + const std::string &KernelName, std::vector &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function &getMemAllocationFunc, - sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, - const bool KernelIsCooperative, const bool KernelUsesClusterLaunch, - const RTDeviceBinaryImage *BinImage) { + ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, + const bool KernelUsesClusterLaunch, const RTDeviceBinaryImage *BinImage) { assert(Queue && "Kernel submissions should have an associated queue"); // Run OpenCL kernel auto ContextImpl = Queue->getContextImplPtr(); auto DeviceImpl = Queue->getDeviceImplPtr(); - sycl::detail::pi::PiKernel Kernel = nullptr; + ur_kernel_handle_t Kernel = nullptr; std::mutex *KernelMutex = nullptr; - sycl::detail::pi::PiProgram Program = nullptr; + ur_program_handle_t Program = nullptr; const KernelArgMask *EliminatedArgMask; std::shared_ptr SyclKernelImpl; @@ -2608,7 +2587,7 @@ void enqueueImpKernel( Kernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = SyclKernelImpl->getDeviceImage(); - Program = DeviceImageImpl->get_program_ref(); + Program = DeviceImageImpl->get_ur_program_ref(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); KernelMutex = SyclKernelImpl->getCacheMutex(); @@ -2621,7 +2600,7 @@ void enqueueImpKernel( // Non-cacheable kernels use mutexes from kernel_impls. // TODO this can still result in a race condition if multiple SYCL // kernels are created with the same native handle. To address this, - // we need to either store and use a pi_native_handle -> mutex map or + // we need to either store and use a ur_native_handle_t -> mutex map or // reuse and return existing SYCL kernels from make_native to avoid // their duplication in such cases. KernelMutex = &MSyclKernel->getNoncacheableEnqueueMutex(); @@ -2633,12 +2612,12 @@ void enqueueImpKernel( } // We may need more events for the launch, so we make another reference. - std::vector &EventsWaitList = RawEvents; + std::vector &EventsWaitList = RawEvents; // Initialize device globals associated with this. - std::vector DeviceGlobalInitEvents = + std::vector DeviceGlobalInitEvents = ContextImpl->initializeDeviceGlobals(Program, Queue); - std::vector EventsWithDeviceGlobalInits; + std::vector EventsWithDeviceGlobalInits; if (!DeviceGlobalInitEvents.empty()) { EventsWithDeviceGlobalInits.reserve(RawEvents.size() + DeviceGlobalInitEvents.size()); @@ -2650,7 +2629,7 @@ void enqueueImpKernel( EventsWaitList = EventsWithDeviceGlobalInits; } - pi_result Error = PI_SUCCESS; + ur_result_t Error = UR_RESULT_SUCCESS; { // When KernelMutex is null, this means that in-memory caching is // disabled, which means that kernel object is not shared, so no locking @@ -2660,12 +2639,12 @@ void enqueueImpKernel( // Set SLM/Cache configuration for the kernel if non-default value is // provided. - if (KernelCacheConfig == PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_SLM || - KernelCacheConfig == PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_DATA) { + if (KernelCacheConfig == UR_KERNEL_CACHE_CONFIG_LARGE_SLM || + KernelCacheConfig == UR_KERNEL_CACHE_CONFIG_LARGE_DATA) { const PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Kernel, PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG, - sizeof(sycl::detail::pi::PiKernelCacheConfig), &KernelCacheConfig); + Plugin->call( + urKernelSetExecInfo, Kernel, UR_KERNEL_EXEC_INFO_CACHE_CONFIG, + sizeof(ur_kernel_cache_config_t), nullptr, &KernelCacheConfig); } Error = SetKernelParamsAndLaunch( @@ -2675,11 +2654,11 @@ void enqueueImpKernel( const PluginPtr &Plugin = Queue->getPlugin(); if (!SyclKernelImpl && !MSyclKernel) { - Plugin->call(Kernel); - Plugin->call(Program); + Plugin->call(urKernelRelease, Kernel); + Plugin->call(urProgramRelease, Program); } } - if (PI_SUCCESS != Error) { + if (UR_RESULT_SUCCESS != Error) { // If we have got non-success error code, let's analyze it to emit nice // exception explaining what was wrong const device_impl &DeviceImpl = *(Queue->getDeviceImplPtr()); @@ -2688,20 +2667,21 @@ void enqueueImpKernel( } } -pi_int32 -enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, - bool blocking, void *ptr, size_t size, - std::vector &RawEvents, - const detail::EventImplPtr &OutEventImpl, bool read) { +ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue, + const std::string &PipeName, bool blocking, + void *ptr, size_t size, + std::vector &RawEvents, + const detail::EventImplPtr &OutEventImpl, + bool read) { assert(Queue && "ReadWrite host pipe submissions should have an associated queue"); detail::HostPipeMapEntry *hostPipeEntry = ProgramManager::getInstance().getHostPipeEntry(PipeName); - sycl::detail::pi::PiProgram Program = nullptr; + ur_program_handle_t Program = nullptr; device Device = Queue->get_device(); ContextImplPtr ContextImpl = Queue->getContextImplPtr(); - std::optional CachedProgram = + std::optional CachedProgram = ContextImpl->getProgramForHostPipe(Device, hostPipeEntry); if (CachedProgram) Program = *CachedProgram; @@ -2713,37 +2693,34 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, Queue->get_device()); device_image_plain BuiltImage = ProgramManager::getInstance().build(devImgPlain, {Device}, {}); - Program = getSyclObjImpl(BuiltImage)->get_program_ref(); + Program = getSyclObjImpl(BuiltImage)->get_ur_program_ref(); } assert(Program && "Program for this hostpipe is not compiled."); - // Get plugin for calling opencl functions const PluginPtr &Plugin = Queue->getPlugin(); - pi_queue pi_q = Queue->getHandleRef(); - pi_result Error; + ur_queue_handle_t ur_q = Queue->getHandleRef(); + ur_result_t Error; auto OutEvent = OutEventImpl ? &OutEventImpl->getHandleRef() : nullptr; if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); if (read) { - Error = - Plugin->call_nocheck( - pi_q, Program, PipeName.c_str(), blocking, ptr, size, - RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], - OutEvent); + Error = Plugin->call_nocheck( + urEnqueueReadHostPipe, ur_q, Program, PipeName.c_str(), blocking, ptr, + size, RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], + OutEvent); } else { - Error = - Plugin - ->call_nocheck( - pi_q, Program, PipeName.c_str(), blocking, ptr, size, - RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], - OutEvent); + Error = Plugin->call_nocheck( + urEnqueueWriteHostPipe, ur_q, Program, PipeName.c_str(), blocking, ptr, + size, RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], + OutEvent); } + return Error; } -pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { +ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { assert(MQueue && "Command buffer enqueue should have an associated queue"); // Wait on host command dependencies waitForPreparedHostEvents(); @@ -2753,23 +2730,21 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { // them, e.g. initial copies from host to device std::vector EventImpls = MPreparedDepsEvents; flushCrossQueueDeps(EventImpls, MWorkerQueue); - std::vector RawEvents = getPiEvents(EventImpls); + std::vector RawEvents = getUrEvents(EventImpls); if (!RawEvents.empty()) { - const PluginPtr &Plugin = MQueue->getPlugin(); - Plugin->call(RawEvents.size(), &RawEvents[0]); + MQueue->getPlugin()->call(urEventWait, RawEvents.size(), &RawEvents[0]); } - // We can omit creating a PI event and create a "discarded" event if either + // We can omit creating a UR event and create a "discarded" event if either // the queue has the discard property or the command has been explicitly // marked as not needing an event, e.g. if the user did not ask for one, and - // if the queue supports discarded PI event and there are no requirements. - bool DiscardPiEvent = (MQueue->MDiscardEvents || !MEventNeeded) && + // if the queue supports discarded UR event and there are no requirements. + bool DiscardUrEvent = (MQueue->MDiscardEvents || !MEventNeeded) && MQueue->supportsDiscardingPiEvents() && MCommandGroup->getRequirements().size() == 0; - sycl::detail::pi::PiEvent *Event = - DiscardPiEvent ? nullptr : &MEvent->getHandleRef(); - sycl::detail::pi::PiExtSyncPoint OutSyncPoint; - sycl::detail::pi::PiExtCommandBufferCommand OutCommand = nullptr; + ur_event_handle_t *Event = DiscardUrEvent ? nullptr : &MEvent->getHandleRef(); + ur_exp_command_buffer_sync_point_t OutSyncPoint; + ur_exp_command_buffer_command_handle_t OutCommand = nullptr; switch (MCommandGroup->getType()) { case CGType::Kernel: { CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get(); @@ -2803,7 +2778,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MQueue->getContextImplPtr(), Copy->getSrc(), MCommandBuffer, Copy->getLength(), Copy->getDst(), MSyncPointDeps, &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::CopyAccToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); @@ -2822,7 +2797,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { ReqDst->MOffset, ReqDst->MElemSize, std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::CopyAccToPtr: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); @@ -2837,7 +2812,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { /*DstOffset=*/{0, 0, 0}, Req->MElemSize, std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::CopyPtrToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); @@ -2851,7 +2826,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::Fill: { CGFill *Fill = (CGFill *)MCommandGroup.get(); @@ -2864,7 +2839,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { Fill->MPattern.data(), Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); @@ -2873,7 +2848,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { Fill->getLength(), Fill->getPattern(), std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::PrefetchUSM: { CGPrefetchUSM *Prefetch = (CGPrefetchUSM *)MCommandGroup.get(); @@ -2881,7 +2856,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MQueue->getContextImplPtr(), MCommandBuffer, Prefetch->getDst(), Prefetch->getLength(), std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::AdviseUSM: { CGAdviseUSM *Advise = (CGAdviseUSM *)MCommandGroup.get(); @@ -2890,7 +2865,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { Advise->getLength(), Advise->getAdvice(), std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } default: @@ -2899,7 +2874,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { } } -pi_int32 ExecCGCommand::enqueueImp() { +ur_result_t ExecCGCommand::enqueueImp() { if (MCommandBuffer) { return enqueueImpCommandBuffer(); } else { @@ -2907,30 +2882,30 @@ pi_int32 ExecCGCommand::enqueueImp() { } } -pi_int32 ExecCGCommand::enqueueImpQueue() { +ur_result_t ExecCGCommand::enqueueImpQueue() { if (getCG().getType() != CGType::CodeplayHostTask) waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - auto RawEvents = getPiEvents(EventImpls); + auto RawEvents = getUrEvents(EventImpls); flushCrossQueueDeps(EventImpls, MWorkerQueue); - // We can omit creating a PI event and create a "discarded" event if either + // We can omit creating a UR event and create a "discarded" event if either // the queue has the discard property or the command has been explicitly // marked as not needing an event, e.g. if the user did not ask for one, and - // if the queue supports discarded PI event and there are no requirements. - bool DiscardPiEvent = MQueue && (MQueue->MDiscardEvents || !MEventNeeded) && + // if the queue supports discarded UR event and there are no requirements. + bool DiscardUrEvent = MQueue && (MQueue->MDiscardEvents || !MEventNeeded) && MQueue->supportsDiscardingPiEvents() && MCommandGroup->getRequirements().size() == 0; - sycl::detail::pi::PiEvent *Event = - DiscardPiEvent ? nullptr : &MEvent->getHandleRef(); - detail::EventImplPtr EventImpl = DiscardPiEvent ? nullptr : MEvent; + + ur_event_handle_t *Event = DiscardUrEvent ? nullptr : &MEvent->getHandleRef(); + detail::EventImplPtr EventImpl = DiscardUrEvent ? nullptr : MEvent; switch (MCommandGroup->getType()) { case CGType::UpdateHost: { throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), "Update host should be handled by the Scheduler. " + - codeToString(PI_ERROR_INVALID_VALUE)); + codeToString(UR_RESULT_ERROR_INVALID_VALUE)); } case CGType::CopyAccToPtr: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); @@ -2944,7 +2919,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Req->MAccessRange, /*DstOffset=*/{0, 0, 0}, Req->MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::CopyPtrToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); @@ -2958,7 +2933,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { MQueue, Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::CopyAccToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); @@ -2976,7 +2951,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { ReqDst->MOffset, ReqDst->MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::Fill: { CGFill *Fill = (CGFill *)MCommandGroup.get(); @@ -2989,7 +2964,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::Kernel: { assert(MQueue && "Kernel submissions should have an associated queue"); @@ -3031,7 +3006,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { ExecKernel->MKernelIsCooperative, ExecKernel->MKernelUsesClusterLaunch, BinImage); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::CopyUSM: { CGCopyUSM *Copy = (CGCopyUSM *)MCommandGroup.get(); @@ -3039,7 +3014,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Copy->getDst(), std::move(RawEvents), Event, MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); @@ -3047,7 +3022,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Fill->getPattern(), std::move(RawEvents), Event, MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::PrefetchUSM: { CGPrefetchUSM *Prefetch = (CGPrefetchUSM *)MCommandGroup.get(); @@ -3055,7 +3030,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Prefetch->getLength(), std::move(RawEvents), Event, MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::AdviseUSM: { CGAdviseUSM *Advise = (CGAdviseUSM *)MCommandGroup.get(); @@ -3063,7 +3038,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Advise->getAdvice(), std::move(RawEvents), Event, MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::Copy2DUSM: { CGCopy2DUSM *Copy = (CGCopy2DUSM *)MCommandGroup.get(); @@ -3071,7 +3046,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Copy->getDst(), Copy->getDstPitch(), Copy->getWidth(), Copy->getHeight(), std::move(RawEvents), Event, MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::Fill2DUSM: { CGFill2DUSM *Fill = (CGFill2DUSM *)MCommandGroup.get(); @@ -3079,7 +3054,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Fill->getWidth(), Fill->getHeight(), Fill->getPattern(), std::move(RawEvents), Event, MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::Memset2DUSM: { CGMemset2DUSM *Memset = (CGMemset2DUSM *)MCommandGroup.get(); @@ -3087,7 +3062,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Memset->getWidth(), Memset->getHeight(), Memset->getValue(), std::move(RawEvents), Event, MEvent); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::CodeplayHostTask: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); @@ -3105,29 +3080,29 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { default: throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), "Unsupported arg type " + - codeToString(PI_ERROR_INVALID_VALUE)); + codeToString(UR_RESULT_ERROR_INVALID_VALUE)); } } std::vector ReqToMem; - std::vector ReqPiMem; + std::vector ReqUrMem; if (HostTask->MHostTask->isInteropTask()) { // Extract the Mem Objects for all Requirements, to ensure they are // available if a user asks for them inside the interop task scope const std::vector &HandlerReq = HostTask->getRequirements(); - auto ReqToMemConv = [&ReqToMem, &ReqPiMem, HostTask](Requirement *Req) { + auto ReqToMemConv = [&ReqToMem, &ReqUrMem, HostTask](Requirement *Req) { const std::vector &AllocaCmds = Req->MSYCLMemObj->MRecord->MAllocaCommands; for (AllocaCommandBase *AllocaCmd : AllocaCmds) if (getContext(HostTask->MQueue) == getContext(AllocaCmd->getQueue())) { - auto MemArg = - reinterpret_cast(AllocaCmd->getMemAllocation()); + auto MemArg = reinterpret_cast( + AllocaCmd->getMemAllocation()); ReqToMem.emplace_back(std::make_pair(Req, MemArg)); - ReqPiMem.emplace_back(MemArg); + ReqUrMem.emplace_back(MemArg); return; } @@ -3138,7 +3113,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { throw sycl::exception( sycl::make_error_code(sycl::errc::runtime), "Can't get memory object due to no allocation available " + - codeToString(PI_ERROR_INVALID_MEM_OBJECT)); + codeToString(UR_RESULT_ERROR_INVALID_MEM_OBJECT)); }; std::for_each(std::begin(HandlerReq), std::end(HandlerReq), ReqToMemConv); std::sort(std::begin(ReqToMem), std::end(ReqToMem)); @@ -3149,11 +3124,11 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { copySubmissionCodeLocation(); queue_impl::getThreadPool().submit( - DispatchHostTask(this, std::move(ReqToMem), std::move(ReqPiMem))); + DispatchHostTask(this, std::move(ReqToMem), std::move(ReqUrMem))); MShouldCompleteEventIfPossible = false; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::EnqueueNativeCommand: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); @@ -3175,7 +3150,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { } std::vector ReqToMem; - std::vector ReqMems; + std::vector ReqMems; if (HostTask->MHostTask->isInteropTask()) { // Extract the Mem Objects for all Requirements, to ensure they are @@ -3189,8 +3164,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { for (AllocaCommandBase *AllocaCmd : AllocaCmds) if (HostTask->MQueue->getContextImplPtr() == AllocaCmd->getQueue()->getContextImplPtr()) { - auto MemArg = - reinterpret_cast(AllocaCmd->getMemAllocation()); + auto MemArg = reinterpret_cast( + AllocaCmd->getMemAllocation()); ReqToMem.emplace_back(std::make_pair(Req, MemArg)); ReqMems.emplace_back(MemArg); @@ -3214,62 +3189,64 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { HostTask->MQueue->getContextImplPtr()}, HostTask->MHostTask->MInteropTask}; - bool NativeCommandSupport = false; - MQueue->getPlugin()->call( + ur_bool_t NativeCommandSupport = false; + MQueue->getPlugin()->call( + urDeviceGetInfo, detail::getSyclObjImpl(MQueue->get_device())->getHandleRef(), - PI_EXT_ONEAPI_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT, + UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP, sizeof(NativeCommandSupport), &NativeCommandSupport, nullptr); assert(NativeCommandSupport && "ext_codeplay_enqueue_native_command is not " "supported on this device"); - MQueue->getPlugin()->call( - MQueue->getHandleRef(), InteropFreeFunc, &CustomOpData, ReqMems.size(), - ReqMems.data(), RawEvents.size(), RawEvents.data(), Event); + MQueue->getPlugin()->call(urEnqueueNativeCommandExp, MQueue->getHandleRef(), + InteropFreeFunc, &CustomOpData, ReqMems.size(), + ReqMems.data(), nullptr, RawEvents.size(), + RawEvents.data(), Event); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::Barrier: { assert(MQueue && "Barrier submission should have an associated queue"); const PluginPtr &Plugin = MQueue->getPlugin(); if (MEvent != nullptr) MEvent->setHostEnqueueTime(); - Plugin->call( - MQueue->getHandleRef(), 0, nullptr, Event); + Plugin->call(urEnqueueEventsWaitWithBarrier, MQueue->getHandleRef(), 0, + nullptr, Event); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::BarrierWaitlist: { assert(MQueue && "Barrier submission should have an associated queue"); CGBarrier *Barrier = static_cast(MCommandGroup.get()); std::vector Events = Barrier->MEventsWaitWithBarrier; - std::vector PiEvents = - getPiEventsBlocking(Events); - if (PiEvents.empty()) { + std::vector UrEvents = getUrEventsBlocking(Events); + if (UrEvents.empty()) { // If Events is empty, then the barrier has no effect. - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } const PluginPtr &Plugin = MQueue->getPlugin(); if (MEvent != nullptr) MEvent->setHostEnqueueTime(); - Plugin->call( - MQueue->getHandleRef(), PiEvents.size(), &PiEvents[0], Event); + Plugin->call(urEnqueueEventsWaitWithBarrier, MQueue->getHandleRef(), + UrEvents.size(), &UrEvents[0], Event); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::ProfilingTag: { - const PluginPtr &Plugin = MQueue->getPlugin(); + const auto &Plugin = MQueue->getPlugin(); // If the queue is not in-order, we need to insert a barrier. This barrier // does not need output events as it will implicitly enforce the following // enqueue is blocked until it finishes. if (!MQueue->isInOrder()) - Plugin->call( - MQueue->getHandleRef(), /*num_events_in_wait_list=*/0, - /*event_wait_list=*/nullptr, /*event=*/nullptr); + Plugin->call(urEnqueueEventsWaitWithBarrier, MQueue->getHandleRef(), + /*num_events_in_wait_list=*/0, + /*event_wait_list=*/nullptr, /*event=*/nullptr); - Plugin->call( - MQueue->getHandleRef(), /*blocking=*/false, - /*num_events_in_wait_list=*/0, /*event_wait_list=*/nullptr, Event); + Plugin->call(urEnqueueTimestampRecordingExp, MQueue->getHandleRef(), + /*blocking=*/false, + /*num_events_in_wait_list=*/0, /*event_wait_list=*/nullptr, + Event); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::CopyToDeviceGlobal: { CGCopyToDeviceGlobal *Copy = (CGCopyToDeviceGlobal *)MCommandGroup.get(); @@ -3278,7 +3255,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Copy->getNumBytes(), Copy->getOffset(), Copy->getSrc(), std::move(RawEvents), Event, MEvent); - return CL_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::CopyFromDeviceGlobal: { CGCopyFromDeviceGlobal *Copy = @@ -3288,7 +3265,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Copy->getNumBytes(), Copy->getOffset(), Copy->getDest(), std::move(RawEvents), Event, MEvent); - return CL_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::ReadWriteHostPipe: { CGReadWriteHostPipe *ExecReadWriteHostPipe = @@ -3312,11 +3289,10 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { static_cast(MCommandGroup.get()); if (MEvent != nullptr) MEvent->setHostEnqueueTime(); - return MQueue->getPlugin() - ->call_nocheck( - CmdBufferCG->MCommandBuffer, MQueue->getHandleRef(), - RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], - Event); + return MQueue->getPlugin()->call_nocheck( + urCommandBufferEnqueueExp, CmdBufferCG->MCommandBuffer, + MQueue->getHandleRef(), RawEvents.size(), + RawEvents.empty() ? nullptr : &RawEvents[0], Event); } case CGType::CopyImage: { CGCopyImage *Copy = (CGCopyImage *)MCommandGroup.get(); @@ -3326,43 +3302,41 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Copy->getDstDesc(), Copy->getSrcFormat(), Copy->getDstFormat(), Copy->getCopyFlags(), Copy->getSrcOffset(), Copy->getDstOffset(), Copy->getCopyExtent(), std::move(RawEvents), Event); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::SemaphoreWait: { assert(MQueue && "Semaphore wait submissions should have an associated queue"); CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get(); - const detail::PluginPtr &Plugin = MQueue->getPlugin(); auto OptWaitValue = SemWait->getWaitValue(); uint64_t WaitValue = OptWaitValue.has_value() ? OptWaitValue.value() : 0; - Plugin->call( - MQueue->getHandleRef(), SemWait->getInteropSemaphoreHandle(), - OptWaitValue.has_value(), WaitValue, 0, nullptr, nullptr); + Plugin->call(urBindlessImagesWaitExternalSemaphoreExp, + MQueue->getHandleRef(), SemWait->getInteropSemaphoreHandle(), + OptWaitValue.has_value(), WaitValue, 0, nullptr, nullptr); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::SemaphoreSignal: { assert(MQueue && "Semaphore signal submissions should have an associated queue"); CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get(); - const detail::PluginPtr &Plugin = MQueue->getPlugin(); auto OptSignalValue = SemSignal->getSignalValue(); uint64_t SignalValue = OptSignalValue.has_value() ? OptSignalValue.value() : 0; - Plugin->call( - MQueue->getHandleRef(), SemSignal->getInteropSemaphoreHandle(), - OptSignalValue.has_value(), SignalValue, 0, nullptr, nullptr); + Plugin->call(urBindlessImagesSignalExternalSemaphoreExp, + MQueue->getHandleRef(), SemSignal->getInteropSemaphoreHandle(), + OptSignalValue.has_value(), SignalValue, 0, nullptr, nullptr); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } case CGType::None: throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), "CG type not implemented. " + - codeToString(PI_ERROR_INVALID_OPERATION)); + codeToString(UR_RESULT_ERROR_INVALID_OPERATION)); } - return PI_ERROR_INVALID_OPERATION; + return UR_RESULT_ERROR_INVALID_OPERATION; } bool ExecCGCommand::producesPiEvent() const { @@ -3402,14 +3376,14 @@ std::vector &KernelFusionCommand::getFusionList() { bool KernelFusionCommand::producesPiEvent() const { return false; } -pi_int32 KernelFusionCommand::enqueueImp() { +ur_result_t KernelFusionCommand::enqueueImp() { waitForPreparedHostEvents(); waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef()); // We need to release the queue here because KernelFusionCommands are // held back by the scheduler thus prevent the deallocation of the queue. resetQueue(); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } void KernelFusionCommand::setFusionStatus(FusionStatus Status) { @@ -3518,10 +3492,10 @@ UpdateCommandBufferCommand::UpdateCommandBufferCommand( : Command(CommandType::UPDATE_CMD_BUFFER, Queue), MGraph(Graph), MNodes(Nodes) {} -pi_int32 UpdateCommandBufferCommand::enqueueImp() { +ur_result_t UpdateCommandBufferCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef(); + ur_event_handle_t &Event = MEvent->getHandleRef(); Command::waitForEvents(MQueue, EventImpls, Event); for (auto &Node : MNodes) { @@ -3547,7 +3521,7 @@ pi_int32 UpdateCommandBufferCommand::enqueueImp() { MGraph->updateImpl(Node); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } void UpdateCommandBufferCommand::printDot(std::ostream &Stream) const { diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 463ddecd63b77..6c5ba62e3e317 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -69,14 +69,14 @@ struct EnqueueResultT { SyclEnqueueFailed }; EnqueueResultT(ResultT Result = SyclEnqueueSuccess, Command *Cmd = nullptr, - pi_int32 ErrCode = PI_SUCCESS) + ur_result_t ErrCode = UR_RESULT_SUCCESS) : MResult(Result), MCmd(Cmd), MErrCode(ErrCode) {} /// Indicates the result of enqueueing. ResultT MResult; /// Pointer to the command which failed to enqueue. Command *MCmd; /// Error code which is set when enqueueing fails. - pi_int32 MErrCode; + ur_result_t MErrCode; }; /// Dependency between two commands. @@ -124,9 +124,10 @@ class Command { UPDATE_CMD_BUFFER }; - Command(CommandType Type, QueueImplPtr Queue, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, - const std::vector &SyncPoints = {}); + Command( + CommandType Type, QueueImplPtr Queue, + ur_exp_command_buffer_handle_t CommandBuffer = nullptr, + const std::vector &SyncPoints = {}); /// \param NewDep dependency to be added /// \param ToCleanUp container for commands that can be cleaned up. @@ -196,9 +197,9 @@ class Command { std::optional AccMode = std::nullopt); /// Creates an edge event when the dependency is an event. void emitEdgeEventForEventDependence(Command *Cmd, - sycl::detail::pi::PiEvent &EventAddr); + ur_event_handle_t &EventAddr); /// Creates a signal event with the enqueued kernel event handle. - void emitEnqueuedEventSignal(sycl::detail::pi::PiEvent &PiEventAddr); + void emitEnqueuedEventSignal(ur_event_handle_t &UrEventAddr); /// Create a trace event of node_create type; this must be guarded by a /// check for xptiTraceEnabled(). /// Post Condition: MTraceEvent will be set to the event created. @@ -227,7 +228,7 @@ class Command { /// differ from the context of MQueue for memory copy commands. virtual ContextImplPtr getWorkerContext() const; - /// Returns true iff the command produces a PI event on non-host devices. + /// Returns true iff the command produces a UR event on non-host devices. virtual bool producesPiEvent() const; /// Returns true iff this command can be freed by post enqueue cleanup. @@ -236,15 +237,15 @@ class Command { /// Returns true iff this command is ready to be submitted for cleanup. virtual bool readyForCleanup() const; - /// Collect PI events from EventImpls and filter out some of them in case of + /// Collect UR events from EventImpls and filter out some of them in case of /// in order queue - std::vector - getPiEvents(const std::vector &EventImpls) const; - /// Collect PI events from EventImpls and filter out some of them in case of - /// in order queue. Does blocking enqueue if event is expected to produce pi + std::vector + getUrEvents(const std::vector &EventImpls) const; + /// Collect UR events from EventImpls and filter out some of them in case of + /// in order queue. Does blocking enqueue if event is expected to produce ur /// event but has empty native handle. - std::vector - getPiEventsBlocking(const std::vector &EventImpls) const; + std::vector + getUrEventsBlocking(const std::vector &EventImpls) const; bool isHostTask() const; @@ -261,7 +262,7 @@ class Command { std::vector &MPreparedHostDepsEvents; void waitForEvents(QueueImplPtr Queue, std::vector &RawEvents, - sycl::detail::pi::PiEvent &Event); + ur_event_handle_t &Event); void waitForPreparedHostEvents() const; @@ -281,7 +282,7 @@ class Command { std::vector &ToCleanUp); /// Private interface. Derived classes should implement this method. - virtual pi_int32 enqueueImp() = 0; + virtual ur_result_t enqueueImp() = 0; /// The type of the command. CommandType MType; @@ -398,15 +399,15 @@ class Command { protected: /// Gets the command buffer (if any) associated with this command. - sycl::detail::pi::PiExtCommandBuffer getCommandBuffer() const { + ur_exp_command_buffer_handle_t getCommandBuffer() const { return MCommandBuffer; } /// CommandBuffer which will be used to submit to instead of the queue, if /// set. - sycl::detail::pi::PiExtCommandBuffer MCommandBuffer; + ur_exp_command_buffer_handle_t MCommandBuffer; /// List of sync points for submissions to a command buffer. - std::vector MSyncPointDeps; + std::vector MSyncPointDeps; }; /// The empty command does nothing during enqueue. The task can be used to @@ -425,7 +426,7 @@ class EmptyCommand : public Command { bool producesPiEvent() const final; private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; // Employing deque here as it allows to push_back/emplace_back without // invalidation of pointer or reference to stored data item regardless of @@ -446,7 +447,7 @@ class ReleaseCommand : public Command { bool readyForCleanup() const final; private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; /// Command which allocates memory release command should dealocate. AllocaCommandBase *MAllocaCmd = nullptr; @@ -510,7 +511,7 @@ class AllocaCommand : public AllocaCommandBase { void emitInstrumentationData() override; private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; /// The flag indicates that alloca should try to reuse pointer provided by /// the user during memory object construction. @@ -531,7 +532,7 @@ class AllocaSubBufCommand : public AllocaCommandBase { void emitInstrumentationData() override; private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; AllocaCommandBase *MParentAlloca = nullptr; }; @@ -547,7 +548,7 @@ class MapMemObject : public Command { void emitInstrumentationData() override; private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; AllocaCommandBase *MSrcAllocaCmd = nullptr; Requirement MSrcReq; @@ -567,7 +568,7 @@ class UnMapMemObject : public Command { bool producesPiEvent() const final; private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; AllocaCommandBase *MDstAllocaCmd = nullptr; Requirement MDstReq; @@ -589,7 +590,7 @@ class MemCpyCommand : public Command { bool producesPiEvent() const final; private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; QueueImplPtr MSrcQueue; Requirement MSrcReq; @@ -612,7 +613,7 @@ class MemCpyCommandHost : public Command { ContextImplPtr getWorkerContext() const final; private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; QueueImplPtr MSrcQueue; Requirement MSrcReq; @@ -621,22 +622,22 @@ class MemCpyCommandHost : public Command { void **MDstPtr = nullptr; }; -pi_int32 -enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, - bool blocking, void *ptr, size_t size, - std::vector &RawEvents, - const detail::EventImplPtr &OutEventImpl, bool read); +ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue, + const std::string &PipeName, bool blocking, + void *ptr, size_t size, + std::vector &RawEvents, + const detail::EventImplPtr &OutEventImpl, + bool read); void enqueueImpKernel( const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector &Args, const std::shared_ptr &KernelBundleImplPtr, const std::shared_ptr &MSyclKernel, - const std::string &KernelName, - std::vector &RawEvents, + const std::string &KernelName, std::vector &RawEvents, const detail::EventImplPtr &Event, const std::function &getMemAllocationFunc, - sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, - bool KernelIsCooperative, const bool KernelUsesClusterLaunch, + ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative, + const bool KernelUsesClusterLaunch, const RTDeviceBinaryImage *BinImage = nullptr); class KernelFusionCommand; @@ -647,9 +648,8 @@ class ExecCGCommand : public Command { public: ExecCGCommand( std::unique_ptr CommandGroup, QueueImplPtr Queue, - bool EventNeeded, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, - const std::vector &Dependencies = {}); + bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer = nullptr, + const std::vector &Dependencies = {}); std::vector> getAuxiliaryResources() const; @@ -684,9 +684,9 @@ class ExecCGCommand : public Command { bool readyForCleanup() const final; private: - pi_int32 enqueueImp() final; - pi_int32 enqueueImpCommandBuffer(); - pi_int32 enqueueImpQueue(); + ur_result_t enqueueImp() final; + ur_result_t enqueueImpCommandBuffer(); + ur_result_t enqueueImpQueue(); AllocaCommandBase *getAllocaForReq(Requirement *Req); @@ -717,7 +717,7 @@ class UpdateHostRequirementCommand : public Command { void emitInstrumentationData() final; private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; AllocaCommandBase *MSrcAllocaCmd = nullptr; Requirement MDstReq; @@ -757,7 +757,7 @@ class KernelFusionCommand : public Command { bool readyForDeletion() const { return MStatus == FusionStatus::DELETED; } private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; std::vector MFusionList; @@ -779,28 +779,28 @@ class UpdateCommandBufferCommand : public Command { bool producesPiEvent() const final; private: - pi_int32 enqueueImp() final; + ur_result_t enqueueImp() final; ext::oneapi::experimental::detail::exec_graph_impl *MGraph; std::vector> MNodes; }; -// Enqueues a given kernel to a PiExtCommandBuffer -pi_int32 enqueueImpCommandBufferKernel( +// Enqueues a given kernel to a ur_exp_command_buffer_handle_t +ur_result_t enqueueImpCommandBufferKernel( context Ctx, DeviceImplPtr DeviceImpl, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, - std::vector &SyncPoints, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint, - sycl::detail::pi::PiExtCommandBufferCommand *OutCommand, + std::vector &SyncPoints, + ur_exp_command_buffer_sync_point_t *OutSyncPoint, + ur_exp_command_buffer_command_handle_t *OutCommand, const std::function &getMemAllocationFunc); // Sets arguments for a given kernel and device based on the argument type. // Refactored from SetKernelParamsAndLaunch to allow it to be used in the graphs // extension. void SetArgBasedOnType( - const detail::plugin &Plugin, sycl::detail::pi::PiKernel Kernel, + const detail::PluginPtr &Plugin, ur_kernel_handle_t Kernel, const std::shared_ptr &DeviceImageImpl, const std::function &getMemAllocationFunc, const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 863752506cd0f..12684f254fc8a 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -345,7 +345,6 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc, Command *Scheduler::GraphBuilder::insertMemoryMove( MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue, std::vector &ToEnqueue) { - AllocaCommandBase *AllocaCmdDst = getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); if (!AllocaCmdDst) @@ -938,8 +937,8 @@ static void combineAccessModesOfReqs(std::vector &Reqs) { Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, std::vector &ToEnqueue, bool EventNeeded, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, - const std::vector &Dependencies) { + ur_exp_command_buffer_handle_t CommandBuffer, + const std::vector &Dependencies) { std::vector &Reqs = CommandGroup->getRequirements(); std::vector &Events = CommandGroup->getEvents(); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 9b8c6b358cc3f..5f04439b7ea0a 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -98,8 +98,8 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, EventImplPtr Scheduler::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, - const std::vector &Dependencies) { + bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer, + const std::vector &Dependencies) { EventImplPtr NewEvent = nullptr; const CGType Type = CommandGroup->getType(); std::vector AuxiliaryCmds; @@ -607,7 +607,7 @@ void Scheduler::cancelFusion(QueueImplPtr Queue) { enqueueCommandForCG(nullptr, ToEnqueue); } -sycl::detail::pi::PiKernel Scheduler::completeSpecConstMaterialization( +ur_kernel_handle_t Scheduler::completeSpecConstMaterialization( [[maybe_unused]] QueueImplPtr Queue, [[maybe_unused]] const RTDeviceBinaryImage *BinImage, [[maybe_unused]] const std::string &KernelName, @@ -726,7 +726,7 @@ bool CheckEventReadiness(const ContextImplPtr &Context, if (SyclEventImplPtr->getContextImpl() != Context) return false; - // A nullptr here means that the commmand does not produce a PI event or it + // A nullptr here means that the commmand does not produce a UR event or it // hasn't been enqueued yet. return SyclEventImplPtr->getHandleRef() != nullptr; } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 8907a9ee1fe93..5306adaecb2a9 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -377,11 +377,10 @@ class Scheduler { /// \param Dependencies Optional list of dependency /// sync points when enqueuing to a command buffer. /// \return an event object to wait on for command group completion. - EventImplPtr - addCG(std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - bool EventNeeded, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, - const std::vector &Dependencies = {}); + EventImplPtr addCG( + std::unique_ptr CommandGroup, const QueueImplPtr &Queue, + bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer = nullptr, + const std::vector &Dependencies = {}); /// Registers a command group, that copies most recent memory to the memory /// pointed by the requirement. @@ -462,7 +461,7 @@ class Scheduler { void cancelFusion(QueueImplPtr Queue); EventImplPtr completeFusion(QueueImplPtr Queue, const property_list &); - sycl::detail::pi::PiKernel completeSpecConstMaterialization( + ur_kernel_handle_t completeSpecConstMaterialization( QueueImplPtr Queue, const RTDeviceBinaryImage *BinImage, const std::string &KernelName, std::vector &SpecConstBlob); @@ -598,11 +597,12 @@ class Scheduler { /// \return a command that represents command group execution and a bool /// indicating whether this command should be enqueued to the graph /// processor right away or not. - GraphBuildResult addCG( - std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - std::vector &ToEnqueue, bool EventNeeded, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, - const std::vector &Dependencies = {}); + GraphBuildResult + addCG(std::unique_ptr CommandGroup, const QueueImplPtr &Queue, + std::vector &ToEnqueue, bool EventNeeded, + ur_exp_command_buffer_handle_t CommandBuffer = nullptr, + const std::vector &Dependencies = + {}); /// Registers a \ref CG "command group" that updates host memory to the /// latest state. diff --git a/sycl/source/detail/spec_constant_impl.cpp b/sycl/source/detail/spec_constant_impl.cpp index 43306382a2dea..8696cc69ee612 100644 --- a/sycl/source/detail/spec_constant_impl.cpp +++ b/sycl/source/detail/spec_constant_impl.cpp @@ -10,9 +10,9 @@ #include #include -#include #include #include +#include #include diff --git a/sycl/source/detail/sycl_mem_obj_i.hpp b/sycl/source/detail/sycl_mem_obj_i.hpp index d73775b2ee70e..4d1e84d2ef836 100644 --- a/sycl/source/detail/sycl_mem_obj_i.hpp +++ b/sycl/source/detail/sycl_mem_obj_i.hpp @@ -8,7 +8,8 @@ #pragma once -#include +#include +#include namespace sycl { inline namespace _V1 { @@ -43,8 +44,7 @@ class SYCLMemObjI { // Method returns a pointer to host allocation if Context is host one and // cl_mem obect if not. virtual void *allocateMem(ContextImplPtr Context, bool InitFromUserData, - void *HostPtr, - sycl::detail::pi::PiEvent &InteropEvent) = 0; + void *HostPtr, ur_event_handle_t &InteropEvent) = 0; // Should be used for memory object created without use_host_ptr property. virtual void *allocateHostMem() = 0; diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 0436c02cb9bdb..83353aff4b65e 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -17,14 +17,16 @@ namespace sycl { inline namespace _V1 { namespace detail { -SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, - const size_t, event AvailableEvent, +SYCLMemObjT::SYCLMemObjT(ur_native_handle_t MemObject, + const context &SyclContext, const size_t, + event AvailableEvent, std::unique_ptr Allocator) : SYCLMemObjT(MemObject, SyclContext, true, AvailableEvent, std::move(Allocator)) {} -SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, - bool OwnNativeHandle, event AvailableEvent, +SYCLMemObjT::SYCLMemObjT(ur_native_handle_t MemObject, + const context &SyclContext, bool OwnNativeHandle, + event AvailableEvent, std::unique_ptr Allocator) : MAllocator(std::move(Allocator)), MProps(), MInteropEvent(detail::getSyclObjImpl(std::move(AvailableEvent))), @@ -33,19 +35,21 @@ SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr), MSharedPtrStorage(nullptr), MHostPtrProvided(true), MOwnNativeHandle(OwnNativeHandle) { - sycl::detail::pi::PiContext Context = nullptr; + ur_context_handle_t Context = nullptr; const PluginPtr &Plugin = getPlugin(); - Plugin->call( - MemObject, MInteropContext->getHandleRef(), OwnNativeHandle, - &MInteropMemObject); + ur_mem_native_properties_t MemProperties = { + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, nullptr, OwnNativeHandle}; + Plugin->call(urMemBufferCreateWithNativeHandle, MemObject, + MInteropContext->getHandleRef(), &MemProperties, + &MInteropMemObject); // Get the size of the buffer in bytes - Plugin->call( - MInteropMemObject, PI_MEM_SIZE, sizeof(size_t), &MSizeInBytes, nullptr); + Plugin->call(urMemGetInfo, MInteropMemObject, UR_MEM_INFO_SIZE, + sizeof(size_t), &MSizeInBytes, nullptr); - Plugin->call(MInteropMemObject, PI_MEM_CONTEXT, - sizeof(Context), &Context, nullptr); + Plugin->call(urMemGetInfo, MInteropMemObject, UR_MEM_INFO_CONTEXT, + sizeof(Context), &Context, nullptr); if (MInteropContext->getHandleRef() != Context) throw sycl::exception( @@ -53,24 +57,23 @@ SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, "Input context must be the same as the context of cl_mem"); if (MInteropContext->getBackend() == backend::opencl) - Plugin->call(MInteropMemObject); + Plugin->call(urMemRetain, MInteropMemObject); } -sycl::detail::pi::PiMemObjectType getImageType(int Dimensions) { +ur_mem_type_t getImageType(int Dimensions) { if (Dimensions == 1) - return PI_MEM_TYPE_IMAGE1D; + return UR_MEM_TYPE_IMAGE1D; if (Dimensions == 2) - return PI_MEM_TYPE_IMAGE2D; - return PI_MEM_TYPE_IMAGE3D; + return UR_MEM_TYPE_IMAGE2D; + return UR_MEM_TYPE_IMAGE3D; } -SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, - bool OwnNativeHandle, event AvailableEvent, +SYCLMemObjT::SYCLMemObjT(ur_native_handle_t MemObject, + const context &SyclContext, bool OwnNativeHandle, + event AvailableEvent, std::unique_ptr Allocator, - sycl::detail::pi::PiMemImageChannelOrder Order, - sycl::detail::pi::PiMemImageChannelType Type, - range<3> Range3WithOnes, unsigned Dimensions, - size_t ElementSize) + ur_image_format_t Format, range<3> Range3WithOnes, + unsigned Dimensions, size_t ElementSize) : MAllocator(std::move(Allocator)), MProps(), MInteropEvent(detail::getSyclObjImpl(std::move(AvailableEvent))), MInteropContext(detail::getSyclObjImpl(SyclContext)), @@ -78,35 +81,37 @@ SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr), MSharedPtrStorage(nullptr), MHostPtrProvided(true), MOwnNativeHandle(OwnNativeHandle) { - sycl::detail::pi::PiContext Context = nullptr; + ur_context_handle_t Context = nullptr; const PluginPtr &Plugin = getPlugin(); - sycl::detail::pi::PiMemImageFormat Format{Order, Type}; - sycl::detail::pi::PiMemImageDesc Desc; - Desc.image_type = getImageType(Dimensions); - Desc.image_width = Range3WithOnes[0]; - Desc.image_height = Range3WithOnes[1]; - Desc.image_depth = Range3WithOnes[2]; - Desc.image_array_size = 0; - Desc.image_row_pitch = ElementSize * Desc.image_width; - Desc.image_slice_pitch = Desc.image_row_pitch * Desc.image_height; - Desc.num_mip_levels = 0; - Desc.num_samples = 0; - Desc.buffer = nullptr; - - Plugin->call( - MemObject, MInteropContext->getHandleRef(), OwnNativeHandle, &Format, - &Desc, &MInteropMemObject); - - Plugin->call(MInteropMemObject, PI_MEM_CONTEXT, - sizeof(Context), &Context, nullptr); + ur_image_desc_t Desc = {}; + Desc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + Desc.type = getImageType(Dimensions); + Desc.width = Range3WithOnes[0]; + Desc.height = Range3WithOnes[1]; + Desc.depth = Range3WithOnes[2]; + Desc.arraySize = 0; + Desc.rowPitch = ElementSize * Desc.width; + Desc.slicePitch = Desc.rowPitch * Desc.height; + Desc.numMipLevel = 0; + Desc.numSamples = 0; + + ur_mem_native_properties_t NativeProperties = { + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, nullptr, OwnNativeHandle}; + + Plugin->call(urMemImageCreateWithNativeHandle, MemObject, + MInteropContext->getHandleRef(), &Format, &Desc, + &NativeProperties, &MInteropMemObject); + + Plugin->call(urMemGetInfo, MInteropMemObject, UR_MEM_INFO_CONTEXT, + sizeof(Context), &Context, nullptr); if (MInteropContext->getHandleRef() != Context) throw sycl::exception(make_error_code(errc::invalid), "Input context must be the same as the context of cl_mem"); if (MInteropContext->getBackend() == backend::opencl) - Plugin->call(MInteropMemObject); + Plugin->call(urMemRetain, MInteropMemObject); } void SYCLMemObjT::releaseMem(ContextImplPtr Context, void *MemAllocation) { @@ -149,8 +154,7 @@ void SYCLMemObjT::updateHostMemory() { if (MOpenCLInterop) { const PluginPtr &Plugin = getPlugin(); - Plugin->call( - pi::cast(MInteropMemObject)); + Plugin->call(urMemRelease, MInteropMemObject); } } const PluginPtr &SYCLMemObjT::getPlugin() const { @@ -160,13 +164,12 @@ const PluginPtr &SYCLMemObjT::getPlugin() const { } size_t SYCLMemObjT::getBufSizeForContext(const ContextImplPtr &Context, - pi_native_handle MemObject) { + ur_native_handle_t MemObject) { size_t BufSize = 0; const PluginPtr &Plugin = Context->getPlugin(); // TODO is there something required to support non-OpenCL backends? - Plugin->call( - detail::pi::cast(MemObject), PI_MEM_SIZE, - sizeof(size_t), &BufSize, nullptr); + Plugin->call(urMemGetInfo, detail::ur::cast(MemObject), + UR_MEM_INFO_SIZE, sizeof(size_t), &BufSize, nullptr); return BufSize; } diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index 59ce8bead7883..93a95de976889 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -65,27 +66,26 @@ class SYCLMemObjT : public SYCLMemObjI { std::unique_ptr Allocator) : SYCLMemObjT(/*SizeInBytes*/ 0, Props, std::move(Allocator)) {} - SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, + SYCLMemObjT(ur_native_handle_t MemObject, const context &SyclContext, const size_t SizeInBytes, event AvailableEvent, std::unique_ptr Allocator); SYCLMemObjT(cl_mem MemObject, const context &SyclContext, event AvailableEvent, std::unique_ptr Allocator) - : SYCLMemObjT(pi::cast(MemObject), SyclContext, + : SYCLMemObjT(ur::cast(MemObject), SyclContext, /*SizeInBytes*/ (size_t)0, AvailableEvent, std::move(Allocator)) {} - SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, + SYCLMemObjT(ur_native_handle_t MemObject, const context &SyclContext, bool OwnNativeHandle, event AvailableEvent, std::unique_ptr Allocator); - SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, + SYCLMemObjT(ur_native_handle_t MemObject, const context &SyclContext, bool OwnNativeHandle, event AvailableEvent, std::unique_ptr Allocator, - sycl::detail::pi::PiMemImageChannelOrder Order, - sycl::detail::pi::PiMemImageChannelType Type, - range<3> Range3WithOnes, unsigned Dimensions, size_t ElementSize); + ur_image_format_t Format, range<3> Range3WithOnes, + unsigned Dimensions, size_t ElementSize); virtual ~SYCLMemObjT() = default; @@ -265,13 +265,12 @@ class SYCLMemObjT : public SYCLMemObjI { } static size_t getBufSizeForContext(const ContextImplPtr &Context, - pi_native_handle MemObject); + ur_native_handle_t MemObject); void handleWriteAccessorCreation(); void *allocateMem(ContextImplPtr Context, bool InitFromUserData, - void *HostPtr, - sycl::detail::pi::PiEvent &InteropEvent) override { + void *HostPtr, ur_event_handle_t &InteropEvent) override { (void)Context; (void)InitFromUserData; (void)HostPtr; @@ -342,7 +341,7 @@ class SYCLMemObjT : public SYCLMemObjI { ContextImplPtr MInteropContext; // Native backend memory object handle passed by user to interoperability // constructor. - sycl::detail::pi::PiMem MInteropMemObject; + ur_mem_handle_t MInteropMemObject; // Indicates whether memory object is created using interoperability // constructor or not. bool MOpenCLInterop; diff --git a/sycl/source/detail/ur.cpp b/sycl/source/detail/ur.cpp new file mode 100644 index 0000000000000..3671d6945abb0 --- /dev/null +++ b/sycl/source/detail/ur.cpp @@ -0,0 +1,410 @@ +//==---------- ur.cpp - Unified Runtime integration helpers ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// \file +/// +/// Implementation of C++ utilities for Unified Runtime integration. +/// +/// \ingroup sycl_ur + +#include "context_impl.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef XPTI_ENABLE_INSTRUMENTATION +// Include the headers necessary for emitting +// traces using the trace framework +#include "xpti/xpti_trace_framework.h" +#endif + +namespace sycl { +inline namespace _V1 { +namespace detail { +namespace pi { +void contextSetExtendedDeleter(const sycl::context &context, + pi_context_extended_deleter func, + void *user_data) { + auto impl = getSyclObjImpl(context); + const auto &Plugin = impl->getPlugin(); + Plugin->call(urContextSetExtendedDeleter, impl->getHandleRef(), + reinterpret_cast(func), + user_data); +} +} // namespace pi + +#ifdef XPTI_ENABLE_INSTRUMENTATION +// Global (to the SYCL runtime) graph handle that all command groups are a +// child of +/// Event to be used by graph related activities +xpti_td *GSYCLGraphEvent = nullptr; +#endif // XPTI_ENABLE_INSTRUMENTATION + +template +void *getPluginOpaqueData([[maybe_unused]] void *OpaqueDataParam) { + // This was formerly a call to piextPluginGetOpaqueData, a deprecated PI entry + // point introduced for the now deleted ESIMD plugin. All calls to this entry + // point returned a similar error code to INVALID_OPERATION and would have + // resulted in a similar throw to this one + throw exception( + make_error_code(errc::feature_not_supported), + "This operation is not supported by any existing backends."); + return nullptr; +} + +namespace ur { +bool trace() { return SYCLConfig::get(); } + +static void initializePlugins(std::vector &Plugins, + ur_loader_config_handle_t LoaderConfig); + +bool XPTIInitDone = false; + +// Initializes all available Plugins. +std::vector &initializeUr(ur_loader_config_handle_t LoaderConfig) { + static std::once_flag PluginsInitDone; + // std::call_once is blocking all other threads if a thread is already + // creating a vector of plugins. So, no additional lock is needed. + std::call_once(PluginsInitDone, [&]() { + initializePlugins(GlobalHandler::instance().getPlugins(), LoaderConfig); + }); + return GlobalHandler::instance().getPlugins(); +} + +static void initializePlugins(std::vector &Plugins, + ur_loader_config_handle_t LoaderConfig) { +#define CHECK_UR_SUCCESS(Call) \ + __SYCL_CHECK_OCL_CODE_NO_EXC(Call) + + bool OwnLoaderConfig = false; + // If we weren't provided with a custom config handle create our own and + // enable full validation by default. + if(!LoaderConfig) { + CHECK_UR_SUCCESS(urLoaderConfigCreate(&LoaderConfig)) + CHECK_UR_SUCCESS(urLoaderConfigEnableLayer(LoaderConfig, + "UR_LAYER_PARAMETER_VALIDATION")) + OwnLoaderConfig = true; + } + + auto SyclURTrace = SYCLConfig::get(); + if (SyclURTrace && (std::atoi(SyclURTrace) != 0)) { + const char *LogOptions = "level:info;output:stdout;flush:info"; +#ifdef _WIN32 + _putenv_s("UR_LOG_TRACING", LogOptions); + _putenv_s("UR_LOG_LOADER", LogOptions); +#else + setenv("UR_LOG_TRACING", LogOptions, 1); + setenv("UR_LOG_LOADER", LogOptions, 1); +#endif + } + + if (std::getenv("UR_LOG_TRACING")) { + CHECK_UR_SUCCESS(urLoaderConfigEnableLayer(LoaderConfig, "UR_LAYER_TRACING")); + } + + CHECK_UR_SUCCESS(urLoaderConfigSetCodeLocationCallback( + LoaderConfig, codeLocationCallback, nullptr)); + + if (ProgramManager::getInstance().kernelUsesAsan()) { + if (urLoaderConfigEnableLayer(LoaderConfig, "UR_LAYER_ASAN")) { + urLoaderConfigRelease(LoaderConfig); + std::cerr << "Failed to enable ASAN layer\n"; + return; + } + } + + urLoaderConfigSetCodeLocationCallback(LoaderConfig, codeLocationCallback, + nullptr); + + if (ProgramManager::getInstance().kernelUsesAsan()) { + if (urLoaderConfigEnableLayer(LoaderConfig, "UR_LAYER_ASAN")) { + urLoaderConfigRelease(LoaderConfig); + std::cerr << "Failed to enable ASAN layer\n"; + return; + } + } + + ur_device_init_flags_t device_flags = 0; + CHECK_UR_SUCCESS(urLoaderInit(device_flags, LoaderConfig)); + + if (OwnLoaderConfig) { + CHECK_UR_SUCCESS(urLoaderConfigRelease(LoaderConfig)); + } + + uint32_t adapterCount = 0; + CHECK_UR_SUCCESS(urAdapterGet(0, nullptr, &adapterCount)); + std::vector adapters(adapterCount); + CHECK_UR_SUCCESS(urAdapterGet(adapterCount, adapters.data(), nullptr)); + + auto UrToSyclBackend = [](ur_adapter_backend_t backend) -> sycl::backend { + switch (backend) { + case UR_ADAPTER_BACKEND_LEVEL_ZERO: + return backend::ext_oneapi_level_zero; + case UR_ADAPTER_BACKEND_OPENCL: + return backend::opencl; + case UR_ADAPTER_BACKEND_CUDA: + return backend::ext_oneapi_cuda; + case UR_ADAPTER_BACKEND_HIP: + return backend::ext_oneapi_hip; + case UR_ADAPTER_BACKEND_NATIVE_CPU: + return backend::ext_oneapi_native_cpu; + default: + // Throw an exception, this should be unreachable. + CHECK_UR_SUCCESS(UR_RESULT_ERROR_INVALID_ENUMERATION) + return backend::all; + } + }; + + for (const auto &adapter : adapters) { + ur_adapter_backend_t adapterBackend = UR_ADAPTER_BACKEND_UNKNOWN; + CHECK_UR_SUCCESS(urAdapterGetInfo(adapter, UR_ADAPTER_INFO_BACKEND, + sizeof(adapterBackend), &adapterBackend, + nullptr)); + auto syclBackend = UrToSyclBackend(adapterBackend); + Plugins.emplace_back(std::make_shared(adapter, syclBackend)); + } + +#ifdef XPTI_ENABLE_INSTRUMENTATION + GlobalHandler::instance().getXPTIRegistry().initializeFrameworkOnce(); + + if (!(xptiTraceEnabled() && !XPTIInitDone)) + return; + // Not sure this is the best place to initialize the framework; SYCL runtime + // team needs to advise on the right place, until then we piggy-back on the + // initialization of the UR layer. + + // Initialize the global events just once, in the case ur::initialize() is + // called multiple times + XPTIInitDone = true; + // Registers a new stream for 'sycl' and any plugin that wants to listen to + // this stream will register itself using this string or stream ID for this + // string. + uint8_t StreamID = xptiRegisterStream(SYCL_STREAM_NAME); + // Let all tool plugins know that a stream by the name of 'sycl' has been + // initialized and will be generating the trace stream. + GlobalHandler::instance().getXPTIRegistry().initializeStream( + SYCL_STREAM_NAME, GMajVer, GMinVer, GVerStr); + // Create a tracepoint to indicate the graph creation + xpti::payload_t GraphPayload("application_graph"); + uint64_t GraphInstanceNo; + GSYCLGraphEvent = + xptiMakeEvent("application_graph", &GraphPayload, xpti::trace_graph_event, + xpti_at::active, &GraphInstanceNo); + if (GSYCLGraphEvent) { + // The graph event is a global event and will be used as the parent for + // all nodes (command groups) + xptiNotifySubscribers(StreamID, xpti::trace_graph_create, nullptr, + GSYCLGraphEvent, GraphInstanceNo, nullptr); + } +#endif +#undef CHECK_UR_SUCCESS +} + +// Get the plugin serving given backend. +template const PluginPtr &getPlugin() { + static PluginPtr *Plugin = nullptr; + if (Plugin) + return *Plugin; + + std::vector &Plugins = ur::initializeUr(); + for (auto &P : Plugins) + if (P->hasBackend(BE)) { + Plugin = &P; + return *Plugin; + } + + throw exception(errc::runtime, "ur::getPlugin couldn't find plugin"); +} + +template const PluginPtr &getPlugin(); +template const PluginPtr &getPlugin(); +template const PluginPtr &getPlugin(); +template const PluginPtr &getPlugin(); + +// Reads an integer value from ELF data. +template +static ResT readELFValue(const unsigned char *Data, size_t NumBytes, + bool IsBigEndian) { + assert(NumBytes <= sizeof(ResT)); + ResT Result = 0; + if (IsBigEndian) { + for (size_t I = 0; I < NumBytes; ++I) { + Result = (Result << 8) | static_cast(Data[I]); + } + } else { + std::copy(Data, Data + NumBytes, reinterpret_cast(&Result)); + } + return Result; +} + +// Checks if an ELF image contains a section with a specified name. +static bool checkELFSectionPresent(const std::string &ExpectedSectionName, + const unsigned char *ImgData, + size_t ImgSize) { + // Check for 64bit and big-endian. + bool Is64bit = ImgData[4] == 2; + bool IsBigEndian = ImgData[5] == 2; + + // Make offsets based on whether the ELF file is 64bit or not. + size_t SectionHeaderOffsetInfoOffset = Is64bit ? 0x28 : 0x20; + size_t SectionHeaderSizeInfoOffset = Is64bit ? 0x3A : 0x2E; + size_t SectionHeaderNumInfoOffset = Is64bit ? 0x3C : 0x30; + size_t SectionStringsHeaderIndexInfoOffset = Is64bit ? 0x3E : 0x32; + + // if the image doesn't contain enough data for the header values, end early. + if (ImgSize < SectionStringsHeaderIndexInfoOffset + 2) + return false; + + // Read the e_shoff, e_shentsize, e_shnum, and e_shstrndx entries in the + // header. + uint64_t SectionHeaderOffset = readELFValue( + ImgData + SectionHeaderOffsetInfoOffset, Is64bit ? 8 : 4, IsBigEndian); + uint16_t SectionHeaderSize = readELFValue( + ImgData + SectionHeaderSizeInfoOffset, 2, IsBigEndian); + uint16_t SectionHeaderNum = readELFValue( + ImgData + SectionHeaderNumInfoOffset, 2, IsBigEndian); + uint16_t SectionStringsHeaderIndex = readELFValue( + ImgData + SectionStringsHeaderIndexInfoOffset, 2, IsBigEndian); + + // End early if we do not have the expected number of section headers or + // if the read section string header index is out-of-range. + if (ImgSize < SectionHeaderOffset + SectionHeaderNum * SectionHeaderSize || + SectionStringsHeaderIndex >= SectionHeaderNum) + return false; + + // Get the location of the section string data. + size_t SectionStringsInfoOffset = Is64bit ? 0x18 : 0x10; + const unsigned char *SectionStringsHeaderData = + ImgData + SectionHeaderOffset + + SectionStringsHeaderIndex * SectionHeaderSize; + uint64_t SectionStrings = readELFValue( + SectionStringsHeaderData + SectionStringsInfoOffset, Is64bit ? 8 : 4, + IsBigEndian); + const unsigned char *SectionStringsData = ImgData + SectionStrings; + + // For each section, check the name against the expected section and return + // true if we find it. + for (size_t I = 0; I < SectionHeaderNum; ++I) { + // Get the offset into the section string data of this sections name. + const unsigned char *HeaderData = + ImgData + SectionHeaderOffset + I * SectionHeaderSize; + uint32_t SectionNameOffset = + readELFValue(HeaderData, 4, IsBigEndian); + + // Read the section name and check if it is the same as the name we are + // looking for. + const char *SectionName = + reinterpret_cast(SectionStringsData + SectionNameOffset); + if (SectionName == ExpectedSectionName) + return true; + } + return false; +} + +// Returns the e_type field from an ELF image. +static uint16_t getELFHeaderType(const unsigned char *ImgData, size_t ImgSize) { + (void)ImgSize; + assert(ImgSize >= 18 && "Not enough bytes to have an ELF header type."); + + bool IsBigEndian = ImgData[5] == 2; + return readELFValue(ImgData + 16, 2, IsBigEndian); +} + +sycl_device_binary_type getBinaryImageFormat(const unsigned char *ImgData, + size_t ImgSize) { + // Top-level magic numbers for the recognized binary image formats. + auto MatchMagicNumber = [&](auto Number) { + return ImgSize >= sizeof(Number) && + std::memcmp(ImgData, &Number, sizeof(Number)) == 0; + }; + + if (MatchMagicNumber(uint32_t{0x07230203})) + return SYCL_DEVICE_BINARY_TYPE_SPIRV; + + if (MatchMagicNumber(uint32_t{0xDEC04342})) + return SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE; + + if (MatchMagicNumber(uint32_t{0x43544E49})) + // 'I', 'N', 'T', 'C' ; Intel native + return SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE; + + // Check for ELF format, size requirements include data we'll read in case of + // succesful match. + if (ImgSize >= 18 && MatchMagicNumber(uint32_t{0x464c457F})) { + uint16_t ELFHdrType = getELFHeaderType(ImgData, ImgSize); + if (ELFHdrType == 0xFF04) + // OpenCL executable. + return SYCL_DEVICE_BINARY_TYPE_NATIVE; + + if (ELFHdrType == 0xFF12) + // ZEBIN executable. + return SYCL_DEVICE_BINARY_TYPE_NATIVE; + + // Newer ZEBIN format does not have a special header type, but can instead + // be identified by having a required .ze_info section. + if (checkELFSectionPresent(".ze_info", ImgData, ImgSize)) + return SYCL_DEVICE_BINARY_TYPE_NATIVE; + } + + if (MatchMagicNumber(std::array{'!', '<', 'a', 'r', 'c', 'h', '>', '\n'})) + // "ar" format is used to pack binaries for multiple devices, e.g. via + // + // -Xsycl-target-backend=spir64_gen "-device acm-g10,acm-g11" + // + // option. + return SYCL_DEVICE_BINARY_TYPE_NATIVE; + + return SYCL_DEVICE_BINARY_TYPE_NONE; +} + +ur_program_metadata_t mapDeviceBinaryPropertyToProgramMetadata( + const sycl_device_binary_property &DeviceBinaryProperty) { + ur_program_metadata_t URMetadata{}; + URMetadata.pName = DeviceBinaryProperty->Name; + URMetadata.size = DeviceBinaryProperty->ValSize; + switch (DeviceBinaryProperty->Type) { + case SYCL_PROPERTY_TYPE_UINT32: + URMetadata.type = UR_PROGRAM_METADATA_TYPE_UINT32; + URMetadata.value.data32 = DeviceBinaryProperty->ValSize; + break; + case SYCL_PROPERTY_TYPE_BYTE_ARRAY: + URMetadata.type = UR_PROGRAM_METADATA_TYPE_BYTE_ARRAY; + URMetadata.value.pData = DeviceBinaryProperty->ValAddr; + break; + case SYCL_PROPERTY_TYPE_STRING: + URMetadata.type = UR_PROGRAM_METADATA_TYPE_STRING; + URMetadata.value.pString = + reinterpret_cast(DeviceBinaryProperty->ValAddr); + break; + default: + break; + } + return URMetadata; +} + +} // namespace ur +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/pi_utils.hpp b/sycl/source/detail/ur_utils.hpp similarity index 61% rename from sycl/source/detail/pi_utils.hpp rename to sycl/source/detail/ur_utils.hpp index 51c5db5a7edf3..335d6ecd11baa 100644 --- a/sycl/source/detail/pi_utils.hpp +++ b/sycl/source/detail/ur_utils.hpp @@ -1,4 +1,4 @@ -//==------------- pi_utils.hpp - Common PI utilities -----------------------==// +//==------------- ur_utils.hpp - Common UR utilities -----------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include @@ -19,63 +19,63 @@ namespace sycl { inline namespace _V1 { namespace detail { -// RAII object for keeping ownership of a PI event. -struct OwnedPiEvent { - OwnedPiEvent(const PluginPtr &Plugin) +// RAII object for keeping ownership of a UR event. +struct OwnedUrEvent { + OwnedUrEvent(const PluginPtr &Plugin) : MEvent{std::nullopt}, MPlugin{Plugin} {} - OwnedPiEvent(sycl::detail::pi::PiEvent Event, const PluginPtr &Plugin, + OwnedUrEvent(ur_event_handle_t Event, const PluginPtr &Plugin, bool TakeOwnership = false) : MEvent(Event), MPlugin(Plugin) { // If it is not instructed to take ownership, retain the event to share // ownership of it. if (!TakeOwnership) - MPlugin->call(*MEvent); + MPlugin->call(urEventRetain, *MEvent); } - ~OwnedPiEvent() { + ~OwnedUrEvent() { try { // Release the event if the ownership was not transferred. if (MEvent.has_value()) - MPlugin->call(*MEvent); + MPlugin->call(urEventRelease, *MEvent); } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~OwnedPiEvent", e); + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~OwnedUrEvent", e); } } - OwnedPiEvent(OwnedPiEvent &&Other) + OwnedUrEvent(OwnedUrEvent &&Other) : MEvent(Other.MEvent), MPlugin(Other.MPlugin) { Other.MEvent = std::nullopt; } // Copy constructor explicitly deleted for simplicity as it is not currently // used. Implement if needed. - OwnedPiEvent(const OwnedPiEvent &Other) = delete; + OwnedUrEvent(const OwnedUrEvent &Other) = delete; operator bool() { return MEvent.has_value(); } - sycl::detail::pi::PiEvent GetEvent() { return *MEvent; } + ur_event_handle_t GetEvent() { return *MEvent; } // Transfers the ownership of the event to the caller. The destructor will // no longer release the event. - sycl::detail::pi::PiEvent TransferOwnership() { - sycl::detail::pi::PiEvent Event = *MEvent; + ur_event_handle_t TransferOwnership() { + ur_event_handle_t Event = *MEvent; MEvent = std::nullopt; return Event; } private: - std::optional MEvent; + std::optional MEvent; const PluginPtr &MPlugin; }; -namespace pi { -using PiDeviceBinaryType = ::sycl_device_binary_type; +namespace ur { +using DeviceBinaryType = ::sycl_device_binary_type; /// Tries to determine the device binary image foramat. Returns /// SYCL_DEVICE_BINARY_TYPE_NONE if unsuccessful. -PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, - size_t ImgSize); -} // namespace pi +DeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, + size_t ImgSize); +} // namespace ur } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 5039ce78f21cd..377c9bb01c378 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -73,31 +73,32 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt, return nullptr; std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); const PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_result Error = PI_ERROR_INVALID_VALUE; + ur_result_t Error = UR_RESULT_ERROR_INVALID_VALUE; switch (Kind) { case alloc::host: { - std::array Props; - auto PropsIter = Props.begin(); + ur_usm_desc_t UsmDesc{}; + UsmDesc.align = Alignment; + + ur_usm_alloc_location_desc_t UsmLocationDesc{}; + UsmLocationDesc.stype = UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC; if (PropList.has_property< sycl::ext::intel::experimental::property::usm::buffer_location>() && Ctxt.get_platform().has_extension( "cl_intel_mem_alloc_buffer_location")) { - *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION; - *PropsIter++ = PropList - .get_property() - .get_buffer_location(); + UsmLocationDesc.location = static_cast( + PropList + .get_property() + .get_buffer_location()); + UsmDesc.pNext = &UsmLocationDesc; } - assert(PropsIter >= Props.begin() && PropsIter < Props.end()); - *PropsIter++ = 0; // null-terminate property list - - Error = Plugin->call_nocheck( - &RetVal, C, Props.data(), Size, Alignment); + Error = Plugin->call_nocheck(urUSMHostAlloc, C, &UsmDesc, + /* pool= */ nullptr, Size, &RetVal); break; } @@ -105,14 +106,14 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt, case alloc::shared: case alloc::unknown: { RetVal = nullptr; - Error = PI_ERROR_INVALID_VALUE; + Error = UR_RESULT_ERROR_INVALID_VALUE; break; } } // Error is for debugging purposes. // The spec wants a nullptr returned, not an exception. - if (Error != PI_SUCCESS) + if (Error != UR_RESULT_SUCCESS) return nullptr; #ifdef XPTI_ENABLE_INSTRUMENTATION xpti::addMetadata(PrepareNotify.traceEvent(), "memory_ptr", @@ -139,78 +140,85 @@ void *alignedAllocInternal(size_t Alignment, size_t Size, if (Size == 0) return nullptr; - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); const PluginPtr &Plugin = CtxImpl->getPlugin(); - pi_result Error = PI_ERROR_INVALID_VALUE; - pi_device Id; + ur_result_t Error = UR_RESULT_ERROR_INVALID_VALUE; + ur_device_handle_t Dev; switch (Kind) { case alloc::device: { - Id = DevImpl->getHandleRef(); + Dev = DevImpl->getHandleRef(); - std::array Props; - auto PropsIter = Props.begin(); + ur_usm_desc_t UsmDesc{}; + UsmDesc.align = Alignment; + + ur_usm_alloc_location_desc_t UsmLocationDesc{}; + UsmLocationDesc.stype = UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC; // Buffer location is only supported on FPGA devices if (PropList.has_property< sycl::ext::intel::experimental::property::usm::buffer_location>() && DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) { - *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION; - *PropsIter++ = PropList - .get_property() - .get_buffer_location(); + UsmLocationDesc.location = static_cast( + PropList + .get_property() + .get_buffer_location()); + UsmDesc.pNext = &UsmLocationDesc; } - assert(PropsIter >= Props.begin() && PropsIter < Props.end()); - *PropsIter++ = 0; // null-terminate property list - - Error = Plugin->call_nocheck( - &RetVal, C, Id, Props.data(), Size, Alignment); + Error = Plugin->call_nocheck(urUSMDeviceAlloc, C, Dev, &UsmDesc, + /*pool=*/nullptr, Size, &RetVal); break; } case alloc::shared: { - Id = DevImpl->getHandleRef(); + Dev = DevImpl->getHandleRef(); - std::array Props; - auto PropsIter = Props.begin(); + ur_usm_desc_t UsmDesc{}; + UsmDesc.align = Alignment; + + ur_usm_alloc_location_desc_t UsmLocationDesc{}; + UsmLocationDesc.stype = UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC; + + ur_usm_device_desc_t UsmDeviceDesc{}; + UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC; + UsmDeviceDesc.flags = 0; + + UsmDesc.pNext = &UsmDeviceDesc; if (PropList.has_property< sycl::ext::oneapi::property::usm::device_read_only>()) { - *PropsIter++ = PI_MEM_ALLOC_FLAGS; - *PropsIter++ = PI_MEM_ALLOC_DEVICE_READ_ONLY; + UsmDeviceDesc.flags |= UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY; } if (PropList.has_property< sycl::ext::intel::experimental::property::usm::buffer_location>() && DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) { - *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION; - *PropsIter++ = PropList - .get_property() - .get_buffer_location(); + UsmLocationDesc.location = static_cast( + PropList + .get_property() + .get_buffer_location()); + UsmDeviceDesc.pNext = &UsmLocationDesc; } - assert(PropsIter >= Props.begin() && PropsIter < Props.end()); - *PropsIter++ = 0; // null-terminate property list - - Error = Plugin->call_nocheck( - &RetVal, C, Id, Props.data(), Size, Alignment); + Error = Plugin->call_nocheck(urUSMSharedAlloc, C, Dev, &UsmDesc, + /*pool=*/nullptr, Size, &RetVal); break; } case alloc::host: case alloc::unknown: { RetVal = nullptr; - Error = PI_ERROR_INVALID_VALUE; + Error = UR_RESULT_ERROR_INVALID_VALUE; break; } } // Error is for debugging purposes. // The spec wants a nullptr returned, not an exception. - if (Error != PI_SUCCESS) + if (Error != UR_RESULT_SUCCESS) return nullptr; return RetVal; } @@ -250,9 +258,9 @@ void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt, void freeInternal(void *Ptr, const context_impl *CtxImpl) { if (Ptr == nullptr) return; - pi_context C = CtxImpl->getHandleRef(); + ur_context_handle_t C = CtxImpl->getHandleRef(); const PluginPtr &Plugin = CtxImpl->getPlugin(); - Plugin->call(C, Ptr); + Plugin->call(urUSMFree, C, Ptr); } void free(void *Ptr, const context &Ctxt, @@ -539,35 +547,34 @@ alloc get_pointer_type(const void *Ptr, const context &Ctxt) { std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); - pi_context PICtx = CtxImpl->getHandleRef(); - pi_usm_type AllocTy; + ur_context_handle_t URCtx = CtxImpl->getHandleRef(); + ur_usm_type_t AllocTy; - // query type using PI function + // query type using UR function const detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - sycl::detail::pi::PiResult Err = - Plugin->call_nocheck( - PICtx, Ptr, PI_MEM_ALLOC_TYPE, sizeof(pi_usm_type), &AllocTy, - nullptr); + ur_result_t Err = Plugin->call_nocheck( + urUSMGetMemAllocInfo, URCtx, Ptr, UR_USM_ALLOC_INFO_TYPE, + sizeof(ur_usm_type_t), &AllocTy, nullptr); - // PI_ERROR_INVALID_VALUE means USM doesn't know about this ptr - if (Err == PI_ERROR_INVALID_VALUE) + // UR_RESULT_ERROR_INVALID_VALUE means USM doesn't know about this ptr + if (Err == UR_RESULT_ERROR_INVALID_VALUE) return alloc::unknown; - // otherwise PI_SUCCESS is expected - if (Err != PI_SUCCESS) { - throw detail::set_pi_error( + // otherwise UR_RESULT_SUCCESS is expected + if (Err != UR_RESULT_SUCCESS) { + throw detail::set_ur_error( exception(make_error_code(errc::runtime), "get_pointer_type() failed"), Err); } alloc ResultAlloc; switch (AllocTy) { - case PI_MEM_TYPE_HOST: + case UR_USM_TYPE_HOST: ResultAlloc = alloc::host; break; - case PI_MEM_TYPE_DEVICE: + case UR_USM_TYPE_DEVICE: ResultAlloc = alloc::device; break; - case PI_MEM_TYPE_SHARED: + case UR_USM_TYPE_SHARED: ResultAlloc = alloc::shared; break; default: @@ -601,13 +608,13 @@ device get_pointer_device(const void *Ptr, const context &Ctxt) { return Devs[0]; } - pi_context PICtx = CtxImpl->getHandleRef(); - pi_device DeviceId; + ur_context_handle_t URCtx = CtxImpl->getHandleRef(); + ur_device_handle_t DeviceId; - // query device using PI function + // query device using UR function const detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - Plugin->call( - PICtx, Ptr, PI_MEM_ALLOC_DEVICE, sizeof(pi_device), &DeviceId, nullptr); + Plugin->call(urUSMGetMemAllocInfo, URCtx, Ptr, UR_USM_ALLOC_INFO_DEVICE, + sizeof(ur_device_handle_t), &DeviceId, nullptr); // The device is not necessarily a member of the context, it could be a // member's descendant instead. Fetch the corresponding device from the cache. @@ -625,18 +632,18 @@ device get_pointer_device(const void *Ptr, const context &Ctxt) { static void prepare_for_usm_device_copy(const void *Ptr, size_t Size, const context &Ctxt) { std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); - pi_context PICtx = CtxImpl->getHandleRef(); - // Call the PI function + ur_context_handle_t URCtx = CtxImpl->getHandleRef(); + // Call the UR function const detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - Plugin->call(Ptr, Size, PICtx); + Plugin->call(urUSMImportExp, URCtx, const_cast(Ptr), Size); } static void release_from_usm_device_copy(const void *Ptr, const context &Ctxt) { std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); - pi_context PICtx = CtxImpl->getHandleRef(); - // Call the PI function + ur_context_handle_t URCtx = CtxImpl->getHandleRef(); + // Call the UR function const detail::PluginPtr &Plugin = CtxImpl->getPlugin(); - Plugin->call(Ptr, PICtx); + Plugin->call(urUSMReleaseExp, URCtx, const_cast(Ptr)); } namespace ext::oneapi::experimental { diff --git a/sycl/source/detail/windows_pi.cpp b/sycl/source/detail/windows_ur.cpp similarity index 63% rename from sycl/source/detail/windows_pi.cpp rename to sycl/source/detail/windows_ur.cpp index 44c432e8d8f4f..f730b087a67af 100644 --- a/sycl/source/detail/windows_pi.cpp +++ b/sycl/source/detail/windows_ur.cpp @@ -1,4 +1,4 @@ -//==---------------- windows_pi.cpp ----------------------------------------==// +//==---------------- windows_ur.cpp ----------------------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -15,12 +15,12 @@ #include #include "detail/windows_os_utils.hpp" -#include "pi_win_proxy_loader.hpp" +#include "ur_win_proxy_loader.hpp" namespace sycl { inline namespace _V1 { namespace detail { -namespace pi { +namespace ur { void *loadOsLibrary(const std::string &LibraryPath) { // Tells the system to not display the critical-error-handler message box. @@ -43,26 +43,10 @@ void *loadOsLibrary(const std::string &LibraryPath) { return Result; } -void *loadOsPluginLibrary(const std::string &PluginPath) { - // We fetch the preloaded plugin from the pi_win_proxy_loader. - // The proxy_loader handles any required error suppression. - auto Result = getPreloadedPlugin(PluginPath); - - return Result; -} - int unloadOsLibrary(void *Library) { return (int)FreeLibrary((HMODULE)Library); } -int unloadOsPluginLibrary(void *Library) { - // The mock plugin does not have an associated library, so we allow nullptr - // here to avoid it trying to free a non-existent library. - if (!Library) - return 1; - return (int)FreeLibrary((HMODULE)Library); -} - void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { return reinterpret_cast( GetProcAddress((HMODULE)Library, FunctionName.c_str())); @@ -86,22 +70,7 @@ static std::filesystem::path getCurrentDSODirPath() { return std::filesystem::path(Path); } -// Load plugins corresponding to provided list of plugin names. -std::vector> -loadPlugins(const std::vector> &&PluginNames) { - std::vector> LoadedPlugins; - const std::filesystem::path LibSYCLDir = getCurrentDSODirPath(); - - for (auto &PluginName : PluginNames) { - void *Library = getPreloadedPlugin(LibSYCLDir / PluginName.first); - LoadedPlugins.push_back(std::make_tuple( - std::move(PluginName.first), std::move(PluginName.second), Library)); - } - - return LoadedPlugins; -} - -} // namespace pi +} // namespace ur } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/xpti_registry.cpp b/sycl/source/detail/xpti_registry.cpp index 1884f5cd34265..78a1c66cb346d 100644 --- a/sycl/source/detail/xpti_registry.cpp +++ b/sycl/source/detail/xpti_registry.cpp @@ -82,7 +82,7 @@ void XPTIRegistry::bufferAssociateNotification(const void *UserObj, xpti::offload_association_data_t BufAssoc{(uintptr_t)UserObj, (uintptr_t)MemObj}; - // Add association between user level and PI level memory object + // Add association between user level and UR level memory object xptiNotifySubscribers(GBufferStreamID, NotificationTraceType, nullptr, nullptr, IId, &BufAssoc); #endif @@ -101,7 +101,7 @@ void XPTIRegistry::bufferReleaseNotification(const void *UserObj, xpti::offload_association_data_t BufRelease{(uintptr_t)UserObj, (uintptr_t)MemObj}; - // Release PI level memory object + // Release UR level memory object xptiNotifySubscribers(GBufferStreamID, NotificationTraceType, nullptr, nullptr, IId, &BufRelease); #endif diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp index 356679a75c2fb..0b96dc2f99a78 100644 --- a/sycl/source/detail/xpti_registry.hpp +++ b/sycl/source/detail/xpti_registry.hpp @@ -29,7 +29,7 @@ namespace detail { inline constexpr const char *SYCL_STREAM_NAME = "sycl"; // Stream name being used for traces generated from the SYCL plugin layer inline constexpr const char *SYCL_PICALL_STREAM_NAME = "sycl.pi"; -// Stream name being used for traces generated from PI calls. This stream +// Stream name being used for traces generated from UR calls. This stream // contains information about function arguments. inline constexpr const char *SYCL_PIDEBUGCALL_STREAM_NAME = "sycl.pi.debug"; inline constexpr auto SYCL_MEM_ALLOC_STREAM_NAME = diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index c9024363846d1..9207189c7ea03 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -32,16 +33,17 @@ void force_type(info::device_type &t, const info::device_type &ft) { device::device() : device(default_selector_v) {} device::device(cl_device_id DeviceId) { + auto Plugin = sycl::detail::ur::getPlugin(); // The implementation constructor takes ownership of the native handle so we // must retain it in order to adhere to SYCL 1.2.1 spec (Rev6, section 4.3.1.) - sycl::detail::pi::PiDevice Device; - auto Plugin = sycl::detail::pi::getPlugin(); - Plugin->call( - detail::pi::cast(DeviceId), nullptr, &Device); + ur_device_handle_t Device; + Plugin->call(urDeviceCreateWithNativeHandle, + detail::ur::cast(DeviceId), + Plugin->getUrPlatforms()[0], nullptr, &Device); auto Platform = - detail::platform_impl::getPlatformFromPiDevice(Device, Plugin); + detail::platform_impl::getPlatformFromUrDevice(Device, Plugin); impl = Platform->getOrMakeDeviceImpl(Device, Platform); - Plugin->call(impl->getHandleRef()); + Plugin->call(urDeviceRetain, impl->getHandleRef()); } device::device(const device_selector &deviceSelector) { @@ -130,7 +132,7 @@ template <> __SYCL_EXPORT device device::get_info_impl() const { // With ONEAPI_DEVICE_SELECTOR the impl.MRootDevice is preset and may be - // overridden (ie it may be nullptr on a sub-device) The PI of the sub-devices + // overridden (ie it may be nullptr on a sub-device) The sub-devices // have parents, but we don't want to return them. They must pretend to be // parentless root devices. if (impl->isRootDevice()) @@ -200,32 +202,32 @@ device::get_backend_info() const { backend device::get_backend() const noexcept { return impl->getBackend(); } -pi_native_handle device::getNative() const { return impl->getNative(); } +ur_native_handle_t device::getNative() const { return impl->getNative(); } bool device::has(aspect Aspect) const { return impl->has(Aspect); } void device::ext_oneapi_enable_peer_access(const device &peer) { - const sycl::detail::pi::PiDevice Device = impl->getHandleRef(); - const sycl::detail::pi::PiDevice Peer = peer.impl->getHandleRef(); + ur_device_handle_t Device = impl->getHandleRef(); + ur_device_handle_t Peer = peer.impl->getHandleRef(); if (Device != Peer) { auto Plugin = impl->getPlugin(); - Plugin->call(Device, Peer); + Plugin->call(urUsmP2PEnablePeerAccessExp, Device, Peer); } } void device::ext_oneapi_disable_peer_access(const device &peer) { - const sycl::detail::pi::PiDevice Device = impl->getHandleRef(); - const sycl::detail::pi::PiDevice Peer = peer.impl->getHandleRef(); + ur_device_handle_t Device = impl->getHandleRef(); + ur_device_handle_t Peer = peer.impl->getHandleRef(); if (Device != Peer) { auto Plugin = impl->getPlugin(); - Plugin->call(Device, Peer); + Plugin->call(urUsmP2PDisablePeerAccessExp, Device, Peer); } } bool device::ext_oneapi_can_access_peer(const device &peer, ext::oneapi::peer_access attr) { - const sycl::detail::pi::PiDevice Device = impl->getHandleRef(); - const sycl::detail::pi::PiDevice Peer = peer.impl->getHandleRef(); + ur_device_handle_t Device = impl->getHandleRef(); + ur_device_handle_t Peer = peer.impl->getHandleRef(); if (Device == Peer) { return true; @@ -234,19 +236,19 @@ bool device::ext_oneapi_can_access_peer(const device &peer, size_t returnSize; int value; - sycl::detail::pi::PiPeerAttr PiAttr = [&]() { + ur_exp_peer_info_t UrAttr = [&]() { switch (attr) { case ext::oneapi::peer_access::access_supported: - return PI_PEER_ACCESS_SUPPORTED; + return UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED; case ext::oneapi::peer_access::atomics_supported: - return PI_PEER_ATOMICS_SUPPORTED; + return UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED; } throw sycl::exception(make_error_code(errc::invalid), "Unrecognized peer access attribute."); }(); auto Plugin = impl->getPlugin(); - Plugin->call( - Device, Peer, PiAttr, sizeof(int), &value, &returnSize); + Plugin->call(urUsmP2PPeerAccessGetInfoExp, Device, Peer, UrAttr, sizeof(int), + &value, &returnSize); return value == 1; } @@ -268,14 +270,13 @@ bool device::ext_oneapi_can_compile( } bool device::ext_oneapi_supports_cl_c_feature(detail::string_view Feature) { - - const detail::pi::PiDevice Device = impl->getHandleRef(); + ur_device_handle_t Device = impl->getHandleRef(); auto Plugin = impl->getPlugin(); uint32_t ipVersion = 0; - auto res = Plugin->call_nocheck( - Device, PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), - &ipVersion, nullptr); - if (res != PI_SUCCESS) + auto res = + Plugin->call_nocheck(urDeviceGetInfo, Device, UR_DEVICE_INFO_IP_VERSION, + sizeof(uint32_t), &ipVersion, nullptr); + if (res != UR_RESULT_SUCCESS) return false; return ext::oneapi::experimental::detail::OpenCLC_Feature_Available( @@ -284,13 +285,13 @@ bool device::ext_oneapi_supports_cl_c_feature(detail::string_view Feature) { bool device::ext_oneapi_supports_cl_c_version( const ext::oneapi::experimental::cl_version &Version) const { - const detail::pi::PiDevice Device = impl->getHandleRef(); + ur_device_handle_t Device = impl->getHandleRef(); auto Plugin = impl->getPlugin(); uint32_t ipVersion = 0; - auto res = Plugin->call_nocheck( - Device, PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), - &ipVersion, nullptr); - if (res != PI_SUCCESS) + auto res = + Plugin->call_nocheck(urDeviceGetInfo, Device, UR_DEVICE_INFO_IP_VERSION, + sizeof(uint32_t), &ipVersion, nullptr); + if (res != UR_RESULT_SUCCESS) return false; return ext::oneapi::experimental::detail::OpenCLC_Supports_Version(Version, @@ -300,13 +301,13 @@ bool device::ext_oneapi_supports_cl_c_version( bool device::ext_oneapi_supports_cl_extension( detail::string_view Name, ext::oneapi::experimental::cl_version *VersionPtr) const { - const detail::pi::PiDevice Device = impl->getHandleRef(); + ur_device_handle_t Device = impl->getHandleRef(); auto Plugin = impl->getPlugin(); uint32_t ipVersion = 0; - auto res = Plugin->call_nocheck( - Device, PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), - &ipVersion, nullptr); - if (res != PI_SUCCESS) + auto res = + Plugin->call_nocheck(urDeviceGetInfo, Device, UR_DEVICE_INFO_IP_VERSION, + sizeof(uint32_t), &ipVersion, nullptr); + if (res != UR_RESULT_SUCCESS) return false; return ext::oneapi::experimental::detail::OpenCLC_Supports_Extension( @@ -314,13 +315,13 @@ bool device::ext_oneapi_supports_cl_extension( } std::string device::ext_oneapi_cl_profile() const { - const detail::pi::PiDevice Device = impl->getHandleRef(); + ur_device_handle_t Device = impl->getHandleRef(); auto Plugin = impl->getPlugin(); uint32_t ipVersion = 0; - auto res = Plugin->call_nocheck( - Device, PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), - &ipVersion, nullptr); - if (res != PI_SUCCESS) + auto res = + Plugin->call_nocheck(urDeviceGetInfo, Device, UR_DEVICE_INFO_IP_VERSION, + sizeof(uint32_t), &ipVersion, nullptr); + if (res != UR_RESULT_SUCCESS) return ""; return ext::oneapi::experimental::detail::OpenCLC_Profile(ipVersion); diff --git a/sycl/source/device_selector.cpp b/sycl/source/device_selector.cpp index 1be82372e6248..b0e5bfb18fcf0 100644 --- a/sycl/source/device_selector.cpp +++ b/sycl/source/device_selector.cpp @@ -48,12 +48,7 @@ static int getDevicePreference(const device &Device) { } static void traceDeviceSelection(const device &Device, int Score, bool Chosen) { - bool shouldTrace = false; - if (Chosen) { - shouldTrace = detail::pi::trace(detail::pi::TraceLevel::PI_TRACE_BASIC); - } else { - shouldTrace = detail::pi::trace(detail::pi::TraceLevel::PI_TRACE_ALL); - } + bool shouldTrace = detail::ur::trace(); if (shouldTrace) { std::string PlatformName = Device.get_info() .get_info(); @@ -61,11 +56,11 @@ static void traceDeviceSelection(const device &Device, int Score, bool Chosen) { auto selectionMsg = Chosen ? "Selected device: -> final score = " : "Candidate device: -> score = "; - std::cout << "SYCL_PI_TRACE[all]: " << selectionMsg << Score + std::cout << "SYCL_UR_TRACE: " << selectionMsg << Score << ((Score < 0) ? " (REJECTED)" : "") << std::endl - << "SYCL_PI_TRACE[all]: " + << "SYCL_UR_TRACE: " << " platform: " << PlatformName << std::endl - << "SYCL_PI_TRACE[all]: " + << "SYCL_UR_TRACE: " << " device: " << DeviceName << std::endl; } } @@ -167,10 +162,9 @@ select_device(const DSelectorInvocableType &DeviceSelectorInvocable, /// 4. Accelerator static void traceDeviceSelector(const std::string &DeviceType) { - bool ShouldTrace = false; - ShouldTrace = detail::pi::trace(detail::pi::TraceLevel::PI_TRACE_BASIC); + bool ShouldTrace = detail::ur::trace(); if (ShouldTrace) { - std::cout << "SYCL_PI_TRACE[all]: Requested device_type: " << DeviceType + std::cout << "SYCL_UR_TRACE: Requested device_type: " << DeviceType << std::endl; } } diff --git a/sycl/source/enqueue_functions.cpp b/sycl/source/enqueue_functions.cpp index b2e4f3f712f4b..4cfe1c46d8d47 100644 --- a/sycl/source/enqueue_functions.cpp +++ b/sycl/source/enqueue_functions.cpp @@ -33,8 +33,9 @@ __SYCL_EXPORT void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice, const sycl::detail::code_location &CodeLoc) { sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); - QueueImplPtr->mem_advise(QueueImplPtr, Ptr, NumBytes, pi_mem_advice(Advice), - {}, /*CallerNeedsEvent=*/false); + QueueImplPtr->mem_advise(QueueImplPtr, Ptr, NumBytes, + ur_usm_advice_flags_t(Advice), {}, + /*CallerNeedsEvent=*/false); } } // namespace ext::oneapi::experimental diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 65d163df465df..3f9048284abac 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include @@ -26,11 +26,12 @@ event::event() : impl(std::make_shared(std::nullopt)) {} event::event(cl_event ClEvent, const context &SyclContext) : impl(std::make_shared( - detail::pi::cast(ClEvent), SyclContext)) { + detail::ur::cast(ClEvent), SyclContext)) { // This is a special interop constructor for OpenCL, so the event must be // retained. - impl->getPlugin()->call( - detail::pi::cast(ClEvent)); + // TODO(pi2ur): Don't just cast from cl_event above + impl->getPlugin()->call(urEventRetain, + detail::ur::cast(ClEvent)); } bool event::operator==(const event &rhs) const { return rhs.impl == impl; } @@ -118,10 +119,10 @@ event::get_profiling_info() const { backend event::get_backend() const noexcept { return getImplBackend(impl); } -pi_native_handle event::getNative() const { return impl->getNative(); } +ur_native_handle_t event::getNative() const { return impl->getNative(); } -std::vector event::getNativeVector() const { - std::vector ReturnVector = {impl->getNative()}; +std::vector event::getNativeVector() const { + std::vector ReturnVector = {impl->getNative()}; return ReturnVector; } diff --git a/sycl/source/exception.cpp b/sycl/source/exception.cpp index 3ec0bbd89c4ec..06d1d30ffad14 100644 --- a/sycl/source/exception.cpp +++ b/sycl/source/exception.cpp @@ -12,6 +12,7 @@ #include #include +#include namespace sycl { inline namespace _V1 { @@ -33,7 +34,7 @@ exception::exception(int EV, const std::error_category &ECat) exception::exception(std::error_code EC, std::shared_ptr SharedPtrCtx, const char *WhatArg) : MMsg(std::make_shared(WhatArg)), - MPIErr(PI_ERROR_INVALID_VALUE), MContext(SharedPtrCtx), MErrC(EC) { + MErr(UR_RESULT_ERROR_INVALID_VALUE), MContext(SharedPtrCtx), MErrC(EC) { detail::GlobalHandler::instance().TraceEventXPTI(MMsg->c_str()); } @@ -66,17 +67,92 @@ std::error_code make_error_code(sycl::errc Err) noexcept { } namespace detail { -__SYCL_EXPORT const char *stringifyErrorCode(pi_int32 error) { +__SYCL_EXPORT const char *stringifyErrorCode(int32_t error) { switch (error) { -#define _PI_ERRC(NAME, VAL) \ +#define _UR_ERRC(NAME) \ case NAME: \ return #NAME; -#define _PI_ERRC_WITH_MSG(NAME, VAL, MSG) \ + // TODO: bring back old code specific messages? +#define _UR_ERRC_WITH_MSG(NAME, MSG) \ case NAME: \ return MSG; -#include -#undef _PI_ERRC -#undef _PI_ERRC_WITH_MSG + _UR_ERRC(UR_RESULT_SUCCESS) + _UR_ERRC(UR_RESULT_ERROR_INVALID_OPERATION) + _UR_ERRC(UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES) + _UR_ERRC(UR_RESULT_ERROR_INVALID_QUEUE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_VALUE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_CONTEXT) + _UR_ERRC(UR_RESULT_ERROR_INVALID_PLATFORM) + _UR_ERRC(UR_RESULT_ERROR_INVALID_BINARY) + _UR_ERRC(UR_RESULT_ERROR_INVALID_PROGRAM) + _UR_ERRC(UR_RESULT_ERROR_INVALID_SAMPLER) + _UR_ERRC(UR_RESULT_ERROR_INVALID_BUFFER_SIZE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_MEM_OBJECT) + _UR_ERRC(UR_RESULT_ERROR_INVALID_EVENT) + _UR_ERRC(UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST) + _UR_ERRC(UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET) + _UR_ERRC(UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE) + _UR_ERRC(UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE) + _UR_ERRC(UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE) + _UR_ERRC(UR_RESULT_ERROR_DEVICE_NOT_FOUND) + _UR_ERRC(UR_RESULT_ERROR_INVALID_DEVICE) + _UR_ERRC(UR_RESULT_ERROR_DEVICE_LOST) + _UR_ERRC(UR_RESULT_ERROR_DEVICE_REQUIRES_RESET) + _UR_ERRC(UR_RESULT_ERROR_DEVICE_IN_LOW_POWER_STATE) + _UR_ERRC(UR_RESULT_ERROR_DEVICE_PARTITION_FAILED) + _UR_ERRC(UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT) + _UR_ERRC(UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_WORK_DIMENSION) + _UR_ERRC(UR_RESULT_ERROR_INVALID_KERNEL_ARGS) + _UR_ERRC(UR_RESULT_ERROR_INVALID_KERNEL) + _UR_ERRC(UR_RESULT_ERROR_INVALID_KERNEL_NAME) + _UR_ERRC(UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX) + _UR_ERRC(UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_IMAGE_SIZE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR) + _UR_ERRC(UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE) + _UR_ERRC(UR_RESULT_ERROR_UNINITIALIZED) + _UR_ERRC(UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) + _UR_ERRC(UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) + _UR_ERRC(UR_RESULT_ERROR_OUT_OF_RESOURCES) + _UR_ERRC(UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE) + _UR_ERRC(UR_RESULT_ERROR_PROGRAM_LINK_FAILURE) + _UR_ERRC(UR_RESULT_ERROR_UNSUPPORTED_VERSION) + _UR_ERRC(UR_RESULT_ERROR_UNSUPPORTED_FEATURE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_ARGUMENT) + _UR_ERRC(UR_RESULT_ERROR_INVALID_NULL_HANDLE) + _UR_ERRC(UR_RESULT_ERROR_HANDLE_OBJECT_IN_USE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_NULL_POINTER) + _UR_ERRC(UR_RESULT_ERROR_INVALID_SIZE) + _UR_ERRC(UR_RESULT_ERROR_UNSUPPORTED_SIZE) + _UR_ERRC(UR_RESULT_ERROR_UNSUPPORTED_ALIGNMENT) + _UR_ERRC(UR_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT) + _UR_ERRC(UR_RESULT_ERROR_INVALID_ENUMERATION) + _UR_ERRC(UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION) + _UR_ERRC(UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT) + _UR_ERRC(UR_RESULT_ERROR_INVALID_NATIVE_BINARY) + _UR_ERRC(UR_RESULT_ERROR_INVALID_GLOBAL_NAME) + _UR_ERRC(UR_RESULT_ERROR_FUNCTION_ADDRESS_NOT_AVAILABLE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION) + _UR_ERRC(UR_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION) + _UR_ERRC(UR_RESULT_ERROR_PROGRAM_UNLINKED) + _UR_ERRC(UR_RESULT_ERROR_OVERLAPPING_REGIONS) + _UR_ERRC(UR_RESULT_ERROR_INVALID_HOST_PTR) + _UR_ERRC(UR_RESULT_ERROR_INVALID_USM_SIZE) + _UR_ERRC(UR_RESULT_ERROR_OBJECT_ALLOCATION_FAILURE) + _UR_ERRC(UR_RESULT_ERROR_ADAPTER_SPECIFIC) + _UR_ERRC(UR_RESULT_ERROR_LAYER_NOT_PRESENT) + _UR_ERRC(UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS) + _UR_ERRC(UR_RESULT_ERROR_DEVICE_NOT_AVAILABLE) + _UR_ERRC(UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP) + _UR_ERRC(UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP) + _UR_ERRC(UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP) + _UR_ERRC(UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP) + _UR_ERRC(UR_RESULT_ERROR_UNKNOWN) +#undef _UR_ERRC +#undef _UR_ERRC_WITH_MSG default: return "Unknown error code"; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 99f22eca198d1..b788daf78c76a 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "ur_api.h" #include "sycl/detail/helpers.hpp" #include @@ -24,8 +25,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -46,15 +46,15 @@ bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr) { return DGEntry && !DGEntry->MImageIdentifiers.empty(); } -sycl::detail::pi::PiImageCopyFlags -getPiImageCopyFlags(sycl::usm::alloc SrcPtrType, sycl::usm::alloc DstPtrType) { +ur_exp_image_copy_flags_t getUrImageCopyFlags(sycl::usm::alloc SrcPtrType, + sycl::usm::alloc DstPtrType) { if (DstPtrType == sycl::usm::alloc::device) { // Dest is on device if (SrcPtrType == sycl::usm::alloc::device) - return sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_DEVICE; + return UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE; if (SrcPtrType == sycl::usm::alloc::host || SrcPtrType == sycl::usm::alloc::unknown) - return sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_HOST_TO_DEVICE; + return UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE; throw sycl::exception(make_error_code(errc::invalid), "Unknown copy source location"); } @@ -62,7 +62,7 @@ getPiImageCopyFlags(sycl::usm::alloc SrcPtrType, sycl::usm::alloc DstPtrType) { DstPtrType == sycl::usm::alloc::unknown) { // Dest is on host if (SrcPtrType == sycl::usm::alloc::device) - return sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_HOST; + return UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST; if (SrcPtrType == sycl::usm::alloc::host || SrcPtrType == sycl::usm::alloc::unknown) throw sycl::exception(make_error_code(errc::invalid), @@ -259,7 +259,7 @@ event handler::finalize() { // this faster path is used to submit kernel bypassing scheduler and // avoiding CommandGroup, Command objects creation. - std::vector RawEvents; + std::vector RawEvents; detail::EventImplPtr NewEvent; #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -477,8 +477,8 @@ event handler::finalize() { } break; case detail::CGType::CopyImage: CommandGroup.reset(new detail::CGCopyImage( - MSrcPtr, MDstPtr, impl->MSrcImageDesc, impl->MDestImageDesc, - impl->MSrcImageFormat, impl->MDestImageFormat, impl->MImageCopyFlags, + MSrcPtr, MDstPtr, impl->MSrcImageDesc, impl->MDstImageDesc, + impl->MSrcImageFormat, impl->MDstImageFormat, impl->MImageCopyFlags, impl->MSrcOffset, impl->MDestOffset, impl->MCopyExtent, std::move(impl->CGData), MCodeLoc)); break; @@ -493,7 +493,7 @@ event handler::finalize() { std::move(impl->CGData), MCodeLoc)); break; case detail::CGType::None: - if (detail::pi::trace(detail::pi::TraceLevel::PI_TRACE_ALL)) { + if (detail::ur::trace()) { std::cout << "WARNING: An empty command group is submitted." << std::endl; } @@ -959,7 +959,7 @@ void handler::mem_advise(const void *Ptr, size_t Count, int Advice) { throwIfActionIsCreated(); MDstPtr = const_cast(Ptr); MLength = Count; - impl->MAdvice = static_cast(Advice); + impl->MAdvice = static_cast(Advice); setType(detail::CGType::AdviseUSM); } @@ -1018,47 +1018,46 @@ void handler::ext_oneapi_copy( Desc.verify(); MSrcPtr = const_cast(Src); - MDstPtr = Dest.raw_handle; + MDstPtr = reinterpret_cast(Dest.raw_handle); - sycl::detail::pi::PiMemImageDesc PiDesc = {}; - PiDesc.image_width = Desc.width; - PiDesc.image_height = Desc.height; - PiDesc.image_depth = Desc.depth; - PiDesc.image_array_size = Desc.array_size; + ur_image_desc_t UrDesc = {}; + UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + UrDesc.width = Desc.width; + UrDesc.height = Desc.height; + UrDesc.depth = Desc.depth; + UrDesc.arraySize = Desc.array_size; if (Desc.array_size > 1) { // Image Array. - PiDesc.image_type = - Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + UrDesc.type = + Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE1D_ARRAY; // Cubemap. - PiDesc.image_type = + UrDesc.type = Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? PI_MEM_TYPE_IMAGE_CUBEMAP - : PiDesc.image_type; + ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP + : UrDesc.type; } else { - PiDesc.image_type = - Desc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D); + UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D + : (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D + : UR_MEM_TYPE_IMAGE1D); } - sycl::detail::pi::PiMemImageFormat PiFormat; - PiFormat.image_channel_data_type = + ur_image_format_t UrFormat; + UrFormat.channelType = sycl::_V1::detail::convertChannelType(Desc.channel_type); - PiFormat.image_channel_order = sycl::detail::convertChannelOrder( + UrFormat.channelOrder = sycl::detail::convertChannelOrder( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(Desc.num_channels)); impl->MSrcOffset = {0, 0, 0}; impl->MDestOffset = {0, 0, 0}; impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - impl->MSrcImageDesc = PiDesc; - impl->MDestImageDesc = PiDesc; - impl->MSrcImageFormat = PiFormat; - impl->MDestImageFormat = PiFormat; - impl->MImageCopyFlags = - sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_HOST_TO_DEVICE; + impl->MSrcImageDesc = UrDesc; + impl->MDstImageDesc = UrDesc; + impl->MSrcImageFormat = UrFormat; + impl->MDstImageFormat = UrFormat; + impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE; setType(detail::CGType::CopyImage); } @@ -1073,50 +1072,50 @@ void handler::ext_oneapi_copy( DestImgDesc.verify(); MSrcPtr = const_cast(Src); - MDstPtr = Dest.raw_handle; + MDstPtr = reinterpret_cast(Dest.raw_handle); - sycl::detail::pi::PiMemImageDesc PiDesc = {}; - PiDesc.image_width = DestImgDesc.width; - PiDesc.image_height = DestImgDesc.height; - PiDesc.image_depth = DestImgDesc.depth; - PiDesc.image_array_size = DestImgDesc.array_size; + ur_image_desc_t UrDesc = {}; + UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + UrDesc.width = DestImgDesc.width; + UrDesc.height = DestImgDesc.height; + UrDesc.depth = DestImgDesc.depth; + UrDesc.arraySize = DestImgDesc.array_size; if (DestImgDesc.array_size > 1) { // Image Array. - PiDesc.image_type = DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY - : PI_MEM_TYPE_IMAGE1D_ARRAY; + UrDesc.type = DestImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY + : UR_MEM_TYPE_IMAGE1D_ARRAY; // Cubemap. - PiDesc.image_type = + UrDesc.type = DestImgDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? PI_MEM_TYPE_IMAGE_CUBEMAP - : PiDesc.image_type; + ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP + : UrDesc.type; } else { - PiDesc.image_type = DestImgDesc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D - : PI_MEM_TYPE_IMAGE1D); + UrDesc.type = DestImgDesc.depth > 0 + ? UR_MEM_TYPE_IMAGE3D + : (DestImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D + : UR_MEM_TYPE_IMAGE1D); } - sycl::detail::pi::PiMemImageFormat PiFormat; - PiFormat.image_channel_data_type = + ur_image_format_t UrFormat; + UrFormat.channelType = sycl::_V1::detail::convertChannelType(DestImgDesc.channel_type); - PiFormat.image_channel_order = sycl::detail::convertChannelOrder( + UrFormat.channelOrder = sycl::detail::convertChannelOrder( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(DestImgDesc.num_channels)); impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - impl->MSrcImageDesc = PiDesc; - impl->MSrcImageDesc.image_width = SrcExtent[0]; - impl->MSrcImageDesc.image_height = SrcExtent[1]; - impl->MSrcImageDesc.image_depth = SrcExtent[2]; - impl->MDestImageDesc = PiDesc; - impl->MSrcImageFormat = PiFormat; - impl->MDestImageFormat = PiFormat; - impl->MImageCopyFlags = - sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_HOST_TO_DEVICE; + impl->MSrcImageDesc = UrDesc; + impl->MSrcImageDesc.width = SrcExtent[0]; + impl->MSrcImageDesc.height = SrcExtent[1]; + impl->MSrcImageDesc.depth = SrcExtent[2]; + impl->MDstImageDesc = UrDesc; + impl->MSrcImageFormat = UrFormat; + impl->MDstImageFormat = UrFormat; + impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE; setType(detail::CGType::CopyImage); } @@ -1128,48 +1127,47 @@ void handler::ext_oneapi_copy( sycl_ext_oneapi_bindless_images>(); Desc.verify(); - MSrcPtr = Src.raw_handle; + MSrcPtr = reinterpret_cast(Src.raw_handle); MDstPtr = Dest; - sycl::detail::pi::PiMemImageDesc PiDesc = {}; - PiDesc.image_width = Desc.width; - PiDesc.image_height = Desc.height; - PiDesc.image_depth = Desc.depth; - PiDesc.image_array_size = Desc.array_size; + ur_image_desc_t UrDesc = {}; + UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + UrDesc.width = Desc.width; + UrDesc.height = Desc.height; + UrDesc.depth = Desc.depth; + UrDesc.arraySize = Desc.array_size; if (Desc.array_size > 1) { // Image Array. - PiDesc.image_type = - Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + UrDesc.type = + Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE1D_ARRAY; // Cubemap. - PiDesc.image_type = + UrDesc.type = Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? PI_MEM_TYPE_IMAGE_CUBEMAP - : PiDesc.image_type; + ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP + : UrDesc.type; } else { - PiDesc.image_type = - Desc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D); + UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D + : (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D + : UR_MEM_TYPE_IMAGE1D); } - sycl::detail::pi::PiMemImageFormat PiFormat; - PiFormat.image_channel_data_type = + ur_image_format_t UrFormat; + UrFormat.channelType = sycl::_V1::detail::convertChannelType(Desc.channel_type); - PiFormat.image_channel_order = sycl::detail::convertChannelOrder( + UrFormat.channelOrder = sycl::detail::convertChannelOrder( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(Desc.num_channels)); impl->MSrcOffset = {0, 0, 0}; impl->MDestOffset = {0, 0, 0}; impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - impl->MSrcImageDesc = PiDesc; - impl->MDestImageDesc = PiDesc; - impl->MSrcImageFormat = PiFormat; - impl->MDestImageFormat = PiFormat; - impl->MImageCopyFlags = - sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_HOST; + impl->MSrcImageDesc = UrDesc; + impl->MDstImageDesc = UrDesc; + impl->MSrcImageFormat = UrFormat; + impl->MDstImageFormat = UrFormat; + impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST; setType(detail::CGType::CopyImage); } @@ -1182,47 +1180,47 @@ void handler::ext_oneapi_copy( sycl_ext_oneapi_bindless_images>(); ImageDesc.verify(); - MSrcPtr = Src.raw_handle; - MDstPtr = Dest.raw_handle; + MSrcPtr = reinterpret_cast(Src.raw_handle); + MDstPtr = reinterpret_cast(Dest.raw_handle); - sycl::detail::pi::PiMemImageDesc PiDesc = {}; - PiDesc.image_width = ImageDesc.width; - PiDesc.image_height = ImageDesc.height; - PiDesc.image_depth = ImageDesc.depth; - PiDesc.image_array_size = ImageDesc.array_size; + ur_image_desc_t UrDesc = {}; + UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + UrDesc.width = ImageDesc.width; + UrDesc.height = ImageDesc.height; + UrDesc.depth = ImageDesc.depth; + UrDesc.arraySize = ImageDesc.array_size; if (ImageDesc.array_size > 1) { // Image Array. - PiDesc.image_type = ImageDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY - : PI_MEM_TYPE_IMAGE1D_ARRAY; + UrDesc.type = ImageDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY + : UR_MEM_TYPE_IMAGE1D_ARRAY; // Cubemap. - PiDesc.image_type = + UrDesc.type = ImageDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? PI_MEM_TYPE_IMAGE_CUBEMAP - : PiDesc.image_type; + ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP + : UrDesc.type; } else { - PiDesc.image_type = ImageDesc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (ImageDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D - : PI_MEM_TYPE_IMAGE1D); + UrDesc.type = ImageDesc.depth > 0 + ? UR_MEM_TYPE_IMAGE3D + : (ImageDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D + : UR_MEM_TYPE_IMAGE1D); } - sycl::detail::pi::PiMemImageFormat PiFormat; - PiFormat.image_channel_data_type = + ur_image_format_t UrFormat; + UrFormat.channelType = sycl::_V1::detail::convertChannelType(ImageDesc.channel_type); - PiFormat.image_channel_order = sycl::detail::convertChannelOrder( + UrFormat.channelOrder = sycl::detail::convertChannelOrder( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(ImageDesc.num_channels)); impl->MSrcOffset = {0, 0, 0}; impl->MDestOffset = {0, 0, 0}; impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth}; - impl->MSrcImageDesc = PiDesc; - impl->MDestImageDesc = PiDesc; - impl->MSrcImageFormat = PiFormat; - impl->MDestImageFormat = PiFormat; - impl->MImageCopyFlags = - sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_DEVICE; + impl->MSrcImageDesc = UrDesc; + impl->MDstImageDesc = UrDesc; + impl->MSrcImageFormat = UrFormat; + impl->MDstImageFormat = UrFormat; + impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE; setType(detail::CGType::CopyImage); } @@ -1237,51 +1235,51 @@ void handler::ext_oneapi_copy( sycl_ext_oneapi_bindless_images>(); SrcImgDesc.verify(); - MSrcPtr = Src.raw_handle; + MSrcPtr = reinterpret_cast(Src.raw_handle); MDstPtr = Dest; - sycl::detail::pi::PiMemImageDesc PiDesc = {}; - PiDesc.image_width = SrcImgDesc.width; - PiDesc.image_height = SrcImgDesc.height; - PiDesc.image_depth = SrcImgDesc.depth; - PiDesc.image_array_size = SrcImgDesc.array_size; + ur_image_desc_t UrDesc = {}; + UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + UrDesc.width = SrcImgDesc.width; + UrDesc.height = SrcImgDesc.height; + UrDesc.depth = SrcImgDesc.depth; + UrDesc.arraySize = SrcImgDesc.array_size; if (SrcImgDesc.array_size > 1) { // Image Array. - PiDesc.image_type = SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY - : PI_MEM_TYPE_IMAGE1D_ARRAY; + UrDesc.type = SrcImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY + : UR_MEM_TYPE_IMAGE1D_ARRAY; // Cubemap. - PiDesc.image_type = + UrDesc.type = SrcImgDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? PI_MEM_TYPE_IMAGE_CUBEMAP - : PiDesc.image_type; + ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP + : UrDesc.type; } else { - PiDesc.image_type = SrcImgDesc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D - : PI_MEM_TYPE_IMAGE1D); + UrDesc.type = SrcImgDesc.depth > 0 + ? UR_MEM_TYPE_IMAGE3D + : (SrcImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D + : UR_MEM_TYPE_IMAGE1D); } - sycl::detail::pi::PiMemImageFormat PiFormat; - PiFormat.image_channel_data_type = + ur_image_format_t UrFormat; + UrFormat.channelType = sycl::_V1::detail::convertChannelType(SrcImgDesc.channel_type); - PiFormat.image_channel_order = sycl::detail::convertChannelOrder( + UrFormat.channelOrder = sycl::detail::convertChannelOrder( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(SrcImgDesc.num_channels)); impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - impl->MSrcImageDesc = PiDesc; - impl->MDestImageDesc = PiDesc; - impl->MDestImageDesc.image_width = DestExtent[0]; - impl->MDestImageDesc.image_height = DestExtent[1]; - impl->MDestImageDesc.image_depth = DestExtent[2]; - impl->MSrcImageFormat = PiFormat; - impl->MDestImageFormat = PiFormat; - impl->MImageCopyFlags = - sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_HOST; + impl->MSrcImageDesc = UrDesc; + impl->MDstImageDesc = UrDesc; + impl->MDstImageDesc.width = DestExtent[0]; + impl->MDstImageDesc.height = DestExtent[1]; + impl->MDstImageDesc.depth = DestExtent[2]; + impl->MSrcImageFormat = UrFormat; + impl->MDstImageFormat = UrFormat; + impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST; setType(detail::CGType::CopyImage); } @@ -1296,46 +1294,46 @@ void handler::ext_oneapi_copy( MSrcPtr = const_cast(Src); MDstPtr = Dest; - sycl::detail::pi::PiMemImageDesc PiDesc = {}; - PiDesc.image_width = Desc.width; - PiDesc.image_height = Desc.height; - PiDesc.image_depth = Desc.depth; - PiDesc.image_array_size = Desc.array_size; + ur_image_desc_t UrDesc = {}; + UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + UrDesc.width = Desc.width; + UrDesc.height = Desc.height; + UrDesc.depth = Desc.depth; + UrDesc.arraySize = Desc.array_size; if (Desc.array_size > 1) { // Image Array. - PiDesc.image_type = - Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY : PI_MEM_TYPE_IMAGE1D_ARRAY; + UrDesc.type = + Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE1D_ARRAY; // Cubemap. - PiDesc.image_type = + UrDesc.type = Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? PI_MEM_TYPE_IMAGE_CUBEMAP - : PiDesc.image_type; + ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP + : UrDesc.type; } else { - PiDesc.image_type = - Desc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (Desc.height > 0 ? PI_MEM_TYPE_IMAGE2D : PI_MEM_TYPE_IMAGE1D); + UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D + : (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D + : UR_MEM_TYPE_IMAGE1D); } - sycl::detail::pi::PiMemImageFormat PiFormat; - PiFormat.image_channel_data_type = + ur_image_format_t UrFormat; + UrFormat.channelType = sycl::_V1::detail::convertChannelType(Desc.channel_type); - PiFormat.image_channel_order = sycl::detail::convertChannelOrder( + UrFormat.channelOrder = sycl::detail::convertChannelOrder( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(Desc.num_channels)); impl->MSrcOffset = {0, 0, 0}; impl->MDestOffset = {0, 0, 0}; impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - impl->MSrcImageDesc = PiDesc; - impl->MDestImageDesc = PiDesc; - impl->MSrcImageFormat = PiFormat; - impl->MDestImageFormat = PiFormat; - impl->MSrcImageDesc.image_row_pitch = Pitch; - impl->MDestImageDesc.image_row_pitch = Pitch; - impl->MImageCopyFlags = detail::getPiImageCopyFlags( + impl->MSrcImageDesc = UrDesc; + impl->MDstImageDesc = UrDesc; + impl->MSrcImageFormat = UrFormat; + impl->MDstImageFormat = UrFormat; + impl->MSrcImageDesc.rowPitch = Pitch; + impl->MDstImageDesc.rowPitch = Pitch; + impl->MImageCopyFlags = detail::getUrImageCopyFlags( get_pointer_type(Src, MQueue->get_context()), get_pointer_type(Dest, MQueue->get_context())); setType(detail::CGType::CopyImage); @@ -1355,66 +1353,64 @@ void handler::ext_oneapi_copy( MSrcPtr = const_cast(Src); MDstPtr = Dest; - sycl::detail::pi::PiMemImageDesc PiDesc = {}; - PiDesc.image_width = DeviceImgDesc.width; - PiDesc.image_height = DeviceImgDesc.height; - PiDesc.image_depth = DeviceImgDesc.depth; - PiDesc.image_array_size = DeviceImgDesc.array_size; + ur_image_desc_t UrDesc = {}; + UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + UrDesc.width = DeviceImgDesc.width; + UrDesc.height = DeviceImgDesc.height; + UrDesc.depth = DeviceImgDesc.depth; + UrDesc.arraySize = DeviceImgDesc.array_size; if (DeviceImgDesc.array_size > 1) { // Image Array. - PiDesc.image_type = DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY - : PI_MEM_TYPE_IMAGE1D_ARRAY; + UrDesc.type = DeviceImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY + : UR_MEM_TYPE_IMAGE1D_ARRAY; // Cubemap. - PiDesc.image_type = - DeviceImgDesc.type == - sycl::ext::oneapi::experimental::image_type::cubemap - ? PI_MEM_TYPE_IMAGE_CUBEMAP - : PiDesc.image_type; + UrDesc.type = DeviceImgDesc.type == + sycl::ext::oneapi::experimental::image_type::cubemap + ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP + : UrDesc.type; } else { - PiDesc.image_type = DeviceImgDesc.depth > 0 - ? PI_MEM_TYPE_IMAGE3D - : (DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D - : PI_MEM_TYPE_IMAGE1D); + UrDesc.type = DeviceImgDesc.depth > 0 + ? UR_MEM_TYPE_IMAGE3D + : (DeviceImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D + : UR_MEM_TYPE_IMAGE1D); } - sycl::detail::pi::PiMemImageFormat PiFormat; - PiFormat.image_channel_data_type = + ur_image_format_t UrFormat; + UrFormat.channelType = sycl::_V1::detail::convertChannelType(DeviceImgDesc.channel_type); - PiFormat.image_channel_order = sycl::detail::convertChannelOrder( + UrFormat.channelOrder = sycl::detail::convertChannelOrder( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(DeviceImgDesc.num_channels)); impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - impl->MSrcImageFormat = PiFormat; - impl->MDestImageFormat = PiFormat; - impl->MImageCopyFlags = detail::getPiImageCopyFlags( + impl->MSrcImageFormat = UrFormat; + impl->MDstImageFormat = UrFormat; + impl->MImageCopyFlags = detail::getUrImageCopyFlags( get_pointer_type(Src, MQueue->get_context()), get_pointer_type(Dest, MQueue->get_context())); - impl->MSrcImageDesc = PiDesc; - impl->MDestImageDesc = PiDesc; + impl->MSrcImageDesc = UrDesc; + impl->MDstImageDesc = UrDesc; // Fill the descriptor row pitch and host extent based on the type of copy. - if (impl->MImageCopyFlags == - sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_HOST_TO_DEVICE) { - impl->MDestImageDesc.image_row_pitch = DeviceRowPitch; - impl->MSrcImageDesc.image_row_pitch = 0; - impl->MSrcImageDesc.image_width = HostExtent[0]; - impl->MSrcImageDesc.image_height = HostExtent[1]; - impl->MSrcImageDesc.image_depth = HostExtent[2]; - } else if (impl->MImageCopyFlags == - sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_HOST) { - impl->MSrcImageDesc.image_row_pitch = DeviceRowPitch; - impl->MDestImageDesc.image_row_pitch = 0; - impl->MDestImageDesc.image_width = HostExtent[0]; - impl->MDestImageDesc.image_height = HostExtent[1]; - impl->MDestImageDesc.image_depth = HostExtent[2]; + if (impl->MImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { + impl->MDstImageDesc.rowPitch = DeviceRowPitch; + impl->MSrcImageDesc.rowPitch = 0; + impl->MSrcImageDesc.width = HostExtent[0]; + impl->MSrcImageDesc.height = HostExtent[1]; + impl->MSrcImageDesc.depth = HostExtent[2]; + } else if (impl->MImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { + impl->MSrcImageDesc.rowPitch = DeviceRowPitch; + impl->MDstImageDesc.rowPitch = 0; + impl->MDstImageDesc.width = HostExtent[0]; + impl->MDstImageDesc.height = HostExtent[1]; + impl->MDstImageDesc.depth = HostExtent[2]; } else { - impl->MDestImageDesc.image_row_pitch = DeviceRowPitch; - impl->MSrcImageDesc.image_row_pitch = DeviceRowPitch; + impl->MDstImageDesc.rowPitch = DeviceRowPitch; + impl->MSrcImageDesc.rowPitch = DeviceRowPitch; } setType(detail::CGType::CopyImage); @@ -1437,7 +1433,7 @@ void handler::ext_oneapi_wait_external_semaphore( "type of semaphore used needs a user passed wait value."); } impl->MInteropSemaphoreHandle = - (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; + (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.raw_handle; impl->MWaitValue = {}; setType(detail::CGType::SemaphoreWait); } @@ -1457,7 +1453,7 @@ void handler::ext_oneapi_wait_external_semaphore( "type of semaphore does not support user passed wait values."); } impl->MInteropSemaphoreHandle = - (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; + (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.raw_handle; impl->MWaitValue = WaitValue; setType(detail::CGType::SemaphoreWait); } @@ -1479,7 +1475,7 @@ void handler::ext_oneapi_signal_external_semaphore( "type of semaphore used needs a user passed signal value."); } impl->MInteropSemaphoreHandle = - (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; + (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.raw_handle; impl->MSignalValue = {}; setType(detail::CGType::SemaphoreSignal); } @@ -1499,7 +1495,7 @@ void handler::ext_oneapi_signal_external_semaphore( "type of semaphore does not support user passed signal values."); } impl->MInteropSemaphoreHandle = - (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; + (ur_exp_interop_semaphore_handle_t)SemaphoreHandle.raw_handle; impl->MSignalValue = SignalValue; setType(detail::CGType::SemaphoreSignal); } @@ -1572,12 +1568,11 @@ void handler::depends_on(const std::vector &Events) { static bool checkContextSupports(const std::shared_ptr &ContextImpl, - sycl::detail::pi::PiContextInfo InfoQuery) { + ur_context_info_t InfoQuery) { auto &Plugin = ContextImpl->getPlugin(); - pi_bool SupportsOp = false; - Plugin->call(ContextImpl->getHandleRef(), - InfoQuery, sizeof(pi_bool), - &SupportsOp, nullptr); + ur_bool_t SupportsOp = false; + Plugin->call(urContextGetInfo, ContextImpl->getHandleRef(), InfoQuery, + sizeof(ur_bool_t), &SupportsOp, nullptr); return SupportsOp; } @@ -1633,7 +1628,7 @@ bool handler::supportsUSMMemcpy2D() { {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) { if (QueueImpl && !checkContextSupports(QueueImpl->getContextImplPtr(), - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT)) + UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT)) return false; } return true; @@ -1642,9 +1637,8 @@ bool handler::supportsUSMMemcpy2D() { bool handler::supportsUSMFill2D() { for (const std::shared_ptr &QueueImpl : {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) { - if (QueueImpl && - !checkContextSupports(QueueImpl->getContextImplPtr(), - PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT)) + if (QueueImpl && !checkContextSupports(QueueImpl->getContextImplPtr(), + UR_CONTEXT_INFO_USM_FILL2D_SUPPORT)) return false; } return true; @@ -1653,9 +1647,8 @@ bool handler::supportsUSMFill2D() { bool handler::supportsUSMMemset2D() { for (const std::shared_ptr &QueueImpl : {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) { - if (QueueImpl && - !checkContextSupports(QueueImpl->getContextImplPtr(), - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT)) + if (QueueImpl && !checkContextSupports(QueueImpl->getContextImplPtr(), + UR_CONTEXT_INFO_USM_FILL2D_SUPPORT)) return false; } return true; @@ -1766,15 +1759,15 @@ handler::getContextImplPtr() const { void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) { switch (Config) { - case handler::StableKernelCacheConfig::Default: - impl->MKernelCacheConfig = PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT; - break; - case handler::StableKernelCacheConfig::LargeSLM: - impl->MKernelCacheConfig = PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_SLM; - break; - case handler::StableKernelCacheConfig::LargeData: - impl->MKernelCacheConfig = PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_DATA; - break; + case handler::StableKernelCacheConfig::Default: + impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; + break; + case handler::StableKernelCacheConfig::LargeSLM: + impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + break; + case handler::StableKernelCacheConfig::LargeData: + impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + break; } } @@ -1812,14 +1805,14 @@ void handler::setUserFacingNodeType(ext::oneapi::experimental::node_type Type) { std::optional> handler::getMaxWorkGroups() { auto Dev = detail::getSyclObjImpl(detail::getDeviceFromHandler(*this)); - std::array PiResult = {}; - auto Ret = Dev->getPlugin()->call_nocheck( - Dev->getHandleRef(), - PiInfoCode< + std::array UrResult = {}; + auto Ret = Dev->getPlugin()->call_nocheck( + urDeviceGetInfo, Dev->getHandleRef(), + UrInfoCode< ext::oneapi::experimental::info::device::max_work_groups<3>>::value, - sizeof(PiResult), &PiResult, nullptr); - if (Ret == PI_SUCCESS) { - return PiResult; + sizeof(UrResult), &UrResult, nullptr); + if (Ret == UR_RESULT_SUCCESS) { + return UrResult; } return {}; } diff --git a/sycl/source/image.cpp b/sycl/source/image.cpp index fe8995e571d9c..511d816e5d31f 100644 --- a/sycl/source/image.cpp +++ b/sycl/source/image.cpp @@ -130,8 +130,8 @@ image_plain::image_plain(cl_mem ClMemObject, const context &SyclContext, } #endif -image_plain::image_plain(pi_native_handle MemObject, const context &SyclContext, - event AvailableEvent, +image_plain::image_plain(ur_native_handle_t MemObject, + const context &SyclContext, event AvailableEvent, std::unique_ptr Allocator, uint8_t Dimensions, image_channel_order Order, image_channel_type Type, bool OwnNativeHandle, diff --git a/sycl/source/interop_handle.cpp b/sycl/source/interop_handle.cpp index 6aa6a0a05d5e8..0d70f12de3d2b 100644 --- a/sycl/source/interop_handle.cpp +++ b/sycl/source/interop_handle.cpp @@ -23,7 +23,8 @@ backend interop_handle::get_backend() const noexcept { return detail::getImplBackend(MQueue); } -pi_native_handle interop_handle::getNativeMem(detail::Requirement *Req) const { +ur_native_handle_t +interop_handle::getNativeMem(detail::Requirement *Req) const { auto Iter = std::find_if(std::begin(MMemObjs), std::end(MMemObjs), [=](ReqToMem Elem) { return (Elem.first == Req); }); @@ -33,21 +34,21 @@ pi_native_handle interop_handle::getNativeMem(detail::Requirement *Req) const { } auto Plugin = MQueue->getPlugin(); - pi_native_handle Handle; - Plugin->call( - Iter->second, MDevice->getHandleRef(), &Handle); + ur_native_handle_t Handle; + Plugin->call(urMemGetNativeHandle, Iter->second, MDevice->getHandleRef(), + &Handle); return Handle; } -pi_native_handle interop_handle::getNativeDevice() const { +ur_native_handle_t interop_handle::getNativeDevice() const { return MDevice->getNative(); } -pi_native_handle interop_handle::getNativeContext() const { +ur_native_handle_t interop_handle::getNativeContext() const { return MContext->getNative(); } -pi_native_handle +ur_native_handle_t interop_handle::getNativeQueue(int32_t &NativeHandleDesc) const { return MQueue->getNative(NativeHandleDesc); } diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index 4461d26281150..83b6f2b346450 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -10,21 +10,27 @@ #include #include #include -#include +#include #include namespace sycl { inline namespace _V1 { -kernel::kernel(cl_kernel ClKernel, const context &SyclContext) - : impl(std::make_shared( - detail::pi::cast(ClKernel), - detail::getSyclObjImpl(SyclContext), nullptr, nullptr)) { +// TODO(pi2ur): Don't cast straight from cl_kernel below +kernel::kernel(cl_kernel ClKernel, const context &SyclContext) { + auto Plugin = sycl::detail::ur::getPlugin(); + ur_kernel_handle_t hKernel = nullptr; + ur_native_handle_t nativeHandle = + reinterpret_cast(ClKernel); + Plugin->call(urKernelCreateWithNativeHandle, nativeHandle, + detail::getSyclObjImpl(SyclContext)->getHandleRef(), nullptr, + nullptr, &hKernel); + impl = std::make_shared( + hKernel, detail::getSyclObjImpl(SyclContext), nullptr, nullptr); // This is a special interop constructor for OpenCL, so the kernel must be // retained. if (get_backend() == backend::opencl) { - impl->getPlugin()->call( - detail::pi::cast(ClKernel)); + impl->getPlugin()->call(urKernelRetain, hKernel); } } @@ -113,9 +119,9 @@ template __SYCL_EXPORT typename ext::oneapi::experimental::info:: kernel::kernel(std::shared_ptr Impl) : impl(Impl) {} -pi_native_handle kernel::getNative() const { return impl->getNative(); } +ur_native_handle_t kernel::getNative() const { return impl->getNative(); } -pi_native_handle kernel::getNativeImpl() const { return impl->getNative(); } +ur_native_handle_t kernel::getNativeImpl() const { return impl->getNative(); } } // namespace _V1 } // namespace sycl diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index e753348690818..c666f437b30dd 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -41,7 +41,7 @@ bool device_image_plain::has_kernel(const kernel_id &KernelID, return impl->has_kernel(KernelID, Dev); } -pi_native_handle device_image_plain::getNative() const { +ur_native_handle_t device_image_plain::getNative() const { return impl->getNative(); } diff --git a/sycl/source/platform.cpp b/sycl/source/platform.cpp index 3d4089c07f9e1..4bc02c9b983a4 100644 --- a/sycl/source/platform.cpp +++ b/sycl/source/platform.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -22,9 +23,12 @@ inline namespace _V1 { platform::platform() : platform(default_selector_v) {} platform::platform(cl_platform_id PlatformId) { - impl = detail::platform_impl::getOrMakePlatformImpl( - detail::pi::cast(PlatformId), - sycl::detail::pi::getPlugin()); + auto Plugin = sycl::detail::ur::getPlugin(); + ur_platform_handle_t UrPlatform = nullptr; + Plugin->call(urPlatformCreateWithNativeHandle, + detail::ur::cast(PlatformId), + Plugin->getUrAdapter(), /* pProperties = */ nullptr, &UrPlatform); + impl = detail::platform_impl::getOrMakePlatformImpl(UrPlatform, Plugin); } // protected constructor for internal use @@ -57,7 +61,7 @@ platform::get_info_impl() const { return detail::convert_to_abi_neutral(impl->template get_info()); } -pi_native_handle platform::getNative() const { return impl->getNative(); } +ur_native_handle_t platform::getNative() const { return impl->getNative(); } bool platform::has(aspect Aspect) const { return impl->has(Aspect); } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 2c8e0fac3a62c..4837b087eb43c 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -65,7 +65,8 @@ queue::queue(cl_command_queue clQueue, const context &SyclContext, const async_handler &AsyncHandler) { const property_list PropList{}; impl = std::make_shared( - reinterpret_cast(clQueue), + // TODO(pi2ur): Don't cast straight from cl_command_queue + reinterpret_cast(clQueue), detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList); } @@ -143,14 +144,15 @@ event queue::memcpy(void *Dest, const void *Src, size_t Count, event queue::mem_advise(const void *Ptr, size_t Length, int Advice, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {}, + return impl->mem_advise(impl, Ptr, Length, ur_usm_advice_flags_t(Advice), {}, /*CallerNeedsEvent=*/true); } event queue::mem_advise(const void *Ptr, size_t Length, int Advice, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {DepEvent}, + return impl->mem_advise(impl, Ptr, Length, ur_usm_advice_flags_t(Advice), + {DepEvent}, /*CallerNeedsEvent=*/true); } @@ -158,7 +160,8 @@ event queue::mem_advise(const void *Ptr, size_t Length, int Advice, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), DepEvents, + return impl->mem_advise(impl, Ptr, Length, ur_usm_advice_flags_t(Advice), + DepEvents, /*CallerNeedsEvent=*/true); } @@ -294,7 +297,7 @@ bool queue::ext_oneapi_empty() const { return impl->ext_oneapi_empty(); } void queue::ext_oneapi_prod() { impl->flush(); } -pi_native_handle queue::getNative(int32_t &NativeHandleDesc) const { +ur_native_handle_t queue::getNative(int32_t &NativeHandleDesc) const { return impl->getNative(NativeHandleDesc); } diff --git a/sycl/source/sycl.manifest b/sycl/source/sycl.manifest index f9d77565f0b42..7253c62963a04 100644 --- a/sycl/source/sycl.manifest +++ b/sycl/source/sycl.manifest @@ -1,6 +1,6 @@ - + @@ -8,4 +8,4 @@ - \ No newline at end of file + diff --git a/sycl/source/sycld.manifest b/sycl/source/sycld.manifest index c0ae8a26c2cf6..f132197a196d3 100644 --- a/sycl/source/sycld.manifest +++ b/sycl/source/sycld.manifest @@ -1,6 +1,6 @@ - + @@ -8,4 +8,4 @@ - \ No newline at end of file + diff --git a/sycl/source/virtual_mem.cpp b/sycl/source/virtual_mem.cpp index 8cdc5ffba0223..e9fe20b51f00a 100644 --- a/sycl/source/virtual_mem.cpp +++ b/sycl/source/virtual_mem.cpp @@ -30,12 +30,12 @@ __SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice, sycl::make_error_code(sycl::errc::feature_not_supported), "Device does not support aspect::ext_oneapi_virtual_mem."); - pi_virtual_mem_granularity_info GranularityQuery = [=]() { + ur_virtual_mem_granularity_info_t GranularityQuery = [=]() { switch (Mode) { case granularity_mode::minimum: - return PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM; + return UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM; case granularity_mode::recommended: - return PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED; + return UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED; } throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), "Unrecognized granularity mode."); @@ -48,16 +48,16 @@ __SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice, const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); #ifndef NDEBUG size_t InfoOutputSize; - Plugin->call( - ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), GranularityQuery, - 0, nullptr, &InfoOutputSize); + Plugin->call(urVirtualMemGranularityGetInfo, ContextImpl->getHandleRef(), + DeviceImpl->getHandleRef(), GranularityQuery, 0, nullptr, + &InfoOutputSize); assert(InfoOutputSize == sizeof(size_t) && "Unexpected output size of granularity info query."); #endif // NDEBUG size_t Granularity = 0; - Plugin->call( - ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), GranularityQuery, - sizeof(size_t), &Granularity, nullptr); + Plugin->call(urVirtualMemGranularityGetInfo, ContextImpl->getHandleRef(), + DeviceImpl->getHandleRef(), GranularityQuery, sizeof(size_t), + &Granularity, nullptr); return Granularity; } @@ -115,9 +115,8 @@ __SYCL_EXPORT uintptr_t reserve_virtual_mem(uintptr_t Start, size_t NumBytes, sycl::detail::getSyclObjImpl(SyclContext); const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); void *OutPtr = nullptr; - Plugin->call( - ContextImpl->getHandleRef(), reinterpret_cast(Start), NumBytes, - &OutPtr); + Plugin->call(urVirtualMemReserve, ContextImpl->getHandleRef(), + reinterpret_cast(Start), NumBytes, &OutPtr); return reinterpret_cast(OutPtr); } @@ -126,20 +125,19 @@ __SYCL_EXPORT void free_virtual_mem(uintptr_t Ptr, size_t NumBytes, std::shared_ptr ContextImpl = sycl::detail::getSyclObjImpl(SyclContext); const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); - Plugin->call( - ContextImpl->getHandleRef(), reinterpret_cast(Ptr), NumBytes); + Plugin->call(urVirtualMemFree, ContextImpl->getHandleRef(), + reinterpret_cast(Ptr), NumBytes); } __SYCL_EXPORT void set_access_mode(const void *Ptr, size_t NumBytes, address_access_mode Mode, const context &SyclContext) { - sycl::detail::pi::PiVirtualAccessFlags AccessFlags = - sycl::detail::AccessModeToVirtualAccessFlags(Mode); + auto AccessFlags = sycl::detail::AccessModeToVirtualAccessFlags(Mode); std::shared_ptr ContextImpl = sycl::detail::getSyclObjImpl(SyclContext); const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); - Plugin->call( - ContextImpl->getHandleRef(), Ptr, NumBytes, AccessFlags); + Plugin->call(urVirtualMemSetAccess, ContextImpl->getHandleRef(), Ptr, + NumBytes, AccessFlags); } __SYCL_EXPORT address_access_mode get_access_mode(const void *Ptr, @@ -149,22 +147,20 @@ __SYCL_EXPORT address_access_mode get_access_mode(const void *Ptr, sycl::detail::getSyclObjImpl(SyclContext); const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); #ifndef NDEBUG - size_t InfoOutputSize; - Plugin->call( - ContextImpl->getHandleRef(), Ptr, NumBytes, - PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE, 0, nullptr, &InfoOutputSize); - assert(InfoOutputSize == sizeof(sycl::detail::pi::PiVirtualAccessFlags) && + size_t InfoOutputSize = 0; + Plugin->call(urVirtualMemGetInfo, ContextImpl->getHandleRef(), Ptr, NumBytes, + UR_VIRTUAL_MEM_INFO_ACCESS_MODE, 0, nullptr, &InfoOutputSize); + assert(InfoOutputSize == sizeof(ur_virtual_mem_access_flags_t) && "Unexpected output size of access mode info query."); #endif // NDEBUG - sycl::detail::pi::PiVirtualAccessFlags AccessFlags; - Plugin->call( - ContextImpl->getHandleRef(), Ptr, NumBytes, - PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE, - sizeof(sycl::detail::pi::PiVirtualAccessFlags), &AccessFlags, nullptr); + ur_virtual_mem_access_flags_t AccessFlags; + Plugin->call(urVirtualMemGetInfo, ContextImpl->getHandleRef(), Ptr, NumBytes, + UR_VIRTUAL_MEM_INFO_ACCESS_MODE, + sizeof(ur_virtual_mem_access_flags_t), &AccessFlags, nullptr); - if (AccessFlags & PI_VIRTUAL_ACCESS_FLAG_RW) + if (AccessFlags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) return address_access_mode::read_write; - if (AccessFlags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) + if (AccessFlags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) return address_access_mode::read; return address_access_mode::none; } @@ -174,8 +170,7 @@ __SYCL_EXPORT void unmap(const void *Ptr, size_t NumBytes, std::shared_ptr ContextImpl = sycl::detail::getSyclObjImpl(SyclContext); const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); - Plugin->call( - ContextImpl->getHandleRef(), Ptr, NumBytes); + Plugin->call(urVirtualMemUnmap, ContextImpl->getHandleRef(), Ptr, NumBytes); } } // Namespace ext::oneapi::experimental diff --git a/sycl/test-e2e/AbiNeutral/submit-kernel.cpp b/sycl/test-e2e/AbiNeutral/submit-kernel.cpp index 3acee07f4e6d3..3577774ef3961 100644 --- a/sycl/test-e2e/AbiNeutral/submit-kernel.cpp +++ b/sycl/test-e2e/AbiNeutral/submit-kernel.cpp @@ -21,7 +21,7 @@ int main() { event.wait_and_throw(); } catch (const sycl::exception &ep) { const std::string_view err_msg(ep.what()); - if (err_msg.find("PI_ERROR_OUT_OF_RESOURCES") != std::string::npos) { + if (err_msg.find("UR_RESULT_ERROR_OUT_OF_RESOURCES") != std::string::npos) { std::cout << "Allocation is out of device memory on the current platform." << std::endl; } else { diff --git a/sycl/test-e2e/AddressSanitizer/common/config-red-zone-size.cpp b/sycl/test-e2e/AddressSanitizer/common/config-red-zone-size.cpp index 5e673101c19e0..757f704279e04 100644 --- a/sycl/test-e2e/AddressSanitizer/common/config-red-zone-size.cpp +++ b/sycl/test-e2e/AddressSanitizer/common/config-red-zone-size.cpp @@ -1,9 +1,12 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -DUNSAFE -O0 -g -o %t -// RUN: env SYCL_PREFER_UR=1 UR_LAYER_ASAN_OPTIONS=redzone:64 %{run} not %t 2>&1 | FileCheck %s +// RUN: env UR_LAYER_ASAN_OPTIONS=redzone:64 %{run} not %t 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -DSAFE -O0 -g -o %t -// RUN: env SYCL_PREFER_UR=1 UR_LOG_SANITIZER=level:debug UR_LAYER_ASAN_OPTIONS=redzone:8 %{run} %t 2>&1 | FileCheck --check-prefixes CHECK-MIN %s -// RUN: env SYCL_PREFER_UR=1 UR_LOG_SANITIZER=level:debug UR_LAYER_ASAN_OPTIONS=max_redzone:4096 %{run} %t 2>&1 | FileCheck --check-prefixes CHECK-MAX %s +// RUN: env UR_LOG_SANITIZER=level:debug UR_LAYER_ASAN_OPTIONS=redzone:8 %{run} %t 2>&1 | FileCheck --check-prefixes CHECK-MIN %s +// RUN: env UR_LOG_SANITIZER=level:debug UR_LAYER_ASAN_OPTIONS=max_redzone:4096 %{run} %t 2>&1 | FileCheck --check-prefixes CHECK-MAX %s + +// TODO: Reenable, see https://github.com/intel/llvm/issues/14658 +// UNSUPPORTED: windows, linux #include diff --git a/sycl/test-e2e/AddressSanitizer/common/demangle-kernel-name.cpp b/sycl/test-e2e/AddressSanitizer/common/demangle-kernel-name.cpp index 54ba6b74a5985..7c8cb8d3569f5 100644 --- a/sycl/test-e2e/AddressSanitizer/common/demangle-kernel-name.cpp +++ b/sycl/test-e2e/AddressSanitizer/common/demangle-kernel-name.cpp @@ -1,6 +1,6 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t &> %t.txt ; FileCheck --input-file %t.txt %s +// RUN: %{run} not %t &> %t.txt ; FileCheck --input-file %t.txt %s #include #include diff --git a/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp b/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp index a44c15d594a95..eff8b9698bec0 100644 --- a/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp +++ b/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp @@ -1,7 +1,7 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 UR_LAYER_ASAN_OPTIONS=debug:1 %{run} %t 2>&1 | FileCheck --check-prefixes CHECK-DEBUG %s -// RUN: env SYCL_PREFER_UR=1 UR_LAYER_ASAN_OPTIONS=debug:0 %{run} %t 2>&1 | FileCheck %s +// RUN: env UR_LAYER_ASAN_OPTIONS=debug:1 %{run} %t 2>&1 | FileCheck --check-prefixes CHECK-DEBUG %s +// RUN: env UR_LAYER_ASAN_OPTIONS=debug:0 %{run} %t 2>&1 | FileCheck %s #include /// This test is used to check enabling/disabling kernel debug message diff --git a/sycl/test-e2e/AddressSanitizer/lit.local.cfg b/sycl/test-e2e/AddressSanitizer/lit.local.cfg index b2281d588f4e5..8eb37fb1a7b43 100644 --- a/sycl/test-e2e/AddressSanitizer/lit.local.cfg +++ b/sycl/test-e2e/AddressSanitizer/lit.local.cfg @@ -5,7 +5,7 @@ config.substitutions.append( ) config.substitutions.append( - ("%force_device_asan_rt", "env SYCL_PREFER_UR=1 UR_ENABLE_LAYERS=UR_LAYER_ASAN") + ("%force_device_asan_rt", "env UR_ENABLE_LAYERS=UR_LAYER_ASAN") ) config.unsupported_features += ['cuda', 'hip'] diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global.cpp index cb3eb72f0c07a..0294e7201915b 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global.cpp @@ -1,10 +1,10 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O0 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O1 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O2 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global_image_scope.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global_image_scope.cpp index e59be7cd37b1b..2a30758819b12 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global_image_scope.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global_image_scope.cpp @@ -1,10 +1,10 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O0 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O1 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O2 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global_image_scope_unaligned.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global_image_scope_unaligned.cpp index 2532016165b7a..258ce9c849d4a 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global_image_scope_unaligned.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/device_global_image_scope_unaligned.cpp @@ -1,10 +1,10 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O0 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O1 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O2 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/multi_device_images.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/multi_device_images.cpp index 117e0ba8be81e..914b80a89521f 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/multi_device_images.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/DeviceGlobal/multi_device_images.cpp @@ -2,7 +2,7 @@ // RUN: %{build} %device_asan_flags -O2 -g -DUSER_CODE_1 -c -o %t1.o // RUN: %{build} %device_asan_flags -O2 -g -DUSER_CODE_2 -c -o %t2.o // RUN: %clangxx -fsycl %device_asan_flags -O2 -g %t1.o %t2.o -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp index f7353ea7c835c..54acba761ae6e 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp @@ -1,14 +1,14 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O0 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O1 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_HOST -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s // RUN: %{build} %device_asan_flags -DMALLOC_SHARED -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s +// RUN: %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp index 75da70920b016..f6c12fcc75818 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp @@ -1,14 +1,14 @@ // REQUIRES: linux, aspect-fp64 // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O0 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O1 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_HOST -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s // RUN: %{build} %device_asan_flags -DMALLOC_SHARED -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s +// RUN: %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp index 489e554d59b47..f582ec78226cb 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp @@ -1,14 +1,14 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O0 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O1 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_HOST -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s // RUN: %{build} %device_asan_flags -DMALLOC_SHARED -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s +// RUN: %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp index 729264d5039a4..d51e3263ea7c5 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp @@ -1,14 +1,14 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O0 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O1 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_HOST -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s // RUN: %{build} %device_asan_flags -DMALLOC_SHARED -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s +// RUN: %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp index f92668adbaea4..8b16598531605 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp @@ -1,14 +1,14 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O0 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O1 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s // RUN: %{build} %device_asan_flags -DMALLOC_HOST -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s +// RUN: %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s // RUN: %{build} %device_asan_flags -DMALLOC_SHARED -O2 -g -o %t -// RUN: env SYCL_PREFER_UR=1 %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s +// RUN: %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer.cpp index e95665f18e733..57f16fd71cfa1 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer.cpp @@ -1,10 +1,10 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O0 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O1 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O2 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_2d.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_2d.cpp index 60b2c8290c17b..416faacb90198 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_2d.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_2d.cpp @@ -1,8 +1,8 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O0 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O1 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_3d.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_3d.cpp index 4590293b4ed58..d91be40fcd9d5 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_3d.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_3d.cpp @@ -1,8 +1,8 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O0 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O1 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_copy_fill.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_copy_fill.cpp index e61a794f40d1c..b695c3c4df835 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_copy_fill.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/buffer_copy_fill.cpp @@ -1,10 +1,10 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O0 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O1 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O2 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/subbuffer.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/subbuffer.cpp index 50297eec3cf1e..3b5862d860653 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/subbuffer.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/buffer/subbuffer.cpp @@ -1,10 +1,10 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O0 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O1 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -O2 -g -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory.cpp index 77f85cb4387b6..619bca302fcbf 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/group_local_memory.cpp @@ -1,10 +1,10 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -g -O0 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -g -O1 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -g -O2 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_basic.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_basic.cpp index efcad68ebea65..8ac696aafb466 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_basic.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_basic.cpp @@ -1,10 +1,10 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -g -O0 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -g -O1 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -g -O2 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include constexpr std::size_t N = 4; diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_function.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_function.cpp index c0cd20f5605ec..5d61bd5c95153 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_function.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_function.cpp @@ -1,10 +1,10 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -g -O0 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -g -O1 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -g -O2 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include #include diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_multiargs.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_multiargs.cpp index 661b739ad6439..63c2c6ef14c68 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_multiargs.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local_accessor_multiargs.cpp @@ -1,10 +1,10 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -g -O0 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -g -O1 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s // RUN: %{build} %device_asan_flags -g -O2 -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include constexpr std::size_t N = 8; diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/multiple_source.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/multiple_source.cpp index 452ec566a1c53..aab9beab711a4 100644 --- a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/multiple_source.cpp +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/multiple_source.cpp @@ -2,7 +2,7 @@ // RUN: %{build} %device_asan_flags -O2 -g -DUSER_CODE_1 -c -o %t1.o // RUN: %{build} %device_asan_flags -O2 -g -DUSER_CODE_2 -c -o %t2.o // RUN: %clangxx -fsycl %device_asan_flags -O2 -g %t1.o %t2.o -o %t.out -// RUN: env SYCL_PREFER_UR=1 %{run} not %t.out 2>&1 | FileCheck %s +// RUN: %{run} not %t.out 2>&1 | FileCheck %s #include constexpr std::size_t N = 4; diff --git a/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-no-free.cpp b/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-no-free.cpp index 983df51ccfac7..c512cfe556957 100644 --- a/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-no-free.cpp +++ b/sycl/test-e2e/AddressSanitizer/use-after-free/quarantine-no-free.cpp @@ -1,6 +1,6 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O0 -g -o %t -// RUN: env SYCL_PREFER_UR=1 UR_LAYER_ASAN_OPTIONS=quarantine_size_mb:5 UR_LOG_SANITIZER=level:info %{run} not %t 2>&1 | FileCheck %s +// RUN: env UR_LAYER_ASAN_OPTIONS=quarantine_size_mb:5 UR_LOG_SANITIZER=level:info %{run} not %t 2>&1 | FileCheck %s #include /// Quarantine Cache Test diff --git a/sycl/test-e2e/AddressSanitizer/use-after-free/use-after-free.cpp b/sycl/test-e2e/AddressSanitizer/use-after-free/use-after-free.cpp index fe67ab5aae584..59cbb37c88fb9 100644 --- a/sycl/test-e2e/AddressSanitizer/use-after-free/use-after-free.cpp +++ b/sycl/test-e2e/AddressSanitizer/use-after-free/use-after-free.cpp @@ -1,6 +1,6 @@ // REQUIRES: linux // RUN: %{build} %device_asan_flags -O0 -g -o %t -// RUN: env SYCL_PREFER_UR=1 UR_LAYER_ASAN_OPTIONS=quarantine_size_mb:1 %{run} not %t 2>&1 | FileCheck %s +// RUN: env UR_LAYER_ASAN_OPTIONS=quarantine_size_mb:1 %{run} not %t 2>&1 | FileCheck %s #include constexpr size_t N = 1024; diff --git a/sycl/test-e2e/Assert/check_resource_leak.cpp b/sycl/test-e2e/Assert/check_resource_leak.cpp index 48c1b0eb54cf7..316da71eedb26 100644 --- a/sycl/test-e2e/Assert/check_resource_leak.cpp +++ b/sycl/test-e2e/Assert/check_resource_leak.cpp @@ -5,7 +5,8 @@ // UNSUPPORTED: opencl && gpu // TODO: Fails at JIT compilation for some reason. -// UNSUPPORTED: hip +// TODO: Reenable windows/linux, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: hip, windows, linux #define SYCL_FALLBACK_ASSERT 1 #include diff --git a/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp b/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp index 9db687747e25f..1efa8fcf6fb84 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp @@ -1,6 +1,9 @@ // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out +// TODO: Reenable, see https://github.com/intel/llvm/issues/14764 +// UNSUPPORTED: windows + // Currently the feature is supported only on CPU and GPU, natively or by // software emulation. // UNSUPPORTED: accelerator diff --git a/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp b/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp index 943cced16aa3e..b758527a301d8 100644 --- a/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp +++ b/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp @@ -1,7 +1,7 @@ // REQUIRES: level_zero || cuda // RUN: %{build} -o %t2.out -// RUN: env SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t2.out %if level_zero %{ 2>&1 | FileCheck %s %} +// RUN: env SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t2.out %if level_zero %{ 2>&1 | FileCheck %s %} // RUN: %{run} %t2.out #include @@ -34,7 +34,6 @@ int main() { } } -// CHECK:---> piMemBufferCreate -// CHECK:---> piMemBufferCreate -// CHECK-NEXT: {{.*}} : {{.*}} -// CHECK-NEXT: {{.*}} : 17 +// CHECK:---> urMemBufferCreate +// CHECK:---> urMemBufferCreate +// CHECK-SAME: UR_MEM_FLAG_ALLOC_HOST_POINTER diff --git a/sycl/test-e2e/Basic/buffer/native_buffer_creation_flags.cpp b/sycl/test-e2e/Basic/buffer/native_buffer_creation_flags.cpp index 639b9839580c8..311740f37a75a 100644 --- a/sycl/test-e2e/Basic/buffer/native_buffer_creation_flags.cpp +++ b/sycl/test-e2e/Basic/buffer/native_buffer_creation_flags.cpp @@ -1,6 +1,6 @@ // REQUIRES: cpu // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s #include @@ -19,10 +19,9 @@ int main() { Q.submit([&](handler &Cgh) { // Now that we have a read-write host allocation, check that the native - // buffer is created with the PI_MEM_FLAGS_HOST_PTR_USE flag. - // CHECK: piMemBufferCreate - // CHECK-NEXT: {{.*}} : {{.*}} - // CHECK-NEXT: {{.*}} : 9 + // buffer is created with the UR_MEM_FLAG_USE_HOST_POINTER flag. + // CHECK: urMemBufferCreate + // CHECK-SAME: UR_MEM_FLAG_USE_HOST_POINTER auto BufAcc = Buf.get_access(Cgh); Cgh.single_task([=]() { int A = BufAcc[0]; }); }); diff --git a/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp b/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp index af296fc3b56f9..89cbed42c25f7 100644 --- a/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp +++ b/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp @@ -1,6 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s #include @@ -16,14 +16,14 @@ int main() { for (auto &e : sycl::host_accessor{b}) e = idx++ % size; - // CHECK: piMemBufferPartition - // CHECK: pi_buffer_region origin/size : 256/64 + // CHECK: urMemBufferPartition + // CHECK: .origin = 256, .size = 64 q.submit([&](sycl::handler &cgh) { sycl::accessor acc{sub1, cgh}; cgh.parallel_for(size, [=](auto id) { acc[id] += 1; }); }); - // CHECK: piMemBufferPartition - // CHECK: pi_buffer_region origin/size : 256/128 + // CHECK: urMemBufferPartition + // CHECK: .origin = 256, .size = 128 q.submit([&](sycl::handler &cgh) { sycl::accessor acc{sub2, cgh}; cgh.parallel_for(size * 2, [=](auto id) { acc[id] -= 1; }); diff --git a/sycl/test-e2e/Basic/enqueue_barrier.cpp b/sycl/test-e2e/Basic/enqueue_barrier.cpp index 0eae48b74f28f..883873d200533 100644 --- a/sycl/test-e2e/Basic/enqueue_barrier.cpp +++ b/sycl/test-e2e/Basic/enqueue_barrier.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // The test is failing sporadically on Windows OpenCL RTs // Disabling on windows until fixed @@ -60,7 +60,7 @@ int main() { return 0; } -// CHECK:---> piEnqueueEventsWaitWithBarrier -// CHECK:---> piEnqueueEventsWaitWithBarrier -// CHECK:---> piEnqueueEventsWaitWithBarrier -// CHECK:---> piEnqueueEventsWaitWithBarrier +// CHECK:---> urEnqueueEventsWaitWithBarrier +// CHECK:---> urEnqueueEventsWaitWithBarrier +// CHECK:---> urEnqueueEventsWaitWithBarrier +// CHECK:---> urEnqueueEventsWaitWithBarrier diff --git a/sycl/test-e2e/Basic/event_release.cpp b/sycl/test-e2e/Basic/event_release.cpp index fd1e9f712db6e..8df5b7f041ba3 100644 --- a/sycl/test-e2e/Basic/event_release.cpp +++ b/sycl/test-e2e/Basic/event_release.cpp @@ -1,11 +1,11 @@ // REQUIRES: cpu // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s #include #include #include -// The test checks that pi_events are released without queue destruction +// The test checks that UR event handles are released without queue destruction // or call to queue::wait, when the corresponding commands are cleaned up. using namespace sycl; @@ -29,8 +29,8 @@ int main() { // Buffer destruction triggers execution graph cleanup, check that both // events (one for launching the kernel and one for memory transfer to host) // are released. - // CHECK: piEventRelease - // CHECK: piEventRelease + // CHECK: urEventRelease + // CHECK: urEventRelease assert(Val == Gold); // CHECK: End of main scope std::cout << "End of main scope" << std::endl; diff --git a/sycl/test-e2e/Basic/fill_accessor_pi.cpp b/sycl/test-e2e/Basic/fill_accessor_ur.cpp similarity index 86% rename from sycl/test-e2e/Basic/fill_accessor_pi.cpp rename to sycl/test-e2e/Basic/fill_accessor_ur.cpp index 820c2fdeb66d7..8faeee9854b7a 100644 --- a/sycl/test-e2e/Basic/fill_accessor_pi.cpp +++ b/sycl/test-e2e/Basic/fill_accessor_ur.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s -// This test merely checks the use of the correct PI call. Its sister test +// This test merely checks the use of the correct UR call. Its sister test // fill_accessor.cpp thoroughly checks the workings of the .fill() call. #include @@ -20,7 +20,7 @@ void testFill_Buffer1D() { std::cout << "start testFill_Buffer1D" << std::endl; q.submit([&](sycl::handler &cgh) { auto acc1D = buffer_1D.get_access(cgh); - // should stage piEnqueueMemBufferFill + // should stage urEnqueueMemBufferFill cgh.fill(acc1D, float{1}); }); q.wait(); @@ -29,7 +29,7 @@ void testFill_Buffer1D() { q.submit([&](sycl::handler &cgh) { auto acc1DOffset = buffer_1D.get_access(cgh, {4}, {2}); - // despite being offset, should stage piEnqueueMemBufferFill + // despite being offset, should stage urEnqueueMemBufferFill cgh.fill(acc1DOffset, float{2}); }); q.wait(); @@ -50,7 +50,7 @@ void testFill_Buffer2D() { std::cout << "start testFill_Buffer2D" << std::endl; q.submit([&](sycl::handler &cgh) { auto acc2D = buffer_2D.get_access(cgh); - // should stage piEnqueueMemBufferFill + // should stage urEnqueueMemBufferFill cgh.fill(acc2D, float{3}); }); q.wait(); @@ -60,7 +60,7 @@ void testFill_Buffer2D() { auto acc2D = buffer_2D.get_access(cgh, {8, 12}, {2, 2}); // "ranged accessor" will have to be handled by custom kernel: - // piEnqueueKernelLaunch + // urEnqueueKernelLaunch cgh.fill(acc2D, float{4}); }); q.wait(); @@ -81,7 +81,7 @@ void testFill_Buffer3D() { std::cout << "start testFill_Buffer3D" << std::endl; q.submit([&](sycl::handler &cgh) { auto acc3D = buffer_3D.get_access(cgh); - // should stage piEnqueueMemBufferFill + // should stage urEnqueueMemBufferFill cgh.fill(acc3D, float{5}); }); q.wait(); @@ -91,7 +91,7 @@ void testFill_Buffer3D() { auto acc3D = buffer_3D.get_access( cgh, {4, 8, 12}, {3, 3, 3}); // "ranged accessor" will have to be handled by custom kernel: - // piEnqueueKernelLaunch + // urEnqueueKernelLaunch cgh.fill(acc3D, float{6}); }); q.wait(); @@ -129,19 +129,19 @@ int main() { } // CHECK: start testFill_Buffer1D -// CHECK: piEnqueueMemBufferFill +// CHECK: urEnqueueMemBufferFill // CHECK: start testFill_Buffer1D -- OFFSET -// CHECK: piEnqueueMemBufferFill +// CHECK: urEnqueueMemBufferFill // CHECK: start testFill_Buffer2D -// CHECK: piEnqueueMemBufferFill +// CHECK: urEnqueueMemBufferFill // CHECK: start testFill_Buffer2D -- OFFSET -// CHECK: piEnqueueKernelLaunch +// CHECK: urEnqueueKernelLaunch // CHECK: start testFill_Buffer3D -// CHECK: piEnqueueMemBufferFill +// CHECK: urEnqueueMemBufferFill // CHECK: start testFill_Buffer3D -- OFFSET -// CHECK: piEnqueueKernelLaunch +// CHECK: urEnqueueKernelLaunch // CHECK: start testFill_ZeroDim -// CHECK: piEnqueueMemBufferFill +// CHECK: urEnqueueMemBufferFill diff --git a/sycl/test-e2e/Basic/host-task-dependency.cpp b/sycl/test-e2e/Basic/host-task-dependency.cpp index 8185eeac6658e..dc8ae18d5f567 100644 --- a/sycl/test-e2e/Basic/host-task-dependency.cpp +++ b/sycl/test-e2e/Basic/host-task-dependency.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out %threads_lib -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // // TODO: Behaviour is unstable for level zero on Windows. Enable when fixed. // TODO: The test is sporadically fails on CUDA. Enable when fixed. @@ -178,15 +178,15 @@ int main() { } // launch of Gen kernel -// CHECK:---> piKernelCreate( +// CHECK:---> urKernelCreate( // CHECK: NameGen -// CHECK:---> piEnqueueKernelLaunch( +// CHECK:---> urEnqueueKernelLaunch( // prepare for host task -// CHECK:---> piEnqueueMemBuffer{{Map|Read}}( +// CHECK:---> urEnqueueMemBuffer{{Map|Read}}( // launch of Copier kernel -// CHECK:---> piKernelCreate( +// CHECK:---> urKernelCreate( // CHECK: Copier -// CHECK:---> piEnqueueKernelLaunch( +// CHECK:---> urEnqueueKernelLaunch( // CHECK:Third buffer [ 0] = 0 // CHECK:Third buffer [ 1] = 1 @@ -209,6 +209,6 @@ int main() { // CHECK:Second buffer [ 8] = 8 // CHECK:Second buffer [ 9] = 9 -// TODO need to check for piEventsWait as "wait on dependencies of host task". -// At the same time this piEventsWait may occur anywhere after -// piEnqueueMemBufferMap ("prepare for host task"). +// TODO need to check for urEventWait as "wait on dependencies of host task". +// At the same time this urEventWait may occur anywhere after +// urEnqueueMemBufferMap ("prepare for host task"). diff --git a/sycl/test-e2e/Basic/kernel_bundle/kernel_bundle_api.cpp b/sycl/test-e2e/Basic/kernel_bundle/kernel_bundle_api.cpp index 12879d5c5e30b..c7e8fca902ee3 100644 --- a/sycl/test-e2e/Basic/kernel_bundle/kernel_bundle_api.cpp +++ b/sycl/test-e2e/Basic/kernel_bundle/kernel_bundle_api.cpp @@ -5,7 +5,10 @@ // device image is statically linked against fallback libdevice. // RUN: %{build} %if cpu %{ -DSYCL_DISABLE_FALLBACK_ASSERT=1 %} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %if cuda %{ %{run} %t.out %} -// RUN: %if cpu %{ env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s %} +// RUN: %if cpu %{ env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s %} + +// TODO: Reenable, see https://github.com/intel/llvm/issues/14764 +// UNSUPPORTED: windows, linux #include #include @@ -150,46 +153,27 @@ int main() { sycl::kernel_bundle KernelBundleObject1 = sycl::compile(KernelBundleInput1, KernelBundleInput1.get_devices()); - // CHECK:---> piProgramCreate - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: ) ---> pi_result : PI_SUCCESS - // CHECK-NEXT: [out] ** : {{.*}}[ [[PROGRAM_HANDLE1:[0-9a-fA-Fx]]] + // CHECK:---> urProgramCreate + // CHECK-SAME:, .phProgram = {{.*}} ([[PROGRAM_HANDLE1:[0-9a-fA-Fx]+]]) + // CHECK-SAME: -> UR_RESULT_SUCCESS; // - // CHECK:---> piProgramCompile( - // CHECK-Next: : [[PROGRAM_HANDLE1]] + // CHECK:---> urProgramCompile + // CHECK-SAME: .hProgram = [[PROGRAM_HANDLE1]] sycl::kernel_bundle KernelBundleObject2 = sycl::compile(KernelBundleInput2, KernelBundleInput2.get_devices()); - // CHECK:---> piProgramCreate - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: ) ---> pi_result : PI_SUCCESS - // CHECK-NEXT: [out] ** : {{.*}}[ [[PROGRAM_HANDLE2:[0-9a-fA-Fx]]] + // CHECK:---> urProgramCreate + // CHECK-SAME:, .phProgram = {{.*}} ([[PROGRAM_HANDLE2:[0-9a-fA-Fx]+]]) + // CHECK-SAME: -> UR_RESULT_SUCCESS; // - // CHECK:---> piProgramCompile( - // CHECK-Next: : [[PROGRAM_HANDLE2]] + // CHECK:---> urProgramCompile( + // CHECK-SAME: .hProgram = [[PROGRAM_HANDLE2]] sycl::kernel_bundle KernelBundleExecutable = sycl::link({KernelBundleObject1, KernelBundleObject2}, KernelBundleObject1.get_devices()); - // CHECK:---> piProgramLink( - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: - // CHECK-NEXT: - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT:---> pi_result : PI_SUCCESS - // CHECK-NEXT: [out] ** : {{.*}} - // PI tracing doesn't allow checking for all input programs so far. + // CHECK:---> urProgramLink{{.*}} -> UR_RESULT_SUCCESS; + // UR tracing doesn't allow checking for all input programs so far. assert(KernelBundleExecutable.has_kernel(Kernel1ID)); assert(KernelBundleExecutable.has_kernel(Kernel2ID)); @@ -198,20 +182,16 @@ int main() { KernelBundleExecutable2 = sycl::build(KernelBundleInput1, KernelBundleInput1.get_devices()); - // CHECK:---> piProgramCreate - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: ) ---> pi_result : PI_SUCCESS - // CHECK-NEXT: [out] ** : {{.*}}[ [[PROGRAM_HANDLE3:[0-9a-fA-Fx]]] + // CHECK:---> urProgramCreate + // CHECK-SAME:, .phProgram = {{.*}} ([[PROGRAM_HANDLE3:[0-9a-fA-Fx]+]]) + // CHECK-SAME: -> UR_RESULT_SUCCESS; // - // CHECK:---> piProgramBuild( - // CHECK-NEXT: : [[PROGRAM_HANDLE3]] + // CHECK:---> urProgramBuild( + // CHECK-SAME: .hProgram = [[PROGRAM_HANDLE3]] // - // CHECK:---> piProgramRetain( - // CHECK-NEXT: : [[PROGRAM_HANDLE3]] - // CHECK-NEXT:---> pi_result : PI_SUCCESS + // CHECK:---> urProgramRetain( + // CHECK-SAME: .hProgram = [[PROGRAM_HANDLE3]] + // CHECK-SAME:-> UR_RESULT_SUCCESS; // Version of link which finds intersection of associated devices between // input bundles @@ -227,40 +207,33 @@ int main() { sycl::kernel_bundle KernelBundleExecutable = sycl::get_kernel_bundle(Ctx, {Dev}, {Kernel3ID}); - // CHECK:---> piextDeviceSelectBinary - // CHECK:---> piProgramCreate - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: ) ---> pi_result : PI_SUCCESS - // CHECK-NEXT: [out] ** : {{.*}}[ [[PROGRAM_HANDLE4:[0-9a-fA-Fx]]] + // CHECK:---> urProgramCreate + // CHECK-SAME:, .phProgram = {{.*}} ([[PROGRAM_HANDLE4:[0-9a-fA-Fx]+]]) + // CHECK-SAME: -> UR_RESULT_SUCCESS; // - // CHECK:---> piProgramBuild( - // CHECK-NEXT: : [[PROGRAM_HANDLE4]] + // CHECK:---> urProgramBuild( + // CHECK-SAME: .hProgram = [[PROGRAM_HANDLE4]] // - // CHECK:---> piProgramRetain( - // CHECK-NEXT: : [[PROGRAM_HANDLE4]] - // CHECK-NEXT:---> pi_result : PI_SUCCESS + // CHECK:---> urProgramRetain( + // CHECK-SAME: .hProgram = [[PROGRAM_HANDLE4]] + // CHECK-SAME:-> UR_RESULT_SUCCESS; // - // CHECK:---> piKernelCreate( - // CHECK-NEXT: : [[PROGRAM_HANDLE4]] - // CHECK-NEXT:: _ZTS11Kernel3Name - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: ---> pi_result : PI_SUCCESS - // CHECK-NEXT: [out] ** : {{.*}}[ [[KERNEL_HANDLE:[0-9a-fA-Fx]]] + // CHECK:---> urKernelCreate( + // CHECK-SAME: .hProgram = [[PROGRAM_HANDLE4]] + // CHECK-SAME: .pKernelName = {{[0-9a-fA-Fx]+}} (_ZTS11Kernel3Name) + // CHECK-SAME: .phKernel = {{[0-9a-fA-Fx]+}} ([[KERNEL_HANDLE:[0-9a-fA-Fx]+]]) + // CHECK-SAME: -> UR_RESULT_SUCCESS; // - // CHECK:---> piKernelRetain( - // CHECK-NEXT: : [[KERNEL_HANDLE]] - // CHECK-NEXT:---> pi_result : PI_SUCCESS + // CHECK:---> urKernelRetain( + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE]] + // CHECK-SAME:-> UR_RESULT_SUCCESS; // - // CHECK:---> piEnqueueKernelLaunch( - // CHECK-NEXT: : {{.*}} - // CHECK-NEXT: : [[KERNEL_HANDLE]] + // CHECK:---> urEnqueueKernelLaunch( + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE]] // - // CHECK:---> piKernelRelease( - // CHECK-NEXT: : [[KERNEL_HANDLE]] - // CHECK-NEXT:---> pi_result : PI_SUCCESS + // CHECK:---> urKernelRelease( + // CHECK-SAME: .hKernel = [[KERNEL_HANDLE]] + // CHECK-SAME:-> UR_RESULT_SUCCESS; sycl::buffer Buf(sycl::range<1>{1}); diff --git a/sycl/test-e2e/Basic/library_loading.cpp b/sycl/test-e2e/Basic/library_loading.cpp deleted file mode 100644 index 59f0434c012da..0000000000000 --- a/sycl/test-e2e/Basic/library_loading.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// REQUIRES: linux -// RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run-unfiltered-devices} %t.out &> %t_trace_no_filter.txt || true -// RUN: FileCheck --input-file=%t_trace_no_filter.txt --check-prefix=CHECK-NO-FILTER %s -dump-input=fail -// Checks pi traces on library loading - -#include - -using namespace sycl; - -int main() { - // CHECK-NO-FILTER-DAG: {{(SYCL_PI_TRACE\[-1\]: dlopen\(.*/libpi_cuda.so\) failed with)|(SYCL_PI_TRACE\[basic\]: Plugin found and successfully loaded: libpi_cuda.so)}} - // CHECK-NO-FILTER-DAG: {{(SYCL_PI_TRACE\[-1\]: dlopen\(.*/libpi_hip.so\) failed with)|(SYCL_PI_TRACE\[basic\]: Plugin found and successfully loaded: libpi_hip.so)}} - queue q; - q.submit([&](handler &cgh) {}); -} diff --git a/sycl/test-e2e/Basic/queue/release.cpp b/sycl/test-e2e/Basic/queue/release.cpp index b56be05dc5308..22840a8e8778a 100644 --- a/sycl/test-e2e/Basic/queue/release.cpp +++ b/sycl/test-e2e/Basic/queue/release.cpp @@ -1,7 +1,8 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s // -// XFAIL: hip_nvidia +// TODO: Reenable on Windows, see https://github.com/intel/llvm/issues/14768 +// XFAIL: hip_nvidia, windows #include int main() { @@ -13,12 +14,12 @@ int main() { return 0; } -// CHECK: ---> piEnqueueKernelLaunch( +// CHECK: ---> urEnqueueKernelLaunch( // FIXME the order of these 2 varies between plugins due to a Level Zero // specific queue workaround. -// CHECK-DAG: ---> piEventRelease( -// CHECK-DAG: ---> piQueueRelease( -// CHECK: ---> piContextRelease( -// CHECK: ---> piKernelRelease( -// CHECK: ---> piProgramRelease( -// CHECK: ---> piDeviceRelease( +// CHECK-DAG: ---> urEventRelease( +// CHECK-DAG: ---> urQueueRelease( +// CHECK: ---> urContextRelease( +// CHECK: ---> urKernelRelease( +// CHECK: ---> urProgramRelease( +// CHECK: ---> urDeviceRelease( diff --git a/sycl/test-e2e/Basic/reqd_work_group_size.cpp b/sycl/test-e2e/Basic/reqd_work_group_size.cpp index d3fbe1621c757..f52ab51a4f8d4 100644 --- a/sycl/test-e2e/Basic/reqd_work_group_size.cpp +++ b/sycl/test-e2e/Basic/reqd_work_group_size.cpp @@ -1,6 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// TODO: Reenable, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: linux, windows + #include #include diff --git a/sycl/test-e2e/Basic/stream/release_resources_test.cpp b/sycl/test-e2e/Basic/stream/release_resources_test.cpp index 74ff7be244bcc..9b8e47c8eb735 100644 --- a/sycl/test-e2e/Basic/stream/release_resources_test.cpp +++ b/sycl/test-e2e/Basic/stream/release_resources_test.cpp @@ -2,7 +2,7 @@ // to fail there. See comments in GlobalHandler::releaseDefaultContexts // UNSUPPORTED: windows // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s // Check that buffer used by a stream object is released. @@ -16,7 +16,7 @@ int main() { { queue Queue; - // CHECK:---> piMemRelease + // CHECK:---> urMemRelease Queue.submit([&](handler &CGH) { stream Out(1024, 80, CGH); CGH.parallel_for( diff --git a/sycl/test-e2e/Basic/subdevice_pi.cpp b/sycl/test-e2e/Basic/subdevice_pi.cpp index 118cebf5fa15b..bdb93d9411536 100644 --- a/sycl/test-e2e/Basic/subdevice_pi.cpp +++ b/sycl/test-e2e/Basic/subdevice_pi.cpp @@ -3,9 +3,9 @@ // REQUIRES: cpu // // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out separate equally | FileCheck %s --check-prefix CHECK-SEPARATE -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out shared equally | FileCheck %s --check-prefix CHECK-SHARED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out fused equally | FileCheck %s --check-prefix CHECK-FUSED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out separate equally | FileCheck %s --check-prefix CHECK-SEPARATE +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out shared equally | FileCheck %s --check-prefix CHECK-SHARED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out fused equally | FileCheck %s --check-prefix CHECK-FUSED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate #include #include @@ -14,7 +14,7 @@ using namespace sycl; -// Log to the same stream as SYCL_PI_TRACE +// Log to the same stream as SYCL_UR_TRACE static void log_pi(const char *msg) { std::cout << msg << std::endl; } static void use_mem(buffer buf, queue q) { @@ -51,7 +51,7 @@ static bool check_separate(device dev, buffer buf, std::vector subdevices = partition(dev); assert(subdevices.size() > 1); // CHECK-SEPARATE: Create sub devices - // CHECK-SEPARATE: ---> piDevicePartition + // CHECK-SEPARATE: ---> urDevicePartition log_pi("Test sub device 0"); { @@ -59,11 +59,11 @@ static bool check_separate(device dev, buffer buf, use_mem(buf, q0); } // CHECK-SEPARATE: Test sub device 0 - // CHECK-SEPARATE: ---> piContextCreate - // CHECK-SEPARATE: ---> piextQueueCreate - // CHECK-SEPARATE: ---> piMemBufferCreate - // CHECK-SEPARATE: ---> piEnqueueKernelLaunch - // CHECK-SEPARATE: ---> piQueueFinish + // CHECK-SEPARATE: ---> urContextCreate + // CHECK-SEPARATE: ---> urQueueCreate + // CHECK-SEPARATE: ---> urMemBufferCreate + // CHECK-SEPARATE: ---> urEnqueueKernelLaunch + // CHECK-SEPARATE: ---> urQueueFinish log_pi("Test sub device 1"); { @@ -71,16 +71,16 @@ static bool check_separate(device dev, buffer buf, use_mem(buf, q1); } // CHECK-SEPARATE: Test sub device 1 - // CHECK-SEPARATE: ---> piContextCreate - // CHECK-SEPARATE: ---> piextQueueCreate - // CHECK-SEPARATE: ---> piMemBufferCreate + // CHECK-SEPARATE: ---> urContextCreate + // CHECK-SEPARATE: ---> urQueueCreate + // CHECK-SEPARATE: ---> urMemBufferCreate // // Verify that we have a memcpy between subdevices in this case - // CHECK-SEPARATE: ---> piEnqueueMemBuffer{{Map|Read}} - // CHECK-SEPARATE: ---> piEnqueueMemBufferWrite + // CHECK-SEPARATE: ---> urEnqueueMemBuffer{{Map|Read}} + // CHECK-SEPARATE: ---> urEnqueueMemBufferWrite // - // CHECK-SEPARATE: ---> piEnqueueKernelLaunch - // CHECK-SEPARATE: ---> piQueueFinish + // CHECK-SEPARATE: ---> urEnqueueKernelLaunch + // CHECK-SEPARATE: ---> urQueueFinish return true; } @@ -91,14 +91,14 @@ static bool check_shared_context(device dev, buffer buf, std::vector subdevices = partition(dev); assert(subdevices.size() > 1); // CHECK-SHARED: Create sub devices - // CHECK-SHARED: ---> piDevicePartition + // CHECK-SHARED: ---> urDevicePartition // Shared context: queues are bound to specific subdevices, but // memory does not migrate log_pi("Create shared context"); context shared_context(subdevices); // CHECK-SHARED: Create shared context - // CHECK-SHARED: ---> piContextCreate + // CHECK-SHARED: ---> urContextCreate // // Make sure that a single context is created: see --implicit-check-not above. @@ -108,14 +108,14 @@ static bool check_shared_context(device dev, buffer buf, use_mem(buf, q0); } // CHECK-SHARED: Test sub device 0 - // CHECK-SHARED: ---> piextQueueCreate - // CHECK-SHARED: ---> piMemBufferCreate + // CHECK-SHARED: ---> urQueueCreate + // CHECK-SHARED: ---> urMemBufferCreate // // Make sure that a single buffer is created (and shared between subdevices): // see --implicit-check-not above. // - // CHECK-SHARED: ---> piEnqueueKernelLaunch - // CHECK-SHARED: ---> piQueueFinish + // CHECK-SHARED: ---> urEnqueueKernelLaunch + // CHECK-SHARED: ---> urQueueFinish log_pi("Test sub device 1"); { @@ -123,10 +123,10 @@ static bool check_shared_context(device dev, buffer buf, use_mem(buf, q1); } // CHECK-SHARED: Test sub device 1 - // CHECK-SHARED: ---> piextQueueCreate - // CHECK-SHARED: ---> piEnqueueKernelLaunch - // CHECK-SHARED: ---> piQueueFinish - // CHECK-SHARED: ---> piEnqueueMemBufferRead + // CHECK-SHARED: ---> urQueueCreate + // CHECK-SHARED: ---> urEnqueueKernelLaunch + // CHECK-SHARED: ---> urQueueFinish + // CHECK-SHARED: ---> urEnqueueMemBufferRead return true; } @@ -137,7 +137,7 @@ static bool check_fused_context(device dev, buffer buf, std::vector subdevices = partition(dev); assert(subdevices.size() > 1); // CHECK-FUSED: Create sub devices - // CHECK-FUSED: ---> piDevicePartition + // CHECK-FUSED: ---> urDevicePartition // Fused context: same as shared context, but also includes the root device log_pi("Create fused context"); @@ -147,7 +147,7 @@ static bool check_fused_context(device dev, buffer buf, devices.push_back(subdevices[1]); context fused_context(devices); // CHECK-FUSED: Create fused context - // CHECK-FUSED: ---> piContextCreate + // CHECK-FUSED: ---> urContextCreate // // Make sure that a single context is created: see --implicit-check-not above. @@ -157,14 +157,14 @@ static bool check_fused_context(device dev, buffer buf, use_mem(buf, q); } // CHECK-FUSED: Test root device - // CHECK-FUSED: ---> piextQueueCreate - // CHECK-FUSED: ---> piMemBufferCreate + // CHECK-FUSED: ---> urQueueCreate + // CHECK-FUSED: ---> urMemBufferCreate // // Make sure that a single buffer is created (and shared between subdevices // *and* the root device): see --implicit-check-not above. // - // CHECK-FUSED: ---> piEnqueueKernelLaunch - // CHECK-FUSED: ---> piQueueFinish + // CHECK-FUSED: ---> urEnqueueKernelLaunch + // CHECK-FUSED: ---> urQueueFinish log_pi("Test sub device 0"); { @@ -172,9 +172,9 @@ static bool check_fused_context(device dev, buffer buf, use_mem(buf, q0); } // CHECK-FUSED: Test sub device 0 - // CHECK-FUSED: ---> piextQueueCreate - // CHECK-FUSED: ---> piEnqueueKernelLaunch - // CHECK-FUSED: ---> piQueueFinish + // CHECK-FUSED: ---> urQueueCreate + // CHECK-FUSED: ---> urEnqueueKernelLaunch + // CHECK-FUSED: ---> urQueueFinish log_pi("Test sub device 1"); { @@ -182,10 +182,10 @@ static bool check_fused_context(device dev, buffer buf, use_mem(buf, q1); } // CHECK-FUSED: Test sub device 1 - // CHECK-FUSED: ---> piextQueueCreate - // CHECK-FUSED: ---> piEnqueueKernelLaunch - // CHECK-FUSED: ---> piQueueFinish - // CHECK-FUSED: ---> piEnqueueMemBufferRead + // CHECK-FUSED: ---> urQueueCreate + // CHECK-FUSED: ---> urEnqueueKernelLaunch + // CHECK-FUSED: ---> urQueueFinish + // CHECK-FUSED: ---> urEnqueueMemBufferRead return true; } diff --git a/sycl/test-e2e/Basic/use_pinned_host_memory.cpp b/sycl/test-e2e/Basic/use_pinned_host_memory.cpp index 75b11c681798d..d73971810ec25 100644 --- a/sycl/test-e2e/Basic/use_pinned_host_memory.cpp +++ b/sycl/test-e2e/Basic/use_pinned_host_memory.cpp @@ -1,6 +1,6 @@ // REQUIRES: cpu // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s #include @@ -43,6 +43,5 @@ int main() { } } -// CHECK:---> piMemBufferCreate -// CHECK-NEXT: {{.*}} : {{.*}} -// CHECK-NEXT: {{.*}} : 17 +// CHECK:---> urMemBufferCreate +// CHECK-SAME: UR_MEM_FLAG_ALLOC_HOST_POINTER diff --git a/sycl/test-e2e/Basic/vector/load_store.cpp b/sycl/test-e2e/Basic/vector/load_store.cpp index 626fd0264fb71..396436ee1ac24 100644 --- a/sycl/test-e2e/Basic/vector/load_store.cpp +++ b/sycl/test-e2e/Basic/vector/load_store.cpp @@ -4,6 +4,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} +// TODO: Reenable, see https://github.com/intel/llvm/issues/14749 +// UNSUPPORTED: windows, linux + // Tests load and store on sycl::vec. #include diff --git a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp index e8bc71f4fc465..8900d10328871 100644 --- a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp +++ b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp @@ -1,5 +1,5 @@ // Checks whether or not event Dependencies are honored by -// piExtEnqueueLaunchKernelCustom +// urEnqueueKernelLaunchCustomExp // REQUIRES: aspect-ext_oneapi_cuda_cluster_group // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Config/select_device.cpp b/sycl/test-e2e/Config/select_device.cpp index 8cab3a707ad44..8ed6221bfeb43 100644 --- a/sycl/test-e2e/Config/select_device.cpp +++ b/sycl/test-e2e/Config/select_device.cpp @@ -529,7 +529,7 @@ int main(int argc, char *argv[]) { } catch (sycl::exception &E) { std::cout << "Caught exception: " << E.what() << std::endl; if (E.what() == - "Key DeviceName of SYCL_DEVICE_ALLOWLIST should have value which starts with {{ -30 (PI_ERROR_INVALID_VALUE)"sv) + "Key DeviceName of SYCL_DEVICE_ALLOWLIST should have value which starts with {{ 4 (UR_RESULT_ERROR_INVALID_VALUE)"sv) return 0; } return 1; @@ -555,7 +555,7 @@ int main(int argc, char *argv[]) { } catch (sycl::exception &E) { std::cout << "Caught exception: " << E.what() << std::endl; if (E.what() == - "Key PlatformName of SYCL_DEVICE_ALLOWLIST should have value which starts with {{ -30 (PI_ERROR_INVALID_VALUE)"sv) + "Key PlatformName of SYCL_DEVICE_ALLOWLIST should have value which starts with {{ 4 (UR_RESULT_ERROR_INVALID_VALUE)"sv) return 0; } return 1; @@ -588,7 +588,7 @@ int main(int argc, char *argv[]) { } catch (sycl::exception &E) { std::cout << "Caught exception: " << E.what() << std::endl; if (E.what() == - "Key DriverVersion of SYCL_DEVICE_ALLOWLIST should have value which starts with {{ -30 (PI_ERROR_INVALID_VALUE)"sv) + "Key DriverVersion of SYCL_DEVICE_ALLOWLIST should have value which starts with {{ 4 (UR_RESULT_ERROR_INVALID_VALUE)"sv) return 0; } return 1; @@ -616,7 +616,7 @@ int main(int argc, char *argv[]) { } catch (sycl::exception &E) { std::cout << "Caught exception: " << E.what() << std::endl; if (E.what() == - "Key PlatformVersion of SYCL_DEVICE_ALLOWLIST should have value which starts with {{ -30 (PI_ERROR_INVALID_VALUE)"sv) + "Key PlatformVersion of SYCL_DEVICE_ALLOWLIST should have value which starts with {{ 4 (UR_RESULT_ERROR_INVALID_VALUE)"sv) return 0; } return 1; diff --git a/sycl/test-e2e/DeprecatedFeatures/sampler_ocl.cpp b/sycl/test-e2e/DeprecatedFeatures/sampler_ocl.cpp index 180d1537ff316..977a7a6967ba1 100644 --- a/sycl/test-e2e/DeprecatedFeatures/sampler_ocl.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/sampler_ocl.cpp @@ -3,6 +3,9 @@ // RUN: %{build} -D__SYCL_INTERNAL_API -o %t.out %opencl_lib // RUN: %{run} %t.out +// TODO: Reenable, see https://github.com/intel/llvm/issues/14679 +// UNSUPPORTED: windows, linux + //==--------------- sampler.cpp - SYCL sampler basic test ------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/sycl/test-e2e/DeviceCodeSplit/grf.cpp b/sycl/test-e2e/DeviceCodeSplit/grf.cpp index 60fac498b78ec..e30110c15d92e 100644 --- a/sycl/test-e2e/DeviceCodeSplit/grf.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/grf.cpp @@ -15,17 +15,17 @@ // REQUIRES: arch-intel_gpu_pvc // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR -// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR +// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR // RUN: %{build} -DUSE_NEW_API=1 -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR -// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR +// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR // RUN: %{build} -DUSE_AUTO_GRF=1 -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-NO-VAR -// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-WITH-VAR +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-NO-VAR +// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-WITH-VAR // RUN: %{build} -DUSE_NEW_API=1 -DUSE_AUTO_GRF=1 -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-NO-VAR -// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-WITH-VAR +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-NO-VAR +// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-WITH-VAR #include "../helpers.hpp" #include #include @@ -134,20 +134,16 @@ int main(void) { return 0; } -// CHECK-LABEL: ---> piProgramBuild( -// CHECK-NOT: -ze-opt-large-register-file -// CHECK-WITH-VAR: -g -// CHECK: ) ---> pi_result : PI_SUCCESS -// CHECK-LABEL: ---> piKernelCreate( -// CHECK: : {{.*}}SingleGRF -// CHECK: ) ---> pi_result : PI_SUCCESS - -// CHECK-LABEL: ---> piProgramBuild( -// CHECK-NO-VAR: -ze-opt-large-register-file -// CHECK-WITH-VAR: -g -ze-opt-large-register-file -// CHECK-AUTO-NO-VAR: -ze-intel-enable-auto-large-GRF-mode -// CHECK-AUTO-WITH-VAR: -g -ze-intel-enable-auto-large-GRF-mode -// CHECK: ) ---> pi_result : PI_SUCCESS -// CHECK-LABEL: ---> piKernelCreate( -// CHECK: : {{.*}}SpecifiedGRF -// CHECK: ) ---> pi_result : PI_SUCCESS +// CHECK-LABEL: ---> urProgramBuild +// CHECK-WITH-VAR-SAME: -g +// CHECK-SAME: -> UR_RESULT_SUCCESS + +// CHECK: ---> urKernelCreate({{.*}}SingleGRF{{.*}}-> UR_RESULT_SUCCESS + +// CHECK-NO-VAR: urProgramBuild{{.*}}-ze-opt-large-register-file +// CHECK-WITH-VAR: urProgramBuild{{.*}}-g -ze-opt-large-register-file +// CHECK-AUTO-NO-VAR: urProgramBuild{{.*}}-ze-intel-enable-auto-large-GRF-mode +// CHECK-AUTO-WITH-VAR: urProgramBuild{{.*}}-g -ze-intel-enable-auto-large-GRF-mode +// CHECK-SAME: -> UR_RESULT_SUCCESS + +// CHECK: ---> urKernelCreate({{.*}}SpecifiedGRF{{.*}}-> UR_RESULT_SUCCESS diff --git a/sycl/test-e2e/DeviceGlobal/device_global_arrow.cpp b/sycl/test-e2e/DeviceGlobal/device_global_arrow.cpp index ffcadf8667bda..24b0437f2a35c 100644 --- a/sycl/test-e2e/DeviceGlobal/device_global_arrow.cpp +++ b/sycl/test-e2e/DeviceGlobal/device_global_arrow.cpp @@ -3,7 +3,8 @@ // // The OpenCL GPU backends do not currently support device_global backend // calls. -// UNSUPPORTED: opencl && gpu +// TODO: Reenable linux/windows, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: opencl && gpu, linux, windows // // Tests operator-> on device_global. diff --git a/sycl/test-e2e/DeviceGlobal/device_global_device_only.cpp b/sycl/test-e2e/DeviceGlobal/device_global_device_only.cpp index ac2894c13c855..07ea4f0ec94b4 100644 --- a/sycl/test-e2e/DeviceGlobal/device_global_device_only.cpp +++ b/sycl/test-e2e/DeviceGlobal/device_global_device_only.cpp @@ -3,7 +3,8 @@ // // The OpenCL GPU backends do not currently support device_global backend // calls. -// UNSUPPORTED: opencl && gpu +// TODO: Reenable linux/windows, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: opencl && gpu, linux, windows // // Tests basic device_global access through device kernels. diff --git a/sycl/test-e2e/DeviceGlobal/device_global_operator_passthrough.cpp b/sycl/test-e2e/DeviceGlobal/device_global_operator_passthrough.cpp index b687bb4c4365d..c98a22b851df6 100644 --- a/sycl/test-e2e/DeviceGlobal/device_global_operator_passthrough.cpp +++ b/sycl/test-e2e/DeviceGlobal/device_global_operator_passthrough.cpp @@ -3,7 +3,8 @@ // // The OpenCL GPU backends do not currently support device_global backend // calls. -// UNSUPPORTED: opencl && gpu +// TODO: Reenable linux/windows, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: opencl && gpu, linux, windows // // Tests the passthrough of operators on device_global. diff --git a/sycl/test-e2e/DeviceGlobal/device_global_subscript.cpp b/sycl/test-e2e/DeviceGlobal/device_global_subscript.cpp index cec40fafd61f3..e519db2894993 100644 --- a/sycl/test-e2e/DeviceGlobal/device_global_subscript.cpp +++ b/sycl/test-e2e/DeviceGlobal/device_global_subscript.cpp @@ -3,7 +3,8 @@ // // The OpenCL GPU backends do not currently support device_global backend // calls. -// UNSUPPORTED: opencl && gpu +// TODO: Reenable linux/windows, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: opencl && gpu, linux, windows // // Tests operator[] on device_global. diff --git a/sycl/test-e2e/DeviceLib/assert-windows.cpp b/sycl/test-e2e/DeviceLib/assert-windows.cpp index c4bfe6498f9d5..777c486af65e5 100644 --- a/sycl/test-e2e/DeviceLib/assert-windows.cpp +++ b/sycl/test-e2e/DeviceLib/assert-windows.cpp @@ -13,14 +13,14 @@ // explicitly. Since the test is going to crash, we'll have to follow a similar // approach as on Linux - call the test in a subprocess. // -// RUN: env SYCL_PI_TRACE=1 SYCL_DEVICELIB_INHIBIT_NATIVE=1 CL_CONFIG_USE_VECTORIZER=False %{run} %t.out | FileCheck %s --check-prefix=CHECK-FALLBACK +// RUN: env SYCL_UR_TRACE=1 SYCL_DEVICELIB_INHIBIT_NATIVE=1 CL_CONFIG_USE_VECTORIZER=False %{run} %t.out | FileCheck %s --check-prefix=CHECK-FALLBACK // RUN: env SHOULD_CRASH=1 SYCL_DEVICELIB_INHIBIT_NATIVE=1 CL_CONFIG_USE_VECTORIZER=False %{run} %t.out | FileCheck %s --check-prefix=CHECK-MESSAGE // // CHECK-MESSAGE: {{.*}}assert-windows.cpp:{{[0-9]+}}: (null): global id: // [{{[0-3]}},0,0], local id: [{{[0-3]}},0,0] Assertion `accessorC[wiID] == 0 && // "Invalid value"` failed. // -// CHECK-FALLBACK: ---> piProgramLink +// CHECK-FALLBACK: ---> urProgramLink #include #include diff --git a/sycl/test-e2e/DeviceLib/assert.cpp b/sycl/test-e2e/DeviceLib/assert.cpp index 4a7d59899dc84..81bedb8d0350e 100644 --- a/sycl/test-e2e/DeviceLib/assert.cpp +++ b/sycl/test-e2e/DeviceLib/assert.cpp @@ -57,8 +57,8 @@ // // 4. We want to check both compilation flow in (1) and the message in (3), // but these messages can interleave and fail to match. To avoid this, -// first run with SYCL_PI_TRACE and collect a trace, and then with -// SHOULD_CRASH (without SYCL_PI_TRACE) to collect an error message. +// first run with SYCL_UR_TRACE and collect a trace, and then with +// SHOULD_CRASH (without SYCL_UR_TRACE) to collect an error message. // // SYCL_DEVICELIB_INHIBIT_NATIVE=1 environment variable is used to force a mode // in SYCL Runtime, so it doesn't look into a device extensions list and always @@ -69,7 +69,7 @@ // extension is a new feature and may not be supported by the runtime used with // SYCL. // -// RUN: %if cpu %{ env SYCL_PI_TRACE=2 SHOULD_CRASH=1 EXPECTED_SIGNAL=SIGABRT %{run} %t.out 2> %t.stderr.native %} +// RUN: %if cpu %{ env SYCL_UR_TRACE=1 SHOULD_CRASH=1 EXPECTED_SIGNAL=SIGABRT %{run} %t.out 2> %t.stderr.native %} // RUN: %if cpu %{ FileCheck %s --input-file %t.stderr.native --check-prefixes=CHECK-MESSAGE || FileCheck %s --input-file %t.stderr.native --check-prefix CHECK-NOTSUPPORTED %} // RUN: %if gpu %{ env SHOULD_CRASH=1 EXPECTED_SIGNAL=SIGIOT %{run} %t.out 2> %t.stderr.native %} // RUN: %if gpu %{ FileCheck %s --input-file %t.stderr.native --check-prefixes=CHECK-MESSAGE || FileCheck %s --input-file %t.stderr.native --check-prefix CHECK-NOTSUPPORTED %} diff --git a/sycl/test-e2e/DiscardEvents/discard_events_accessors.cpp b/sycl/test-e2e/DiscardEvents/discard_events_accessors.cpp index 4aa1d04cb9d76..e6f298cbb1be6 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_accessors.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_accessors.cpp @@ -1,20 +1,17 @@ // RUN: %{build} -o %t.out // -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt // // The test checks that the last parameter is `nullptr` for -// piEnqueueKernelLaunch for USM kernel using local accessor, but +// urEnqueueKernelLaunch for USM kernel using local accessor, but // is not `nullptr` for kernel using buffer accessor. -// {{0|0000000000000000}} is required for various output on Linux and Windows. // -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: .phEvent = nullptr // -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS +// CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}}.phEvent = nullptr +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: -> UR_RESULT_SUCCESS // // CHECK: The test passed. diff --git a/sycl/test-e2e/DiscardEvents/discard_events_l0_leak.cpp b/sycl/test-e2e/DiscardEvents/discard_events_l0_leak.cpp index 9a2ab20da8f7b..adbffd60d98b1 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_l0_leak.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_l0_leak.cpp @@ -10,7 +10,7 @@ // The test is to check that there are no leaks reported with the embedded // UR_L0_LEAKS_DEBUG=1 ( %{l0_leak_check} ) testing capability. // In addition to general leak checking, especially for discard_events, the test -// checks that piKernelRelease to be executed for each kernel call, and +// checks that urKernelRelease to be executed for each kernel call, and // EventRelease for events, that are used for dependencies between // command-lists. diff --git a/sycl/test-e2e/DiscardEvents/discard_events_mixed_calls.cpp b/sycl/test-e2e/DiscardEvents/discard_events_mixed_calls.cpp index a4d017f26523c..041519a64d900 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_mixed_calls.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_mixed_calls.cpp @@ -16,7 +16,7 @@ // correctly. // RUN: %{run} %t.out mixed -// The test checks that piEnqueueMemBufferMap and piEnqueueMemUnmap work +// The test checks that urEnqueueMemBufferMap and urEnqueueMemUnmap work // correctly when we alternate between event and eventless kernel calls. // RUN: %{run} %t.out map-unmap @@ -206,7 +206,7 @@ void RunTest_MemBufferMapUnMap(sycl::queue Q) { }); { - // waiting for all queue operations in piEnqueueMemBufferMap and then + // waiting for all queue operations in urEnqueueMemBufferMap and then // checking buffer sycl::host_accessor HostAcc(Buf); for (size_t i = 0; i < BUFFER_SIZE; ++i) { @@ -227,7 +227,7 @@ void RunTest_MemBufferMapUnMap(sycl::queue Q) { }); Q.submit([&](sycl::handler &CGH) { - // waiting for all queue operations in piEnqueueMemUnmap and then + // waiting for all queue operations in urEnqueueMemUnmap and then // using buffer auto Acc = Buf.get_access(CGH); CGH.parallel_for(Range, [=](sycl::item<1> itemID) { diff --git a/sycl/test-e2e/DiscardEvents/discard_events_using_assert.cpp b/sycl/test-e2e/DiscardEvents/discard_events_using_assert.cpp index 5960c4ff3cdba..208ec11ca1746 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_using_assert.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_using_assert.cpp @@ -4,16 +4,14 @@ // UNSUPPORTED: ze_debug // RUN: %{build} -o %t.out // -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt // // The test checks that the last parameter is not `nullptr` for -// piEnqueueKernelLaunch. -// {{0|0000000000000000}} is required for various output on Linux and Windows. +// urEnqueueKernelLaunch. // -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS +// CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}}.phEvent = nullptr +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: -> UR_RESULT_SUCCESS // // CHECK: The test passed. diff --git a/sycl/test-e2e/DiscardEvents/discard_events_using_assert_ndebug.cpp b/sycl/test-e2e/DiscardEvents/discard_events_using_assert_ndebug.cpp index 5706a86e2f722..481e5a93291ce 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_using_assert_ndebug.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_using_assert_ndebug.cpp @@ -1,14 +1,12 @@ // RUN: %{build} -DNDEBUG -o %t.out // -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt // // The test checks that the last parameter is `nullptr` for -// piEnqueueKernelLaunch. -// {{0|0000000000000000}} is required for various output on Linux and Windows. +// urEnqueueKernelLaunch. // -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: .phEvent = nullptr // // CHECK: The test passed. diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp index 111e72d121cfc..b02e751792313 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp @@ -1,111 +1,91 @@ // RUN: %{build} -o %t.out // -// On level_zero Q.fill uses piEnqueueKernelLaunch and not piextUSMEnqueueFill +// On level_zero Q.fill uses urEnqueueKernelLaunch and not urEnqueueUSMFill // due to https://github.com/intel/llvm/issues/13787 // -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} // // REQUIRES: aspect-usm_shared_allocations -// The test checks that the last parameter is `nullptr` for all PI calls that +// The test checks that the last parameter is `nullptr` for all UR calls that // should discard events. // {{0|0000000000000000}} is required for various output on Linux and Windows. -// NOTE: piextUSMEnqueuePrefetch and piextUSMEnqueueMemAdvise in the CUDA and +// NOTE: urEnqueueUSMPrefetch and urEnqueueUSMAdvise in the CUDA and // HIP backends may return a warning result on Windows with error-code -// -996 (PI_ERROR_PLUGIN_SPECIFIC_ERROR) if USM managed memory is not +// 66 (UR_RESULT_ERROR_ADAPTER_SPECIFIC) if USM managed memory is not // supported or if unsupported advice flags are used for the latter API. // Since it is a warning it is safe to ignore for this test. // // Everything that follows TestQueueOperations() -// CHECK: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueUSMFill( +// CHECK-SAME: .phEvent = nullptr // -// CHECK: ---> piextUSMEnqueueMemcpy( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueUSMMemcpy( +// CHECK-SAME: .phEvent = nullptr // -// Level-zero backend doesn't use piextUSMEnqueueFill -// CHECK-L0: ---> piEnqueueKernelLaunch( -// CHECK-OTHER: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// Level-zero backend doesn't use urEnqueueUSMFill +// CHECK-L0: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr +// CHECK-OTHER: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr // -// ---> piEnqueueMemBufferCopy( -// CHECK: ---> piextUSMEnqueueMemcpy( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// ---> urEnqueueUSMMemcpy( +// CHECK: ---> urEnqueueUSMMemcpy( +// CHECK-SAME: .phEvent = nullptr // -// CHECK: ---> piextUSMEnqueuePrefetch( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueUSMPrefetch( +// CHECK-SAME: .phEvent = nullptr // -// CHECK: ---> piextUSMEnqueueMemAdvise( -// CHECK: ) ---> pi_result : {{PI_SUCCESS|-996}} -// CHECK-NEXT: [out]pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueUSMAdvise( +// CHECK-SAME: .phEvent = nullptr +// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}} // -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: .phEvent = nullptr // -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: .phEvent = nullptr // // RegularQueue -// CHECK: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS +// CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMFill( +// CHECK-SAME: -> UR_RESULT_SUCCESS // -// CHECK: ---> piEnqueueEventsWait( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueEventsWait( +// CHECK-SAME: .phEvent = nullptr // // Everything that follows TestQueueOperationsViaSubmit() -// CHECK: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueUSMFill( +// CHECK-SAME: .phEvent = nullptr // -// CHECK: ---> piextUSMEnqueueMemcpy( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueUSMMemcpy( +// CHECK-SAME: .phEvent = nullptr // -// Level-zero backend doesn't use piextUSMEnqueueFill -// CHECK-L0: ---> piEnqueueKernelLaunch( -// CHECK-OTHER: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// Level-zero backend doesn't use urEnqueueUSMFill +// CHECK-L0: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr +// CHECK-OTHER: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr // -// ---> piEnqueueMemBufferCopy( -// CHECK: ---> piextUSMEnqueueMemcpy( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// ---> urEnqueueUSMMemcpy( +// CHECK: ---> urEnqueueUSMMemcpy( +// CHECK-SAME: .phEvent = nullptr // -// CHECK: ---> piextUSMEnqueuePrefetch( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueUSMPrefetch( +// CHECK-SAME: .phEvent = nullptr // -// CHECK: ---> piextUSMEnqueueMemAdvise( -// CHECK: ) ---> pi_result : {{PI_SUCCESS|-996}} -// CHECK-NEXT: [out]pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueUSMAdvise( +// CHECK-SAME: .phEvent = nullptr +// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}} // -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: .phEvent = nullptr // -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: .phEvent = nullptr // // RegularQueue -// CHECK: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// CHECK: ---> piEnqueueEventsWait( -// CHECK: pi_event * : -// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] +// CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMFill( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// CHECK: ---> urEnqueueEventsWait( +// CHECK-SAME: .phEvent = nullptr // // CHECK: The test passed. diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp index d27be6ab48abc..edd30f2901d71 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp @@ -1,135 +1,115 @@ // RUN: %{build} -o %t.out // -// On level_zero Q.fill uses piEnqueueKernelLaunch and not piextUSMEnqueueFill +// On level_zero Q.fill uses urEnqueueKernelLaunch and not urEnqueueUSMFill // due to https://github.com/intel/llvm/issues/13787 // -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} // // REQUIRES: aspect-usm_shared_allocations -// The test checks that the last parameter is not `nullptr` for all PI calls +// The test checks that the last parameter is not `nullptr` for all UR calls // that should discard events. // {{0|0000000000000000}} is required for various output on Linux and Windows. -// NOTE: piextUSMEnqueuePrefetch and piextUSMEnqueueMemAdvise in the CUDA and +// NOTE: urEnqueueUSMPrefetch and urEnqueueUSMAdvise in the CUDA and // HIP backends may return a warning result on Windows with error-code -// -996 (PI_ERROR_PLUGIN_SPECIFIC_ERROR) if USM managed memory is not +// 66 (UR_RESULT_ERROR_ADAPTER_SPECIFIC) if USM managed memory is not // supported or if unsupported advice flags are used for the latter API. // Since it is a warning it is safe to ignore for this test. // // Everything that follows TestQueueOperations() -// CHECK: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// CHECK: ---> piextUSMEnqueueMemcpy( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// Level-zero backend doesn't use piextUSMEnqueueFill -// CHECK-L0: ---> piEnqueueKernelLaunch( -// CHECK-OTHER: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// ---> piEnqueueMemBufferCopy( -// CHECK: ---> piextUSMEnqueueMemcpy( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// CHECK: ---> piextUSMEnqueuePrefetch( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : {{PI_SUCCESS|-996}} -// -// CHECK: ---> piextUSMEnqueueMemAdvise( -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : {{PI_SUCCESS|-996}} -// -// CHECK: ---> piEnqueueEventsWaitWithBarrier( -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS +// CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMFill( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// CHECK-NOT: ---> urEnqueueUSMMemcpy({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMMemcpy( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// Level-zero backend doesn't use urEnqueueUSMFill +// CHECK-L0: ---> urEnqueueKernelLaunch({{.*}} .phEvent = {{[0-9a-f]+}} +// CHECK-OTHER: ---> urEnqueueUSMFill({{.*}} .phEvent = {{[0-9a-f]+}} +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// ---> urEnqueueUSMMemcpy( +// CHECK-NOT: ---> urEnqueueUSMMemcpy({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMMemcpy( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// CHECK-NOT: ---> urEnqueueUSMPrefetch({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMPrefetch( +// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}} +// +// CHECK-NOT: ---> urEnqueueUSMAdvise({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMAdvise( +// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}} +// +// CHECK-NOT: ---> urEnqueueEventsWaitWithBarrier({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueEventsWaitWithBarrier( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: -> UR_RESULT_SUCCESS // // RegularQueue -// CHECK: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS +// CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMFill( +// CHECK-SAME: -> UR_RESULT_SUCCESS // -// CHECK: ---> piEnqueueEventsWait( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS +// CHECK-NOT: ---> urEnqueueEventsWait({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueEventsWait( +// CHECK-SAME: -> UR_RESULT_SUCCESS // // Everything that follows TestQueueOperationsViaSubmit() -// CHECK: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// CHECK: ---> piextUSMEnqueueMemcpy( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// Level-zero backend doesn't use piextUSMEnqueueFill -// CHECK-L0: ---> piEnqueueKernelLaunch( -// CHECK-OTHER: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// ---> piEnqueueMemBufferCopy( -// CHECK: ---> piextUSMEnqueueMemcpy( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// CHECK: ---> piextUSMEnqueuePrefetch( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : {{PI_SUCCESS|-996}} -// -// CHECK: ---> piextUSMEnqueueMemAdvise( -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : {{PI_SUCCESS|-996}} -// -// CHECK: ---> piEnqueueEventsWaitWithBarrier( -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS +// CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMFill( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// CHECK-NOT: ---> urEnqueueUSMMemcpy({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMMemcpy( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// Level-zero backend doesn't use urEnqueueUSMFill +// CHECK-L0: ---> urEnqueueKernelLaunch({{.*}} .phEvent = {{[0-9a-f]+}} +// CHECK-OTHER: ---> urEnqueueUSMFill({{.*}} .phEvent = {{[0-9a-f]+}} +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// ---> urEnqueueUSMMemcpy( +// CHECK-NOT: ---> urEnqueueUSMMemcpy({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMMemcpy( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// CHECK-NOT: ---> urEnqueueUSMPrefetch({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMPrefetch( +// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}} +// +// CHECK-NOT: ---> urEnqueueUSMAdvise({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMAdvise( +// CHECK-SAME: ) -> {{UR_RESULT_SUCCESS|UR_RESULT_ERROR_ADAPTER_SPECIFIC}} +// +// CHECK-NOT: ---> urEnqueueEventsWaitWithBarrier({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueEventsWaitWithBarrier( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// CHECK-NOT: ---> urEnqueueKernelLaunch({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueKernelLaunch( +// CHECK-SAME: -> UR_RESULT_SUCCESS // // RegularQueue -// CHECK: ---> piextUSMEnqueueFill( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS -// -// CHECK: ---> piEnqueueEventsWait( -// CHECK: pi_event * : -// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] -// CHECK: ---> pi_result : PI_SUCCESS +// CHECK-NOT: ---> urEnqueueUSMFill({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueUSMFill( +// CHECK-SAME: -> UR_RESULT_SUCCESS +// +// CHECK-NOT: ---> urEnqueueEventsWait({{.*}} .phEvent = nullptr +// CHECK: ---> urEnqueueEventsWait( +// CHECK-SAME: -> UR_RESULT_SUCCESS // // CHECK: The test passed. diff --git a/sycl/test-e2e/DiscardEvents/invalid_event.cpp b/sycl/test-e2e/DiscardEvents/invalid_event.cpp index c7a90a9b181a0..9f08bcdc28a31 100644 --- a/sycl/test-e2e/DiscardEvents/invalid_event.cpp +++ b/sycl/test-e2e/DiscardEvents/invalid_event.cpp @@ -4,7 +4,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// The test checks that each PI call to the queue returns a discarded event +// The test checks that each queue method call returns a discarded event // with the status "ext_oneapi_unknown" #include diff --git a/sycl/test-e2e/ESIMD/esimd_check_vc_codegen.cpp b/sycl/test-e2e/ESIMD/esimd_check_vc_codegen.cpp index 4aa984667fb5e..3267e787c2d79 100644 --- a/sycl/test-e2e/ESIMD/esimd_check_vc_codegen.cpp +++ b/sycl/test-e2e/ESIMD/esimd_check_vc_codegen.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s #include "esimd_test_utils.hpp" @@ -91,6 +91,7 @@ int main(void) { return err_cnt > 0 ? 1 : 0; } -// CHECK: ---> piProgramBuild( -// CHECK: : {{.*}}-vc-codegen -// CHECK: ) ---> pi_result : PI_SUCCESS +// Don't use -NEXT here to split the line because we need to allow for the +// possbility of a BuildExp( that fails with UNSUPPORTED followed by a Build( +// that results in SUCCESS +// CHECK: ---> urProgramBuild{{(Exp)?}}({{.*}}-vc-codegen{{.*}} -> UR_RESULT_SUCCESS diff --git a/sycl/test-e2e/ESIMD/grf.cpp b/sycl/test-e2e/ESIMD/grf.cpp index 6b12eaa78e32f..d305c1d3a0007 100644 --- a/sycl/test-e2e/ESIMD/grf.cpp +++ b/sycl/test-e2e/ESIMD/grf.cpp @@ -14,16 +14,16 @@ // compiler option // REQUIRES: arch-intel_gpu_pvc -// invokes 'piProgramBuild'/'piKernelCreate' +// invokes 'urProgramBuild'/'urKernelCreate' // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR -// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR +// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR // RUN: %{build} -DUSE_NEW_API=1 -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR -// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR +// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR // RUN: %{build} -DUSE_AUTO -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-NO-VAR -// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-WITH-VAR +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-NO-VAR +// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-WITH-VAR #include "esimd_test_utils.hpp" #if defined(USE_NEW_API) || defined(USE_AUTO) @@ -154,35 +154,24 @@ int main(void) { // Regular SYCL kernel is compiled without -vc-codegen option -// CHECK-LABEL: ---> piProgramBuild( -// CHECK-NOT: -vc-codegen -// CHECK-WITH-VAR: -g -// CHECK-NOT: -vc-codegen -// CHECK: ) ---> pi_result : PI_SUCCESS -// CHECK-LABEL: ---> piKernelCreate( -// CHECK: : {{.*}}SyclKernel -// CHECK: ) ---> pi_result : PI_SUCCESS +// CHECK-NOT: ---> urProgramBuild{{.*}}-vc-codegen +// CHECK-WITH-VAR: ---> urProgramBuild{{.*}}-g +// CHECK: ---> urKernelCreate({{.*}}{{.*}}SyclKernel // For ESIMD kernels, -vc-codegen option is always preserved, // regardless of SYCL_PROGRAM_COMPILE_OPTIONS value. -// CHECK-LABEL: ---> piProgramBuild( -// CHECK-NO-VAR: -vc-codegen -disable-finalizer-msg +// CHECK-NO-VAR-LABEL: -vc-codegen -disable-finalizer-msg // CHECK-WITH-VAR: -g -vc-codegen -disable-finalizer-msg -// CHECK: ) ---> pi_result : PI_SUCCESS -// CHECK-LABEL: ---> piKernelCreate( -// CHECK: : {{.*}}EsimdKernel -// CHECK: ) ---> pi_result : PI_SUCCESS +// CHECK-LABEL: ---> urKernelCreate({{.*}}EsimdKernel{{.*}}-> UR_RESULT_SUCCESS // Kernels requesting GRF are grouped into separate module and compiled // with the respective option regardless of SYCL_PROGRAM_COMPILE_OPTIONS value. -// CHECK-LABEL: ---> piProgramBuild( // CHECK-NO-VAR: -vc-codegen -disable-finalizer-msg -doubleGRF // CHECK-WITH-VAR: -g -vc-codegen -disable-finalizer-msg -doubleGRF // CHECK-AUTO-NO-VAR: -vc-codegen -disable-finalizer-msg -ze-intel-enable-auto-large-GRF-mode // CHECK-AUTO-WITH-VAR: -g -vc-codegen -disable-finalizer-msg -ze-intel-enable-auto-large-GRF-mode -// CHECK: ) ---> pi_result : PI_SUCCESS -// CHECK-LABEL: ---> piKernelCreate( -// CHECK: : {{.*}}EsimdKernelSpecifiedGRF -// CHECK: ) ---> pi_result : PI_SUCCESS +// CHECK-LABEL: ---> urKernelCreate( +// CHECK-SAME: EsimdKernelSpecifiedGRF +// CHECK-SAME: -> UR_RESULT_SUCCESS diff --git a/sycl/test-e2e/ESIMD/spec_const/spec_const_redefine.cpp b/sycl/test-e2e/ESIMD/spec_const/spec_const_redefine.cpp index ee0c64e396ac0..7aef5acf4031a 100644 --- a/sycl/test-e2e/ESIMD/spec_const/spec_const_redefine.cpp +++ b/sycl/test-e2e/ESIMD/spec_const/spec_const_redefine.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s //==----------- spec_const_redefine.cpp ------------------------------==// // @@ -91,7 +91,7 @@ int main(int argc, char **argv) { } // --- Check that only two JIT compilation happened: -// CHECK-COUNT-2: ---> piProgramBuild -// CHECK-NOT: ---> piProgramBuild +// CHECK-COUNT-2: ---> urProgramBuildExp +// CHECK-NOT: ---> urProgramBuildExp // --- Check that the test completed with expected results: // CHECK: passed diff --git a/sycl/test-e2e/ESIMD/sycl_esimd_mix.cpp b/sycl/test-e2e/ESIMD/sycl_esimd_mix.cpp index 0a252fb0dff35..a8c3a964e4e3e 100644 --- a/sycl/test-e2e/ESIMD/sycl_esimd_mix.cpp +++ b/sycl/test-e2e/ESIMD/sycl_esimd_mix.cpp @@ -9,8 +9,8 @@ // in the same program . // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR -// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR +// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR #include "esimd_test_utils.hpp" @@ -119,22 +119,25 @@ int main(void) { // Regular SYCL kernel is compiled without -vc-codegen option -// CHECK-LABEL: ---> piProgramBuild( +// Some backends will call urProgramBuild and some will call +// urProgramBuildExp depending on urProgramBuildExp support. + +// CHECK-LABEL: ---> urProgramBuild{{(Exp)?}}( // CHECK-NOT: -vc-codegen // CHECK-WITH-VAR: -g // CHECK-NOT: -vc-codegen -// CHECK: ) ---> pi_result : PI_SUCCESS -// CHECK-LABEL: ---> piKernelCreate( -// CHECK: : {{.*}}SyclKernel -// CHECK: ) ---> pi_result : PI_SUCCESS +// CHECK: {{.*}}-> UR_RESULT_SUCCESS +// CHECK-LABEL: ---> urKernelCreate( +// CHECK: {{.*}}SyclKernel +// CHECK: {{.*}}-> UR_RESULT_SUCCESS // For ESIMD kernels, -vc-codegen option is always preserved, // regardless of SYCL_PROGRAM_COMPILE_OPTIONS value. -// CHECK-LABEL: ---> piProgramBuild( +// CHECK-LABEL: ---> urProgramBuild{{(Exp)?}}( // CHECK-NO-VAR: -vc-codegen // CHECK-WITH-VAR: -g -vc-codegen -// CHECK: ) ---> pi_result : PI_SUCCESS -// CHECK-LABEL: ---> piKernelCreate( -// CHECK: : {{.*}}EsimdKernel -// CHECK: ) ---> pi_result : PI_SUCCESS +// CHECK: {{.*}}-> UR_RESULT_SUCCESS +// CHECK-LABEL: ---> urKernelCreate( +// CHECK: {{.*}}EsimdKernel +// CHECK: {{.*}}-> UR_RESULT_SUCCESS \ No newline at end of file diff --git a/sycl/test-e2e/EnqueueFunctions/barrier.cpp b/sycl/test-e2e/EnqueueFunctions/barrier.cpp index c6bc8bb8abc6b..887fca484ed52 100644 --- a/sycl/test-e2e/EnqueueFunctions/barrier.cpp +++ b/sycl/test-e2e/EnqueueFunctions/barrier.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // Tests the enqueue free function barriers. @@ -50,5 +50,5 @@ int main() { return 0; } -// CHECK-COUNT-4:---> piEnqueueEventsWaitWithBarrier -// CHECK-NOT:---> piEnqueueEventsWaitWithBarrier +// CHECK-COUNT-4:---> urEnqueueEventsWaitWithBarrier +// CHECK-NOT:---> urEnqueueEventsWaitWithBarrier diff --git a/sycl/test-e2e/EnqueueFunctions/mem_advise.cpp b/sycl/test-e2e/EnqueueFunctions/mem_advise.cpp index 5900bdb51ad05..218f15570c2ee 100644 --- a/sycl/test-e2e/EnqueueFunctions/mem_advise.cpp +++ b/sycl/test-e2e/EnqueueFunctions/mem_advise.cpp @@ -1,6 +1,6 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // Tests the enqueue free function mem_advise. @@ -36,5 +36,5 @@ int main() { return 0; } -// CHECK-COUNT-3:---> piextUSMEnqueueMemAdvise -// CHECK-NOT:---> piextUSMEnqueueMemAdvise +// CHECK-COUNT-3:---> urEnqueueUSMAdvise +// CHECK-NOT:---> urEnqueueUSMAdvise diff --git a/sycl/test-e2e/EnqueueFunctions/prefetch.cpp b/sycl/test-e2e/EnqueueFunctions/prefetch.cpp index 940af1307a82b..941d6ee993446 100644 --- a/sycl/test-e2e/EnqueueFunctions/prefetch.cpp +++ b/sycl/test-e2e/EnqueueFunctions/prefetch.cpp @@ -1,6 +1,6 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // Tests the enqueue free function prefetch. @@ -35,5 +35,5 @@ int main() { return 0; } -// CHECK-COUNT-3:---> piextUSMEnqueuePrefetch -// CHECK-NOT:---> piextUSMEnqueuePrefetch +// CHECK-COUNT-3:---> urEnqueueUSMPrefetch +// CHECK-NOT:---> urEnqueueUSMPrefetch diff --git a/sycl/test-e2e/EnqueueNativeCommand/custom-command-cuda.cpp b/sycl/test-e2e/EnqueueNativeCommand/custom-command-cuda.cpp index c3e2c0f8f4c62..2aa8d9a4c590f 100644 --- a/sycl/test-e2e/EnqueueNativeCommand/custom-command-cuda.cpp +++ b/sycl/test-e2e/EnqueueNativeCommand/custom-command-cuda.cpp @@ -2,6 +2,9 @@ // RUN: %{run} %t.out // REQUIRES: cuda, cuda_dev_kit +// TODO: Reenable, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: windows, linux + #include #include diff --git a/sycl/test-e2e/EnqueueNativeCommand/custom-command-multiple-dev-cuda.cpp b/sycl/test-e2e/EnqueueNativeCommand/custom-command-multiple-dev-cuda.cpp index b5a0183865b47..6e409113aa26b 100644 --- a/sycl/test-e2e/EnqueueNativeCommand/custom-command-multiple-dev-cuda.cpp +++ b/sycl/test-e2e/EnqueueNativeCommand/custom-command-multiple-dev-cuda.cpp @@ -1,6 +1,10 @@ // REQUIRES: cuda, cuda_dev_kit // RUN: %{build} -o %t.out %cuda_options // RUN: %{run} %t.out + +// TODO: Reenable, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: windows, linux + #include #include diff --git a/sycl/test-e2e/External/RSBench/acc.test b/sycl/test-e2e/External/RSBench/acc.test index 448c5909f7975..0ccf6ccb58b7b 100644 --- a/sycl/test-e2e/External/RSBench/acc.test +++ b/sycl/test-e2e/External/RSBench/acc.test @@ -1,3 +1,3 @@ REQUIRES: accelerator -RUN: env SYCL_PI_TRACE=1 %{run} %T/rsbench -s large -l 34 -p 300000 -P 1000 -W 100 -m event +RUN: env SYCL_UR_TRACE=1 %{run} %T/rsbench -s large -l 34 -p 300000 -P 1000 -W 100 -m event XFAIL: * diff --git a/sycl/test-e2e/External/RSBench/cpu.test b/sycl/test-e2e/External/RSBench/cpu.test index 4bbe988fad62f..49f3213456b6a 100644 --- a/sycl/test-e2e/External/RSBench/cpu.test +++ b/sycl/test-e2e/External/RSBench/cpu.test @@ -1,3 +1,3 @@ REQUIRES: cpu -RUN: env SYCL_PI_TRACE=1 %{run} %T/rsbench -s large -l 34 -p 300000 -P 1000 -W 100 -m event +RUN: env SYCL_UR_TRACE=1 %{run} %T/rsbench -s large -l 34 -p 300000 -P 1000 -W 100 -m event XFAIL: * diff --git a/sycl/test-e2e/External/RSBench/gpu.test b/sycl/test-e2e/External/RSBench/gpu.test index 5040a6b6b8ca0..6f803d8bf9cf1 100644 --- a/sycl/test-e2e/External/RSBench/gpu.test +++ b/sycl/test-e2e/External/RSBench/gpu.test @@ -1,3 +1,3 @@ REQUIRES: gpu -RUN: env SYCL_PI_TRACE=1 %{run} %T/rsbench -s large -l 34 -p 300000 -P 1000 -W 100 -m event +RUN: env SYCL_UR_TRACE=1 %{run} %T/rsbench -s large -l 34 -p 300000 -P 1000 -W 100 -m event XFAIL: * diff --git a/sycl/test-e2e/Graph/Explicit/kernel_bundle.cpp b/sycl/test-e2e/Graph/Explicit/kernel_bundle.cpp index 28da376967dbf..e2dbce2b93aa2 100644 --- a/sycl/test-e2e/Graph/Explicit/kernel_bundle.cpp +++ b/sycl/test-e2e/Graph/Explicit/kernel_bundle.cpp @@ -1,43 +1,31 @@ // RUN: %{build} -o %t.out // RUN: %if cuda %{ %{run} %t.out %} -// RUN: %if level_zero %{env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s --implicit-check-not=LEAK %} +// RUN: %if level_zero %{env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s --implicit-check-not=LEAK %} -// Checks the PI call trace to ensure that the bundle kernel of the single task +// Checks the UR call trace to ensure that the bundle kernel of the single task // is used. -// CHECK:---> piProgramCreate -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: ) ---> pi_result : PI_SUCCESS -// CHECK-NEXT: [out] ** : {{.*}}[ [[PROGRAM_HANDLE1:[0-9a-fA-Fx]]] +// CHECK:---> urProgramCreate +// CHECK-SAME: .phProgram = {{.*}} ([[PROGRAM_HANDLE1:[0-9a-fA-Fx]+]]) + // -// CHECK:---> piProgramBuild( -// CHECK-NEXT: : [[PROGRAM_HANDLE1]] +// CHECK:---> urProgramBuild +// CHECK-SAME: .hProgram = [[PROGRAM_HANDLE1]] // -// CHECK:---> piProgramRetain( -// CHECK-NEXT: : [[PROGRAM_HANDLE1]] -// CHECK-NEXT:---> pi_result : PI_SUCCESS +// CHECK:---> urProgramRetain(.hProgram = [[PROGRAM_HANDLE1]]) -> UR_RESULT_SUCCESS -// CHECK:---> piKernelCreate( -// CHECK-NEXT: : [[PROGRAM_HANDLE1]] -// CHECK-NEXT:: _ZTS11Kernel1Name -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: ---> pi_result : PI_SUCCESS -// CHECK-NEXT: [out] ** : {{.*}}[ [[KERNEL_HANDLE:[0-9a-fA-Fx]]] +// CHECK:---> urKernelCreate( +// CHECK-SAME: .hProgram = [[PROGRAM_HANDLE1]] +// CHECK-SAME: .pKernelName = {{.*}} (_ZTS11Kernel1Name) +// CHECK-SAME: .phKernel = {{.*}} ([[KERNEL_HANDLE:[0-9a-fA-Fx]+]]) +// CHECK-SAME: -> UR_RESULT_SUCCESS // -// CHECK:---> piKernelRetain( -// CHECK-NEXT: : [[KERNEL_HANDLE]] -// CHECK-NEXT:---> pi_result : PI_SUCCESS +// CHECK:---> urKernelRetain(.hKernel = [[KERNEL_HANDLE]]) -> UR_RESULT_SUCCESS // -// CHECK:---> piextCommandBufferNDRangeKernel( -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: : [[KERNEL_HANDLE]] +// CHECK:---> urCommandBufferAppendKernelLaunchExp( +// CHECK-SAME: .hKernel = [[KERNEL_HANDLE]] // -// CHECK:---> piKernelRelease( -// CHECK-NEXT: : [[KERNEL_HANDLE]] -// CHECK-NEXT:---> pi_result : PI_SUCCESS +// CHECK:---> urKernelRelease(.hKernel = [[KERNEL_HANDLE]]) -> UR_RESULT_SUCCESS #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/memadvise.cpp b/sycl/test-e2e/Graph/Explicit/memadvise.cpp index 1aa8f17eb290a..9bd335cef3a49 100644 --- a/sycl/test-e2e/Graph/Explicit/memadvise.cpp +++ b/sycl/test-e2e/Graph/Explicit/memadvise.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: %if linux && (level_zero || cuda) %{ env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 FileCheck %s %} %else %{ %{run} %t.out %} +// RUN: %if linux && (level_zero || cuda) %{ env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 FileCheck %s %} %else %{ %{run} %t.out %} // REQUIRES: aspect-usm_shared_allocations @@ -8,29 +8,24 @@ // Since Mem advise is only a memory hint that doesn't // impact results but only performances, we verify -// that a node is correctly added by checking PI function calls. +// that a node is correctly added by checking UR function calls. -// CHECK: piextCommandBufferAdviseUSM -// CHECK-NEXT: : 0x[[#%x,COMMAND_BUFFER:]] -// CHECK-NEXT: : 0x[[#%x,PTR:]] -// CHECK-NEXT: : 400 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0x[[#%x,ADVISE_SYNC_POINT:]] -// CHECK: pi_result : PI_SUCCESS +// CHECK: urCommandBufferAppendUSMAdviseExp +// CHECK-SAME: .hCommandBuffer = 0x[[#%x,COMMAND_BUFFER:]] +// CHECK-SAME: .pMemory = 0x[[#%x,PTR:]] +// CHECK-SAME: .size = 400 +// CHECK-SAME: .pSyncPoint = {{.*}} (0x[[#%x,ADVISE_SYNC_POINT:]]) +// CHECK-SAME: -> UR_RESULT_SUCCESS -// CHECK: piextCommandBufferNDRangeKernel( -// CHECK-NEXT: : 0x[[#COMMAND_BUFFER]] -// CHECK-NEXT: : 0x[[#%x,KERNEL:]] -// CHECK-NEXT: : 1 -// CHECK-NEXT: : 0x[[#%x,GLOBAL_WORK_OFFSET:]] -// CHECK-NEXT: : 0x[[#%x,GLOBAL_WORK_SIZE:]] -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 1 -// CHECK-NEXT: : 0x[[#%x,SYNC_POINT_WAIT_LIST:]] -// CHECK-NEXT: : 0x[[#%x,KERNEL_SYNC_POINT:]] -// CHECK: pi_result : PI_SUCCESS +// CHECK: urCommandBufferAppendKernelLaunchExp( +// CHECK-SAME: .hCommandBuffer = 0x[[#%x,COMMAND_BUFFER]] +// CHECK-SAME: .hKernel = 0x[[#%x,KERNEL:]] +// CHECK-SAME: .workDim = 1 +// CHECK-SAME: .pGlobalWorkOffset = 0x[[#%x,GLOBAL_WORK_OFFSET:]] +// CHECK-SAME: .pGlobalWorkSize = 0x[[#%x,GLOBAL_WORK_SIZE:]] +// CHECK-SAME: .pSyncPointWaitList = 0x[[#%x,SYNC_POINT_WAIT_LIST:]] +// CHECK-SAME: .pSyncPoint = 0x[[#%x,KERNEL_SYNC_POINT:]] +// CHECK-SAME: -> UR_RESULT_SUCCESS #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/prefetch.cpp b/sycl/test-e2e/Graph/Explicit/prefetch.cpp index dcf1c159fbae8..a39e148ccaaed 100644 --- a/sycl/test-e2e/Graph/Explicit/prefetch.cpp +++ b/sycl/test-e2e/Graph/Explicit/prefetch.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: %if linux && (level_zero || cuda) %{ env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 FileCheck %s %} %else %{ %{run} %t.out %} +// RUN: %if linux && (level_zero || cuda) %{ env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 FileCheck %s %} %else %{ %{run} %t.out %} // REQUIRES: aspect-usm_shared_allocations @@ -8,29 +8,23 @@ // Since Prefetch is only a memory hint that doesn't // impact results but only performances, we verify -// that a node is correctly added by checking PI function calls +// that a node is correctly added by checking UR function calls -// CHECK: piextCommandBufferPrefetchUSM( -// CHECK-NEXT: : 0x[[#%x,COMMAND_BUFFER:]] -// CHECK-NEXT: : 0x[[#%x,PTR:]] -// CHECK-NEXT: : 400 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0x[[#%x,PREFETCH_SYNC_POINT:]] -// CHECK: pi_result : PI_SUCCESS +// CHECK: urCommandBufferAppendUSMPrefetchExp( +// CHECK-SAME: .hCommandBuffer = 0x[[#%x,COMMAND_BUFFER:]] +// CHECK-SAME: .pMemory = 0x[[#%x,PTR:]] +// CHECK-SAME: .size = 400 +// CHECK-SAME: .pSyncPoint = {{.*}} (0x[[#%x,PREFETCH_SYNC_POINT:]]) -// CHECK: piextCommandBufferNDRangeKernel( -// CHECK-NEXT: : 0x[[#COMMAND_BUFFER]] -// CHECK-NEXT: : 0x[[#%x,KERNEL:]] -// CHECK-NEXT: : 1 -// CHECK-NEXT: : 0x[[#%x,GLOBAL_WORK_OFFSET:]] -// CHECK-NEXT: : 0x[[#%x,GLOBAL_WORK_SIZE:]] -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 1 -// CHECK-NEXT: : 0x[[#%x,SYNC_POINT_WAIT_LIST:]] -// CHECK-NEXT: : 0x[[#%x,KERNEL_SYNC_POINT:]] -// CHECK: pi_result : PI_SUCCESS +// CHECK: urCommandBufferAppendKernelLaunchExp( +// CHECK-SAME: .hCommandBuffer = 0x[[#%x,COMMAND_BUFFER]] +// CHECK-SAME: .hKernel = 0x[[#%x,KERNEL:]] +// CHECK-SAME: .workDim = 1 +// CHECK-SAME: .pGlobalWorkOffset = 0x[[#%x,GLOBAL_WORK_OFFSET:]] +// CHECK-SAME: .pGlobalWorkSize = 0x[[#%x,GLOBAL_WORK_SIZE:]] +// CHECK-SAME: .pSyncPointWaitList = 0x[[#%x,SYNC_POINT_WAIT_LIST:]] +// CHECK-SAME: .pSyncPoint = 0x[[#%x,KERNEL_SYNC_POINT:]] +// CHECK-SAME: -> UR_RESULT_SUCCESS #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/RecordReplay/kernel_bundle.cpp b/sycl/test-e2e/Graph/RecordReplay/kernel_bundle.cpp index 790822cf46662..cfd0ed283e84a 100644 --- a/sycl/test-e2e/Graph/RecordReplay/kernel_bundle.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/kernel_bundle.cpp @@ -1,43 +1,40 @@ // RUN: %{build} -o %t.out // RUN: %if cuda %{ %{run} %t.out %} -// RUN: %if level_zero %{env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s --implicit-check-not=LEAK %} +// RUN: %if level_zero %{env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s --implicit-check-not=LEAK %} -// Checks the PI call trace to ensure that the bundle kernel of the single task +// TODO: Reenable, see https://github.com/intel/llvm/issues/14763 +// UNSUPPORTED: windows, linux + +// Checks the UR call trace to ensure that the bundle kernel of the single task // is used. -// CHECK:---> piProgramCreate -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: ) ---> pi_result : PI_SUCCESS -// CHECK-NEXT: [out] ** : {{.*}}[ [[PROGRAM_HANDLE1:[0-9a-fA-Fx]]] +// CHECK:---> urProgramCreate +// CHECK-SAME:, .phProgram = {{.*}} ([[PROGRAM_HANDLE1:[0-9a-fA-Fx]+]]) +// CHECK-SAME: -> UR_RESULT_SUCCESS; // -// CHECK:---> piProgramBuild( -// CHECK-NEXT: : [[PROGRAM_HANDLE1]] +// CHECK:---> urProgramBuild( +// CHECK-SAME: .hProgram = [[PROGRAM_HANDLE1]] // -// CHECK:---> piProgramRetain( -// CHECK-NEXT: : [[PROGRAM_HANDLE1]] -// CHECK-NEXT:---> pi_result : PI_SUCCESS +// CHECK:---> urProgramRetain( +// CHECK-SAME: .hProgram = [[PROGRAM_HANDLE1]] +// CHECK-SAME: -> UR_RESULT_SUCCESS; -// CHECK:---> piKernelCreate( -// CHECK-NEXT: : [[PROGRAM_HANDLE1]] -// CHECK-NEXT:: _ZTS11Kernel1Name -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: ---> pi_result : PI_SUCCESS -// CHECK-NEXT: [out] ** : {{.*}}[ [[KERNEL_HANDLE:[0-9a-fA-Fx]]] +// CHECK:---> urKernelCreate( +// CHECK-SAME: .hProgram = [[PROGRAM_HANDLE1]] +// CHECK-SAME: .pKernelName = {{[0-9a-fA-Fx]+}} (_ZTS11Kernel1Name) +// CHECK-SAME: .phKernel = {{[0-9a-fA-Fx]+}} ([[KERNEL_HANDLE:[0-9a-fA-Fx]+]]) +// CHECK-SAME: -> UR_RESULT_SUCCESS; // -// CHECK:---> piKernelRetain( -// CHECK-NEXT: : [[KERNEL_HANDLE]] -// CHECK-NEXT:---> pi_result : PI_SUCCESS +// CHECK:---> urKernelRetain( +// CHECK-SAME: .hKernel = [[KERNEL_HANDLE]] +// CHECK-SAME: -> UR_RESULT_SUCCESS; // -// CHECK:---> piextCommandBufferNDRangeKernel( -// CHECK-NEXT: : {{.*}} -// CHECK-NEXT: : [[KERNEL_HANDLE]] +// CHECK:---> urCommandBufferAppendKernelLaunchExp( +// CHECK-SAME: .hKernel = [[KERNEL_HANDLE]] // -// CHECK:---> piKernelRelease( -// CHECK-NEXT: : [[KERNEL_HANDLE]] -// CHECK-NEXT:---> pi_result : PI_SUCCESS +// CHECK:---> urKernelRelease( +// CHECK-SAME: .hKernel = [[KERNEL_HANDLE]] +// CHECK-SAME: -> UR_RESULT_SUCCESS; #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/memadvise.cpp b/sycl/test-e2e/Graph/RecordReplay/memadvise.cpp index 1c801b0632cfe..7a4c0791a7811 100644 --- a/sycl/test-e2e/Graph/RecordReplay/memadvise.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/memadvise.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: %if linux && (level_zero || cuda) %{ env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 FileCheck %s %} %else %{ %{run} %t.out %} +// RUN: %if linux && (level_zero || cuda) %{ env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 FileCheck %s %} %else %{ %{run} %t.out %} // REQUIRES: aspect-usm_shared_allocations @@ -8,29 +8,23 @@ // Since Mem advise is only a memory hint that doesn't // impact results but only performances, we verify -// that a node is correctly added by checking PI function calls. +// that a node is correctly added by checking UR function calls. -// CHECK: piextCommandBufferAdviseUSM -// CHECK-NEXT: : 0x[[#%x,COMMAND_BUFFER:]] -// CHECK-NEXT: : 0x[[#%x,PTR:]] -// CHECK-NEXT: : 400 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0x[[#%x,ADVISE_SYNC_POINT:]] -// CHECK: pi_result : PI_SUCCESS +// CHECK: urCommandBufferAppendUSMAdviseExp +// CHECK-SAME: .hCommandBuffer = 0x[[#%x,COMMAND_BUFFER:]] +// CHECK-SAME: .pMemory = 0x[[#%x,PTR:]] +// CHECK-SAME: .size = 400 +// CHECK-SAME: .pSyncPoint = {{.*}} (0x[[#%x,ADVISE_SYNC_POINT:]]) -// CHECK: piextCommandBufferNDRangeKernel( -// CHECK-NEXT: : 0x[[#COMMAND_BUFFER]] -// CHECK-NEXT: : 0x[[#%x,KERNEL:]] -// CHECK-NEXT: : 1 -// CHECK-NEXT: : 0x[[#%x,GLOBAL_WORK_OFFSET:]] -// CHECK-NEXT: : 0x[[#%x,GLOBAL_WORK_SIZE:]] -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 1 -// CHECK-NEXT: : 0x[[#%x,SYNC_POINT_WAIT_LIST:]] -// CHECK-NEXT: : 0x[[#%x,KERNEL_SYNC_POINT:]] -// CHECK: pi_result : PI_SUCCESS +// CHECK: urCommandBufferAppendKernelLaunchExp( +// CHECK-SAME: .hCommandBuffer = 0x[[#%x,COMMAND_BUFFER]] +// CHECK-SAME: .hKernel = 0x[[#%x,KERNEL:]] +// CHECK-SAME: .workDim = 1 +// CHECK-SAME: .pGlobalWorkOffset = 0x[[#%x,GLOBAL_WORK_OFFSET:]] +// CHECK-SAME: .pGlobalWorkSize = 0x[[#%x,GLOBAL_WORK_SIZE:]] +// CHECK-SAME: .pSyncPointWaitList = 0x[[#%x,SYNC_POINT_WAIT_LIST:]] +// CHECK-SAME: .pSyncPoint = 0x[[#%x,KERNEL_SYNC_POINT:]] +// CHECK-SAME: -> UR_RESULT_SUCCESS #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/prefetch.cpp b/sycl/test-e2e/Graph/RecordReplay/prefetch.cpp index 07e77d769404f..68d78ee23c0da 100644 --- a/sycl/test-e2e/Graph/RecordReplay/prefetch.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/prefetch.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: %if linux && (level_zero || cuda) %{ env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 FileCheck %s %} %else %{ %{run} %t.out %} +// RUN: %if linux && (level_zero || cuda) %{ env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 FileCheck %s %} %else %{ %{run} %t.out %} // REQUIRES: aspect-usm_shared_allocations @@ -8,29 +8,23 @@ // Since Prefetch is only a memory hint that doesn't // impact results but only performances, we verify -// that a node is correctly added by checking PI function calls +// that a node is correctly added by checking UR function calls -// CHECK: piextCommandBufferPrefetchUSM( -// CHECK-NEXT: : 0x[[#%x,COMMAND_BUFFER:]] -// CHECK-NEXT: : 0x[[#%x,PTR:]] -// CHECK-NEXT: : 400 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0x[[#%x,PREFETCH_SYNC_POINT:]] -// CHECK: pi_result : PI_SUCCESS +// CHECK: urCommandBufferAppendUSMPrefetchExp( +// CHECK-SAME: .hCommandBuffer = 0x[[#%x,COMMAND_BUFFER:]] +// CHECK-SAME: .pMemory = 0x[[#%x,PTR:]] +// CHECK-SAME: .size = 400 +// CHECK-SAME: .pSyncPoint = {{.*}} (0x[[#%x,PREFETCH_SYNC_POINT:]]) -// CHECK: piextCommandBufferNDRangeKernel( -// CHECK-NEXT: : 0x[[#COMMAND_BUFFER]] -// CHECK-NEXT: : 0x[[#%x,KERNEL:]] -// CHECK-NEXT: : 1 -// CHECK-NEXT: : 0x[[#%x,GLOBAL_WORK_OFFSET:]] -// CHECK-NEXT: : 0x[[#%x,GLOBAL_WORK_SIZE:]] -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 1 -// CHECK-NEXT: : 0x[[#%x,SYNC_POINT_WAIT_LIST:]] -// CHECK-NEXT: : 0x[[#%x,KERNEL_SYNC_POINT:]] -// CHECK: pi_result : PI_SUCCESS +// CHECK: urCommandBufferAppendKernelLaunchExp( +// CHECK-SAME: .hCommandBuffer = 0x[[#%x,COMMAND_BUFFER]] +// CHECK-SAME: .hKernel = 0x[[#%x,KERNEL:]] +// CHECK-SAME: .workDim = 1 +// CHECK-SAME: .pGlobalWorkOffset = 0x[[#%x,GLOBAL_WORK_OFFSET:]] +// CHECK-SAME: .pGlobalWorkSize = 0x[[#%x,GLOBAL_WORK_SIZE:]] +// CHECK-SAME: .pSyncPointWaitList = 0x[[#%x,SYNC_POINT_WAIT_LIST:]] +// CHECK-SAME: .pSyncPoint = 0x[[#%x,KERNEL_SYNC_POINT:]] +// CHECK-SAME: -> UR_RESULT_SUCCESS #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp b/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp index c59c63637aace..b6ac1f96f90e1 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp @@ -8,7 +8,6 @@ // // Make sure that memory migration works for buffers across devices in a context // when using host tasks. -// #include #include diff --git a/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp b/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp index 05d4edf1d2d19..7682f4ad25160 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include diff --git a/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp b/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp index 0ff0359149469..2fb2739c69958 100644 --- a/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp +++ b/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // Test to check that we don't insert unnecessary urEnqueueEventsWaitWithBarrier // calls if queue is in-order and wait list is empty. diff --git a/sycl/test-e2e/InorderQueue/in_order_usm_implicit.cpp b/sycl/test-e2e/InorderQueue/in_order_usm_implicit.cpp index 8245921ce18d6..bcd75f65f7953 100644 --- a/sycl/test-e2e/InorderQueue/in_order_usm_implicit.cpp +++ b/sycl/test-e2e/InorderQueue/in_order_usm_implicit.cpp @@ -28,7 +28,7 @@ int main() { { queue Queue{property::queue::in_order()}; - const int mem_advice = PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY; + const int mem_advice = UR_USM_ADVICE_FLAG_SET_READ_MOSTLY; const int dataSize = 32; const size_t numBytes = static_cast(dataSize) * sizeof(int); diff --git a/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp b/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp index c1c084a1c7671..7ebf391a631b5 100644 --- a/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp +++ b/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp @@ -41,7 +41,7 @@ void test() { } else { // Exception constantly adds info on its error code in the message assert(Msg.find_first_of(e.what()) == 0 && - "PI_ERROR_BUILD_PROGRAM_FAILURE"); + "UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE"); } } catch (...) { assert(false && "Caught exception was not a compilation error"); diff --git a/sycl/test-e2e/KernelAndProgram/cache_env_vars.cpp b/sycl/test-e2e/KernelAndProgram/cache_env_vars.cpp index 6162a74f85ea2..ddbd9eceaead1 100644 --- a/sycl/test-e2e/KernelAndProgram/cache_env_vars.cpp +++ b/sycl/test-e2e/KernelAndProgram/cache_env_vars.cpp @@ -3,15 +3,15 @@ // RUN: rm -rf %t/cache_dir // RUN: %{build} -o %t.out -DTARGET_IMAGE=INC100 // Build program and add item to cache -// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD // Ignore caching because image size is less than threshold -// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 SYCL_CACHE_MIN_DEVICE_IMAGE_SIZE=1000000 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_UR_TRACE=1 SYCL_CACHE_MIN_DEVICE_IMAGE_SIZE=1000000 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD // Ignore caching because image size is more than threshold -// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 SYCL_CACHE_MAX_DEVICE_IMAGE_SIZE=1000 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_UR_TRACE=1 SYCL_CACHE_MAX_DEVICE_IMAGE_SIZE=1000 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD // Use cache -// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-CACHE +// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-CACHE // Ignore cache because of environment variable -// RUN: env SYCL_CACHE_PERSISTENT=0 SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=0 SYCL_CACHE_DIR=%t/cache_dir SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD // // The test checks environment variables which may disable caching. // Also it can be used for benchmarking cache: @@ -22,12 +22,16 @@ // CPU OCL JIT 0.12 0.12 0.16 1.1 16 // CPU OCL Cache 0.01 0.01 0.01 0.02 0.08 -// CHECK-BUILD-NOT: piProgramCreateWithBinary( -// CHECK-BUILD: piProgramCreate( -// CHECK-BUILD: piProgramBuild( -// CHECK-CACHE-NOT: piProgramCreate( -// CHECK-CACHE: piProgramCreateWithBinary( -// CHECK-CACHE: piProgramBuild( +// Some backends will call urProgramBuild and some will call +// urProgramBuildExp depending on urProgramBuildExp support. + +// CHECK-BUILD-NOT: urProgramCreateWithBinary( +// CHECK-BUILD: urProgramCreateWithIL( +// CHECK-BUILD: urProgramBuild{{(Exp)?}}( + +// CHECK-CACHE-NOT: urProgramCreateWithIL( +// CHECK-CACHE: urProgramCreateWithBinary( +// CHECK-CACHE: urProgramBuild{{(Exp)?}}( #include "cache_env_vars.hpp" diff --git a/sycl/test-e2e/KernelAndProgram/cache_env_vars_lin.cpp b/sycl/test-e2e/KernelAndProgram/cache_env_vars_lin.cpp index b1654acfb7a86..8046b1af7d653 100644 --- a/sycl/test-e2e/KernelAndProgram/cache_env_vars_lin.cpp +++ b/sycl/test-e2e/KernelAndProgram/cache_env_vars_lin.cpp @@ -7,27 +7,29 @@ // When no environment variables pointing cache directory are set the cache is // disabled -// RUN: env SYCL_CACHE_PERSISTENT=1 env -u SYCL_CACHE_DIR env -u HOME env -u XDG_CACHE_HOME SYCL_PI_TRACE=-1 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD -// RUN: env SYCL_CACHE_PERSISTENT=1 env -u SYCL_CACHE_DIR env -u HOME env -u XDG_CACHE_HOME SYCL_PI_TRACE=-1 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 env -u SYCL_CACHE_DIR env -u HOME env -u XDG_CACHE_HOME SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 env -u SYCL_CACHE_DIR env -u HOME env -u XDG_CACHE_HOME SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD // When any of environment variables pointing to cache root is present cache is // enabled // RUN: rm -rf %t/cache_dir -// RUN: env SYCL_CACHE_PERSISTENT=1 XDG_CACHE_HOME=%t/cache_dir SYCL_PI_TRACE=-1 env -u SYCL_CACHE_DIR env -u HOME %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD -// RUN: env SYCL_CACHE_PERSISTENT=1 XDG_CACHE_HOME=%t/cache_dir SYCL_PI_TRACE=-1 env -u SYCL_CACHE_DIR env -u HOME %{run} %t.out | FileCheck %s --check-prefixes=CHECK-CACHE +// RUN: env SYCL_CACHE_PERSISTENT=1 XDG_CACHE_HOME=%t/cache_dir SYCL_UR_TRACE=1 env -u SYCL_CACHE_DIR env -u HOME %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 XDG_CACHE_HOME=%t/cache_dir SYCL_UR_TRACE=1 env -u SYCL_CACHE_DIR env -u HOME %{run} %t.out | FileCheck %s --check-prefixes=CHECK-CACHE // RUN: rm -rf %t/cache_dir -// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 env -u XDG_CACHE_HOME env -u HOME %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD -// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 env -u XDG_CACHE_HOME env -u HOME %{run} %t.out %t.out | FileCheck %s --check-prefixes=CHECK-CACHE +// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_UR_TRACE=1 env -u XDG_CACHE_HOME env -u HOME %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_UR_TRACE=1 env -u XDG_CACHE_HOME env -u HOME %{run} %t.out | FileCheck %s --check-prefixes=CHECK-CACHE // RUN: rm -rf %t/cache_dir -// RUN: env SYCL_CACHE_PERSISTENT=1 HOME=%t/cache_dir SYCL_PI_TRACE=-1 env -u XDG_CACHE_HOME env -u SYCL_CACHE_DIR %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD -// RUN: env SYCL_CACHE_PERSISTENT=1 HOME=%t/cache_dir SYCL_PI_TRACE=-1 env -u XDG_CACHE_HOME env -u SYCL_CACHE_DIR %{run} %t.out | FileCheck %s --check-prefixes=CHECK-CACHE +// RUN: env SYCL_CACHE_PERSISTENT=1 HOME=%t/cache_dir SYCL_UR_TRACE=1 env -u XDG_CACHE_HOME env -u SYCL_CACHE_DIR %{run} %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 HOME=%t/cache_dir SYCL_UR_TRACE=1 env -u XDG_CACHE_HOME env -u SYCL_CACHE_DIR %{run} %t.out | FileCheck %s --check-prefixes=CHECK-CACHE -// CHECK-BUILD-NOT: piProgramCreateWithBinary( -// CHECK-BUILD: piProgramCreate( -// CHECK-BUILD: piProgramBuild( +// Some backends will call urProgramBuild and some will call urProgramBuildExp depending on urProgramBuildExp support. -// CHECK-CACHE-NOT: piProgramCreate( -// CHECK-CACHE: piProgramCreateWithBinary( -// CHECK-CACHE: piProgramBuild( +// CHECK-BUILD-NOT: urProgramCreateWithBinary( +// CHECK-BUILD: urProgramCreateWithIL( +// CHECK-BUILD: urProgramBuild{{(Exp)?}}( + +// CHECK-CACHE-NOT: urProgramCreateWithIL( +// CHECK-CACHE: urProgramCreateWithBinary( +// CHECK-CACHE: urProgramBuild{{(Exp)?}}( #include "cache_env_vars.hpp" diff --git a/sycl/test-e2e/KernelAndProgram/cache_env_vars_win.cpp b/sycl/test-e2e/KernelAndProgram/cache_env_vars_win.cpp index 0d2823dd3cdce..438cf9646321f 100644 --- a/sycl/test-e2e/KernelAndProgram/cache_env_vars_win.cpp +++ b/sycl/test-e2e/KernelAndProgram/cache_env_vars_win.cpp @@ -7,24 +7,24 @@ // When no environment variables pointing cache directory are set the cache is // disabled -// RUN: env SYCL_CACHE_PERSISTENT=1 env -u SYCL_CACHE_DIR env -u AppData SYCL_PI_TRACE=-1 %t.out | FileCheck %s --check-prefixes=CHECK-BUILD -// RUN: env SYCL_CACHE_PERSISTENT=1 env -u SYCL_CACHE_DIR env -u AppData SYCL_PI_TRACE=-1 %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 env -u SYCL_CACHE_DIR env -u AppData SYCL_UR_TRACE=1 %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 env -u SYCL_CACHE_DIR env -u AppData SYCL_UR_TRACE=1 %t.out | FileCheck %s --check-prefixes=CHECK-BUILD // When any of environment variables pointing to cache root is present cache is // enabled // RUN: rm -rf %t/cache_dir -// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 env -u AppData %t.out | FileCheck %s --check-prefixes=CHECK-BUILD -// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_PI_TRACE=-1 env -u AppData %t.out %t.out | FileCheck %s --check-prefixes=CHECK-CACHE +// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_UR_TRACE=1 env -u AppData %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_DIR=%t/cache_dir SYCL_UR_TRACE=1 env -u AppData %t.out %t.out | FileCheck %s --check-prefixes=CHECK-CACHE // RUN: rm -rf %t/cache_dir -// RUN: env SYCL_CACHE_PERSISTENT=1 AppData=%t/cache_dir SYCL_PI_TRACE=-1 env -u SYCL_CACHE_DIR %t.out | FileCheck %s --check-prefixes=CHECK-BUILD -// RUN: env SYCL_CACHE_PERSISTENT=1 AppData=%t/cache_dir SYCL_PI_TRACE=-1 env -u SYCL_CACHE_DIR %t.out | FileCheck %s --check-prefixes=CHECK-CACHE +// RUN: env SYCL_CACHE_PERSISTENT=1 AppData=%t/cache_dir SYCL_UR_TRACE=1 env -u SYCL_CACHE_DIR %t.out | FileCheck %s --check-prefixes=CHECK-BUILD +// RUN: env SYCL_CACHE_PERSISTENT=1 AppData=%t/cache_dir SYCL_UR_TRACE=1 env -u SYCL_CACHE_DIR %t.out | FileCheck %s --check-prefixes=CHECK-CACHE -// CHECK-BUILD-NOT: piProgramCreateWithBinary( -// CHECK-BUILD: piProgramCreate( -// CHECK-BUILD: piProgramBuild( +// CHECK-BUILD-NOT: urProgramCreateWithBinary( +// CHECK-BUILD: urProgramCreateWithIL( +// CHECK-BUILD: urProgramBuild{{(Exp)?}}( -// CHECK-CACHE-NOT: piProgramCreate( -// CHECK-CACHE: piProgramCreateWithBinary( -// CHECK-CACHE: piProgramBuild( +// CHECK-CACHE-NOT: urProgramCreateWithIL( +// CHECK-CACHE: urProgramCreateWithBinary( +// CHECK-CACHE: urProgramBuild{{(Exp)?}}( #include "cache_env_vars.hpp" diff --git a/sycl/test-e2e/KernelAndProgram/disable-caching.cpp b/sycl/test-e2e/KernelAndProgram/disable-caching.cpp index 772fc54ae4f6e..a84fde4d39d42 100644 --- a/sycl/test-e2e/KernelAndProgram/disable-caching.cpp +++ b/sycl/test-e2e/KernelAndProgram/disable-caching.cpp @@ -2,11 +2,14 @@ // if and only if caching is disabled. // RUN: %{build} -o %t.out -// RUN: env ZE_DEBUG=-6 SYCL_PI_TRACE=-1 SYCL_CACHE_IN_MEM=0 %{run} %t.out \ +// RUN: env ZE_DEBUG=-6 SYCL_UR_TRACE=1 SYCL_CACHE_IN_MEM=0 %{run} %t.out \ // RUN: | FileCheck %s -// RUN: env ZE_DEBUG=-6 SYCL_PI_TRACE=-1 %{run} %t.out \ +// RUN: env ZE_DEBUG=-6 SYCL_UR_TRACE=1 %{run} %t.out \ // RUN: | FileCheck %s --check-prefixes=CHECK-CACHE +// TODO: Reenable on Windows, see https://github.com/intel/llvm/issues/14768 +// XFAIL: windows + #include #include @@ -18,66 +21,66 @@ constexpr specialization_id spec_id; int main() { queue q; - // CHECK: piProgramCreate - // CHECK-NOT: piProgramRetain - // CHECK: piKernelCreate - // CHECK-NOT: piKernelRetain - // CHECK: piEnqueueKernelLaunch - // CHECK: piKernelRelease - // CHECK: piProgramRelease - // CHECK: piEventsWait + // CHECK: urProgramCreate + // CHECK-NOT: urProgramRetain + // CHECK: urKernelCreate + // CHECK-NOT: urKernelRetain + // CHECK: urEnqueueKernelLaunch + // CHECK: urKernelRelease + // CHECK: urProgramRelease + // CHECK: urEventWait - // CHECK-CACHE: piProgramCreate - // CHECK-CACHE: piProgramRetain - // CHECK-CACHE-NOT: piProgramRetain - // CHECK-CACHE: piKernelCreate - // CHECK-CACHE: piKernelRetain - // CHECK-CACHE-NOT: piKernelCreate - // CHECK-CACHE: piEnqueueKernelLaunch - // CHECK-CACHE: piKernelRelease - // CHECK-CACHE: piProgramRelease - // CHECK-CACHE: piEventsWait + // CHECK-CACHE: urProgramCreate + // CHECK-CACHE: urProgramRetain + // CHECK-CACHE-NOT: urProgramRetain + // CHECK-CACHE: urKernelCreate + // CHECK-CACHE: urKernelRetain + // CHECK-CACHE-NOT: urKernelCreate + // CHECK-CACHE: urEnqueueKernelLaunch + // CHECK-CACHE: urKernelRelease + // CHECK-CACHE: urProgramRelease + // CHECK-CACHE: urEventWait q.single_task([] {}).wait(); - // CHECK: piProgramCreate - // CHECK-NOT: piProgramRetain - // CHECK: piKernelCreate - // CHECK-NOT: piKernelRetain - // CHECK: piEnqueueKernelLaunch - // CHECK: piKernelRelease - // CHECK: piProgramRelease - // CHECK: piEventsWait + // CHECK: urProgramCreate + // CHECK-NOT: urProgramRetain + // CHECK: urKernelCreate + // CHECK-NOT: urKernelRetain + // CHECK: urEnqueueKernelLaunch + // CHECK: urKernelRelease + // CHECK: urProgramRelease + // CHECK: urEventWait - // CHECK-CACHE: piProgramCreate - // CHECK-CACHE: piProgramRetain - // CHECK-CACHE-NOT: piProgramRetain - // CHECK-CACHE: piKernelCreate - // CHECK-CACHE: piKernelRetain - // CHECK-CACHE-NOT: piKernelCreate - // CHECK-CACHE: piEnqueueKernelLaunch - // CHECK-CACHE: piKernelRelease - // CHECK-CACHE: piProgramRelease - // CHECK-CACHE: piEventsWait + // CHECK-CACHE: urProgramCreate + // CHECK-CACHE: urProgramRetain + // CHECK-CACHE-NOT: urProgramRetain + // CHECK-CACHE: urKernelCreate + // CHECK-CACHE: urKernelRetain + // CHECK-CACHE-NOT: urKernelCreate + // CHECK-CACHE: urEnqueueKernelLaunch + // CHECK-CACHE: urKernelRelease + // CHECK-CACHE: urProgramRelease + // CHECK-CACHE: urEventWait - // CHECK: piProgramCreate - // CHECK-NOT: piProgramRetain - // CHECK: piKernelCreate - // CHECK-NOT: piKernelRetain - // CHECK: piEnqueueKernelLaunch - // CHECK: piKernelRelease - // CHECK: piProgramRelease - // CHECK: piEventsWait + // CHECK: urProgramCreate + // CHECK-NOT: urProgramRetain + // CHECK: urKernelCreate + // CHECK-NOT: urKernelRetain + // CHECK: urEnqueueKernelLaunch + // CHECK: urKernelRelease + // CHECK: urProgramRelease + // CHECK: urEventWait - // CHECK-CACHE: piProgramCreate - // CHECK-CACHE: piProgramRetain - // CHECK-CACHE-NOT: piProgramRetain - // CHECK-CACHE: piKernelCreate - // CHECK-CACHE: piKernelRetain - // CHECK-CACHE-NOT: piKernelCreate - // CHECK-CACHE: piEnqueueKernelLaunch - // CHECK-CACHE: piKernelRelease - // CHECK-CACHE: piProgramRelease - // CHECK-CACHE: piEventsWait + // CHECK-CACHE: urProgramCreate + // CHECK-CACHE: urProgramRetain + // CHECK-CACHE-NOT: urProgramRetain + // CHECK-CACHE: urKernelCreate + // CHECK-CACHE: urKernelRetain + // CHECK-CACHE-NOT: urKernelCreate + // CHECK-CACHE: urEnqueueKernelLaunch + // CHECK-CACHE: urKernelRelease + // CHECK-CACHE: urProgramRelease + // CHECK-CACHE: urEventWait auto *p = malloc_device(1, q); for (int i = 0; i < 2; ++i) q.submit([&](handler &cgh) { @@ -91,9 +94,9 @@ int main() { } // (Program cache releases) -// CHECK-CACHE: piKernelRelease -// CHECK-CACHE: piKernelRelease -// CHECK-CACHE: piKernelRelease -// CHECK-CACHE: piProgramRelease -// CHECK-CACHE: piProgramRelease -// CHECK-CACHE: piProgramRelease +// CHECK-CACHE: urKernelRelease +// CHECK-CACHE: urKernelRelease +// CHECK-CACHE: urKernelRelease +// CHECK-CACHE: urProgramRelease +// CHECK-CACHE: urProgramRelease +// CHECK-CACHE: urProgramRelease diff --git a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options-env.cpp b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options-env.cpp index 37edd8f47ebfd..c2589dcd42f88 100644 --- a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options-env.cpp +++ b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options-env.cpp @@ -1,28 +1,16 @@ // REQUIRES: gpu // Disable fallback assert here so, that build process isn't affected // RUN: %{build} -DSYCL_DISABLE_FALLBACK_ASSERT=1 -o %t.out %debug_option -// RUN: env SYCL_PI_TRACE=-1 SYCL_PROGRAM_COMPILE_OPTIONS=-DENV_COMPILE_OPTS SYCL_PROGRAM_LINK_OPTIONS=-DENV_LINK_OPTS SYCL_PROGRAM_APPEND_COMPILE_OPTIONS=-DENV_APPEND_COMPILE_OPTS SYCL_PROGRAM_APPEND_LINK_OPTIONS=-DENV_APPEND_LINK_OPTS %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 SYCL_PROGRAM_COMPILE_OPTIONS=-DENV_COMPILE_OPTS SYCL_PROGRAM_LINK_OPTIONS=-DENV_LINK_OPTS SYCL_PROGRAM_APPEND_COMPILE_OPTIONS=-DENV_APPEND_COMPILE_OPTS SYCL_PROGRAM_APPEND_LINK_OPTIONS=-DENV_APPEND_LINK_OPTS %{run} %t.out | FileCheck %s // Check that options are overrided // RUN: %{build} -DSYCL_DISABLE_FALLBACK_ASSERT=1 -Xsycl-target-linker=spir64 -DBAR -Xsycl-target-frontend=spir64 -DBAR_COMPILE -o %t.out -// RUN: env SYCL_PI_TRACE=-1 SYCL_PROGRAM_COMPILE_OPTIONS=-DENV_COMPILE_OPTS SYCL_PROGRAM_LINK_OPTIONS=-DENV_LINK_OPTS SYCL_PROGRAM_APPEND_COMPILE_OPTIONS=-DENV_APPEND_COMPILE_OPTS SYCL_PROGRAM_APPEND_LINK_OPTIONS=-DENV_APPEND_LINK_OPTS %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 SYCL_PROGRAM_COMPILE_OPTIONS=-DENV_COMPILE_OPTS SYCL_PROGRAM_LINK_OPTIONS=-DENV_LINK_OPTS SYCL_PROGRAM_APPEND_COMPILE_OPTIONS=-DENV_APPEND_COMPILE_OPTS SYCL_PROGRAM_APPEND_LINK_OPTIONS=-DENV_APPEND_LINK_OPTS %{run} %t.out | FileCheck %s // UNSUPPORTED: hip #include "kernel-bundle-merge-options.hpp" -// CHECK: piProgramBuild -// CHECK-NEXT: -// CHECK-NEXT: -// CHECK-NEXT: -// CHECK: :{{[^bar]*}}-DENV_COMPILE_OPTS -DENV_APPEND_COMPILE_OPTS{{[^bar]*}}-DENV_LINK_OPTS -DENV_APPEND_LINK_OPTS{{[^bar]*}} +// CHECK: urProgramBuild{{.*}}{{[^bar]*}}-DENV_COMPILE_OPTS -DENV_APPEND_COMPILE_OPTS{{[^bar]*}}-DENV_LINK_OPTS -DENV_APPEND_LINK_OPTS{{[^bar]*}} -// CHECK: piProgramCompile( -// CHECK-NEXT: -// CHECK-NEXT: -// CHECK-NEXT: -// CHECK: :{{[^bar]*}}-DENV_COMPILE_OPTS -DENV_APPEND_COMPILE_OPTS{{[^bar]*}} +// CHECK: urProgramCompile{{.*}}{{[^bar]*}}-DENV_COMPILE_OPTS -DENV_APPEND_COMPILE_OPTS{{[^bar]*}} -// CHECK: piProgramLink( -// CHECK-NEXT: -// CHECK-NEXT: -// CHECK-NEXT: -// CHECK: :{{[^bar]*}}-DENV_LINK_OPTS -DENV_APPEND_LINK_OPTS{{[^bar]*}} +// CHECK: urProgramLink{{.*}}{{[^bar]*}}-DENV_LINK_OPTS -DENV_APPEND_LINK_OPTS{{[^bar]*}} diff --git a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp index 42fa384ca519f..2662323d365e7 100644 --- a/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp +++ b/sycl/test-e2e/KernelAndProgram/kernel-bundle-merge-options.cpp @@ -1,6 +1,6 @@ // REQUIRES: gpu // RUN: %{build} -o %t.out %debug_option -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s // UNSUPPORTED: hip // Debug option -g is not passed to device code compiler when CL-style driver @@ -9,21 +9,12 @@ #include "kernel-bundle-merge-options.hpp" -// CHECK: piProgramBuild -// CHECK-NEXT: -// CHECK-NEXT: -// CHECK-NEXT: -// CHECK-NEXT: :{{.*}}-g +// CHECK: urProgramBuild +// CHECK-SAME: -g // TODO: Uncomment when build options are properly passed to compile and link // commands for kernel_bundle -// xCHECK: piProgramCompile( -// xCHECK-NEXT: -// xCHECK-NEXT: -// xCHECK-NEXT: -// xCHECK-NEXT: :{{.*}}-g -// xCHECK: piProgramLink( -// xCHECK-NEXT: -// xCHECK-NEXT: -// xCHECK-NEXT: -// xCHECK-NEXT: :{{.*}}-g +// xCHECK: urProgramCompile( +// xCHECK-SAME: -g +// xCHECK: urProgramLink( +// xCHECK-SAME: -g diff --git a/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp b/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp index 8be17e24d229d..c13cf39e41847 100644 --- a/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp +++ b/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp @@ -1,7 +1,7 @@ // REQUIRES: level_zero // UNSUPPORTED: ze_debug // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=-1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s // //==--- level-zero-static-link-flow.cpp.cpp - Check L0 static link flow --==// // @@ -16,13 +16,13 @@ //===--------------------------------------------------------------===// // The key thing we check here is that the call to "zeModuleCreate" does not -// happen from "piProgramCompile". Instead, we expect it to be delayed and -// called from "piProgramLink". +// happen from "urProgramCompile". Instead, we expect it to be delayed and +// called from "urProgramLink". // -// CHECK: ---> piProgramCreate -// CHECK: ---> piProgramCompile +// CHECK: ---> urProgramCreate +// CHECK: ---> urProgramCompile // CHECK-NOT: ZE ---> zeModuleCreate -// CHECK: ---> piProgramLink +// CHECK: ---> urProgramLink // CHECK: ZE ---> zeModuleCreate #include diff --git a/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp b/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp index 70c5ce2bc64a5..2501ca0b6e2df 100644 --- a/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp +++ b/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp @@ -1,15 +1,14 @@ // RUN: %{build} -ftarget-compile-fast -o %t_with.out // RUN: %{build} -o %t_without.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t_with.out 2>&1 | FileCheck %if !gpu || hip || cuda %{ --check-prefix=CHECK-WITHOUT %} %else %{ --check-prefix=CHECK-INTEL-GPU-WITH %} %s -// RUN: env SYCL_PI_TRACE=-1 %{run} %t_without.out 2>&1 | FileCheck --implicit-check-not=-igc_opts %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t_with.out 2>&1 | FileCheck %if !gpu || hip || cuda %{ --check-prefix=CHECK-WITHOUT %} %else %{ --check-prefix=CHECK-INTEL-GPU-WITH %} %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t_without.out 2>&1 | FileCheck --implicit-check-not=-igc_opts %s -// CHECK-INTEL-GPU-WITH: ---> piProgramBuild( -// CHECK-INTEL-GPU-WITH: -igc_opts 'PartitionUnit=1,SubroutineThreshold=50000' +// CHECK-INTEL-GPU-WITH: ---> urProgramBuild +// CHECK-INTEL-GPU-WITH-SAME: -igc_opts 'PartitionUnit=1,SubroutineThreshold=50000' -// CHECK-WITHOUT: ---> piProgramBuild( -// CHECK-WITHOUT-NOT: -igc_opts -// CHECK-WITHOUT: ) ---> pi_result : PI_SUCCESS +// CHECK-WITHOUT-NOT: ---> urProgramBuild{{.*}}-igc_opts{{.*}} -> UR_RESULT_SUCCESS +// CHECK-WITHOUT: ---> urProgramBuild{{.*}} -> UR_RESULT_SUCCESS #include diff --git a/sycl/test-e2e/KernelAndProgram/target_register_alloc_mode.cpp b/sycl/test-e2e/KernelAndProgram/target_register_alloc_mode.cpp index 21d6a668a9a5e..f6e2d00e15af8 100644 --- a/sycl/test-e2e/KernelAndProgram/target_register_alloc_mode.cpp +++ b/sycl/test-e2e/KernelAndProgram/target_register_alloc_mode.cpp @@ -4,12 +4,12 @@ // RUN: %{build} -o %t_without.out // RUN: %{build} -ftarget-register-alloc-mode=pvc:default -o %t_default.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t_with.out 2>&1 | FileCheck --check-prefix=CHECK-OPT %s -// RUN: env SYCL_PI_TRACE=-1 %{run} %t_without.out 2>&1 | FileCheck %if system-windows %{ --implicit-check-not=-ze-intel-enable-auto-large-GRF-mode %} %else %{ --check-prefix=CHECK-OPT %} %s -// RUN: env SYCL_PI_TRACE=-1 %{run} %t_default.out 2>&1 | FileCheck --implicit-check-not=-ze-intel-enable-auto-large-GRF-mode %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t_with.out 2>&1 | FileCheck --check-prefix=CHECK-OPT %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t_without.out 2>&1 | FileCheck %if system-windows %{ --implicit-check-not=-ze-intel-enable-auto-large-GRF-mode %} %else %{ --check-prefix=CHECK-OPT %} %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t_default.out 2>&1 | FileCheck --implicit-check-not=-ze-intel-enable-auto-large-GRF-mode %s -// CHECK-OPT: ---> piProgramBuild( -// CHECK-OPT: -ze-intel-enable-auto-large-GRF-mode +// CHECK-OPT: ---> urProgramBuild( +// CHECK-SAME-OPT: -ze-intel-enable-auto-large-GRF-mode #include diff --git a/sycl/test-e2e/KernelFusion/cancel_fusion.cpp b/sycl/test-e2e/KernelFusion/cancel_fusion.cpp index 8141d22534fbb..763976198bfc2 100644 --- a/sycl/test-e2e/KernelFusion/cancel_fusion.cpp +++ b/sycl/test-e2e/KernelFusion/cancel_fusion.cpp @@ -1,11 +1,11 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s // Test cancel fusion // As fusion is cancelled, this test launches two kernels. -// CHECK-COUNT-2: piEnqueueKernelLaunch -// CHECK-NOT: piEnqueueKernelLaunch +// CHECK-COUNT-2: urEnqueueKernelLaunch +// CHECK-NOT: urEnqueueKernelLaunch #include #include diff --git a/sycl/test-e2e/KernelFusion/complete_fusion.cpp b/sycl/test-e2e/KernelFusion/complete_fusion.cpp index e9c79048c77cf..cd58ddc869ecf 100644 --- a/sycl/test-e2e/KernelFusion/complete_fusion.cpp +++ b/sycl/test-e2e/KernelFusion/complete_fusion.cpp @@ -1,11 +1,11 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s // Test complete fusion without any internalization // The two kernels are fused, so only a single, fused kernel is launched. -// CHECK-COUNT-1: piEnqueueKernelLaunch -// CHECK-NOT: piEnqueueKernelLaunch +// CHECK-COUNT-1: urEnqueueKernelLaunch +// CHECK-NOT: urEnqueueKernelLaunch #include #include diff --git a/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp b/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp index ce7cbd0432b34..ba4fd3d5a6506 100644 --- a/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp +++ b/sycl/test-e2e/KernelFusion/different_nd_ranges.cpp @@ -1,13 +1,13 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: env SYCL_PI_TRACE=2 env SYCL_RT_WARNING_LEVEL=1 \ +// RUN: env SYCL_UR_TRACE=2 env SYCL_RT_WARNING_LEVEL=1 \ // RUN: SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 %{run} %t.out 2>&1 \ // RUN: | FileCheck %s --implicit-check-not "ERROR: JIT compilation for kernel fusion failed with message:" // Test complete fusion of kernels with different ND-ranges. // Kernels with different ND-ranges should be fused. -// CHECK-COUNT-26: piEnqueueKernelLaunch -// CHECK-NOT: piEnqueueKernelLaunch +// CHECK-COUNT-26: urEnqueueKernelLaunch +// CHECK-NOT: urEnqueueKernelLaunch #include diff --git a/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp b/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp index 67285414e2a26..a82f849d429ec 100644 --- a/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp +++ b/sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp @@ -1,13 +1,13 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion where one kernel in the fusion list specifies an // explicit dependency (via events) on another kernel in the fusion list. // The two kernels are fused, so only a single, fused kernel is launched. -// CHECK-COUNT-1: piEnqueueKernelLaunch -// CHECK-NOT: piEnqueueKernelLaunch +// CHECK-COUNT-1: urEnqueueKernelLaunch +// CHECK-NOT: urEnqueueKernelLaunch #include "fusion_event_test_common.h" diff --git a/sycl/test-e2e/KernelFusion/lit.local.cfg b/sycl/test-e2e/KernelFusion/lit.local.cfg index 1d0db3020f754..cc77315a316ef 100644 --- a/sycl/test-e2e/KernelFusion/lit.local.cfg +++ b/sycl/test-e2e/KernelFusion/lit.local.cfg @@ -1,7 +1,8 @@ import platform config.required_features += ['fusion'] -config.unsupported_features += ['accelerator'] +# TODO: Reenable hip, see https://github.com/intel/llvm/issues/14598 +config.unsupported_features += ['accelerator', 'hip'] # TODO: enable on Windows once kernel fusion is supported on Windows. if platform.system() != "Linux": diff --git a/sycl/test-e2e/KernelFusion/math_function.cpp b/sycl/test-e2e/KernelFusion/math_function.cpp index bb4a76f9fbfb7..2b9c17e7d9a21 100644 --- a/sycl/test-e2e/KernelFusion/math_function.cpp +++ b/sycl/test-e2e/KernelFusion/math_function.cpp @@ -1,11 +1,11 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s // Test fusion of a kernel using a math function. // The two kernels are fused, so only a single, fused kernel is launched. -// CHECK-COUNT-1: piEnqueueKernelLaunch -// CHECK-NOT: piEnqueueKernelLaunch +// CHECK-COUNT-1: urEnqueueKernelLaunch +// CHECK-NOT: urEnqueueKernelLaunch #include diff --git a/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp b/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp index 1cc03bc7ad0c3..9230e8b9fab78 100644 --- a/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp +++ b/sycl/test-e2e/KernelFusion/non_unit_local_size.cpp @@ -1,12 +1,12 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion with local internalization specified on the // accessors, where each work-item processes multiple data-items. // The two kernels are fused, so only a single, fused kernel is launched. -// CHECK-COUNT-1: piEnqueueKernelLaunch -// CHECK-NOT: piEnqueueKernelLaunch +// CHECK-COUNT-1: urEnqueueKernelLaunch +// CHECK-NOT: urEnqueueKernelLaunch #include #include diff --git a/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp b/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp index 6adcd29ab3071..0a01a25a4070b 100644 --- a/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp +++ b/sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp @@ -76,7 +76,7 @@ template void test() { int main() { std::cerr << "FusionStartPoint = 0:\n"; // COM: memcpy leads to a CG being created as it depends on CGs not producing - // a PI event (coming from the CGs to be fused), so not safe to bypass. Fusion + // a UR event (coming from the CGs to be fused), so not safe to bypass. Fusion // should be cancelled as a dependency with an event to be fused is found. // CHECK: FusionStartPoint = 0: diff --git a/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp b/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp index 6ad81d579226d..90f76d1654fc1 100644 --- a/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp +++ b/sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp @@ -1,12 +1,12 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion using USM pointers. // The two kernels are fused, so only a single, fused kernel is launched. -// CHECK-COUNT-1: piEnqueueKernelLaunch -// CHECK-NOT: piEnqueueKernelLaunch +// CHECK-COUNT-1: urEnqueueKernelLaunch +// CHECK-NOT: urEnqueueKernelLaunch #include #include diff --git a/sycl/test-e2e/KernelFusion/work_group_barrier.cpp b/sycl/test-e2e/KernelFusion/work_group_barrier.cpp index fe09e4fb3050e..8f8dcc1f67c9c 100644 --- a/sycl/test-e2e/KernelFusion/work_group_barrier.cpp +++ b/sycl/test-e2e/KernelFusion/work_group_barrier.cpp @@ -1,12 +1,12 @@ // RUN: %{build} %{embed-ir} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion with a combination of kernels that require a work-group // barrier to be inserted by fusion. // The two kernels are fused, so only a single, fused kernel is launched. -// CHECK-COUNT-1: piEnqueueKernelLaunch -// CHECK-NOT: piEnqueueKernelLaunch +// CHECK-COUNT-1: urEnqueueKernelLaunch +// CHECK-NOT: urEnqueueKernelLaunch #include #include diff --git a/sycl/test-e2e/KernelFusion/wrapped_usm.cpp b/sycl/test-e2e/KernelFusion/wrapped_usm.cpp index f011ad3b297be..f61c054ae70bf 100644 --- a/sycl/test-e2e/KernelFusion/wrapped_usm.cpp +++ b/sycl/test-e2e/KernelFusion/wrapped_usm.cpp @@ -1,12 +1,12 @@ // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} %{embed-ir} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s // Test complete fusion using an wrapped USM pointer as kernel functor argument. // The two kernels are fused, so only a single, fused kernel is launched. -// CHECK-COUNT-1: piEnqueueKernelLaunch -// CHECK-NOT: piEnqueueKernelLaunch +// CHECK-COUNT-1: urEnqueueKernelLaunch +// CHECK-NOT: urEnqueueKernelLaunch #include #include diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops.cpp b/sycl/test-e2e/Matrix/element_wise_all_ops.cpp index 4d6493c23e18e..d14240b145e81 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops.cpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops.cpp @@ -10,5 +10,8 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// TODO: Reenable, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: windows, linux + #include "common.hpp" #include "element_wise_all_ops_impl.hpp" diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_1d.cpp b/sycl/test-e2e/Matrix/element_wise_all_ops_1d.cpp index d3d0396593bb0..ad6bd117044a3 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_1d.cpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_1d.cpp @@ -10,5 +10,8 @@ // RUN: %{build} -o %t.out // RUN: env IGC_JointMatrixLoadStoreOpt=1 %{run} %t.out +// TODO: Reenable, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: windows, linux + #include "common.hpp" #include "element_wise_all_ops_impl.hpp" diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_1d_cont.cpp b/sycl/test-e2e/Matrix/element_wise_all_ops_1d_cont.cpp index 1529e60c4165c..46e0771238de6 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_1d_cont.cpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_1d_cont.cpp @@ -10,5 +10,8 @@ // RUN: %{build} -o %t.out // RUN: env IGC_JointMatrixLoadStoreOpt=2 %{run} %t.out +// TODO: Reenable, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: windows, linux + #include "common.hpp" #include "element_wise_all_ops_impl.hpp" diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_scalar.cpp b/sycl/test-e2e/Matrix/element_wise_all_ops_scalar.cpp index 0604a7f345963..93432bca4a03e 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_scalar.cpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_scalar.cpp @@ -10,5 +10,8 @@ // RUN: %{build} -o %t.out // RUN: env IGC_JointMatrixLoadStoreOpt=0 %{run} %t.out +// TODO: Reenable, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: windows, linux + #include "common.hpp" #include "element_wise_all_ops_impl.hpp" diff --git a/sycl/test-e2e/Matrix/element_wise_all_sizes.cpp b/sycl/test-e2e/Matrix/element_wise_all_sizes.cpp index 22ec9b98a66c1..83db076d2be20 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_sizes.cpp +++ b/sycl/test-e2e/Matrix/element_wise_all_sizes.cpp @@ -8,6 +8,9 @@ // REQUIRES: aspect-ext_intel_matrix // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943 +// TODO: Reenable, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: windows, linux + // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp b/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp index d3fb670b6bb75..af760cb13c605 100644 --- a/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp +++ b/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp @@ -1,4 +1,6 @@ // REQUIRES: fusion +// TODO: Reenable, see https://github.com/intel/llvm/issues/14598 +// UNSUPPORTED: hip // RUN: %{build} %{embed-ir} -O2 --offload-new-driver -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Plugin/adapter-release.cpp b/sycl/test-e2e/Plugin/adapter-release.cpp new file mode 100644 index 0000000000000..265c2d7af0eba --- /dev/null +++ b/sycl/test-e2e/Plugin/adapter-release.cpp @@ -0,0 +1,4 @@ +// ensure that urAdapterRelease is called + +// RUN: env SYCL_UR_TRACE=1 sycl-ls | FileCheck %s +// CHECK: ---> urAdapterRelease diff --git a/sycl/test-e2e/Plugin/dll-detach-order.cpp b/sycl/test-e2e/Plugin/dll-detach-order.cpp index 16d12d1949e8b..bdc9788064cfb 100644 --- a/sycl/test-e2e/Plugin/dll-detach-order.cpp +++ b/sycl/test-e2e/Plugin/dll-detach-order.cpp @@ -1,12 +1,15 @@ // REQUIRES: windows -// RUN: env SYCL_PI_TRACE=2 sycl-ls | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 sycl-ls | FileCheck %s -// ensure that the plugins are detached AFTER piTearDown is done executing +// TODO: Reenable on Windows, see https://github.com/intel/llvm/issues/14768 +// UNSUPPORTED: windows + +// ensure that the plugins are detached AFTER urLoaderTearDown is done executing // CHECK: ---> DLL_PROCESS_DETACH syclx.dll -// CHECK: ---> piTearDown( -// whatever plugin THIS is -// CHECK: ---> DLL_PROCESS_DETACH +// whatever adapter THIS is +// CHECK: ---> urAdapterRelease( +// CHECK: [INFO]: unloaded adapter -// CHECK: ---> DLL_PROCESS_DETACH pi_win_proxy_loader.dll +// CHECK: ---> urLoaderTearDown( diff --git a/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp b/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp index aec4cc5297c8f..2a7d0db62df94 100644 --- a/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp +++ b/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: hip_nvidia // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s #include @@ -220,8 +220,8 @@ void testcopyD2HBuffer() { } void testcopyH2DBuffer() { - // copy between two queues triggers a piEnqueueMemBufferMap followed by - // copyH2D, followed by a copyD2H, followed by a piEnqueueMemUnmap + // copy between two queues triggers a urEnqueueMemBufferMap followed by + // copyH2D, followed by a copyD2H, followed by a urEnqueueMemUnmap // Here we only care about checking copyH2D std::cout << "start copyH2D-buffer" << std::endl; @@ -408,52 +408,23 @@ int main() { // ----------- BUFFERS // CHECK-LABEL: start copyD2H-buffer -// CHECK: ---> piEnqueueMemBufferRead( -// CHECK: : 64 -// CHECK: ---> piEnqueueMemBufferReadRect( -// CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/1 -// CHECK-NEXT: : 64 -// CHECK: ---> piEnqueueMemBufferReadRect( -// CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/3 -// CHECK-NEXT: : 64 -// CHECK-NEXT: : 320 +// CHECK: ---> urEnqueueMemBufferRead({{.*}} .size = 64, +// CHECK: ---> urEnqueueMemBufferReadRect({{.*}} .region = (struct ur_rect_region_t){.width = 64, .height = 5, .depth = 1}, .bufferRowPitch = 64, +// CHECK: ---> urEnqueueMemBufferReadRect({{.*}} .region = (struct ur_rect_region_t){.width = 64, .height = 5, .depth = 3}, .bufferRowPitch = 64, .bufferSlicePitch = 320, // CHECK: end copyD2H-buffer // CHECK-LABEL: start copyH2D-buffer -// CHECK: ---> piEnqueueMemBufferWrite( -// CHECK: : 64 -// CHECK: ---> piEnqueueMemBufferWriteRect( -// CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/1 -// CHECK-NEXT: : 64 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 64 -// CHECK: ---> piEnqueueMemBufferWriteRect( -// CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/3 -// CHECK-NEXT: : 64 -// CHECK-NEXT: : 320 -// CHECK-NEXT: : 64 -// CHECK-NEXT: : 320 +// CHECK: ---> urEnqueueMemBufferWrite({{.*}} .size = 64, +// CHECK: ---> urEnqueueMemBufferWriteRect({{.*}} .region = (struct ur_rect_region_t){.width = 64, .height = 5, .depth = 1}, .bufferRowPitch = 64, .bufferSlicePitch = 0, .hostRowPitch = 64, +// CHECK: ---> urEnqueueMemBufferWriteRect({{.*}} .region = (struct ur_rect_region_t){.width = 64, .height = 5, .depth = 3}, .bufferRowPitch = 64, .bufferSlicePitch = 320, .hostRowPitch = 64, .hostSlicePitch = 320, // CHECK: end copyH2D-buffer // CHECK-LABEL: start copyD2D-buffer -// CHECK: ---> piEnqueueMemBufferCopy( -// CHECK: : 64 -// CHECK: ---> piEnqueueMemBufferCopyRect( -// CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/1 -// CHECK-NEXT: : 64 -// CHECK-NEXT: : 320 -// CHECK-NEXT: : 64 -// CHECK-NEXT: : 320 -// CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/3 -// CHECK-NEXT: : 64 -// CHECK-NEXT: : 320 -// CHECK-NEXT: : 64 -// CHECK-NEXT: : 320 +// CHECK: ---> urEnqueueMemBufferCopy({{.*}} .size = 64 +// CHECK: ---> urEnqueueMemBufferCopyRect({{.*}} .region = (struct ur_rect_region_t){.width = 64, .height = 5, .depth = 1}, .srcRowPitch = 64, .srcSlicePitch = 320, .dstRowPitch = 64, .dstSlicePitch = 320 +// CHECK: .region = (struct ur_rect_region_t){.width = 64, .height = 5, .depth = 3}, .srcRowPitch = 64, .srcSlicePitch = 320, .dstRowPitch = 64, .dstSlicePitch = 320 // CHECK: end copyD2D-buffer // CHECK-LABEL: start testFill Buffer -// CHECK: ---> piEnqueueMemBufferFill( -// CHECK: : 4 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 64 +// CHECK :---> urEnqueueMemBufferFill({{.*}} .patternSize = 4, .offset = 0, .size = 64, // CHECK: end testFill Buffer diff --git a/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp b/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp index 96863be242550..85eb7aa5d3600 100644 --- a/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp +++ b/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp @@ -10,7 +10,7 @@ // RUN: %{build} -o %t.out // Native images are created with host pointers only with host unified memory // support, enforce it for this test. -// RUN: env SYCL_HOST_UNIFIED_MEMORY=1 SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_HOST_UNIFIED_MEMORY=1 SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s #include @@ -306,108 +306,65 @@ int main() { // clang-format off //CHECK: start copyD2H-Image //CHECK: -- 1D -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 1 / 1 -- arrSz/row/slice : 0 / 256 / 256 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4340 -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 1 / 1 -- arrSz/row/slice : 0 / 256 / 256 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4340 +//CHECK: ---> urMemImageCreate({{.*}} .type = UR_MEM_TYPE_IMAGE1D, .width = 16, .height = 1, .depth = 1, .arraySize = 0, .rowPitch = 256, .slicePitch = 256, .numMipLevel = 0, .numSamples = 0 +//CHECK: ---> urMemImageCreate({{.*}} .type = UR_MEM_TYPE_IMAGE1D, .width = 16, .height = 1, .depth = 1, .arraySize = 0, .rowPitch = 256, .slicePitch = 256, .numMipLevel = 0, .numSamples = 0 //CHECK: about to destruct 1D -//CHECK: ---> piEnqueueMemImageRead( -//CHECK: pi_image_region width/height/depth : 16/1/1 +//CHECK: ---> urEnqueueMemImageRead({{.*}} .region = (struct ur_rect_region_t){.width = 16, .height = 1, .depth = 1} //CHECK: -- 2D -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 5 / 1 -- arrSz/row/slice : 0 / 256 / 1280 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4337 -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 5 / 1 -- arrSz/row/slice : 0 / 256 / 1280 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4337 +//CHECK: ---> urMemImageCreate({{.*}} .type = UR_MEM_TYPE_IMAGE2D, .width = 16, .height = 5, .depth = 1, .arraySize = 0, .rowPitch = 256, .slicePitch = 1280, .numMipLevel = 0, .numSamples = 0 +//CHECK: ---> urMemImageCreate({{.*}} .type = UR_MEM_TYPE_IMAGE2D, .width = 16, .height = 5, .depth = 1, .arraySize = 0, .rowPitch = 256, .slicePitch = 1280, .numMipLevel = 0, .numSamples = 0 //CHECK: about to destruct 2D -//CHECK: ---> piEnqueueMemImageRead( -//CHECK: pi_image_region width/height/depth : 16/5/1 -// CHECK-NEXT: : 256 +//CHECK: ---> urEnqueueMemImageRead({{.*}} .region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 1}, .rowPitch = 256 //CHECK: -- 3D -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 5 / 3 -- arrSz/row/slice : 0 / 256 / 1280 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4338 -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 5 / 3 -- arrSz/row/slice : 0 / 256 / 1280 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4338 +//CHECK: ---> urMemImageCreate({{.*}} .type = UR_MEM_TYPE_IMAGE3D, .width = 16, .height = 5, .depth = 3, .arraySize = 0, .rowPitch = 256, .slicePitch = 1280, .numMipLevel = 0, .numSamples = +//CHECK: ---> urMemImageCreate({{.*}} .type = UR_MEM_TYPE_IMAGE3D, .width = 16, .height = 5, .depth = 3, .arraySize = 0, .rowPitch = 256, .slicePitch = 1280, .numMipLevel = 0, .numSamples = //CHECK: about to destruct 3D -//CHECK: ---> piEnqueueMemImageRead( -//CHECK: pi_image_region width/height/depth : 16/5/3 -// CHECK-NEXT: : 256 -// CHECK-NEXT: : 1280 +//CHECK: ---> urEnqueueMemImageRead({{.*}}.region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 3}, .rowPitch = 256, .slicePitch = 1280 //CHECK: end copyD2H-Image -//CHECK: start copyH2D-image -//CHECK: -- 1D -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 1 / 1 -- arrSz/row/slice : 0 / 256 / 256 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4340 -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 1 / 1 -- arrSz/row/slice : 0 / 256 / 256 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4340 -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 1 / 1 -- arrSz/row/slice : 0 / 0 / 0 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4340 -//CHECK: ---> piEnqueueMemImageRead( -//CHECK: pi_image_region width/height/depth : 16/1/1 +// CHECK: start copyH2D-image +// CHECK: -- 1D +// CHECK: ---> urMemImageCreate({{.*}} .type = UR_MEM_TYPE_IMAGE1D, .width = 16, .height = 1, .depth = 1, .arraySize = 0, .rowPitch = 256, .slicePitch = 256, .numMipLevel = 0, .numSamples = 0 +// CHECK: ---> urMemImageCreate({{.*}} .type = UR_MEM_TYPE_IMAGE1D, .width = 16, .height = 1, .depth = 1, .arraySize = 0, .rowPitch = 256, .slicePitch = 256, .numMipLevel = 0, .numSamples = 0 +// CHECK: ---> urMemImageCreate({{.*}} .type = UR_MEM_TYPE_IMAGE1D, .width = 16, .height = 1, .depth = 1, .arraySize = 0, .rowPitch = 0, .slicePitch = 0, .numMipLevel = 0, .numSamples = 0 +// CHECK: ---> urEnqueueMemImageRead({{.*}} .region = (struct ur_rect_region_t){.width = 16, .height = 1, .depth = 1} // The order of the following calls may vary since some of them are made by a -// host task (in a separate thread). -//CHECK-DAG: ---> piMemImageCreate( -//CHECK-DAG: image_desc w/h/d : 16 / 1 / 1 -- arrSz/row/slice : 0 / 0 / 0 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4340 -//CHECK-DAG: ---> piEnqueueMemImageRead( -//CHECK-DAG: pi_image_region width/height/depth : 16/1/1 -//CHECK-DAG: ---> piEnqueueMemImageWrite( -//CHECK-DAG: pi_image_region width/height/depth : 16/1/1 -//CHECK-DAG: ---> piEnqueueMemImageWrite( -//CHECK-DAG: pi_image_region width/height/depth : 16/1/1 -//CHECK: about to destruct 1D -//CHECK: ---> piEnqueueMemImageRead( -//CHECK: pi_image_region width/height/depth : 16/1/1 -//CHECK: -- 2D -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 5 / 1 -- arrSz/row/slice : 0 / 256 / 1280 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4337 -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 5 / 1 -- arrSz/row/slice : 0 / 256 / 1280 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4337 -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 5 / 1 -- arrSz/row/slice : 0 / 0 / 0 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4337 -//CHECK: ---> piEnqueueMemImageRead( -//CHECK: pi_image_region width/height/depth : 16/5/1 +// host task (in a separate thread). Don't check for the actual function name +// as it may be interleaved with other tracing output. +// CHECK-DAG: .type = UR_MEM_TYPE_IMAGE1D, .width = 16, .height = 1, .depth = 1, .arraySize = 0, .rowPitch = 0, .slicePitch = 0, .numMipLevel = 0, .numSamples = 0 +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 1, .depth = 1} +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 1, .depth = 1} +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 1, .depth = 1} +// CHECK: about to destruct 1D +// CHECK: ---> urEnqueueMemImageRead({{.*}}.region = (struct ur_rect_region_t){.width = 16, .height = 1, .depth = 1} +// CHECK: -- 2D // The order of the following calls may vary since some of them are made by a -// host task (in a separate thread). -//CHECK-DAG: ---> piMemImageCreate( -//CHECK-DAG: image_desc w/h/d : 16 / 5 / 1 -- arrSz/row/slice : 0 / 0 / 0 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4337 -//CHECK-DAG: ---> piEnqueueMemImageRead( -//CHECK-DAG: pi_image_region width/height/depth : 16/5/1 -//CHECK-DAG: ---> piEnqueueMemImageWrite( -//CHECK-DAG: pi_image_region width/height/depth : 16/5/1 -//CHECK-DAG: : 256 -//CHECK-DAG: ---> piEnqueueMemImageWrite( -//CHECK-DAG: pi_image_region width/height/depth : 16/5/1 -//CHECK-DAG: : 256 -//CHECK: about to destruct 2D -//CHECK: ---> piEnqueueMemImageRead( -//CHECK: pi_image_region width/height/depth : 16/5/1 -//CHECK: -- 3D -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 5 / 3 -- arrSz/row/slice : 0 / 256 / 1280 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4338 -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 5 / 3 -- arrSz/row/slice : 0 / 256 / 1280 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4338 -//CHECK: ---> piMemImageCreate( -//CHECK: image_desc w/h/d : 16 / 5 / 3 -- arrSz/row/slice : 0 / 0 / 0 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4338 -//CHECK: ---> piEnqueueMemImageRead( -//CHECK: pi_image_region width/height/depth : 16/5/3 +// host task (in a separate thread). Don't check for the actual function name +// as it may be interleaved with other tracing output. +// CHECK-DAG: .type = UR_MEM_TYPE_IMAGE2D, .width = 16, .height = 5, .depth = 1, .arraySize = 0, .rowPitch = 256, .slicePitch = 1280, .numMipLevel = 0, .numSamples = 0 +// CHECK-DAG: .type = UR_MEM_TYPE_IMAGE2D, .width = 16, .height = 5, .depth = 1, .arraySize = 0, .rowPitch = 256, .slicePitch = 1280, .numMipLevel = 0, .numSamples = 0 +// CHECK-DAG: .type = UR_MEM_TYPE_IMAGE2D, .width = 16, .height = 5, .depth = 1, .arraySize = 0, .rowPitch = 0, .slicePitch = 0, .numMipLevel = 0, .numSamples = 0 +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 1} +// CHECK-DAG: .type = UR_MEM_TYPE_IMAGE2D, .width = 16, .height = 5, .depth = 1, .arraySize = 0, .rowPitch = 0, .slicePitch = 0, .numMipLevel = 0, .numSamples = 0 +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 1} +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 1} +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 1} +// CHECK: about to destruct 2D +// CHECK: ---> urEnqueueMemImageRead({{.*}}.region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 1} +// CHECK: -- 3D +// CHECK-DAG: .type = UR_MEM_TYPE_IMAGE3D, .width = 16, .height = 5, .depth = 3, .arraySize = 0, .rowPitch = 256, .slicePitch = 1280, .numMipLevel = 0, .numSamples = 0 +// CHECK-DAG: .type = UR_MEM_TYPE_IMAGE3D, .width = 16, .height = 5, .depth = 3, .arraySize = 0, .rowPitch = 256, .slicePitch = 1280, .numMipLevel = 0, .numSamples = 0 +// CHECK-DAG: .type = UR_MEM_TYPE_IMAGE3D, .width = 16, .height = 5, .depth = 3, .arraySize = 0, .rowPitch = 0, .slicePitch = 0, .numMipLevel = 0, .numSamples = 0 +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 3} // The order of the following calls may vary since some of them are made by a -// host task (in a separate thread). -//CHECK-DAG: ---> piMemImageCreate( -//CHECK-DAG: image_desc w/h/d : 16 / 5 / 3 -- arrSz/row/slice : 0 / 0 / 0 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4338 -//CHECK-DAG: ---> piEnqueueMemImageRead( -//CHECK-DAG: pi_image_region width/height/depth : 16/5/3 -//CHECK-DAG: ---> piEnqueueMemImageWrite( -//CHECK-DAG: pi_image_region width/height/depth : 16/5/3 -//CHECK-DAG: : 256 -//CHECK-DAG: : 1280 -//CHECK-DAG: ---> piEnqueueMemImageWrite( -//CHECK-DAG: pi_image_region width/height/depth : 16/5/3 -//CHECK-DAG: : 256 -//CHECK-DAG: : 1280 -//CHECK: about to destruct 3D -//CHECK: ---> piEnqueueMemImageRead( -//CHECK: pi_image_region width/height/depth : 16/5/3 -// CHECK-NEXT: : 256 -// CHECK-NEXT: : 1280 -//CHECK: end copyH2D-image +// host task (in a separate thread). Don't check for the actual function name +// as it may be interleaved with other tracing output. +// CHECK-DAG: .type = UR_MEM_TYPE_IMAGE3D, .width = 16, .height = 5, .depth = 3, .arraySize = 0, .rowPitch = 0, .slicePitch = 0, .numMipLevel = 0, .numSamples = 0 +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 3} +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 3}, .rowPitch = 256, .slicePitch = 1280 +// CHECK-DAG: .region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 3}, .rowPitch = 256, .slicePitch = 1280 +// CHECK: about to destruct 3D +// CHECK: ---> urEnqueueMemImageRead({{.*}} .region = (struct ur_rect_region_t){.width = 16, .height = 5, .depth = 3}, .rowPitch = 256, .slicePitch = 1280 + +// CHECK: end copyH2D-image // clang-format on diff --git a/sycl/test-e2e/Plugin/level_zero_barrier_optimization.cpp b/sycl/test-e2e/Plugin/level_zero_barrier_optimization.cpp index 14cd05c9641ac..284227ee0a630 100644 --- a/sycl/test-e2e/Plugin/level_zero_barrier_optimization.cpp +++ b/sycl/test-e2e/Plugin/level_zero_barrier_optimization.cpp @@ -1,6 +1,6 @@ // REQUIRES: level_zero // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s // Test to check that we don't insert unnecessary L0 commands for // queue::ext_oneapi_submit_barrier() when we have in-order queue. @@ -34,11 +34,11 @@ int main() { auto EventB = submitKernel(Q2); // CHECK: Test1 - // CHECK: ---> piEnqueueEventsWaitWithBarrier( + // CHECK: ---> urEnqueueEventsWaitWithBarrier // CHECK: ZE ---> zeEventCreate // CHECK: ZE ---> zeCommandListAppendWaitOnEvents // CHECK: ZE ---> zeCommandListAppendSignalEvent - // CHECK: ) ---> pi_result : PI_SUCCESS + // CHECK: ) -> UR_RESULT_SUCCESS auto BarrierEvent = Q2.ext_oneapi_submit_barrier({EventA, EventB}); BarrierEvent.wait(); @@ -54,11 +54,11 @@ int main() { auto EventB = submitKernel(Q2); // CHECK: Test2 - // CHECK: ---> piEnqueueEventsWaitWithBarrier( + // CHECK: ---> urEnqueueEventsWaitWithBarrier // CHECK: ZE ---> {{zeEventCreate|zeEventHostReset}} // CHECK: ZE ---> zeCommandListAppendWaitOnEvents // CHECK: ZE ---> zeCommandListAppendSignalEvent - // CHECK: ) ---> pi_result : PI_SUCCESS + // CHECK: ) -> UR_RESULT_SUCCESS auto BarrierEvent = Q1.ext_oneapi_submit_barrier({EventA, EventB}); BarrierEvent.wait(); @@ -74,12 +74,12 @@ int main() { Q2.wait(); Q3.wait(); // CHECK: Test3 - // CHECK: ---> piEnqueueEventsWaitWithBarrier( + // CHECK: ---> urEnqueueEventsWaitWithBarrier // CHECK: ZE ---> zeEventCreate // CHECK-NOT: ZE ---> zeCommandListAppendWaitOnEvents // CHECK-NOT: ZE ---> zeCommandListAppendSignalEvent // CHECK: ZE ---> zeCommandListAppendBarrier - // CHECK: ) ---> pi_result : PI_SUCCESS + // CHECK: ) -> UR_RESULT_SUCCESS auto BarrierEvent = Q3.ext_oneapi_submit_barrier({EventA, EventB}); BarrierEvent.wait(); diff --git a/sycl/test-e2e/Plugin/level_zero_batch_barrier.cpp b/sycl/test-e2e/Plugin/level_zero_batch_barrier.cpp index 487ec67fc3fb7..fded93f810a72 100644 --- a/sycl/test-e2e/Plugin/level_zero_batch_barrier.cpp +++ b/sycl/test-e2e/Plugin/level_zero_batch_barrier.cpp @@ -1,7 +1,10 @@ // REQUIRES: gpu, level_zero, level_zero_dev_kit +// TODO: Reenable, see https://github.com/intel/llvm/issues/14704 +// UNSUPPORTED: windows, linux + // RUN: %{build} %level_zero_options -o %t.out -// RUN: env SYCL_PI_TRACE=2 UR_L0_DEBUG=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 UR_L0_DEBUG=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{run} %t.out 2>&1 | FileCheck %s // Test that the wait with a barrier is fully batched, i.e. it doesn't cause // extra submissions. @@ -22,26 +25,26 @@ int main(int argc, char *argv[]) { queue q; submit_kernel(q); // starts a batch - // CHECK: ---> piEnqueueKernelLaunch + // CHECK: ---> urEnqueueKernelLaunch // CHECK-NOT: ZE ---> zeCommandQueueExecuteCommandLists // continue the batch event barrier = q.ext_oneapi_submit_barrier(); - // CHECK: ---> piEnqueueEventsWaitWithBarrier + // CHECK: ---> urEnqueueEventsWaitWithBarrier // CHECK-NOT: ZE ---> zeCommandQueueExecuteCommandLists submit_kernel(q); - // CHECK: ---> piEnqueueKernelLaunch + // CHECK: ---> urEnqueueKernelLaunch // CHECK-NOT: ZE ---> zeCommandQueueExecuteCommandLists // interop should close the batch ze_event_handle_t ze_event = get_native(barrier); - // CHECK: ---> piextEventGetNativeHandle + // CHECK: ---> urEventGetNativeHandle // CHECK: ZE ---> zeCommandQueueExecuteCommandLists zeEventHostSynchronize(ze_event, UINT64_MAX); - // CHECK: ---> piQueueFinish + // CHECK: ---> urQueueFinish q.wait_and_throw(); return 0; } diff --git a/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp b/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp index d4792c9177a28..3e8a6fc2e4e3c 100644 --- a/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp +++ b/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp @@ -4,36 +4,36 @@ // RUN: %{build} -o %t.out // Set batching to 4 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s // level_zero_batch_test.cpp // // This tests the level zero plugin's kernel batching code. It specifically // tests that the current batch is submitted when an Event execution status // request is made. This test uses explicit SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 -// to make sure that the batching is submitted when the piEventGetInfo is +// to make sure that the batching is submitted when the urEventGetInfo is // done, rather than some other dynamic batching criteria. // -// CHECK: ---> piEnqueueKernelLaunch +// CHECK: ---> urEnqueueKernelLaunch // CHECK: ZE ---> zeCommandListAppendLaunchKernel -// Shouldn't have closed until we see a piEventGetInfo +// Shouldn't have closed until we see a urEventGetInfo // CHECK-NOT: ZE ---> zeCommandListClose // CHECK-NOT: ZE ---> zeCommandQueueExecuteCommandLists -// CHECK: ---> piEventGetInfo -// Shouldn't see another piGetEventInfo until after closing command list -// CHECK-NOT: ---> piEventGetInfo -// Look for close and Execute after piEventGetInfo +// CHECK: ---> urEventGetInfo +// Shouldn't see another urGetEventInfo until after closing command list +// CHECK-NOT: ---> urEventGetInfo +// Look for close and Execute after urEventGetInfo // CHECK: ZE ---> zeCommandListClose // CHECK: ZE ---> zeCommandQueueExecuteCommandLists -// CHECK: ---> piEventGetInfo -// CHECK-NOT: piEventsWait -// CHECK: ---> piEnqueueKernelLaunch +// CHECK: ---> urEventGetInfo +// CHECK-NOT: urEventsWait +// CHECK: ---> urEnqueueKernelLaunch // CHECK: ZE ---> zeCommandListAppendLaunchKernel -// CHECK: ---> piQueueFinish -// Look for close and Execute after piQueueFinish +// CHECK: ---> urQueueFinish +// Look for close and Execute after urQueueFinish // CHECK: ZE ---> zeCommandListClose // CHECK: ZE ---> zeCommandQueueExecuteCommandLists -// CHECK: ---> piEventGetInfo +// CHECK: ---> urEventGetInfo // No close and execute here, should already have happened. // CHECK-NOT: ZE ---> zeCommandListClose // CHECK-NOT: ZE ---> zeCommandQueueExecuteCommandLists diff --git a/sycl/test-e2e/Plugin/level_zero_batch_test.cpp b/sycl/test-e2e/Plugin/level_zero_batch_test.cpp index ab96887f4aa27..4c384cf78c5d4 100644 --- a/sycl/test-e2e/Plugin/level_zero_batch_test.cpp +++ b/sycl/test-e2e/Plugin/level_zero_batch_test.cpp @@ -7,69 +7,69 @@ // To test batching on out-of-order queue: // Set batching to 4 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB4 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB4 %s // Set batching to 1 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB1 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB1 %s // Set batching to 3 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB3 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB3 %s // Set batching to 5 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=5 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB5 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=5 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB5 %s // Set batching to 7 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=7 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB7 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=7 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB7 %s // Set batching to 8 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=8 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB8 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=8 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB8 %s // Set batching to 9 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=9 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB9 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=9 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB9 %s // To test batching on in-order queue: // Set batching to 4 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB4 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB4 %s // Set batching to 1 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB1 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB1 %s // Set batching to 3 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB3 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB3 %s // Set batching to 5 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=5 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB5 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=5 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB5 %s // Set batching to 7 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=7 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB7 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=7 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB7 %s // Set batching to 8 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=8 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB8 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=8 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB8 %s // Set batching to 9 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=9 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB9 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=9 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB9 %s // To test batching on in-order queue with discard_events: // Set batching to 4 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB4 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB4 %s // Set batching to 1 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB1 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB1 %s // Set batching to 3 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB3 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB3 %s // Set batching to 5 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=5 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB5 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=5 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB5 %s // Set batching to 7 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=7 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB7 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=7 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB7 %s // Set batching to 8 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=8 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB8 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=8 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB8 %s // Set batching to 9 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=9 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB9 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=9 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB9 %s // level_zero_batch_test.cpp // @@ -78,55 +78,55 @@ // variable SYCL_PI_LEVEL_ZEOR+BATCH_SIZE=N. // This test enqueues 8 kernels and then does a wait. And it does this 3 times. // Expected output is that for batching =1 you will see zeCommandListClose, -// and zeCommandQueueExecuteCommandLists after every piEnqueueKernelLaunch. +// and zeCommandQueueExecuteCommandLists after every urEnqueueKernelLaunch. // For batching=3 you will see that after 3rd and 6th enqueues, and then after -// piQueueFinish. For 5, after 5th piEnqueue, and then after piQueueFinish. For +// urQueueFinish. For 5, after 5th urEnqueue, and then after urQueueFinish. For // 4 you will see these after 4th and 8th Enqueue, and for 8, only after the // 8th enqueue. And lastly for 9, you will see the Close and Execute calls -// only after the piQueueFinish. +// only after the urQueueFinish. // Since the test does this 3 times, this pattern will repeat 2 more times, // and then the test will print Test Passed 8 times, once for each kernel // validation check. // Pattern starts first set of kernel executions. -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB4: ZE ---> zeCommandListClose( // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( // CKB5: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB7: ZE ---> zeCommandListClose( // CKB7: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( @@ -134,7 +134,7 @@ // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( // CKB8: ZE ---> zeCommandListClose( // CKB8: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piQueueFinish( +// CKALL: ---> urQueueFinish // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( @@ -144,45 +144,45 @@ // CKB9: ZE ---> zeCommandListClose( // CKB9: ZE ---> zeCommandQueueExecuteCommandLists( // Pattern starts 2nd set of kernel executions -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB4: ZE ---> zeCommandListClose( // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( // CKB5: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB7: ZE ---> zeCommandListClose( // CKB7: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( @@ -190,7 +190,7 @@ // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( // CKB8: ZE ---> zeCommandListClose( // CKB8: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piQueueFinish( +// CKALL: ---> urQueueFinish // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( @@ -200,45 +200,45 @@ // CKB9: ZE ---> zeCommandListClose( // CKB9: ZE ---> zeCommandQueueExecuteCommandLists( // Pattern starts 3rd set of kernel executions -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB4: ZE ---> zeCommandListClose( // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( // CKB5: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB7: ZE ---> zeCommandListClose( // CKB7: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( @@ -246,7 +246,7 @@ // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( // CKB8: ZE ---> zeCommandListClose( // CKB8: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piQueueFinish( +// CKALL: ---> urQueueFinish // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( diff --git a/sycl/test-e2e/Plugin/level_zero_batch_test_copy_with_compute.cpp b/sycl/test-e2e/Plugin/level_zero_batch_test_copy_with_compute.cpp index ca6f2a1dd7e3d..72a6cb44379fb 100644 --- a/sycl/test-e2e/Plugin/level_zero_batch_test_copy_with_compute.cpp +++ b/sycl/test-e2e/Plugin/level_zero_batch_test_copy_with_compute.cpp @@ -4,25 +4,25 @@ // RUN: %{build} -o %t.out // Set batching to 4 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB4 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB4 %s // Set batching to 1 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB1 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB1 %s // Set batching to 3 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB3 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB3 %s // Set batching to 5 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=5 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB5 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=5 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB5 %s // Set batching to 7 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=7 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB7 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=7 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB7 %s // Set batching to 8 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=8 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB8 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=8 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB8 %s // Set batching to 9 explicitly -// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=9 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB9 %s +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=9 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKB9 %s // level_zero_batch_test_copy_with_compute.cpp // @@ -31,55 +31,55 @@ // variable SYCL_PI_LEVEL_ZERO_{COPY_}BATCH_SIZE=N. // This test enqueues 8 kernels and then does a wait. And it does this 3 times. // Expected output is that for batching =1 you will see zeCommandListClose, -// and zeCommandQueueExecuteCommandLists after every piEnqueueKernelLaunch. +// and zeCommandQueueExecuteCommandLists after every urEnqueueKernelLaunch. // For batching=3 you will see that after 3rd and 6th enqueues, and then after -// piEventsWait. For 5, after 5th piEnqueue, and then after piEventsWait. For +// urEventWait. For 5, after 5th urEnqueue, and then after urEventWait. For // 4 you will see these after 4th and 8th Enqueue, and for 8, only after the // 8th enqueue. And lastly for 9, you will see the Close and Execute calls -// only after the piEventsWait. +// only after the urEventWait. // Since the test does this 3 times, this pattern will repeat 2 more times, // and then the test will print Test Passed 8 times, once for each kernel // validation check. // Pattern starts first set of kernel executions. -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB4: ZE ---> zeCommandListClose( // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( // CKB5: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB7: ZE ---> zeCommandListClose( // CKB7: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( @@ -87,7 +87,7 @@ // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( // CKB8: ZE ---> zeCommandListClose( // CKB8: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piQueueFinish( +// CKALL: ---> urQueueFinish // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( @@ -97,45 +97,45 @@ // CKB9: ZE ---> zeCommandListClose( // CKB9: ZE ---> zeCommandQueueExecuteCommandLists( // Pattern starts 2nd set of kernel executions -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB4: ZE ---> zeCommandListClose( // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( // CKB5: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB7: ZE ---> zeCommandListClose( // CKB7: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( @@ -143,7 +143,7 @@ // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( // CKB8: ZE ---> zeCommandListClose( // CKB8: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piQueueFinish( +// CKALL: ---> urQueueFinish // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( @@ -153,45 +153,45 @@ // CKB9: ZE ---> zeCommandListClose( // CKB9: ZE ---> zeCommandQueueExecuteCommandLists( // Pattern starts 3rd set of kernel executions -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB4: ZE ---> zeCommandListClose( // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( // CKB5: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( // CKB7: ZE ---> zeCommandListClose( // CKB7: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piEnqueueKernelLaunch( +// CKALL: ---> urEnqueueKernelLaunch // CKALL: ZE ---> zeCommandListAppendLaunchKernel( // CKB1: ZE ---> zeCommandListClose( // CKB1: ZE ---> zeCommandQueueExecuteCommandLists( @@ -199,7 +199,7 @@ // CKB4: ZE ---> zeCommandQueueExecuteCommandLists( // CKB8: ZE ---> zeCommandListClose( // CKB8: ZE ---> zeCommandQueueExecuteCommandLists( -// CKALL: ---> piQueueFinish( +// CKALL: ---> urQueueFinish // CKB3: ZE ---> zeCommandListClose( // CKB3: ZE ---> zeCommandQueueExecuteCommandLists( // CKB5: ZE ---> zeCommandListClose( diff --git a/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp b/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp index b1aa94d280682..1d9f0850cd3b8 100644 --- a/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp +++ b/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp @@ -1,8 +1,8 @@ // REQUIRES: gpu, level_zero // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 SYCL_PI_TRACE=-1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=MODE1 %s -// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_TRACE=-1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=MODE2 %s +// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=MODE1 %s +// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck --check-prefixes=MODE2 %s // UNSUPPORTED: ze_debug // Checks that with L0 device-scope events enabled the only host-visible L0 @@ -11,8 +11,7 @@ // // clang-format off // MODE1-LABEL: Submitted all kernels -// MODE1: ---> piEventsWait( -// MODE1-NEXT: : 1 +// MODE1: ---> urEventWait // MODE1: ze_event_pool_desc_t flags set to: 1 // MODE1: ZE ---> zeEventCreate(ZeEventPool, &ZeEventDesc, &ZeEvent) // MODE1: ZE ---> zeCommandListAppendWaitOnEvents(CommandList->first, 1, &ZeEvent) diff --git a/sycl/test-e2e/Plugin/level_zero_dynamic_batch_test.cpp b/sycl/test-e2e/Plugin/level_zero_dynamic_batch_test.cpp index 722e1fe887f61..a028eced5cf91 100644 --- a/sycl/test-e2e/Plugin/level_zero_dynamic_batch_test.cpp +++ b/sycl/test-e2e/Plugin/level_zero_dynamic_batch_test.cpp @@ -1,14 +1,15 @@ // REQUIRES: gpu, level_zero -// UNSUPPORTED: ze_debug +// TODO: Reenable, see https://github.com/intel/llvm/issues/14721 +// UNSUPPORTED: ze_debug, windows, linux // RUN: %{build} -o %t.ooo.out // RUN: %{build} -DUSING_INORDER -o %t.ino.out // RUN: %{build} -DUSING_DISCARD_EVENTS -o %t.discard_events.out // Check that dynamic batching raises/lowers batch size -// RUN: env SYCL_PI_TRACE=2 UR_L0_DEBUG=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKDYN %s -// RUN: env SYCL_PI_TRACE=2 UR_L0_DEBUG=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKDYN %s -// RUN: env SYCL_PI_TRACE=2 UR_L0_DEBUG=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKDYN %s +// RUN: env SYCL_UR_TRACE=1 UR_L0_DEBUG=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{run} %t.ooo.out 2>&1 | FileCheck --check-prefixes=CKALL,CKDYN %s +// RUN: env SYCL_UR_TRACE=1 UR_L0_DEBUG=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{run} %t.ino.out 2>&1 | FileCheck --check-prefixes=CKALL,CKDYN %s +// RUN: env SYCL_UR_TRACE=1 UR_L0_DEBUG=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{run} %t.discard_events.out 2>&1 | FileCheck --check-prefixes=CKALL,CKDYN %s // level_zero_dynamic_batch_test.cpp // diff --git a/sycl/test-e2e/Plugin/level_zero_usm_device_read_only.cpp b/sycl/test-e2e/Plugin/level_zero_usm_device_read_only.cpp index 463c1425d914e..00ab16ae7c40f 100644 --- a/sycl/test-e2e/Plugin/level_zero_usm_device_read_only.cpp +++ b/sycl/test-e2e/Plugin/level_zero_usm_device_read_only.cpp @@ -2,7 +2,7 @@ // UNSUPPORTED: ze_debug // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s // Test that "device_read_only" shared USM allocations are pooled. @@ -16,12 +16,12 @@ int main(int argc, char *argv[]) { auto ptr1 = malloc_shared(1, Q, ext::oneapi::property::usm::device_read_only()); - // CHECK: ---> piextUSMSharedAlloc - // CHECK: ZE ---> zeMemAllocShared + // CHECK: ---> urUSMSharedAlloc + // CHECK-SAME:ZE ---> zeMemAllocShared auto ptr2 = aligned_alloc_shared( 1, 1, Q, ext::oneapi::property::usm::device_read_only()); - // CHECK: ---> piextUSMSharedAlloc + // CHECK: ---> urUSMSharedAlloc // CHECK-NOT: ZE ---> zeMemAllocShared free(ptr1, Q); diff --git a/sycl/test-e2e/Plugin/level_zero_usm_residency.cpp b/sycl/test-e2e/Plugin/level_zero_usm_residency.cpp index 3626d3c80e9ff..60dacb11a5fce 100644 --- a/sycl/test-e2e/Plugin/level_zero_usm_residency.cpp +++ b/sycl/test-e2e/Plugin/level_zero_usm_residency.cpp @@ -1,10 +1,10 @@ // REQUIRES: gpu, level_zero // RUN: %{build} %level_zero_options -o %t.out -// RUN: env SYCL_PI_TRACE=-1 UR_L0_DEBUG=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck --check-prefixes=DEVICE %s -// RUN: env SYCL_PI_LEVEL_ZERO_USM_RESIDENT=0x001 SYCL_PI_TRACE=-1 UR_L0_DEBUG=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck --check-prefixes=DEVICE %s -// RUN: env SYCL_PI_LEVEL_ZERO_USM_RESIDENT=0x010 SYCL_PI_TRACE=-1 UR_L0_DEBUG=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck --check-prefixes=SHARED %s -// RUN: env SYCL_PI_LEVEL_ZERO_USM_RESIDENT=0x100 SYCL_PI_TRACE=-1 UR_L0_DEBUG=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck --check-prefixes=HOST %s +// RUN: env SYCL_UR_TRACE=1 UR_L0_DEBUG=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck --check-prefixes=DEVICE %s +// RUN: env SYCL_PI_LEVEL_ZERO_USM_RESIDENT=0x001 SYCL_UR_TRACE=1 UR_L0_DEBUG=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck --check-prefixes=DEVICE %s +// RUN: env SYCL_PI_LEVEL_ZERO_USM_RESIDENT=0x010 SYCL_UR_TRACE=1 UR_L0_DEBUG=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck --check-prefixes=SHARED %s +// RUN: env SYCL_PI_LEVEL_ZERO_USM_RESIDENT=0x100 SYCL_UR_TRACE=1 UR_L0_DEBUG=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck --check-prefixes=HOST %s // Test that USM is made resident at allocation as requested. @@ -17,18 +17,18 @@ int main(int argc, char *argv[]) { queue Q; auto ptr1 = malloc_device(1, Q); - // DEVICE: ---> piextUSMDeviceAlloc + // DEVICE: ---> urUSMDeviceAlloc // DEVICE: ZE ---> zeMemAllocDevice // DEVICE: ZE ---> zeContextMakeMemoryResident auto ptr2 = malloc_shared(1, Q); - // SHARED: ---> piextUSMSharedAlloc + // SHARED: ---> urUSMSharedAlloc // SHARED: ZE ---> zeMemAllocShared // SHARED: ZE ---> zeContextMakeMemoryResident // SHARED-NOT: ZE ---> zeContextMakeMemoryResident auto ptr3 = malloc_host(1, Q); - // HOST: ---> piextUSMHostAlloc + // HOST: ---> urUSMHostAlloc // HOST: ZE ---> zeMemAllocHost // HOST: ZE ---> zeContextMakeMemoryResident diff --git a/sycl/test-e2e/Plugin/pi-teardown.cpp b/sycl/test-e2e/Plugin/pi-teardown.cpp deleted file mode 100644 index c4ea7e3cff31e..0000000000000 --- a/sycl/test-e2e/Plugin/pi-teardown.cpp +++ /dev/null @@ -1,4 +0,0 @@ -// ensure that piTearDown is called - -// RUN: env SYCL_PI_TRACE=2 sycl-ls | FileCheck %s -// CHECK: ---> piTearDown diff --git a/sycl/test-e2e/Plugin/sycl-ls-unified-runtime.cpp b/sycl/test-e2e/Plugin/sycl-ls-unified-runtime.cpp deleted file mode 100644 index 73d0cb6dd3edb..0000000000000 --- a/sycl/test-e2e/Plugin/sycl-ls-unified-runtime.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// REQUIRES: gpu, level_zero -// RUN: env ONEAPI_DEVICE_SELECTOR="level_zero:*" sycl-ls 2>&1 | FileCheck --check-prefixes=CHECK-PI %s -// RUN: env SYCL_PREFER_UR=0 ONEAPI_DEVICE_SELECTOR="level_zero:*" sycl-ls 2>&1 | FileCheck --check-prefixes=CHECK-PI %s -// RUN: env SYCL_PI_TRACE=-1 SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR="level_zero:*" sycl-ls 2>&1 | FileCheck --check-prefixes=CHECK-UR %s - -// CHECK-PI: Intel(R) Level-Zero -// CHECK-UR: Intel(R) oneAPI Unified Runtime over Level-Zero - -//==-- sycl-ls-unified-runtime.cpp ----- Test Unified Runtime platform ----==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// diff --git a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp index d7b5d55161107..f8884a4982c24 100644 --- a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp +++ b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp @@ -1,13 +1,13 @@ // REQUIRES: level_zero // RUN: %{build} %if cl_options %{/Od%} %else %{-O0%} -o %t0.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t0.out 2>&1 | FileCheck %s --check-prefixes=CHECK0 +// RUN: env SYCL_UR_TRACE=1 %{run} %t0.out 2>&1 | FileCheck %s --check-prefixes=CHECK0 // RUN: %{build} -O1 -o %t1.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t1.out 2>&1 | FileCheck %s --check-prefixes=CHECK1 +// RUN: env SYCL_UR_TRACE=1 %{run} %t1.out 2>&1 | FileCheck %s --check-prefixes=CHECK1 // RUN: %{build} -O2 -o %t2.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t2.out 2>&1 | FileCheck %s --check-prefixes=CHECK1 +// RUN: env SYCL_UR_TRACE=1 %{run} %t2.out 2>&1 | FileCheck %s --check-prefixes=CHECK1 // RUN: %{build} -O3 -o %t3.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t3.out 2>&1 | FileCheck %s --check-prefixes=CHECK1 +// RUN: env SYCL_UR_TRACE=1 %{run} %t3.out 2>&1 | FileCheck %s --check-prefixes=CHECK1 // This test verifies the propagation of front-end compiler optimization // option to the backend. @@ -28,7 +28,5 @@ int main() { return 0; } -// CHECK-LABEL: ---> piProgramBuild( -// CHECK0: -ze-opt-disable -// CHECK1: -ze-opt-level=2 -// CHECK: ) ---> pi_result : PI_SUCCESS +// CHECK0: ---> urProgramBuild{{.*}}-ze-opt-disable{{.*}}-> UR_RESULT_SUCCESS +// CHECK1: ---> urProgramBuild{{.*}}-ze-opt-level=2{{.*}}-> UR_RESULT_SUCCESS diff --git a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp index c5c9fc6074dab..13f88128ccc27 100644 --- a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp +++ b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp @@ -1,13 +1,13 @@ // REQUIRES: opencl // RUN: %{build} %if cl_options %{/Od%} %else %{-O0%} -o %t0.out -// RUN: %if !acc %{ env SYCL_PI_TRACE=-1 %{run} %t0.out 2>&1 | FileCheck %s --check-prefixes=CHECKOCL0 %} +// RUN: %if !acc %{ env SYCL_UR_TRACE=1 %{run} %t0.out 2>&1 | FileCheck %s --check-prefixes=CHECKOCL0 %} // RUN: %{build} -O1 -o %t1.out -// RUN: %if !acc %{ env SYCL_PI_TRACE=-1 %{run} %t1.out 2>&1 | FileCheck %s --check-prefixes=CHECKOCL1 %} +// RUN: %if !acc %{ env SYCL_UR_TRACE=1 %{run} %t1.out 2>&1 | FileCheck %s --check-prefixes=CHECKOCL1 %} // RUN: %{build} -O2 -o %t2.out -// RUN: %if !acc %{ env SYCL_PI_TRACE=-1 %{run} %t2.out 2>&1 | FileCheck %s --check-prefixes=CHECKOCL2 %} +// RUN: %if !acc %{ env SYCL_UR_TRACE=1 %{run} %t2.out 2>&1 | FileCheck %s --check-prefixes=CHECKOCL2 %} // RUN: %{build} -O3 -o %t3.out -// RUN: %if !acc %{ env SYCL_PI_TRACE=-1 %{run} %t3.out 2>&1 | FileCheck %s --check-prefixes=CHECKOCL3 %} +// RUN: %if !acc %{ env SYCL_UR_TRACE=1 %{run} %t3.out 2>&1 | FileCheck %s --check-prefixes=CHECKOCL3 %} // RUN: %{build} -O0 -o %t.out // RUN: %{run} %t.out @@ -31,9 +31,7 @@ int main() { return 0; } -// CHECK-LABEL: ---> piProgramBuild( -// CHECKOCL0: -cl-opt-disable -// CHECKOCL1-NOT: -cl-opt-disable -// CHECKOCL2-NOT: -cl-opt-disable -// CHECKOCL3-NOT: -cl-opt-disable -// CHECK: ) ---> pi_result : PI_SUCCESS +// CHECKOCL0: urProgramBuild{{.*}}-cl-opt-disable +// CHECKOCL1-NOT: urProgramBuild{{.*}}-cl-opt-disable +// CHECKOCL2-NOT: urProgramBuild{{.*}}-cl-opt-disable +// CHECKOCL3-NOT: urProgramBuild{{.*}}-cl-opt-disable diff --git a/sycl/test-e2e/Regression/context_is_destroyed_after_exception.cpp b/sycl/test-e2e/Regression/context_is_destroyed_after_exception.cpp index a9a23157bb5e8..b662101db63ca 100644 --- a/sycl/test-e2e/Regression/context_is_destroyed_after_exception.cpp +++ b/sycl/test-e2e/Regression/context_is_destroyed_after_exception.cpp @@ -1,9 +1,10 @@ // REQUIRES: gpu // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // -// XFAIL: hip_nvidia +// TODO: Reenable on Windows, see https://github.com/intel/llvm/issues/14768 +// XFAIL: hip_nvidia, windows #include @@ -31,4 +32,4 @@ int main() { return 0; } -// CHECK:---> piContextRelease( +// CHECK:---> urContextRelease( diff --git a/sycl/test-e2e/Regression/image_access.cpp b/sycl/test-e2e/Regression/image_access.cpp index 7ade1b4fb3b4b..c22907934c397 100644 --- a/sycl/test-e2e/Regression/image_access.cpp +++ b/sycl/test-e2e/Regression/image_access.cpp @@ -1,6 +1,6 @@ // REQUIRES: aspect-ext_intel_legacy_image // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // // UNSUPPORTED: hip // CUDA doesn't fully support OpenCL spec conform images. @@ -45,5 +45,5 @@ int main() { return 0; } -// CHECK:---> piMemImageCreate -// CHECK:---> piEnqueueMemImageRead +// CHECK:---> urMemImageCreate +// CHECK:---> urEnqueueMemImageRead diff --git a/sycl/test-e2e/Regression/implicit_kernel_bundle_image_filtering.cpp b/sycl/test-e2e/Regression/implicit_kernel_bundle_image_filtering.cpp index 200ef29888134..cfe997c8b3f93 100644 --- a/sycl/test-e2e/Regression/implicit_kernel_bundle_image_filtering.cpp +++ b/sycl/test-e2e/Regression/implicit_kernel_bundle_image_filtering.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // This tests checks that implicitly created kernel_bundles (i.e. through // setting a specialization ID from host) only builds the device image @@ -43,7 +43,7 @@ int main() { } // --- Check that only a single program is built: -// CHECK: ---> piProgramBuild -// CHECK-NOT: ---> piProgramBuild +// CHECK: ---> urProgramBuildExp +// CHECK-NOT: ---> urProgramBuildExp // --- Check that the test completed with expected results: // CHECK: passed diff --git a/sycl/test-e2e/Regression/pi_release.cpp b/sycl/test-e2e/Regression/pi_release.cpp index 84fbee6417b8e..672157024259b 100644 --- a/sycl/test-e2e/Regression/pi_release.cpp +++ b/sycl/test-e2e/Regression/pi_release.cpp @@ -1,6 +1,9 @@ // REQUIRES: opencl || level_zero || cuda // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s +// +// TODO: Reenable on Windows, see https://github.com/intel/llvm/issues/14768 +// XFAIL: windows #include @@ -9,5 +12,5 @@ int main() { return 0; } -// CHECK: piQueueRelease -// CHECK: piContextRelease +// CHECK: urQueueRelease +// CHECK: urContextRelease diff --git a/sycl/test-e2e/Scheduler/HostAccDestruction.cpp b/sycl/test-e2e/Scheduler/HostAccDestruction.cpp index 9c87e903c6b1f..99d93bd85bf52 100644 --- a/sycl/test-e2e/Scheduler/HostAccDestruction.cpp +++ b/sycl/test-e2e/Scheduler/HostAccDestruction.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -fsycl-dead-args-optimization -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows @@ -35,5 +35,5 @@ int main() { } // CHECK:host acc destructor call -// CHECK:---> piEnqueueKernelLaunch( +// CHECK:---> urEnqueueKernelLaunch( // CHECK:end of scope diff --git a/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp b/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp index 69f582fb6a1e4..af35567e09ff0 100644 --- a/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp +++ b/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // // XFAIL: hip_nvidia @@ -38,34 +38,16 @@ int main() { // Sequential submissions to the same in-order queue should not result in any // event dependencies. - // CHECK: piEnqueueKernelLaunch - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : 0 + // CHECK: urEnqueueKernelLaunch + // CHECK-SAME: .numEventsInWaitList = 0 submitKernel(InOrderQueueA, Buf); - // CHECK: piEnqueueKernelLaunch - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : 0 + // CHECK: urEnqueueKernelLaunch + // CHECK-SAME: .numEventsInWaitList = 0 submitKernel(InOrderQueueA, Buf); // Submisssion to a different in-order queue should explicitly depend on the // previous command group. - // CHECK: piEnqueueKernelLaunch - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : 1 + // CHECK: urEnqueueKernelLaunch + // CHECK-SAME: .numEventsInWaitList = 1 submitKernel(InOrderQueueB, Buf); return 0; diff --git a/sycl/test-e2e/Scheduler/MemObjRemapping.cpp b/sycl/test-e2e/Scheduler/MemObjRemapping.cpp index d12c58d7815da..0b681149c2999 100644 --- a/sycl/test-e2e/Scheduler/MemObjRemapping.cpp +++ b/sycl/test-e2e/Scheduler/MemObjRemapping.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_HOST_UNIFIED_MEMORY=1 SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_HOST_UNIFIED_MEMORY=1 SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // // XFAIL: hip_nvidia #include @@ -28,30 +28,24 @@ int main() { { // Check access mode flags - // CHECK: piEnqueueMemBufferMap - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : 1 - host_accessor AccA(BufA, read_only); + // CHECK: urEnqueueMemBufferMap + // CHECK-SAME: .mapFlags = UR_MAP_FLAG_READ + auto AccA = BufA.get_access(); for (std::size_t I = 0; I < Size; ++I) { assert(AccA[I] == I); } } { - // CHECK: piEnqueueMemUnmap - // CHECK: piEnqueueMemBufferMap - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : - // CHECK-NEXT: : 3 - host_accessor AccA(BufA, write_only); + // CHECK: urEnqueueMemUnmap + // CHECK: urEnqueueMemBufferMap + // CHECK-SAME: .mapFlags = UR_MAP_FLAG_READ | UR_MAP_FLAG_WRITE + auto AccA = BufA.get_access(); for (std::size_t I = 0; I < Size; ++I) AccA[I] = 2 * I; } - // CHECK-NOT: piEnqueueMemBufferMap - host_accessor AccA(BufA, read_only); + // CHECK-NOT: urEnqueueMemBufferMap + auto AccA = BufA.get_access(); for (std::size_t I = 0; I < Size; ++I) { assert(AccA[I] == 2 * I); } diff --git a/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp b/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp index eb8fd2c9f5eba..ba6b91f5acc1b 100644 --- a/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp +++ b/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp @@ -1,7 +1,8 @@ // RUN: %{build} -fsycl-dead-args-optimization -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s // -// XFAIL: hip_nvidia +// TODO: Reenable on Windows, see https://github.com/intel/llvm/issues/14768 +// XFAIL: hip_nvidia, windows //==------------------- ReleaseResourcesTests.cpp --------------------------==// // @@ -45,11 +46,11 @@ int main() { return Failed; } -// CHECK:---> piContextCreate -// CHECK:---> piextQueueCreate -// CHECK:---> piProgramCreate -// CHECK:---> piKernelCreate -// CHECK:---> piQueueRelease -// CHECK:---> piContextRelease -// CHECK:---> piKernelRelease -// CHECK:---> piProgramRelease +// CHECK:---> urContextCreate +// CHECK:---> urQueueCreate +// CHECK:---> urProgramCreate +// CHECK:---> urKernelCreate +// CHECK:---> urQueueRelease +// CHECK:---> urContextRelease +// CHECK:---> urKernelRelease +// CHECK:---> urProgramRelease diff --git a/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp b/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp index 22e742e0d6c25..f901ac168cf17 100644 --- a/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp +++ b/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp @@ -1,24 +1,18 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_HOST_UNIFIED_MEMORY=1 env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_HOST_UNIFIED_MEMORY=1 env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s // sub-buffer host alloca are not mated with device alloca. That linkage occurs // in the parent alloca. this test ensures that any map operations are using the // correct alloca, even in the case of sub-buffer accessors in host tasks. // CHECK: == fills completed -// CHECK: piEnqueueMemBufferMap -// CHECK: piEnqueueMemBufferMap -// CHECK-NEXT: : -// CHECK-NEXT: pi_mem : -// CHECK-NEXT: : -// CHECK-NEXT: : 3 +// CHECK: urEnqueueMemBufferMap +// CHECK: urEnqueueMemBufferMap +// CHECK-SAME: .mapFlags = UR_MAP_FLAG_READ // CHECK: == between host accesses -// CHECK: piEnqueueMemBufferMap -// CHECK-NEXT: : -// CHECK-NEXT: pi_mem : -// CHECK-NEXT: : -// CHECK-NEXT: : 3 +// CHECK: urEnqueueMemBufferMap +// CHECK-SAME: .mapFlags = UR_MAP_FLAG_READ #include diff --git a/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp b/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp index 04f67fe9fad3c..d58e9801e9cf2 100644 --- a/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp +++ b/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp @@ -3,10 +3,10 @@ // RUN: %{build} -DBUILD_LIB -fPIC -shared -o %T/lib%basename_t.so // RUN: %{build} -DFOO_FIRST -L%T -o %t.out -l%basename_t -Wl,-rpath=%T -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-FIRST,CHECK --implicit-check-not=piProgramBuild +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-FIRST,CHECK --implicit-check-not=piProgramBuild // RUN: %{build} -L%T -o %t.out -l%basename_t -Wl,-rpath=%T -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-LAST,CHECK --implicit-check-not=piProgramBuild +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-LAST,CHECK --implicit-check-not=piProgramBuild #include @@ -50,19 +50,19 @@ void run() { } int main() { #ifdef FOO_FIRST - // CHECK-FIRST: piProgramBuild + // CHECK-FIRST: urProgramBuild // CHECK-FIRST: Foo: 1 // CHECK-FIRST: Foo: 1 assert(foo() == 1); assert(foo() == 1); #endif - // CHECK: piProgramBuild + // CHECK: urProgramBuild // CHECK: Main: 2 // CHECK: Main: 2 run(); run(); #ifndef FOO_FIRST - // CHECK-LAST: piProgramBuild + // CHECK-LAST: urProgramBuild // CHECK-LAST: Foo: 1 // CHECK-LAST: Foo: 1 assert(foo() == 1); diff --git a/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp b/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp index de7dc3dd3897b..b4c6820f3dc84 100644 --- a/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp +++ b/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp @@ -5,18 +5,18 @@ // DEFINE: %{compile} = %{build} -DFNAME=%basename_t -o %t.out -ldl -Wl,-rpath=%T // RUN: %{compile} -DRUN_FIRST -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-FIRST,CHECK --implicit-check-not=piProgramBuild +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-FIRST,CHECK --implicit-check-not=piProgramBuild // RUN: %{compile} -DRUN_MIDDLE_BEFORE -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-MIDDLE-BEFORE,CHECK --implicit-check-not=piProgramBuild +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-MIDDLE-BEFORE,CHECK --implicit-check-not=piProgramBuild // RUN: %{compile} -DRUN_MIDDLE_AFTER -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-MIDDLE-AFTER,CHECK --implicit-check-not=piProgramBuild +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-MIDDLE-AFTER,CHECK --implicit-check-not=piProgramBuild // clang-format off // This causes SEG. FAULT. // RUNx: %{compile} -DRUN_LAST -// RUNx: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-LAST,CHECK --implicit-check-not=piProgramBuild +// RUNx: env SYCL_UR_TRACE=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-LAST,CHECK --implicit-check-not=piProgramBuild // clang-format on #include @@ -61,7 +61,7 @@ void run() { } int main() { #ifdef RUN_FIRST - // CHECK-FIRST: piProgramBuild + // CHECK-FIRST: urProgramBuild // CHECK-FIRST: Main: 2 // CHECK-FIRST: Main: 2 run(); @@ -77,21 +77,21 @@ int main() { *(void **)(&func) = dlsym(handle, "_Z3foov"); #ifdef RUN_MIDDLE_BEFORE - // CHECK-MIDDLE-BEFORE: piProgramBuild + // CHECK-MIDDLE-BEFORE: urProgramBuild // CHECK-MIDDLE-BEFORE: Main: 2 // CHECK-MIDDLE-BEFORE: Main: 2 run(); run(); #endif - // CHECK: piProgramBuild + // CHECK: urProgramBuild // CHECK: Foo: 1 // CHECK: Foo: 1 assert(func() == 1); assert(func() == 1); #ifdef RUN_MIDDLE_AFTER - // CHECK-MIDDLE-AFTER: piProgramBuild + // CHECK-MIDDLE-AFTER: urProgramBuild // CHECK-MIDDLE-AFTER: Main: 2 // CHECK-MIDDLE-AFTER: Main: 2 run(); @@ -101,7 +101,7 @@ int main() { dlclose(handle); #ifdef RUN_LAST - // CHECK-LAST: piProgramBuild + // CHECK-LAST: urProgramBuild // CHECK-LAST: Main: 2 // CHECK-LAST: Main: 2 run(); diff --git a/sycl/test-e2e/SpecConstants/2020/image_selection.cpp b/sycl/test-e2e/SpecConstants/2020/image_selection.cpp index 59528a05f0c8d..1709185896571 100644 --- a/sycl/test-e2e/SpecConstants/2020/image_selection.cpp +++ b/sycl/test-e2e/SpecConstants/2020/image_selection.cpp @@ -6,21 +6,21 @@ // clang-format off // RUN: %clangxx -fsycl-add-default-spec-consts-image -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %s -o %t1.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t1.out | FileCheck --match-full-lines --check-prefix=CHECK-ENABLED %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t1.out | FileCheck --match-full-lines --check-prefix=CHECK-ENABLED %s // clang-format on // Check the behaviour when -fsycl-add-default-spec-consts-image option is not // used. // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %s -o %t2.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t2.out | FileCheck --match-full-lines --check-prefix=CHECK-DEFAULT %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t2.out | FileCheck --match-full-lines --check-prefix=CHECK-DEFAULT %s // Check the behaviour when -fsycl-add-default-spec-consts-image option is used // and we have spirv image in addition to AOT. // clang-format off // RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %s -o %t3.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t3.out | FileCheck --match-full-lines --check-prefix=CHECK-MIX %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t3.out | FileCheck --match-full-lines --check-prefix=CHECK-MIX %s // clang-format on // Check the behaviour when -fsycl-add-default-spec-consts-image option is used @@ -29,7 +29,7 @@ // clang-format off // RUN: %clangxx -fsycl-add-default-spec-consts-image -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %s -o %t3.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t3.out | FileCheck --match-full-lines --check-prefix=CHECK-DEFAULT-EXPLICIT-SET %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t3.out | FileCheck --match-full-lines --check-prefix=CHECK-DEFAULT-EXPLICIT-SET %s // clang-format on // Check the behaviour when -fsycl-add-default-spec-consts-image option is used @@ -39,7 +39,7 @@ // clang-format off // RUN: %clangxx -fsycl-add-default-spec-consts-image -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %s -o %t3.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t3.out | FileCheck --match-full-lines --check-prefix=CHECK-DEFAULT-BACK-TO-DEFAULT %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t3.out | FileCheck --match-full-lines --check-prefix=CHECK-DEFAULT-BACK-TO-DEFAULT %s // clang-format on #include @@ -64,80 +64,40 @@ int main() { // submission depending on whether spec const value was set or not. a. In the // case when we select image where specialization constants are replaced with // default value - specialization constant buffer is not created and we set - // nullptr in piextKernelSetArgMemObj (4th parameter) b. In the case when we + // nullptr in urKernelSetArgMemObj (4th parameter) b. In the case when we // select regular image - specialization constant buffer is created and we set - // a real pointer in piextKernelSetArgMemObj. + // a real pointer in urKernelSetArgMemObj. // CHECK-DEFAULT: Submission 0 - // CHECK-DEFAULT: ---> piextKernelSetArgMemObj( - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{(0x)?[0-9,a-f,A-F]+}} - // CHECK-DEFAULT-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-DEFAULT: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT: Default value of specialization constant was used. // CHECK-DEFAULT: Submission 1 - // CHECK-DEFAULT: ---> piextKernelSetArgMemObj( - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{(0x)?[0-9,a-f,A-F]+}} - // CHECK-DEFAULT-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-DEFAULT: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT: New specialization constant value was set. // CHECK-DEFAULT: Submission 2 - // CHECK-DEFAULT: ---> piextKernelSetArgMemObj( - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{(0x)?[0-9,a-f,A-F]+}} - // CHECK-DEFAULT-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-DEFAULT: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT: Default value of specialization constant was used. // CHECK-DEFAULT: Submission 3 - // CHECK-DEFAULT: ---> piextKernelSetArgMemObj( - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{(0x)?[0-9,a-f,A-F]+}} - // CHECK-DEFAULT-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-DEFAULT: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT: New specialization constant value was set. // CHECK-ENABLED: Submission 0 - // CHECK-ENABLED: ---> piextKernelSetArgMemObj( - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{0+}} - // CHECK-ENABLED-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-ENABLED: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = nullptr) -> UR_RESULT_SUCCESS; // CHECK-ENABLED: Default value of specialization constant was used. // CHECK-ENABLED: Submission 1 - // CHECK-ENABLED: ---> piextKernelSetArgMemObj( - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{(0x)?[0-9,a-f,A-F]+}} - // CHECK-ENABLED-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-ENABLED: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-ENABLED: New specialization constant value was set. // CHECK-ENABLED: Submission 2 - // CHECK-ENABLED: ---> piextKernelSetArgMemObj( - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{0+}} - // CHECK-ENABLED-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-ENABLED: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = nullptr) -> UR_RESULT_SUCCESS; // CHECK-ENABLED: Default value of specialization constant was used. // CHECK-ENABLED: Submission 3 - // CHECK-ENABLED: ---> piextKernelSetArgMemObj( - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{(0x)?[0-9,a-f,A-F]+}} - // CHECK-ENABLED-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-ENABLED: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-ENABLED: New specialization constant value was set. // CHECK-MIX: Submission 0 @@ -175,23 +135,13 @@ int main() { // In this we don't set specialization constant value for bundle, so default // value is used and SYCL RT selects image where values are replaced with // default, that's why nullptr is set as 4th parameter of - // piextKernelSetArgMemObj. + // urKernelSetArgMemObj. // CHECK-DEFAULT: Kernel bundle - // CHECK-DEFAULT: ---> piextKernelSetArgMemObj( - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-NEXT: : {{(0x)?[0-9,a-f,A-F]+}} - // CHECK-DEFAULT-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-DEFAULT: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT: Default value of specialization constant was used. // CHECK-ENABLED: Kernel bundle - // CHECK-ENABLED: ---> piextKernelSetArgMemObj( - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{.*}} - // CHECK-ENABLED-NEXT: : {{0+}} - // CHECK-ENABLED-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-ENABLED: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = nullptr) -> UR_RESULT_SUCCESS; // CHECK-ENABLED: Default value of specialization constant was used. // CHECK-MIX: Kernel bundle @@ -219,12 +169,7 @@ int main() { // constants. We are verifying that by checking the 4th parameter is set to // zero. // CHECK-DEFAULT-EXPLICIT-SET: Default value was explicitly set - // CHECK-DEFAULT-EXPLICIT-SET: ---> piextKernelSetArgMemObj( - // CHECK-DEFAULT-EXPLICIT-SET-NEXT: : {{.*}} - // CHECK-DEFAULT-EXPLICIT-SET-NEXT: : {{.*}} - // CHECK-DEFAULT-EXPLICIT-SET-NEXT: : {{.*}} - // CHECK-DEFAULT-EXPLICIT-SET-NEXT: : {{0+}} - // CHECK-DEFAULT-EXPLICIT-SET-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-DEFAULT-EXPLICIT-SET: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = nullptr) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT-EXPLICIT-SET: Default value of specialization constant was used. std::cout << "Default value was explicitly set" << std::endl; Q.submit([&](sycl::handler &cgh) { @@ -247,12 +192,7 @@ int main() { // values of specialization constants. We are verifying that by checking the // 4th parameter is set to zero. // CHECK-DEFAULT-BACK-TO-DEFAULT: Changed to new value and then default value was explicitly set - // CHECK-DEFAULT-BACK-TO-DEFAULT: ---> piextKernelSetArgMemObj( - // CHECK-DEFAULT-BACK-TO-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-BACK-TO-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-BACK-TO-DEFAULT-NEXT: : {{.*}} - // CHECK-DEFAULT-BACK-TO-DEFAULT-NEXT: : {{0+}} - // CHECK-DEFAULT-BACK-TO-DEFAULT-NEXT: ) ---> pi_result : PI_SUCCESS + // CHECK-DEFAULT-BACK-TO-DEFAULT: ---> urKernelSetArgMemObj({{.*}}, .hArgValue = nullptr) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT-BACK-TO-DEFAULT: Default value of specialization constant was used. std::cout << "Changed to new value and then default value was explicitly set" << std::endl; diff --git a/sycl/test-e2e/SpecConstants/2020/non_native/SpecConstBuffer.cpp b/sycl/test-e2e/SpecConstants/2020/non_native/SpecConstBuffer.cpp index 68d0e5dd5a396..d8798caf04f14 100644 --- a/sycl/test-e2e/SpecConstants/2020/non_native/SpecConstBuffer.cpp +++ b/sycl/test-e2e/SpecConstants/2020/non_native/SpecConstBuffer.cpp @@ -2,7 +2,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 %S/Inputs/common.cpp -o %t.out \ // RUN: -fsycl-dead-args-optimization -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s #include @@ -20,5 +20,5 @@ int main() { }); Q.wait(); return 0; - // CHECK: piMemRelease + // CHECK: urMemRelease } diff --git a/sycl/test-e2e/SubGroup/load_store.cpp b/sycl/test-e2e/SubGroup/load_store.cpp index 76bebe3b33f34..61014d152fcbe 100644 --- a/sycl/test-e2e/SubGroup/load_store.cpp +++ b/sycl/test-e2e/SubGroup/load_store.cpp @@ -1,6 +1,9 @@ // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out // +// TODO: Flaky reenable, see https://github.com/intel/llvm/issues/14765 +// UNSUPPORTED: windows, linux +// //==----------- load_store.cpp - SYCL sub_group load/store test ------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/sycl/test-e2e/Tracing/buffer_printers.cpp b/sycl/test-e2e/Tracing/buffer_printers.cpp index 3f3a3c9aea858..1266541a57cbf 100644 --- a/sycl/test-e2e/Tracing/buffer_printers.cpp +++ b/sycl/test-e2e/Tracing/buffer_printers.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 %{run} %t.out | FileCheck %s // // XFAIL: hip_nvidia @@ -8,11 +8,10 @@ // Test image-specific printers of the Plugin Interace // -//CHECK: ---> piEnqueueMemBufferCopyRect( -//CHECK: pi_buff_rect_offset x_bytes/y/z : 64/5/0 -//CHECK: pi_buff_rect_offset x_bytes/y/z : 0/0/0 -//CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/1 -//CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/1 +//CHECK: ---> urEnqueueMemBufferCopyRect( +//CHECK-SAME: .srcOrigin = (struct ur_rect_offset_t){.x = 64, .y = 5, .z = 0} +//CHECK-SAME: .dstOrigin = (struct ur_rect_offset_t){.x = 0, .y = 0, .z = 0} +//CHECK-SAME: .region = (struct ur_rect_region_t){.width = 64, .height = 5, .depth = 1} using namespace sycl; @@ -37,8 +36,8 @@ int main() { }); } - // CHECK: ---> piMemBufferPartition( - // CHECK: pi_buffer_region origin/size : 128/32 + // CHECK: ---> urMemBufferPartition( + // CHECK-SAME: .origin = 128, .size = 32 constexpr unsigned Size = 64; std::vector Data(Size); diff --git a/sycl/test-e2e/Tracing/image_printers.cpp b/sycl/test-e2e/Tracing/image_printers.cpp index 837707d327ca1..6e6c81b17ad98 100644 --- a/sycl/test-e2e/Tracing/image_printers.cpp +++ b/sycl/test-e2e/Tracing/image_printers.cpp @@ -5,11 +5,11 @@ // Test image-specific printers of the Plugin Interace // -// CHECK: ---> piMemImageCreate( -// CHECK: image_desc w/h/d : 4 / 4 / 1 -- arrSz/row/slice : 0 / 64 / 256 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4337 -// CHECK: ---> piEnqueueMemImageRead( -// CHECK: pi_image_offset x/y/z : 0/0/0 -// CHECK: pi_image_region width/height/depth : 4/4/1 +// CHECK: ---> urMemImageCreate( +// CHECK-SAME: image_desc w/h/d : 4 / 4 / 1 -- arrSz/row/slice : 0 / 64 / 256 -- num_mip_lvls/num_smpls/image_type : 0 / 0 / 4337 +// CHECK: ---> urEnqueueMemBufferReadRect( +// CHECK-SAME: ur_rect_offset_t x/y/z : 0/0/0 +// CHECK-SAME: ur_rect_region_t width/height/depth : 4/4/1 #include #include diff --git a/sycl/test-e2e/Tracing/pi_tracing_test.cpp b/sycl/test-e2e/Tracing/pi_tracing_test.cpp deleted file mode 100644 index 78bb0c31c1d9d..0000000000000 --- a/sycl/test-e2e/Tracing/pi_tracing_test.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out | FileCheck %s -// -// XFAIL: hip_nvidia - -// Test tracing of the Plugin Interface - -// CHECK: ---> piPlatformGetInfo( -// CHECK: pi_platform : {{0[xX]?[0-9a-fA-F]*}} -// CHECK: : {{0[xX]?[0-9a-fA-F]*}} -// CHECK: -// CHECK: ---> piMemBufferCreate( -// CHECK-NEXT: : {{0[xX]?[0-9a-fA-F]*}} -// CHECK-NEXT: : 1 -// CHECK-NEXT: : 40 -// CHECK-NEXT: : 0 -// CHECK-NEXT: : {{0[xX]?[0-9a-fA-F]*}} -// CHECK: {{|0}} -// CHECK-NEXT: ) ---> pi_result : PI_SUCCESS -// CHECK-NEXT: [out]void * : {{0+}} -// CHECK-NEXT: [out]pi_mem * : {{0[xX]?[0-9a-fA-F]*}}[ {{0[xX]?[0-9a-fA-F]*}} -// CHECK: ---> piKernelCreate( -// CHECK: : {{.*}} -// CHECK: ---> piEnqueueKernelLaunch( -// CHECK-NEXT: : {{0[xX]?[0-9a-fA-F]*}} -// CHECK-NEXT: : {{0[xX]?[0-9a-fA-F]*}} -// CHECK-NEXT: : 1 -// CHECK-NEXT: : {{0[xX]?[0-9a-fA-F]*}} -// CHECK-NEXT: : {{0[xX]?[0-9a-fA-F]*}} -// CHECK-NEXT: : 0 -// CHECK-NEXT: : 0 -// CHECK-NEXT: pi_event * : {{0+}}[ nullptr ] -// CHECK-NEXT: pi_event * : {{0[xX]?[0-9a-fA-F]*}}[ {{0+}} ... ] -// CHECK-NEXT: ) ---> pi_result : PI_SUCCESS -// CHECK-NEXT: [out]pi_event * : {{0+}}[ nullptr ] -// CHECK-NEXT: [out]pi_event * : {{0[xX]?[0-9a-fA-F]*}} -// CHECK-SAME: [ {{0[xX]?[0-9a-fA-F]*}} ... ] -// -// CHECK: ---> piEventsWait( -// CHECK-NEXT: : 1 -// CHECK-NEXT: {{(const |\[out\])?}}pi_event * : {{0[xX]?[0-9a-fA-F]*}} -// CHECK-SAME: [ {{0[xX]?[0-9a-fA-F]*}} ... ] -// CHECK-NEXT: ) ---> pi_result : PI_SUCCESS - -#include -int main() { - sycl::queue Queue; - sycl::buffer Buf(10); - sycl::event E = Queue.submit([&](sycl::handler &cgh) { - auto Acc = Buf.template get_access(cgh); - - cgh.parallel_for(10, - [=](sycl::id<1> ID) { Acc[ID] = 5; }); - }); - E.wait(); - return 0; -} diff --git a/sycl/test-e2e/USM/memadvise_flags.cpp b/sycl/test-e2e/USM/memadvise_flags.cpp index f4079d88a4997..7de2a8a931f30 100755 --- a/sycl/test-e2e/USM/memadvise_flags.cpp +++ b/sycl/test-e2e/USM/memadvise_flags.cpp @@ -37,36 +37,28 @@ int main() { bool isHip = dev.get_backend() == sycl::backend::ext_oneapi_hip; std::vector valid_advices; - if (isCuda) { - valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY); - valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY); - valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION); - valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION); - valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY); - valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY); - valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION_HOST); + if (isCuda || isHip) { + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY); + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY); + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION); + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION); + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE); + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE); + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST); valid_advices.emplace_back( - PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST); - valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST); - valid_advices.emplace_back(PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST); - } else if (isHip) { - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_READ_MOSTLY); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_READ_MOSTLY); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_PREFERRED_LOCATION); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_ACCESSED_BY); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_PREFERRED_LOCATION_HOST); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION_HOST); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_ACCESSED_BY_HOST); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY_HOST); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED); - valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED); + UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST); + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST); + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST); } else { // Skip return 0; } + if (isHip) { + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY); + valid_advices.emplace_back(UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY); + } + for (int advice : valid_advices) { q.mem_advise(ptr, size, advice); } diff --git a/sycl/test-e2e/USM/memory_coherency_hip.cpp b/sycl/test-e2e/USM/memory_coherency_hip.cpp index 4003a3e25374c..f411392e257bd 100644 --- a/sycl/test-e2e/USM/memory_coherency_hip.cpp +++ b/sycl/test-e2e/USM/memory_coherency_hip.cpp @@ -2,6 +2,9 @@ // REQUIRES: hip_amd // RUN: %{run} %t1.out +// TODO: Reenable, see https://github.com/intel/llvm/issues/14742 +// UNSUPPORTED: windows, linux + //==---- memory_coherency_hip.cpp -----------------------------------------==// // USM coarse/fine grain memory coherency test for the HIP-AMD backend. // @@ -74,7 +77,7 @@ int main() { // Coherency test 1 // // The following test validates if memory access is fine with memory allocated - // using malloc_managed() and COARSE_GRAINED advice set via mem_advise(). + // using malloc_managed() and NON_COHERENT advice set via mem_advise(). // // Coarse grained memory is only guaranteed to be coherent outside of GPU // kernels that modify it. Changes applied to coarse-grained memory by a GPU @@ -84,7 +87,8 @@ int main() { // GPUs) if those changes were made before the kernel launched. // Hint to use coarse-grain memory. - q.mem_advise(ptr, sizeof(int), int{PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED}); + q.mem_advise(ptr, sizeof(int), + int{UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY}); int init_val{9}; int expected{init_val * init_val}; @@ -112,7 +116,8 @@ int main() { // coherently communicate with each other while the GPU kernel is running. // Hint to use fine-grain memory. - q.mem_advise(ptr, sizeof(int), int{PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED}); + q.mem_advise(ptr, sizeof(int), + int{UR_USM_ADVICE_FLAG_UNSET_NON_COHERENT_MEMORY}); init_val = 1; expected = 4; diff --git a/sycl/test-e2e/XPTI/Inputs/test_collector.cpp b/sycl/test-e2e/XPTI/Inputs/test_collector.cpp index be75f61137ea3..17f838f79b11a 100644 --- a/sycl/test-e2e/XPTI/Inputs/test_collector.cpp +++ b/sycl/test-e2e/XPTI/Inputs/test_collector.cpp @@ -9,7 +9,7 @@ std::mutex GMutex; XPTI_CALLBACK_API void syclCallback(uint16_t, xpti::trace_event_data_t *, xpti::trace_event_data_t *, uint64_t, const void *); -XPTI_CALLBACK_API void syclPiCallback(uint16_t, xpti::trace_event_data_t *, +XPTI_CALLBACK_API void syclUrCallback(uint16_t, xpti::trace_event_data_t *, xpti::trace_event_data_t *, uint64_t, const void *); @@ -21,11 +21,10 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int MajorVersion, std::string_view NameView{StreamName}; using type = xpti::trace_point_type_t; - if (NameView == "sycl.pi") { + if (NameView == "ur") { uint8_t StreamID = xptiRegisterStream(StreamName); - for (type t : std::initializer_list{type::function_begin, - type::function_with_args_end}) - xptiRegisterCallback(StreamID, static_cast(t), syclPiCallback); + for (type t : std::initializer_list{type::function_with_args_begin}) + xptiRegisterCallback(StreamID, static_cast(t), syclUrCallback); } if (NameView == "sycl") { uint8_t StreamID = xptiRegisterStream(StreamName); @@ -43,19 +42,19 @@ XPTI_CALLBACK_API void xptiTraceFinish(const char *streamName) { std::cout << "xptiTraceFinish: Stream Name = " << streamName << "\n"; } -XPTI_CALLBACK_API void syclPiCallback(uint16_t TraceType, +XPTI_CALLBACK_API void syclUrCallback(uint16_t TraceType, xpti::trace_event_data_t *, xpti::trace_event_data_t *, uint64_t, const void *UserData) { std::lock_guard Lock{GMutex}; auto Type = static_cast(TraceType); - const char *funcName = static_cast(UserData); - if (Type == xpti::trace_point_type_t::function_begin) { - std::cout << "PI Call Begin : "; - } else if (Type == xpti::trace_point_type_t::function_end) { - std::cout << "PI Call End : "; + auto *args = static_cast(UserData); + if (Type == xpti::trace_point_type_t::function_with_args_begin) { + std::cout << "UR Call Begin : "; + } else if (Type == xpti::trace_point_type_t::function_with_args_end) { + std::cout << "UR Call End : "; } - std::cout << funcName << "\n"; + std::cout << args->function_name << "\n"; } XPTI_CALLBACK_API void syclCallback(uint16_t TraceType, diff --git a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp index 75fc70a127ad9..020d263498635 100644 --- a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp +++ b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp @@ -1,32 +1,32 @@ -// Test is disabled to allow a few output format changes to pass pre-commit -// testing. // REQUIRES: xptifw, opencl, cpu, linux // RUN: %clangxx %s -DXPTI_COLLECTOR -DXPTI_CALLBACK_API_EXPORTS %xptifw_lib -shared -fPIC -std=c++17 -o %t_collector.so // RUN: %{build} -o %t.out -// RUN: env XPTI_TRACE_ENABLE=1 env XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher env XPTI_SUBSCRIBERS=%t_collector.so %{run} %t.out | FileCheck %s +// RUN: env UR_ENABLE_LAYERS=UR_LAYER_TRACING env XPTI_TRACE_ENABLE=1 env XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher env XPTI_SUBSCRIBERS=%t_collector.so %{run} %t.out | FileCheck %s + +// TODO: Reenable, see https://github.com/intel/llvm/issues/14744 +// UNSUPPORTED: windows, linux #include "basic_event_collection.inc" // +// CHECK: xptiTraceInit: Stream Name = ur // CHECK: xptiTraceInit: Stream Name = sycl.experimental.mem_alloc // CHECK: xptiTraceInit: Stream Name = sycl // CHECK-NEXT: Graph create -// CHECK-NEXT: xptiTraceInit: Stream Name = sycl.pi -// CHECK-NEXT: xptiTraceInit: Stream Name = sycl.pi.debug -// CHECK: PI Call Begin : piPlatformsGet -// CHECK: PI Call Begin : piContextCreate -// CHECK: PI Call Begin : piextQueueCreate -// CHECK: PI Call Begin : piextDeviceSelectBinary -// CHECK: PI Call Begin : piKernelCreate -// CHECK-NEXT: PI Call Begin : piPlatformGetInfo -// CHECK-NEXT: PI Call Begin : piPlatformGetInfo -// CHECK-NEXT: PI Call Begin : piKernelSetExecInfo -// CHECK: PI Call Begin : piextKernelSetArgPointer -// CHECK-NEXT: PI Call Begin : piKernelGetGroupInfo -// CHECK-NEXT: PI Call Begin : piEnqueueKernelLaunch -// CHECK: PI Call Begin : piKernelCreate -// CHECK-NEXT: PI Call Begin : piPlatformGetInfo -// CHECK-NEXT: PI Call Begin : piPlatformGetInfo -// CHECK-NEXT: PI Call Begin : piKernelSetExecInfo +// CHECK: UR Call Begin : urPlatformGet +// CHECK: UR Call Begin : urContextCreate +// CHECK: UR Call Begin : urQueueCreate +// CHECK: UR Call Begin : urDeviceSelectBinary +// CHECK: UR Call Begin : urKernelCreate +// CHECK-NEXT: UR Call Begin : urPlatformGetInfo +// CHECK-NEXT: UR Call Begin : urPlatformGetInfo +// CHECK-NEXT: UR Call Begin : urKernelSetExecInfo +// CHECK: UR Call Begin : urKernelSetArgPointer +// CHECK-NEXT: UR Call Begin : urKernelGetGroupInfo +// CHECK-NEXT: UR Call Begin : urEnqueueKernelLaunch +// CHECK: UR Call Begin : urKernelCreate +// CHECK-NEXT: UR Call Begin : urPlatformGetInfo +// CHECK-NEXT: UR Call Begin : urPlatformGetInfo +// CHECK-NEXT: UR Call Begin : urKernelSetExecInfo // CHECK: Node create // CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} @@ -50,11 +50,11 @@ // CHECK-DAG: from_source : false // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} -// CHECK: PI Call Begin : piextKernelSetArgPointer -// CHECK-NEXT: PI Call Begin : piKernelGetGroupInfo -// CHECK-NEXT: PI Call Begin : piEnqueueKernelLaunch -// CHECK-NEXT: PI Call Begin : piKernelRelease -// CHECK-NEXT: PI Call Begin : piProgramRelease +// CHECK: UR Call Begin : urKernelSetArgPointer +// CHECK-NEXT: UR Call Begin : urKernelGetGroupInfo +// CHECK-NEXT: UR Call Begin : urEnqueueKernelLaunch +// CHECK-NEXT: UR Call Begin : urKernelRelease +// CHECK-NEXT: UR Call Begin : urProgramRelease // CHECK-NEXT: Signal // CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} @@ -73,7 +73,7 @@ // CHECK-DAG: sycl_device : {{.*}} // CHECK: Wait begin // CHECK-DAG: queue_id : {{.*}} -// CHECK-NEXT: PI Call Begin : piEventsWait +// CHECK-NEXT: UR Call Begin : urEventWait // CHECK-NEXT: Wait end // CHECK-DAG: queue_id : {{.*}} // CHECK-NEXT: Node create @@ -88,18 +88,18 @@ // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} -// CHECK: PI Call Begin : piextUSMEnqueueMemcpy +// CHECK: UR Call Begin : urEnqueueUSMMemcpy // CHECK-NEXT: Task end // CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: memory_size : {{.*}} // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} -// CHECK: PI Call Begin : piEventRelease +// CHECK: UR Call Begin : urEventRelease // CHECK-NEXT: Wait begin // CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sycl_device_type : {{.*}} -// CHECK: PI Call Begin : piQueueFinish +// CHECK: UR Call Begin : urQueueFinish // CHECK-NEXT: Wait end // CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sycl_device_type : {{.*}} diff --git a/sycl/test-e2e/syclcompat/memory/memory_management_test2.cpp b/sycl/test-e2e/syclcompat/memory/memory_management_test2.cpp index c6687ed9c9dc0..fe142a548bbb7 100644 --- a/sycl/test-e2e/syclcompat/memory/memory_management_test2.cpp +++ b/sycl/test-e2e/syclcompat/memory/memory_management_test2.cpp @@ -33,6 +33,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// TODO: Reenable, see https://github.com/intel/llvm/issues/14659 +// UNSUPPORTED: windows, linux + #include #include diff --git a/sycl/test/abi/layout_exception.cpp b/sycl/test/abi/layout_exception.cpp index 0af35d6738022..b8a5294aad995 100644 --- a/sycl/test/abi/layout_exception.cpp +++ b/sycl/test/abi/layout_exception.cpp @@ -20,7 +20,7 @@ void foo() { // CHECK-NEXT: 8 | element_type * _M_ptr // CHECK-NEXT: 16 | class std::__shared_count<> _M_refcount // CHECK-NEXT: 16 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 24 | pi_int32 MPIErr +// CHECK-NEXT: 24 | int32_t MErr // CHECK-NEXT: 32 | class std::shared_ptr MContext // CHECK-NEXT: 32 | class std::__shared_ptr (base) // CHECK-NEXT: 32 | class std::__shared_ptr_access (base) (empty) diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump deleted file mode 100644 index 8f375094f229a..0000000000000 --- a/sycl/test/abi/pi_cuda_symbol_check.dump +++ /dev/null @@ -1,184 +0,0 @@ -################################################################################ -# This file is automatically generated by abi_check.py tool. -# DO NOT EDIT IT MANUALLY. Refer to sycl/doc/developer/ABIPolicyGuide.md for more info. -################################################################################ - -# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir %python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libpi_cuda.so -# REQUIRES: linux -# REQUIRES: cuda -# UNSUPPORTED: libcxx - -piContextCreate -piContextGetInfo -piContextRelease -piContextRetain -piDeviceGetInfo -piDevicePartition -piDeviceRelease -piDeviceRetain -piDevicesGet -piEnqueueEventsWait -piEnqueueEventsWaitWithBarrier -piEnqueueKernelLaunch -piEnqueueMemBufferCopy -piEnqueueMemBufferCopyRect -piEnqueueMemBufferFill -piEnqueueMemBufferMap -piEnqueueMemBufferRead -piEnqueueMemBufferReadRect -piEnqueueMemBufferWrite -piEnqueueMemBufferWriteRect -piEnqueueMemImageCopy -piEnqueueMemImageFill -piEnqueueMemImageRead -piEnqueueMemImageWrite -piEnqueueMemUnmap -piEnqueueTimestampRecordingExp -piEventCreate -piEventGetInfo -piEventGetProfilingInfo -piEventRelease -piEventRetain -piEventSetCallback -piEventSetStatus -piEventsWait -piGetDeviceAndHostTimer -piKernelCreate -piKernelGetGroupInfo -piKernelGetInfo -piKernelGetSubGroupInfo -piKernelRelease -piKernelRetain -piKernelSetArg -piKernelSetExecInfo -piMemBufferCreate -piMemBufferPartition -piMemGetInfo -piMemImageCreate -piMemImageGetInfo -piMemRelease -piMemRetain -piPlatformGetInfo -piPlatformsGet -piPluginGetBackendOption -piPluginGetLastError -piPluginInit -piProgramBuild -piProgramCompile -piProgramCreate -piProgramCreateWithBinary -piProgramGetBuildInfo -piProgramGetInfo -piProgramLink -piProgramRelease -piProgramRetain -piQueueCreate -piQueueFinish -piQueueFlush -piQueueGetInfo -piQueueRelease -piQueueRetain -piSamplerCreate -piSamplerGetInfo -piSamplerRelease -piSamplerRetain -piTearDown -piextBindlessImageSamplerCreate -piextCommandBufferAdviseUSM -piextCommandBufferCreate -piextCommandBufferFillUSM -piextCommandBufferFinalize -piextCommandBufferMemBufferCopy -piextCommandBufferMemBufferCopyRect -piextCommandBufferMemBufferFill -piextCommandBufferMemBufferRead -piextCommandBufferMemBufferReadRect -piextCommandBufferMemBufferWrite -piextCommandBufferMemBufferWriteRect -piextCommandBufferMemcpyUSM -piextCommandBufferNDRangeKernel -piextCommandBufferPrefetchUSM -piextCommandBufferRelease -piextCommandBufferReleaseCommand -piextCommandBufferRetain -piextCommandBufferRetainCommand -piextCommandBufferUpdateKernelLaunch -piextContextCreateWithNativeHandle -piextContextGetNativeHandle -piextContextSetExtendedDeleter -piextDeviceCreateWithNativeHandle -piextDeviceGetNativeHandle -piextDeviceSelectBinary -piextDisablePeerAccess -piextEnablePeerAccess -piextEnqueueCommandBuffer -piextEnqueueCooperativeKernelLaunch -piextEnqueueKernelLaunchCustom -piextEnqueueNativeCommand -piextEnqueueReadHostPipe -piextEnqueueWriteHostPipe -piextEventCreateWithNativeHandle -piextEventGetNativeHandle -piextGetDeviceFunctionPointer -piextGetGlobalVariablePointer -piextImportExternalMemory -piextImportExternalSemaphore -piextKernelCreateWithNativeHandle -piextKernelGetNativeHandle -piextKernelSetArgMemObj -piextKernelSetArgPointer -piextKernelSetArgSampler -piextKernelSuggestMaxCooperativeGroupCount -piextMemCreateWithNativeHandle -piextMemGetNativeHandle -piextMemImageAllocate -piextMemImageCopy -piextMemImageCreateWithNativeHandle -piextMemImageFree -piextMemImageGetInfo -piextMemMapExternalArray -piextMemMipmapFree -piextMemMipmapGetLevel -piextMemReleaseInterop -piextMemSampledImageCreate -piextMemSampledImageHandleDestroy -piextMemUnsampledImageCreate -piextMemUnsampledImageHandleDestroy -piextPeerAccessGetInfo -piextPhysicalMemCreate -piextPhysicalMemRelease -piextPhysicalMemRetain -piextPlatformCreateWithNativeHandle -piextPlatformGetNativeHandle -piextPluginGetOpaqueData -piextProgramCreateWithNativeHandle -piextProgramGetNativeHandle -piextProgramSetSpecializationConstant -piextQueueCreate -piextQueueCreateWithNativeHandle -piextQueueGetNativeHandle -piextReleaseExternalSemaphore -piextSignalExternalSemaphore -piextUSMDeviceAlloc -piextUSMEnqueueFill -piextUSMEnqueueFill2D -piextUSMEnqueueMemAdvise -piextUSMEnqueueMemcpy -piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset2D -piextUSMEnqueuePrefetch -piextUSMFree -piextUSMGetMemAllocInfo -piextUSMHostAlloc -piextUSMImport -piextUSMPitchedAlloc -piextUSMRelease -piextUSMSharedAlloc -piextVirtualMemFree -piextVirtualMemGetInfo -piextVirtualMemGranularityGetInfo -piextVirtualMemMap -piextVirtualMemReserve -piextVirtualMemSetAccess -piextVirtualMemUnmap -piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump deleted file mode 100644 index ea3bc4f74179f..0000000000000 --- a/sycl/test/abi/pi_hip_symbol_check.dump +++ /dev/null @@ -1,184 +0,0 @@ -################################################################################ -# This file is automatically generated by abi_check.py tool. -# DO NOT EDIT IT MANUALLY. Refer to sycl/doc/developer/ABIPolicyGuide.md for more info. -################################################################################ - -# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir %python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libpi_hip.so -# REQUIRES: linux -# REQUIRES: hip -# UNSUPPORTED: libcxx - -piContextCreate -piContextGetInfo -piContextRelease -piContextRetain -piDeviceGetInfo -piDevicePartition -piDeviceRelease -piDeviceRetain -piDevicesGet -piEnqueueEventsWait -piEnqueueEventsWaitWithBarrier -piEnqueueKernelLaunch -piEnqueueMemBufferCopy -piEnqueueMemBufferCopyRect -piEnqueueMemBufferFill -piEnqueueMemBufferMap -piEnqueueMemBufferRead -piEnqueueMemBufferReadRect -piEnqueueMemBufferWrite -piEnqueueMemBufferWriteRect -piEnqueueMemImageCopy -piEnqueueMemImageFill -piEnqueueMemImageRead -piEnqueueMemImageWrite -piEnqueueMemUnmap -piEnqueueTimestampRecordingExp -piEventCreate -piEventGetInfo -piEventGetProfilingInfo -piEventRelease -piEventRetain -piEventSetCallback -piEventSetStatus -piEventsWait -piGetDeviceAndHostTimer -piKernelCreate -piKernelGetGroupInfo -piKernelGetInfo -piKernelGetSubGroupInfo -piKernelRelease -piKernelRetain -piKernelSetArg -piKernelSetExecInfo -piMemBufferCreate -piMemBufferPartition -piMemGetInfo -piMemImageCreate -piMemImageGetInfo -piMemRelease -piMemRetain -piPlatformGetInfo -piPlatformsGet -piPluginGetBackendOption -piPluginGetLastError -piPluginInit -piProgramBuild -piProgramCompile -piProgramCreate -piProgramCreateWithBinary -piProgramGetBuildInfo -piProgramGetInfo -piProgramLink -piProgramRelease -piProgramRetain -piQueueCreate -piQueueFinish -piQueueFlush -piQueueGetInfo -piQueueRelease -piQueueRetain -piSamplerCreate -piSamplerGetInfo -piSamplerRelease -piSamplerRetain -piTearDown -piextBindlessImageSamplerCreate -piextCommandBufferAdviseUSM -piextCommandBufferCreate -piextCommandBufferFillUSM -piextCommandBufferFinalize -piextCommandBufferMemBufferCopy -piextCommandBufferMemBufferCopyRect -piextCommandBufferMemBufferFill -piextCommandBufferMemBufferRead -piextCommandBufferMemBufferReadRect -piextCommandBufferMemBufferWrite -piextCommandBufferMemBufferWriteRect -piextCommandBufferMemcpyUSM -piextCommandBufferNDRangeKernel -piextCommandBufferPrefetchUSM -piextCommandBufferRelease -piextCommandBufferReleaseCommand -piextCommandBufferRetain -piextCommandBufferRetainCommand -piextCommandBufferUpdateKernelLaunch -piextContextCreateWithNativeHandle -piextContextGetNativeHandle -piextContextSetExtendedDeleter -piextReleaseExternalSemaphore -piextDeviceCreateWithNativeHandle -piextDeviceGetNativeHandle -piextDeviceSelectBinary -piextDisablePeerAccess -piextEnablePeerAccess -piextEnqueueCommandBuffer -piextEnqueueCooperativeKernelLaunch -piextEnqueueNativeCommand -piextEnqueueReadHostPipe -piextEnqueueWriteHostPipe -piextEventCreateWithNativeHandle -piextEventGetNativeHandle -piextGetDeviceFunctionPointer -piextGetGlobalVariablePointer -piextImportExternalMemory -piextImportExternalSemaphore -piextKernelCreateWithNativeHandle -piextKernelGetNativeHandle -piextKernelSetArgMemObj -piextKernelSetArgPointer -piextKernelSetArgSampler -piextKernelSuggestMaxCooperativeGroupCount -piextMemCreateWithNativeHandle -piextMemGetNativeHandle -piextMemImageAllocate -piextMemImageCopy -piextMemImageCreateWithNativeHandle -piextMemImageFree -piextMemImageGetInfo -piextMemMapExternalArray -piextMemMipmapFree -piextMemMipmapGetLevel -piextMemReleaseInterop -piextMemSampledImageCreate -piextMemSampledImageHandleDestroy -piextMemUnsampledImageCreate -piextMemUnsampledImageHandleDestroy -piextPeerAccessGetInfo -piextPhysicalMemCreate -piextPhysicalMemRelease -piextPhysicalMemRetain -piextPlatformCreateWithNativeHandle -piextPlatformGetNativeHandle -piextPluginGetOpaqueData -piextProgramCreateWithNativeHandle -piextProgramGetNativeHandle -piextProgramSetSpecializationConstant -piextQueueCreate -piextQueueCreateWithNativeHandle -piextQueueGetNativeHandle -piextSignalExternalSemaphore -piextUSMDeviceAlloc -piextUSMEnqueueFill -piextUSMEnqueueFill2D -piextUSMEnqueueMemAdvise -piextUSMEnqueueMemcpy -piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset2D -piextUSMEnqueuePrefetch -piextUSMFree -piextUSMGetMemAllocInfo -piextUSMHostAlloc -piextUSMImport -piextUSMPitchedAlloc -piextUSMRelease -piextUSMSharedAlloc -piextVirtualMemFree -piextVirtualMemGetInfo -piextVirtualMemGranularityGetInfo -piextVirtualMemMap -piextVirtualMemReserve -piextVirtualMemSetAccess -piextVirtualMemUnmap -piextWaitExternalSemaphore -piextEnqueueKernelLaunchCustom diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump deleted file mode 100644 index f465b50f578ec..0000000000000 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ /dev/null @@ -1,183 +0,0 @@ -################################################################################ -# This file is automatically generated by abi_check.py tool. -# DO NOT EDIT IT MANUALLY. Refer to sycl/doc/developer/ABIPolicyGuide.md for more info. -################################################################################ - -# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir %python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libpi_level_zero.so -# REQUIRES: linux -# UNSUPPORTED: libcxx - -piContextCreate -piContextGetInfo -piContextRelease -piContextRetain -piDeviceGetInfo -piDevicePartition -piDeviceRelease -piDeviceRetain -piDevicesGet -piEnqueueEventsWait -piEnqueueEventsWaitWithBarrier -piEnqueueKernelLaunch -piEnqueueMemBufferCopy -piEnqueueMemBufferCopyRect -piEnqueueMemBufferFill -piEnqueueMemBufferMap -piEnqueueMemBufferRead -piEnqueueMemBufferReadRect -piEnqueueMemBufferWrite -piEnqueueMemBufferWriteRect -piEnqueueMemImageCopy -piEnqueueMemImageFill -piEnqueueMemImageRead -piEnqueueMemImageWrite -piEnqueueMemUnmap -piEnqueueTimestampRecordingExp -piEventCreate -piEventGetInfo -piEventGetProfilingInfo -piEventRelease -piEventRetain -piEventSetCallback -piEventSetStatus -piEventsWait -piGetDeviceAndHostTimer -piKernelCreate -piKernelGetGroupInfo -piKernelGetInfo -piKernelGetSubGroupInfo -piKernelRelease -piKernelRetain -piKernelSetArg -piKernelSetExecInfo -piMemBufferCreate -piMemBufferPartition -piMemGetInfo -piMemImageCreate -piMemImageGetInfo -piMemRelease -piMemRetain -piPlatformGetInfo -piPlatformsGet -piPluginGetBackendOption -piPluginGetLastError -piPluginInit -piProgramBuild -piProgramCompile -piProgramCreate -piProgramCreateWithBinary -piProgramGetBuildInfo -piProgramGetInfo -piProgramLink -piProgramRelease -piProgramRetain -piQueueCreate -piQueueFinish -piQueueFlush -piQueueGetInfo -piQueueRelease -piQueueRetain -piSamplerCreate -piSamplerGetInfo -piSamplerRelease -piSamplerRetain -piTearDown -piextBindlessImageSamplerCreate -piextCommandBufferAdviseUSM -piextCommandBufferCreate -piextCommandBufferFillUSM -piextCommandBufferFinalize -piextCommandBufferMemBufferCopy -piextCommandBufferMemBufferCopyRect -piextCommandBufferMemBufferFill -piextCommandBufferMemBufferRead -piextCommandBufferMemBufferReadRect -piextCommandBufferMemBufferWrite -piextCommandBufferMemBufferWriteRect -piextCommandBufferMemcpyUSM -piextCommandBufferNDRangeKernel -piextCommandBufferPrefetchUSM -piextCommandBufferRelease -piextCommandBufferReleaseCommand -piextCommandBufferRetain -piextCommandBufferRetainCommand -piextCommandBufferUpdateKernelLaunch -piextContextCreateWithNativeHandle -piextContextGetNativeHandle -piextContextSetExtendedDeleter -piextDeviceCreateWithNativeHandle -piextDeviceGetNativeHandle -piextDeviceSelectBinary -piextDisablePeerAccess -piextEnablePeerAccess -piextEnqueueCommandBuffer -piextEnqueueCooperativeKernelLaunch -piextEnqueueKernelLaunchCustom -piextEnqueueNativeCommand -piextEnqueueReadHostPipe -piextEnqueueWriteHostPipe -piextEventCreateWithNativeHandle -piextEventGetNativeHandle -piextGetDeviceFunctionPointer -piextGetGlobalVariablePointer -piextImportExternalMemory -piextImportExternalSemaphore -piextKernelCreateWithNativeHandle -piextKernelGetNativeHandle -piextKernelSetArgMemObj -piextKernelSetArgPointer -piextKernelSetArgSampler -piextKernelSuggestMaxCooperativeGroupCount -piextMemCreateWithNativeHandle -piextMemGetNativeHandle -piextMemImageAllocate -piextMemImageCopy -piextMemImageCreateWithNativeHandle -piextMemImageFree -piextMemImageGetInfo -piextMemMapExternalArray -piextMemMipmapFree -piextMemMipmapGetLevel -piextMemReleaseInterop -piextMemSampledImageCreate -piextMemSampledImageHandleDestroy -piextMemUnsampledImageCreate -piextMemUnsampledImageHandleDestroy -piextPeerAccessGetInfo -piextPhysicalMemCreate -piextPhysicalMemRelease -piextPhysicalMemRetain -piextPlatformCreateWithNativeHandle -piextPlatformGetNativeHandle -piextPluginGetOpaqueData -piextProgramCreateWithNativeHandle -piextProgramGetNativeHandle -piextProgramSetSpecializationConstant -piextQueueCreate -piextQueueCreateWithNativeHandle -piextQueueGetNativeHandle -piextReleaseExternalSemaphore -piextSignalExternalSemaphore -piextUSMDeviceAlloc -piextUSMEnqueueFill -piextUSMEnqueueFill2D -piextUSMEnqueueMemAdvise -piextUSMEnqueueMemcpy -piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset2D -piextUSMEnqueuePrefetch -piextUSMFree -piextUSMGetMemAllocInfo -piextUSMHostAlloc -piextUSMImport -piextUSMPitchedAlloc -piextUSMRelease -piextUSMSharedAlloc -piextVirtualMemFree -piextVirtualMemGetInfo -piextVirtualMemGranularityGetInfo -piextVirtualMemMap -piextVirtualMemReserve -piextVirtualMemSetAccess -piextVirtualMemUnmap -piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_nativecpu_symbol_check.dump b/sycl/test/abi/pi_nativecpu_symbol_check.dump deleted file mode 100644 index d4248fe8eedc2..0000000000000 --- a/sycl/test/abi/pi_nativecpu_symbol_check.dump +++ /dev/null @@ -1,184 +0,0 @@ -################################################################################ -# This file is automatically generated by abi_check.py tool. -# DO NOT EDIT IT MANUALLY. Refer to sycl/doc/developer/ABIPolicyGuide.md for more info. -################################################################################ - -# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir %python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libpi_native_cpu.so -# REQUIRES: linux -# REQUIRES: native_cpu -# UNSUPPORTED: libcxx - -piContextCreate -piContextGetInfo -piContextRelease -piContextRetain -piDeviceGetInfo -piDevicePartition -piDeviceRelease -piDeviceRetain -piDevicesGet -piEnqueueEventsWait -piEnqueueEventsWaitWithBarrier -piEnqueueKernelLaunch -piEnqueueMemBufferCopy -piEnqueueMemBufferCopyRect -piEnqueueMemBufferFill -piEnqueueMemBufferMap -piEnqueueMemBufferRead -piEnqueueMemBufferReadRect -piEnqueueMemBufferWrite -piEnqueueMemBufferWriteRect -piEnqueueMemImageCopy -piEnqueueMemImageFill -piEnqueueMemImageRead -piEnqueueMemImageWrite -piEnqueueMemUnmap -piEnqueueTimestampRecordingExp -piEventCreate -piEventGetInfo -piEventGetProfilingInfo -piEventRelease -piEventRetain -piEventSetCallback -piEventSetStatus -piEventsWait -piGetDeviceAndHostTimer -piKernelCreate -piKernelGetGroupInfo -piKernelGetInfo -piKernelGetSubGroupInfo -piKernelRelease -piKernelRetain -piKernelSetArg -piKernelSetExecInfo -piMemBufferCreate -piMemBufferPartition -piMemGetInfo -piMemImageCreate -piMemImageGetInfo -piMemRelease -piMemRetain -piPlatformGetInfo -piPlatformsGet -piPluginGetBackendOption -piPluginGetLastError -piPluginInit -piProgramBuild -piProgramCompile -piProgramCreate -piProgramCreateWithBinary -piProgramGetBuildInfo -piProgramGetInfo -piProgramLink -piProgramRelease -piProgramRetain -piQueueCreate -piQueueFinish -piQueueFlush -piQueueGetInfo -piQueueRelease -piQueueRetain -piSamplerCreate -piSamplerGetInfo -piSamplerRelease -piSamplerRetain -piTearDown -piextBindlessImageSamplerCreate -piextCommandBufferAdviseUSM -piextCommandBufferCreate -piextCommandBufferFillUSM -piextCommandBufferFinalize -piextCommandBufferMemBufferCopy -piextCommandBufferMemBufferCopyRect -piextCommandBufferMemBufferFill -piextCommandBufferMemBufferRead -piextCommandBufferMemBufferReadRect -piextCommandBufferMemBufferWrite -piextCommandBufferMemBufferWriteRect -piextCommandBufferMemcpyUSM -piextCommandBufferNDRangeKernel -piextCommandBufferPrefetchUSM -piextCommandBufferRelease -piextCommandBufferReleaseCommand -piextCommandBufferRetain -piextCommandBufferRetainCommand -piextCommandBufferUpdateKernelLaunch -piextContextCreateWithNativeHandle -piextContextGetNativeHandle -piextContextSetExtendedDeleter -piextReleaseExternalSemaphore -piextDeviceCreateWithNativeHandle -piextDeviceGetNativeHandle -piextDeviceSelectBinary -piextDisablePeerAccess -piextEnablePeerAccess -piextEnqueueCommandBuffer -piextEnqueueCooperativeKernelLaunch -piextEnqueueNativeCommand -piextEnqueueReadHostPipe -piextEnqueueWriteHostPipe -piextEventCreateWithNativeHandle -piextEventGetNativeHandle -piextGetDeviceFunctionPointer -piextGetGlobalVariablePointer -piextImportExternalMemory -piextImportExternalSemaphore -piextKernelCreateWithNativeHandle -piextKernelGetNativeHandle -piextKernelSetArgMemObj -piextKernelSetArgPointer -piextKernelSetArgSampler -piextKernelSuggestMaxCooperativeGroupCount -piextMemCreateWithNativeHandle -piextMemGetNativeHandle -piextMemImageAllocate -piextMemImageCopy -piextMemImageCreateWithNativeHandle -piextMemImageFree -piextMemImageGetInfo -piextMemMapExternalArray -piextMemMipmapFree -piextMemMipmapGetLevel -piextMemReleaseInterop -piextMemSampledImageCreate -piextMemSampledImageHandleDestroy -piextMemUnsampledImageCreate -piextMemUnsampledImageHandleDestroy -piextPeerAccessGetInfo -piextPhysicalMemCreate -piextPhysicalMemRelease -piextPhysicalMemRetain -piextPlatformCreateWithNativeHandle -piextPlatformGetNativeHandle -piextPluginGetOpaqueData -piextProgramCreateWithNativeHandle -piextProgramGetNativeHandle -piextProgramSetSpecializationConstant -piextQueueCreate -piextQueueCreateWithNativeHandle -piextQueueGetNativeHandle -piextSignalExternalSemaphore -piextUSMDeviceAlloc -piextUSMEnqueueFill -piextUSMEnqueueFill2D -piextUSMEnqueueMemAdvise -piextUSMEnqueueMemcpy -piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset2D -piextUSMEnqueuePrefetch -piextUSMFree -piextUSMGetMemAllocInfo -piextUSMHostAlloc -piextUSMImport -piextUSMPitchedAlloc -piextUSMRelease -piextUSMSharedAlloc -piextVirtualMemFree -piextVirtualMemGetInfo -piextVirtualMemGranularityGetInfo -piextVirtualMemMap -piextVirtualMemReserve -piextVirtualMemSetAccess -piextVirtualMemUnmap -piextWaitExternalSemaphore -piextEnqueueKernelLaunchCustom diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump deleted file mode 100644 index 3d2f6269d9a7a..0000000000000 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ /dev/null @@ -1,183 +0,0 @@ -################################################################################ -# This file is automatically generated by abi_check.py tool. -# DO NOT EDIT IT MANUALLY. Refer to sycl/doc/developer/ABIPolicyGuide.md for more info. -################################################################################ - -# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir %python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libpi_opencl.so -# REQUIRES: linux -# UNSUPPORTED: libcxx - -piContextCreate -piContextGetInfo -piContextRelease -piContextRetain -piDeviceGetInfo -piDevicePartition -piDeviceRelease -piDeviceRetain -piDevicesGet -piEnqueueEventsWait -piEnqueueEventsWaitWithBarrier -piEnqueueKernelLaunch -piEnqueueMemBufferCopy -piEnqueueMemBufferCopyRect -piEnqueueMemBufferFill -piEnqueueMemBufferMap -piEnqueueMemBufferRead -piEnqueueMemBufferReadRect -piEnqueueMemBufferWrite -piEnqueueMemBufferWriteRect -piEnqueueMemImageCopy -piEnqueueMemImageFill -piEnqueueMemImageRead -piEnqueueMemImageWrite -piEnqueueMemUnmap -piEnqueueTimestampRecordingExp -piEventCreate -piEventGetInfo -piEventGetProfilingInfo -piEventRelease -piEventRetain -piEventSetCallback -piEventSetStatus -piEventsWait -piGetDeviceAndHostTimer -piKernelCreate -piKernelGetGroupInfo -piKernelGetInfo -piKernelGetSubGroupInfo -piKernelRelease -piKernelRetain -piKernelSetArg -piKernelSetExecInfo -piMemBufferCreate -piMemBufferPartition -piMemGetInfo -piMemImageCreate -piMemImageGetInfo -piMemRelease -piMemRetain -piPlatformGetInfo -piPlatformsGet -piPluginGetBackendOption -piPluginGetLastError -piPluginInit -piProgramBuild -piProgramCompile -piProgramCreate -piProgramCreateWithBinary -piProgramGetBuildInfo -piProgramGetInfo -piProgramLink -piProgramRelease -piProgramRetain -piQueueCreate -piQueueFinish -piQueueFlush -piQueueGetInfo -piQueueRelease -piQueueRetain -piSamplerCreate -piSamplerGetInfo -piSamplerRelease -piSamplerRetain -piTearDown -piextBindlessImageSamplerCreate -piextCommandBufferAdviseUSM -piextCommandBufferCreate -piextCommandBufferFillUSM -piextCommandBufferFinalize -piextCommandBufferMemBufferCopy -piextCommandBufferMemBufferCopyRect -piextCommandBufferMemBufferFill -piextCommandBufferMemBufferRead -piextCommandBufferMemBufferReadRect -piextCommandBufferMemBufferWrite -piextCommandBufferMemBufferWriteRect -piextCommandBufferMemcpyUSM -piextCommandBufferNDRangeKernel -piextCommandBufferPrefetchUSM -piextCommandBufferRelease -piextCommandBufferReleaseCommand -piextCommandBufferRetain -piextCommandBufferRetainCommand -piextCommandBufferUpdateKernelLaunch -piextContextCreateWithNativeHandle -piextContextGetNativeHandle -piextContextSetExtendedDeleter -piextDeviceCreateWithNativeHandle -piextDeviceGetNativeHandle -piextDeviceSelectBinary -piextDisablePeerAccess -piextEnablePeerAccess -piextEnqueueCommandBuffer -piextEnqueueCooperativeKernelLaunch -piextEnqueueKernelLaunchCustom -piextEnqueueNativeCommand -piextEnqueueReadHostPipe -piextEnqueueWriteHostPipe -piextEventCreateWithNativeHandle -piextEventGetNativeHandle -piextGetDeviceFunctionPointer -piextGetGlobalVariablePointer -piextImportExternalMemory -piextImportExternalSemaphore -piextKernelCreateWithNativeHandle -piextKernelGetNativeHandle -piextKernelSetArgMemObj -piextKernelSetArgPointer -piextKernelSetArgSampler -piextKernelSuggestMaxCooperativeGroupCount -piextMemCreateWithNativeHandle -piextMemGetNativeHandle -piextMemImageAllocate -piextMemImageCopy -piextMemImageCreateWithNativeHandle -piextMemImageFree -piextMemImageGetInfo -piextMemMapExternalArray -piextMemMipmapFree -piextMemMipmapGetLevel -piextMemReleaseInterop -piextMemSampledImageCreate -piextMemSampledImageHandleDestroy -piextMemUnsampledImageCreate -piextMemUnsampledImageHandleDestroy -piextPeerAccessGetInfo -piextPhysicalMemCreate -piextPhysicalMemRelease -piextPhysicalMemRetain -piextPlatformCreateWithNativeHandle -piextPlatformGetNativeHandle -piextPluginGetOpaqueData -piextProgramCreateWithNativeHandle -piextProgramGetNativeHandle -piextProgramSetSpecializationConstant -piextQueueCreate -piextQueueCreateWithNativeHandle -piextQueueGetNativeHandle -piextReleaseExternalSemaphore -piextSignalExternalSemaphore -piextUSMDeviceAlloc -piextUSMEnqueueFill -piextUSMEnqueueFill2D -piextUSMEnqueueMemAdvise -piextUSMEnqueueMemcpy -piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset2D -piextUSMEnqueuePrefetch -piextUSMFree -piextUSMGetMemAllocInfo -piextUSMHostAlloc -piextUSMImport -piextUSMPitchedAlloc -piextUSMRelease -piextUSMSharedAlloc -piextVirtualMemFree -piextVirtualMemGetInfo -piextVirtualMemGranularityGetInfo -piextVirtualMemMap -piextVirtualMemReserve -piextVirtualMemSetAccess -piextVirtualMemUnmap -piextWaitExternalSemaphore diff --git a/sycl/test/include_deps/sycl_accessor.hpp.cpp b/sycl/test/include_deps/sycl_accessor.hpp.cpp index 85b2543205279..d4996b308edff 100644 --- a/sycl/test/include_deps/sycl_accessor.hpp.cpp +++ b/sycl/test/include_deps/sycl_accessor.hpp.cpp @@ -21,13 +21,8 @@ // CHECK-NEXT: info/aspects_deprecated.def // CHECK-NEXT: atomic.hpp // CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: detail/pi.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: detail/pi.h -// CHECK-NEXT: detail/pi_error.def -// CHECK-NEXT: detail/pi.def // CHECK-NEXT: memory_enums.hpp +// CHECK-NEXT: ur_api.h // CHECK-NEXT: CL/__spirv/spirv_vars.hpp // CHECK-NEXT: multi_ptr.hpp // CHECK-NEXT: detail/type_traits.hpp @@ -61,6 +56,7 @@ // CHECK-NEXT: detail/boost/mp11/detail/mp_with_index.hpp // CHECK-NEXT: detail/boost/mp11/integer_sequence.hpp // CHECK-NEXT: buffer.hpp +// CHECK-NEXT: backend_types.hpp // CHECK-NEXT: detail/array.hpp // CHECK-NEXT: exception.hpp // CHECK-NEXT: detail/cl.h @@ -78,6 +74,7 @@ // CHECK-NEXT: detail/stl_type_traits.hpp // CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp // CHECK-NEXT: detail/aligned_allocator.hpp +// CHECK-NEXT: detail/os_util.hpp // CHECK-NEXT: ext/oneapi/accessor_property_list.hpp // CHECK-NEXT: detail/property_list_base.hpp // CHECK-NEXT: property_list.hpp diff --git a/sycl/test/include_deps/sycl_buffer.hpp.cpp b/sycl/test/include_deps/sycl_buffer.hpp.cpp index ad5a5cafbf22c..79de82af4cce8 100644 --- a/sycl/test/include_deps/sycl_buffer.hpp.cpp +++ b/sycl/test/include_deps/sycl_buffer.hpp.cpp @@ -19,14 +19,10 @@ // CHECK-NEXT: CL/cl_version.h // CHECK-NEXT: CL/cl_platform.h // CHECK-NEXT: CL/cl_ext.h -// CHECK-NEXT: detail/pi.h -// CHECK-NEXT: detail/pi_error.def -// CHECK-NEXT: detail/pi.def // CHECK-NEXT: detail/string.hpp +// CHECK-NEXT: ur_api.h // CHECK-NEXT: detail/common.hpp // CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: detail/pi.hpp -// CHECK-NEXT: detail/os_util.hpp // CHECK-NEXT: memory_enums.hpp // CHECK-NEXT: CL/__spirv/spirv_vars.hpp // CHECK-NEXT: detail/iostream_proxy.hpp @@ -38,6 +34,7 @@ // CHECK-NEXT: detail/stl_type_traits.hpp // CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp // CHECK-NEXT: detail/aligned_allocator.hpp +// CHECK-NEXT: detail/os_util.hpp // CHECK-NEXT: ext/oneapi/accessor_property_list.hpp // CHECK-NEXT: detail/property_list_base.hpp // CHECK-NEXT: property_list.hpp diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 2ec31465dcc7c..2fe9c3a7b5d92 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -22,13 +22,8 @@ // CHECK-NEXT: info/aspects_deprecated.def // CHECK-NEXT: atomic.hpp // CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: detail/pi.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: detail/pi.h -// CHECK-NEXT: detail/pi_error.def -// CHECK-NEXT: detail/pi.def // CHECK-NEXT: memory_enums.hpp +// CHECK-NEXT: ur_api.h // CHECK-NEXT: CL/__spirv/spirv_vars.hpp // CHECK-NEXT: multi_ptr.hpp // CHECK-NEXT: detail/type_traits.hpp @@ -62,6 +57,7 @@ // CHECK-NEXT: detail/boost/mp11/detail/mp_with_index.hpp // CHECK-NEXT: detail/boost/mp11/integer_sequence.hpp // CHECK-NEXT: buffer.hpp +// CHECK-NEXT: backend_types.hpp // CHECK-NEXT: detail/array.hpp // CHECK-NEXT: exception.hpp // CHECK-NEXT: detail/cl.h @@ -79,6 +75,7 @@ // CHECK-NEXT: detail/stl_type_traits.hpp // CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp // CHECK-NEXT: detail/aligned_allocator.hpp +// CHECK-NEXT: detail/os_util.hpp // CHECK-NEXT: ext/oneapi/accessor_property_list.hpp // CHECK-NEXT: detail/property_list_base.hpp // CHECK-NEXT: property_list.hpp @@ -146,6 +143,7 @@ // CHECK-NEXT: ext/oneapi/experimental/graph.hpp // CHECK-NEXT: handler.hpp // CHECK-NEXT: detail/reduction_forward.hpp +// CHECK-NEXT: detail/ur.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp // CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp diff --git a/sycl/test/native_cpu/atomic-base.cpp b/sycl/test/native_cpu/atomic-base.cpp index 9ffc98201da68..ee84a90c8a89c 100644 --- a/sycl/test/native_cpu/atomic-base.cpp +++ b/sycl/test/native_cpu/atomic-base.cpp @@ -3,6 +3,7 @@ // REQUIRES: native_cpu // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t + #include #include diff --git a/sycl/test/native_cpu/call_host_func.cpp b/sycl/test/native_cpu/call_host_func.cpp index a348c4694b53e..a84f2fc95f5d2 100644 --- a/sycl/test/native_cpu/call_host_func.cpp +++ b/sycl/test/native_cpu/call_host_func.cpp @@ -4,6 +4,7 @@ // This test is needed since we need to make sure that there no // "multiple definitions" linker errors when a function appears // both in the host and in the device module. + #include void increase(int *data, sycl::id<1> id, int val) { data[id] = data[id] + val; } diff --git a/sycl/test/native_cpu/check-pi-output.cpp b/sycl/test/native_cpu/check-pi-output.cpp index d69b6994d7ec5..51fee9e05998f 100644 --- a/sycl/test/native_cpu/check-pi-output.cpp +++ b/sycl/test/native_cpu/check-pi-output.cpp @@ -1,6 +1,6 @@ // REQUIRES: native_cpu // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t -// RUN: env SYCL_PI_TRACE=1 ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t | FileCheck %s +// RUN: env SYCL_UR_TRACE=1 ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t | FileCheck %s #include @@ -46,5 +46,5 @@ int main() { return 0; } -//CHECK: SYCL_PI_TRACE[all]: platform: SYCL_NATIVE_CPU -//CHECK: SYCL_PI_TRACE[all]: device: SYCL Native CPU +//CHECK: SYCL_UR_TRACE: platform: SYCL_NATIVE_CPU +//CHECK: SYCL_UR_TRACE: device: SYCL Native CPU diff --git a/sycl/test/native_cpu/example-sycl-application.cpp b/sycl/test/native_cpu/example-sycl-application.cpp index 364f308dbad15..c4a26d427330f 100644 --- a/sycl/test/native_cpu/example-sycl-application.cpp +++ b/sycl/test/native_cpu/example-sycl-application.cpp @@ -2,6 +2,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t + /*************************************************************************** * * Copyright (C) 2016 Codeplay Software Limited diff --git a/sycl/test/native_cpu/local_basic.cpp b/sycl/test/native_cpu/local_basic.cpp index 8a34695d151c4..d702789bbc47c 100644 --- a/sycl/test/native_cpu/local_basic.cpp +++ b/sycl/test/native_cpu/local_basic.cpp @@ -1,6 +1,7 @@ // REQUIRES: native_cpu // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t + #include using namespace sycl; diff --git a/sycl/test/native_cpu/multiple_tu.cpp b/sycl/test/native_cpu/multiple_tu.cpp index d10e1fd97fb0d..b4d5338dc0f4a 100644 --- a/sycl/test/native_cpu/multiple_tu.cpp +++ b/sycl/test/native_cpu/multiple_tu.cpp @@ -11,6 +11,7 @@ //RUN: %clangxx -fsycl -fsycl-targets=native_cpu -g %S/Inputs/plusone.cpp -c -o %t_plusone-debug.o //RUN: %clangxx -fsycl -fsycl-targets=native_cpu -g %t_plusone-debug.o %t_main-debug.o %t_init-debug.o -o %t-debug //RUN: env ONEAPI_DEVICE_SELECTOR=native_cpu:cpu %t-debug + #include "Inputs/common.h" #include diff --git a/sycl/test/native_cpu/scalar_args.cpp b/sycl/test/native_cpu/scalar_args.cpp index 0a326ff9c0ae7..93d3fe1fcb6fa 100644 --- a/sycl/test/native_cpu/scalar_args.cpp +++ b/sycl/test/native_cpu/scalar_args.cpp @@ -1,6 +1,7 @@ // REQUIRES: native_cpu // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t + #include #include diff --git a/sycl/test/native_cpu/unnamed.cpp b/sycl/test/native_cpu/unnamed.cpp index 31a8aa845ecc7..895b5a0201d9d 100644 --- a/sycl/test/native_cpu/unnamed.cpp +++ b/sycl/test/native_cpu/unnamed.cpp @@ -1,6 +1,7 @@ // REQUIRES: native_cpu // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR=native_cpu:cpu %t + #include #include diff --git a/sycl/test/native_cpu/unused-regression.cpp b/sycl/test/native_cpu/unused-regression.cpp index 5275aaa41384c..dcb22f3c804db 100644 --- a/sycl/test/native_cpu/unused-regression.cpp +++ b/sycl/test/native_cpu/unused-regression.cpp @@ -1,6 +1,7 @@ // REQUIRES: native_cpu // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR=native_cpu:cpu %t + #include #include diff --git a/sycl/test/native_cpu/user-defined-private-type.cpp b/sycl/test/native_cpu/user-defined-private-type.cpp index deb9833728785..7db1c7e6e2165 100644 --- a/sycl/test/native_cpu/user-defined-private-type.cpp +++ b/sycl/test/native_cpu/user-defined-private-type.cpp @@ -1,6 +1,7 @@ // REQUIRES: native_cpu // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR=native_cpu:cpu %t + #include #include diff --git a/sycl/test/native_cpu/user-defined-type.cpp b/sycl/test/native_cpu/user-defined-type.cpp index 40172927e83df..3a72073d1019b 100644 --- a/sycl/test/native_cpu/user-defined-type.cpp +++ b/sycl/test/native_cpu/user-defined-type.cpp @@ -1,6 +1,7 @@ // REQUIRES: native_cpu // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR=native_cpu:cpu %t + #include #include diff --git a/sycl/test/native_cpu/usm_basic.cpp b/sycl/test/native_cpu/usm_basic.cpp index fc3b6b1540bdf..4d5d4585a205c 100644 --- a/sycl/test/native_cpu/usm_basic.cpp +++ b/sycl/test/native_cpu/usm_basic.cpp @@ -1,6 +1,7 @@ // REQUIRES: native_cpu // RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR=native_cpu:cpu %t + #include #include diff --git a/sycl/tools/abi_check.py b/sycl/tools/abi_check.py index 35613b407c557..30ab028fcde49 100644 --- a/sycl/tools/abi_check.py +++ b/sycl/tools/abi_check.py @@ -65,10 +65,10 @@ def parse_readobj_output(output): # Some of them happen in the SYCL RT library and we think clang-cl's behavior is more reasonable. # # Case 1: - # pi.hpp: + # ur.hpp: # template __SYCL_EXPORT const PluginPtr &getPlugin(); # - # pi.cpp: + # ur.cpp: # template const PluginPtr &getPlugin() { # static const plugin *Plugin = nullptr; # ... diff --git a/sycl/tools/sycl-ls/CMakeLists.txt b/sycl/tools/sycl-ls/CMakeLists.txt index 2cb5a8c01ec84..9d7db02efc1b7 100644 --- a/sycl/tools/sycl-ls/CMakeLists.txt +++ b/sycl/tools/sycl-ls/CMakeLists.txt @@ -15,6 +15,7 @@ target_link_libraries(sycl-ls PRIVATE ${sycl_lib} OpenCL-Headers + UnifiedRuntime-Headers ) if (WIN32) # 0x900: Search for the dependency DLLs only in the System32 directory and in the directory with sycl-ls.exe diff --git a/sycl/tools/sycl-prof/collector.cpp b/sycl/tools/sycl-prof/collector.cpp index 60f53ef39d3da..35d1111a86cb7 100644 --- a/sycl/tools/sycl-prof/collector.cpp +++ b/sycl/tools/sycl-prof/collector.cpp @@ -47,6 +47,11 @@ XPTI_CALLBACK_API void apiBeginEndCallback(uint16_t TraceType, xpti::trace_event_data_t *, uint64_t /*Instance*/, const void *UserData); +XPTI_CALLBACK_API void urBeginEndCallback(uint16_t TraceType, + xpti::trace_event_data_t *, + xpti::trace_event_data_t *, + uint64_t /*Instance*/, + const void *UserData); XPTI_CALLBACK_API void taskBeginEndCallback(uint16_t TraceType, xpti::trace_event_data_t *, xpti::trace_event_data_t *, @@ -72,12 +77,12 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/, } std::string_view NameView{StreamName}; - if (NameView == "sycl.pi") { + if (NameView == "ur") { uint8_t StreamID = xptiRegisterStream(StreamName); - xptiRegisterCallback(StreamID, xpti::trace_function_begin, - apiBeginEndCallback); - xptiRegisterCallback(StreamID, xpti::trace_function_end, - apiBeginEndCallback); + xptiRegisterCallback(StreamID, xpti::trace_function_with_args_begin, + urBeginEndCallback); + xptiRegisterCallback(StreamID, xpti::trace_function_with_args_end, + urBeginEndCallback); } else if (NameView == "sycl") { uint8_t StreamID = xptiRegisterStream(StreamName); xptiRegisterCallback(StreamID, xpti::trace_task_begin, @@ -121,6 +126,21 @@ XPTI_CALLBACK_API void apiBeginEndCallback(uint16_t TraceType, } } +XPTI_CALLBACK_API void urBeginEndCallback(uint16_t TraceType, + xpti::trace_event_data_t *, + xpti::trace_event_data_t *, + uint64_t /*Instance*/, + const void *UserData) { + auto [TID, PID, TS] = measure(); + auto *Name = + static_cast(UserData)->function_name; + if (TraceType == xpti::trace_function_with_args_begin) { + GWriter->writeBegin(Name, "API", PID, TID, TS); + } else { + GWriter->writeEnd(Name, "API", PID, TID, TS); + } +} + XPTI_CALLBACK_API void taskBeginEndCallback(uint16_t TraceType, xpti::trace_event_data_t *, xpti::trace_event_data_t *Event, diff --git a/sycl/tools/sycl-prof/main.cpp b/sycl/tools/sycl-prof/main.cpp index e1474b2fc61a4..ac511676332f6 100644 --- a/sycl/tools/sycl-prof/main.cpp +++ b/sycl/tools/sycl-prof/main.cpp @@ -46,6 +46,7 @@ int main(int argc, char **argv, char *env[]) { NewEnv.push_back("XPTI_SUBSCRIBERS=libsycl_profiler_collector.so"); NewEnv.push_back("XPTI_TRACE_ENABLE=1"); NewEnv.push_back("ZE_ENABLE_TRACING_LAYER=1"); + NewEnv.push_back("UR_ENABLE_LAYERS=UR_LAYER_TRACING"); std::vector Args; diff --git a/sycl/tools/sycl-sanitize/CMakeLists.txt b/sycl/tools/sycl-sanitize/CMakeLists.txt index 16e7a1a49fe70..280b4760e302f 100644 --- a/sycl/tools/sycl-sanitize/CMakeLists.txt +++ b/sycl/tools/sycl-sanitize/CMakeLists.txt @@ -13,6 +13,7 @@ target_compile_options(sycl-sanitize PRIVATE -fno-exceptions -fno-rtti) add_library(sycl_sanitizer_collector SHARED collector.cpp) target_compile_definitions(sycl_sanitizer_collector PRIVATE XPTI_CALLBACK_API_EXPORTS) target_link_libraries(sycl_sanitizer_collector PRIVATE xptifw) +target_link_libraries(sycl_sanitizer_collector PRIVATE UnifiedRuntime-Headers) if (TARGET OpenCL-Headers) target_link_libraries(sycl_sanitizer_collector PRIVATE OpenCL-Headers) endif() diff --git a/sycl/tools/sycl-sanitize/collector.cpp b/sycl/tools/sycl-sanitize/collector.cpp index 30cf8e99e38cd..7266a3e24fa9a 100644 --- a/sycl/tools/sycl-sanitize/collector.cpp +++ b/sycl/tools/sycl-sanitize/collector.cpp @@ -12,11 +12,8 @@ #include "xpti/xpti_trace_framework.h" -#include "pi_arguments_handler.hpp" #include "usm_analyzer.hpp" -#include - #include #include #include @@ -35,7 +32,7 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/, unsigned int /*minor_version*/, const char * /*version_str*/, const char *StreamName) { - if (std::string_view(StreamName) == "sycl.pi.debug") { + if (std::string_view(StreamName) == "ur") { uint8_t StreamID = xptiRegisterStream(StreamName); xptiRegisterCallback(StreamID, xpti::trace_function_with_args_begin, tpCallback); @@ -44,12 +41,11 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/, auto &GS = USMAnalyzer::getInstance(); GS.changeTerminationOnErrorState(true); GS.printToErrorStream(); - GS.setupUSMHandlers(); } } XPTI_CALLBACK_API void xptiTraceFinish(const char *StreamName) { - if (std::string_view(StreamName) == "sycl.pi.debug") { + if (std::string_view(StreamName) == "ur") { bool hadLeak = false; auto &GS = USMAnalyzer::getInstance(); if (GS.ActivePointers.size() > 0) { @@ -80,13 +76,9 @@ XPTI_CALLBACK_API void tpCallback(uint16_t TraceType, std::lock_guard Lock(IOMutex); const auto *Data = static_cast(UserData); - const auto *Plugin = static_cast(Data->user_data); if (TraceType == xpti::trace_function_with_args_begin) { - GS.ArgHandlerPreCall.handle(Data->function_id, *Plugin, std::nullopt, - Data->args_data); + GS.handlePreCall(Data); } else if (TraceType == xpti::trace_function_with_args_end) { - const pi_result Result = *static_cast(Data->ret_data); - GS.ArgHandlerPostCall.handle(Data->function_id, *Plugin, Result, - Data->args_data); + GS.handlePostCall(Data); } } diff --git a/sycl/tools/sycl-sanitize/main.cpp b/sycl/tools/sycl-sanitize/main.cpp index 3a89d8132f232..19e53f4c869e9 100644 --- a/sycl/tools/sycl-sanitize/main.cpp +++ b/sycl/tools/sycl-sanitize/main.cpp @@ -33,6 +33,7 @@ int main(int argc, char **argv, char *env[]) { NewEnv.push_back("XPTI_FRAMEWORK_DISPATCHER=libxptifw.so"); NewEnv.push_back("XPTI_SUBSCRIBERS=libsycl_sanitizer_collector.so"); NewEnv.push_back("XPTI_TRACE_ENABLE=1"); + NewEnv.push_back("UR_ENABLE_LAYERS=UR_LAYER_TRACING"); std::vector Args; diff --git a/sycl/tools/sycl-trace/CMakeLists.txt b/sycl/tools/sycl-trace/CMakeLists.txt index c5302cc09049b..3e6155b19517a 100644 --- a/sycl/tools/sycl-trace/CMakeLists.txt +++ b/sycl/tools/sycl-trace/CMakeLists.txt @@ -31,35 +31,17 @@ if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) add_dependencies(sycl-trace cuda_trace_collector) endif() -add_library(sycl_pi_trace_collector SHARED +add_library(sycl_ur_trace_collector SHARED collector.cpp - pi_trace_collector.cpp sycl_trace_collector.cpp + ur_trace_collector.cpp verification_collector.cpp ) find_package(Python3 REQUIRED) -add_custom_target(pi-pretty-printers - DEPENDS - ${CMAKE_CURRENT_BINARY_DIR}/pi_printers.def - ${CMAKE_CURRENT_BINARY_DIR}/pi_structs.hpp - ) - -add_custom_command( - OUTPUT - ${CMAKE_CURRENT_BINARY_DIR}/pi_printers.def - ${CMAKE_CURRENT_BINARY_DIR}/pi_structs.hpp - COMMAND ${Python3_EXECUTABLE} - ${CMAKE_CURRENT_SOURCE_DIR}/generate_pi_pretty_printers.py - ${sycl_inc_dir}/sycl/detail/pi.h - DEPENDS ${sycl_inc_dir}/sycl/detail/pi.h - ) - # To get L0 loader if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) - add_dependencies(ze_trace_collector pi_level_zero) - target_link_libraries(ze_trace_collector PRIVATE LevelZeroLoader-Headers) target_compile_definitions(ze_trace_collector PRIVATE SYCL_HAS_LEVEL_ZERO) target_link_libraries(ze_trace_collector PRIVATE xptifw) @@ -74,37 +56,43 @@ if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) ${CMAKE_CURRENT_SOURCE_DIR}/generate_ze_pretty_printers.py ${LEVEL_ZERO_INCLUDE_DIR}/ze_api.h DEPENDS - pi_level_zero ${LEVEL_ZERO_INCLUDE_DIR}/ze_api.h ) - add_dependencies(ze_trace_collector ze-pretty-printers) - target_compile_definitions(sycl_pi_trace_collector PRIVATE SYCL_HAS_LEVEL_ZERO) + add_custom_target(ze-api DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ze_api.def) + add_custom_command( + OUTPUT + ${CMAKE_CURRENT_BINARY_DIR}/ze_api.def + COMMAND ${Python3_EXECUTABLE} + ${CMAKE_CURRENT_SOURCE_DIR}/ze_api_generator.py + ${LEVEL_ZERO_INCLUDE_DIR}/ze_api.h + DEPENDS + ${LEVEL_ZERO_INCLUDE_DIR}/ze_api.h + ) + + add_dependencies(ze_trace_collector ze-api ze-pretty-printers) + target_compile_definitions(sycl_ur_trace_collector PRIVATE SYCL_HAS_LEVEL_ZERO) endif() -target_compile_definitions(sycl_pi_trace_collector PRIVATE XPTI_CALLBACK_API_EXPORTS) -target_link_libraries(sycl_pi_trace_collector PRIVATE xptifw) +target_compile_definitions(sycl_ur_trace_collector PRIVATE XPTI_CALLBACK_API_EXPORTS) +target_link_libraries(sycl_ur_trace_collector PRIVATE xptifw) if (TARGET OpenCL-Headers) - target_link_libraries(sycl_pi_trace_collector PRIVATE OpenCL-Headers) + target_link_libraries(sycl_ur_trace_collector PRIVATE OpenCL-Headers) endif() +target_link_libraries(sycl_ur_trace_collector PRIVATE UnifiedRuntime-Headers) -target_include_directories(sycl_pi_trace_collector PRIVATE +target_include_directories(sycl_ur_trace_collector PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../xpti_helpers/" "${sycl_inc_dir}" "${sycl_src_dir}" "${CMAKE_CURRENT_BINARY_DIR}" ) -add_dependencies(sycl_pi_trace_collector pi-pretty-printers) - if(SYCL_BUILD_BACKEND_CUDA) find_package(CUDA 10.1 REQUIRED) - target_compile_definitions(cuda_trace_collector - PRIVATE - $<$:USE_PI_CUDA> - ) + target_compile_definitions(cuda_trace_collector PRIVATE USE_UR_CUDA) # The following two if's can be removed when FindCUDA -> FindCUDAToolkit. # CUDA_CUPTI_INCLUDE_DIR -> CUDAToolkit_CUPTI_INCLUDE_DIR @@ -143,7 +131,6 @@ if(SYCL_BUILD_BACKEND_CUDA) ${CMAKE_CURRENT_SOURCE_DIR}/generate_cuda_pretty_printers.py ${GEN_CUDA_META_H_DIR}/generated_cuda_meta.h DEPENDS - pi_cuda ${GEN_CUDA_META_H_DIR}/generated_cuda_meta.h ) else() @@ -155,17 +142,14 @@ if(SYCL_BUILD_BACKEND_CUDA) add_dependencies(cuda_trace_collector cuda-pretty-printers) target_link_libraries(cuda_trace_collector PRIVATE xptifw) - target_compile_definitions(sycl_pi_trace_collector - PRIVATE - $<$:USE_PI_CUDA> - ) + target_compile_definitions(sycl_ur_trace_collector PRIVATE USE_UR_CUDA) endif() -add_dependencies(sycl-trace sycl_pi_trace_collector) +add_dependencies(sycl-trace sycl_ur_trace_collector) add_dependencies(sycl-toolchain sycl-trace) include(GNUInstallDirs) -install(TARGETS sycl-trace sycl_pi_trace_collector ${EXTRA_TARGETS_TO_INSTALL} +install(TARGETS sycl-trace sycl_ur_trace_collector ${EXTRA_TARGETS_TO_INSTALL} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT sycl-trace LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT sycl-trace ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT sycl-trace diff --git a/sycl/tools/sycl-trace/collector.cpp b/sycl/tools/sycl-trace/collector.cpp index 36e45a05d9363..edf6ce9ccff3f 100644 --- a/sycl/tools/sycl-trace/collector.cpp +++ b/sycl/tools/sycl-trace/collector.cpp @@ -140,7 +140,7 @@ XPTI_CALLBACK_API void zeCallback(uint16_t TraceType, return zeCollectorLibrary.callback(TraceType, Parent, Event, Instance, UserData); } -#ifdef USE_PI_CUDA +#ifdef USE_UR_CUDA XPTI_CALLBACK_API void cudaCallback(uint16_t TraceType, xpti::trace_event_data_t *Parent, xpti::trace_event_data_t *Event, @@ -155,10 +155,12 @@ void piPrintersInit(); void piPrintersFinish(); void syclPrintersInit(); void syclPrintersFinish(); +void urPrintersInit(); +void urPrintersFinish(); void vPrintersInit(); void vPrintersFinish(); -XPTI_CALLBACK_API void piCallback(uint16_t TraceType, +XPTI_CALLBACK_API void urCallback(uint16_t TraceType, xpti::trace_event_data_t *Parent, xpti::trace_event_data_t *Event, uint64_t Instance, const void *UserData); @@ -175,14 +177,14 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/, unsigned int /*minor_version*/, const char * /*version_str*/, const char *StreamName) { - if (std::string_view(StreamName) == "sycl.pi.debug" && - std::getenv("SYCL_TRACE_PI_ENABLE")) { - piPrintersInit(); + if (std::string_view(StreamName) == "ur" && + std::getenv("SYCL_TRACE_UR_ENABLE")) { + urPrintersInit(); uint16_t StreamID = xptiRegisterStream(StreamName); xptiRegisterCallback(StreamID, xpti::trace_function_with_args_begin, - piCallback); + urCallback); xptiRegisterCallback(StreamID, xpti::trace_function_with_args_end, - piCallback); + urCallback); zeCollectorLibrary.setIndentationLevel(1); cudaCollectorLibrary.setIndentationLevel(1); #ifdef SYCL_HAS_LEVEL_ZERO @@ -198,7 +200,7 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/, zeCallback); } #endif -#ifdef USE_PI_CUDA +#ifdef USE_UR_CUDA } else if (std::string_view(StreamName) == "sycl.experimental.cuda.debug" && std::getenv("SYCL_TRACE_CU_ENABLE")) { if (cudaCollectorLibrary.initPrinters()) { @@ -231,9 +233,9 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/, } XPTI_CALLBACK_API void xptiTraceFinish(const char *StreamName) { - if (std::string_view(StreamName) == "sycl.pi.debug" && - std::getenv("SYCL_TRACE_PI_ENABLE")) - piPrintersFinish(); + if (std::string_view(StreamName) == "ur" && + std::getenv("SYCL_TRACE_UR_ENABLE")) + urPrintersFinish(); #ifdef SYCL_HAS_LEVEL_ZERO else if (std::string_view(StreamName) == "sycl.experimental.level_zero.debug" && @@ -242,7 +244,7 @@ XPTI_CALLBACK_API void xptiTraceFinish(const char *StreamName) { zeCollectorLibrary.clear(); } #endif -#ifdef USE_PI_CUDA +#ifdef USE_UR_CUDA else if (std::string_view(StreamName) == "sycl.experimental.cuda.debug" && std::getenv("SYCL_TRACE_CU_ENABLE")) { cudaCollectorLibrary.finishPrinters(); diff --git a/sycl/tools/sycl-trace/generate_pi_pretty_printers.py b/sycl/tools/sycl-trace/generate_pi_pretty_printers.py deleted file mode 100644 index 70d5fd31fa7ff..0000000000000 --- a/sycl/tools/sycl-trace/generate_pi_pretty_printers.py +++ /dev/null @@ -1,72 +0,0 @@ -import re -import sys - - -def generate_pi_pretty_printers(header): - hdr = open("pi_structs.hpp", "w") - hdr.write( - "//===-------------- pi_structs.hpp - PI Trace Structs ----------------------==//\n" - ) - hdr.write("//\n") - hdr.write( - "// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.\n" - ) - hdr.write("// See https://llvm.org/LICENSE.txt for license information.\n") - hdr.write("// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n") - hdr.write("//\n") - hdr.write( - "//===----------------------------------------------------------------------===//\n" - ) - hdr.write("// clang-format off\n") - hdr.write("// This file is auto-generated! Do not modify!\n") - hdr.write("#pragma once\n") - printers = open("pi_printers.def", "w") - - matches = re.finditer(r"(pi[a-zA-Z]+)\(\n?\r?([\sa-zA-Z_,\*,=0-9]+)\);", header) - - for match in matches: - api_name = str(match.group(1)) - - if api_name == "piPluginInit": - continue - - all_args = match.group(2).replace("\n", "").split(",") - - hdr.write("struct __attribute__((packed)) " + api_name + "_args {\n") - - for arg in all_args: - hdr.write(arg.strip() + ";\n") - - hdr.write("};\n") - - arg_names = [] - - for arg in all_args: - name = arg.split("=")[0].strip().split(" ")[-1].replace("*", "") - arg_names.append(name) - - printers.write( - "case static_cast(sycl::detail::PiApiKind::{}): {{\n".format( - api_name - ) - ) - printers.write( - "const auto *Args = reinterpret_cast<{}_args*>(Data->args_data);\n".format( - api_name - ) - ) - for name in arg_names: - printers.write( - 'std::cout << " {}: " << Args->{} << "\\n";\n'.format(name, name) - ) - printers.write("break;\n") - printers.write("}\n") - - -if __name__ == "__main__": - """ - Usage: python generate_pi_pretty_printers.py path/to/pi.h - """ - with open(sys.argv[1], "r") as f: - header = f.read() - generate_pi_pretty_printers(header) diff --git a/sycl/tools/sycl-trace/main.cpp b/sycl/tools/sycl-trace/main.cpp index 389a54a8413b2..cb51474c3be60 100644 --- a/sycl/tools/sycl-trace/main.cpp +++ b/sycl/tools/sycl-trace/main.cpp @@ -14,7 +14,7 @@ using namespace llvm; -enum ModeKind { PI, ZE, CU, SYCL, VERIFY }; +enum ModeKind { UR, ZE, CU, SYCL, VERIFY }; enum PrintFormatKind { PRETTY_COMPACT, PRETTY_VERBOSE, CLASSIC }; int main(int argc, char **argv, char *env[]) { @@ -22,7 +22,8 @@ int main(int argc, char **argv, char *env[]) { cl::desc("Available tracing modes:"), cl::values( // TODO graph dot - clEnumValN(PI, "plugin", "Trace Plugin Interface calls"), + // clEnumValN(PI, "plugin", "Trace Plugin Interface calls"), + clEnumValN(UR, "ur", "Trace Unified Runtime calls"), clEnumValN(ZE, "level_zero", "Trace Level Zero calls"), clEnumValN(CU, "cuda", "Trace CUDA Driver API calls"), clEnumValN(SYCL, "sycl", "Trace SYCL API calls"), @@ -53,15 +54,16 @@ int main(int argc, char **argv, char *env[]) { #ifdef __linux__ NewEnv.push_back("XPTI_FRAMEWORK_DISPATCHER=libxptifw.so"); - NewEnv.push_back("XPTI_SUBSCRIBERS=libsycl_pi_trace_collector.so"); + NewEnv.push_back("XPTI_SUBSCRIBERS=libsycl_ur_trace_collector.so"); #elif defined(__APPLE__) NewEnv.push_back("XPTI_FRAMEWORK_DISPATCHER=libxptifw.dylib"); - NewEnv.push_back("XPTI_SUBSCRIBERS=libsycl_pi_trace_collector.dylib"); + NewEnv.push_back("XPTI_SUBSCRIBERS=libsycl_ur_trace_collector.dylib"); #endif NewEnv.push_back("XPTI_TRACE_ENABLE=1"); - const auto EnablePITrace = [&]() { - NewEnv.push_back("SYCL_TRACE_PI_ENABLE=1"); + const auto EnableURTrace = [&]() { + NewEnv.push_back("SYCL_TRACE_UR_ENABLE=1"); + NewEnv.push_back("UR_ENABLE_LAYERS=UR_LAYER_TRACING"); }; const auto EnableZETrace = [&]() { NewEnv.push_back("SYCL_TRACE_ZE_ENABLE=1"); @@ -75,12 +77,13 @@ int main(int argc, char **argv, char *env[]) { }; const auto EnableVerificationTrace = [&]() { NewEnv.push_back("SYCL_TRACE_VERIFICATION_ENABLE=1"); + NewEnv.push_back("UR_ENABLE_LAYERS=UR_LAYER_TRACING"); }; for (auto Mode : Modes) { switch (Mode) { - case PI: - EnablePITrace(); + case UR: + EnableURTrace(); break; case ZE: EnableZETrace(); @@ -106,7 +109,7 @@ int main(int argc, char **argv, char *env[]) { } if (Modes.size() == 0) { - EnablePITrace(); + EnableURTrace(); EnableZETrace(); EnableCUTrace(); // Intentionally do not enable SYCL API traces -> to not break existing diff --git a/sycl/tools/sycl-trace/pi_trace_collector.cpp b/sycl/tools/sycl-trace/pi_trace_collector.cpp deleted file mode 100644 index 21478de31eab7..0000000000000 --- a/sycl/tools/sycl-trace/pi_trace_collector.cpp +++ /dev/null @@ -1,147 +0,0 @@ -//==---------------------- pi_trace_collector.cpp --------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// \file pi_trace_collector.cpp -/// Routines to collect and print Plugin Interface calls. - -#include "xpti/xpti_trace_framework.h" - -#include "pi_arguments_handler.hpp" -#include "pi_structs.hpp" - -#include -#include - -#include -#include -#include -#include -#include - -extern sycl::detail::SpinLock GlobalLock; - -extern bool HasZEPrinter; - -using HeaderPrinterT = - std::function; - -static sycl::xpti_helpers::PiArgumentsHandler *ArgHandler = nullptr; -static HeaderPrinterT *HeaderPrinter = nullptr; -static std::function *ResultPrinter = nullptr; - -static std::string getResult(pi_result Res) { - switch (Res) { -#define _PI_ERRC(NAME, VAL) \ - case NAME: \ - return #NAME; -#define _PI_ERRC_WITH_MSG(NAME, VAL, MSG) _PI_ERRC(NAME, VAL) -#include -#undef _PI_ERRC -#undef _PI_ERRC_WITH_MSG - } - - return "UNKNOWN RESULT"; -} - -static void setupClassicPrinter() { - ArgHandler = new sycl::xpti_helpers::PiArgumentsHandler(); -#define _PI_API(api) \ - ArgHandler->set##_##api( \ - [](const pi_plugin &, std::optional, auto &&...Args) { \ - std::cout << "---> " << #api << "(" \ - << "\n"; \ - sycl::detail::pi::printArgs(Args...); \ - }); -#include -#undef _PI_API - - ResultPrinter = new std::function( - [](pi_result Res) { std::cout << ") ---> " << Res << std::endl; }); - HeaderPrinter = new std::function( - [](const pi_plugin &Plugin, const xpti::function_with_args_t *Data) { - ArgHandler->handle(Data->function_id, Plugin, std::nullopt, - Data->args_data); - }); -} - -static void setupPrettyPrinter(bool Verbose) { - HeaderPrinter = new std::function( - [Verbose](const pi_plugin &, const xpti::function_with_args_t *Data) { - if (Verbose) { - std::string Source = ""; - size_t Line = 0; - - auto *Payload = xptiQueryPayloadByUID(xptiGetUniversalId()); - - if (Payload) { - if (Payload->source_file != nullptr) { - Source = Payload->source_file; - Line = Payload->line_no; - } - } - - auto TID = std::this_thread::get_id(); - std::cout << "[PI:TID " << TID << ":"; - std::cout << Source << ":" << Line << "]\n"; - } else { - std::cout << "[PI] "; - } - std::cout << Data->function_name << "(\n"; - switch (Data->function_id) { -#include "pi_printers.def" - } - std::cout << ")"; - - if (HasZEPrinter) { - std::cout << " {" << std::endl; - } - }); - ResultPrinter = new std::function([](pi_result Res) { - if (HasZEPrinter) { - std::cout << "}"; - } - std::cout << " ---> " << getResult(Res) << "\n" << std::endl; - }); -} - -void piPrintersInit() { - std::string_view PrinterType(std::getenv("SYCL_TRACE_PRINT_FORMAT")); - - if (PrinterType == "classic") { - setupClassicPrinter(); - } else if (PrinterType == "verbose") { - setupPrettyPrinter(/*verbose*/ true); - } else if (PrinterType == "compact") { - setupPrettyPrinter(/*verbose*/ false); - } -} - -void piPrintersFinish() { - if (ArgHandler) - delete ArgHandler; - delete HeaderPrinter; - delete ResultPrinter; -} - -XPTI_CALLBACK_API void piCallback(uint16_t TraceType, - xpti::trace_event_data_t * /*Parent*/, - xpti::trace_event_data_t * /*Event*/, - uint64_t /*Instance*/, const void *UserData) { - if (!HeaderPrinter || !ResultPrinter) - return; - - // Lock while we print information - std::lock_guard _{GlobalLock}; - const auto *Data = static_cast(UserData); - if (TraceType == xpti::trace_function_with_args_begin) { - const auto *Plugin = static_cast(Data->user_data); - (*HeaderPrinter)(*Plugin, Data); - } else if (TraceType == xpti::trace_function_with_args_end) { - (*ResultPrinter)(*static_cast(Data->ret_data)); - } -} diff --git a/sycl/tools/sycl-trace/ur_trace_collector.cpp b/sycl/tools/sycl-trace/ur_trace_collector.cpp new file mode 100644 index 0000000000000..4f9e1959bedce --- /dev/null +++ b/sycl/tools/sycl-trace/ur_trace_collector.cpp @@ -0,0 +1,122 @@ +//==---------------------- ur_trace_collector.cpp --------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// \file ur_trace_collector.cpp +/// Routines to collect and print Unified Runtime calls. + +#include "xpti/xpti_trace_framework.h" + +#include +#include + +#include +#include +#include +#include +#include + +extern sycl::detail::SpinLock GlobalLock; + +extern bool HasZEPrinter; + +using PrinterT = std::function; + +static PrinterT *HeaderPrinter = nullptr; +static PrinterT *ResultPrinter = nullptr; + +static void setupClassicPrinter() { + ResultPrinter = new std::function([](const xpti::function_with_args_t *Data) { + ur::extras::printFunctionParams( + std::cout, static_cast(Data->function_id), + Data->args_data); + auto *result = static_cast(Data->ret_data); + + std::cout << ")\n---> " << *result << "\n\n"; + }); + + HeaderPrinter = new std::function([](const xpti::function_with_args_t *Data) { + std::cout << "---> " << Data->function_name << "(\n"; + }); +} + +static void setupPrettyPrinter(bool Verbose) { + HeaderPrinter = + new std::function([Verbose](const xpti::function_with_args_t *Data) { + if (Verbose) { + std::string Source = ""; + size_t Line = 0; + + auto *Payload = xptiQueryPayloadByUID(xptiGetUniversalId()); + + if (Payload) { + if (Payload->source_file != nullptr) { + Source = Payload->source_file; + Line = Payload->line_no; + } + } + + auto TID = std::this_thread::get_id(); + std::cout << "[UR:TID " << TID << ":"; + std::cout << Source << ":" << Line << "]\n"; + } else { + std::cout << "[UR] "; + } + std::cout << Data->function_name << "(\n"; + + if (HasZEPrinter) { + std::cout << " {" << std::endl; + } + }); + + ResultPrinter = new std::function([](const xpti::function_with_args_t *Data) { + if (HasZEPrinter) { + std::cout << "}"; + } + std::cout << " "; + ur::extras::printFunctionParams( + std::cout, static_cast(Data->function_id), + Data->args_data); + auto *result = static_cast(Data->ret_data); + + std::cout << ")\n---> " << *result << "\n\n"; + }); +} + +void urPrintersInit() { + std::string_view PrinterType(std::getenv("SYCL_TRACE_PRINT_FORMAT")); + + if (PrinterType == "classic") { + setupClassicPrinter(); + } else if (PrinterType == "verbose") { + setupPrettyPrinter(/*verbose*/ true); + } else if (PrinterType == "compact") { + setupPrettyPrinter(/*verbose*/ false); + } +} + +void urPrintersFinish() { + delete HeaderPrinter; + delete ResultPrinter; +} + +XPTI_CALLBACK_API void urCallback(uint16_t TraceType, + xpti::trace_event_data_t * /*Parent*/, + xpti::trace_event_data_t * /*Event*/, + uint64_t /*Instance*/, const void *UserData) { + if (!HeaderPrinter || !ResultPrinter) + return; + + // Lock while we print information + std::lock_guard _{GlobalLock}; + const auto *Data = static_cast(UserData); + if (TraceType == xpti::trace_function_with_args_begin) { + (*HeaderPrinter)(Data); + } else if (TraceType == xpti::trace_function_with_args_end) { + (*ResultPrinter)(Data); + } +} diff --git a/sycl/tools/sycl-trace/verification_collector.cpp b/sycl/tools/sycl-trace/verification_collector.cpp index 8e90edf1a5a3f..f2267b3d0ed34 100644 --- a/sycl/tools/sycl-trace/verification_collector.cpp +++ b/sycl/tools/sycl-trace/verification_collector.cpp @@ -11,11 +11,8 @@ #include "xpti/xpti_trace_framework.h" -#include "pi_arguments_handler.hpp" -#include "pi_structs.hpp" #include "usm_analyzer.hpp" -#include #include #include @@ -32,7 +29,6 @@ void vPrintersInit() { std::ignore = PrinterType; auto &GS = USMAnalyzer::getInstance(); - GS.setupUSMHandlers(); // this environment variable is for proper testing only GS.changeTerminationOnErrorState( std::getenv("SYCL_TRACE_TERMINATE_ON_WARNING")); @@ -50,13 +46,9 @@ XPTI_CALLBACK_API void vCallback(uint16_t TraceType, // Lock while we print information std::lock_guard _{GlobalLock}; const auto *Data = static_cast(UserData); - const auto *Plugin = static_cast(Data->user_data); if (TraceType == xpti::trace_function_with_args_begin) { - GS.ArgHandlerPreCall.handle(Data->function_id, *Plugin, std::nullopt, - Data->args_data); + GS.handlePreCall(Data); } else if (TraceType == xpti::trace_function_with_args_end) { - const pi_result Result = *static_cast(Data->ret_data); - GS.ArgHandlerPostCall.handle(Data->function_id, *Plugin, Result, - Data->args_data); + GS.handlePostCall(Data); } } diff --git a/sycl/plugins/level_zero/ze_api_generator.py b/sycl/tools/sycl-trace/ze_api_generator.py similarity index 100% rename from sycl/plugins/level_zero/ze_api_generator.py rename to sycl/tools/sycl-trace/ze_api_generator.py diff --git a/sycl/tools/sycl-trace/ze_trace_collector.cpp b/sycl/tools/sycl-trace/ze_trace_collector.cpp index d8f374778bf9d..2b52494ec6f2a 100644 --- a/sycl/tools/sycl-trace/ze_trace_collector.cpp +++ b/sycl/tools/sycl-trace/ze_trace_collector.cpp @@ -27,7 +27,7 @@ int IndentationLevel = 0; enum class ZEApiKind { #define _ZE_API(call, domain, cb, params_type) call, -#include "../../plugins/level_zero/ze_api.def" +#include "ze_api.def" #undef _ZE_API }; diff --git a/sycl/tools/xpti_helpers/pi_arguments_handler.hpp b/sycl/tools/xpti_helpers/pi_arguments_handler.hpp deleted file mode 100644 index 2f60177b95d99..0000000000000 --- a/sycl/tools/xpti_helpers/pi_arguments_handler.hpp +++ /dev/null @@ -1,109 +0,0 @@ -//==---------- pi_arguments_handler.hpp - PI call arguments handler --------==// -// i -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include - -#include - -#include -#include -#include - -namespace sycl { -inline namespace _V1 { -namespace xpti_helpers { - -template -inline auto get(char *Data, const std::index_sequence &) { - // Our type should be last in Is sequence - using TargetType = - typename std::tuple_element::type; - - // Calculate sizeof all elements before target + target element then substract - // sizeof target element - const size_t Offset = - (sizeof(typename std::tuple_element::type) + ...) - - sizeof(TargetType); - return *(typename std::decay::type *)(Data + Offset); -} - -template -inline TupleT unpack(char *Data, - const std::index_sequence & /*1..TupleSize*/) { - return {get(Data, std::make_index_sequence{})...}; -} - -template struct to_function {}; - -template struct to_function> { - using type = - std::function, Args...)>; -}; - -/// PiArgumentsHandler is a helper class to process incoming XPTI function call -/// events and unpack contained arguments. -/// -/// Usage: -/// -/// PiArgumentsHandler provides set_ member functions, that accept a -/// lambda with the same arguments as target PI API. Use it to set up handling -/// for particular API. By default an empty lambda is used. -/// -/// When an event is signaled, use PiArgumentsHandler::handle() member function -/// to process the incoming event and call necessary handler. -/// -/// See sycl/tools/pi-trace/ for an example. -class PiArgumentsHandler { -public: - void handle(uint32_t ID, const pi_plugin &Plugin, - std::optional Result, void *ArgsData) { -#define _PI_API(api) \ - if (ID == static_cast(detail::PiApiKind::api)) { \ - MHandler##_##api(Plugin, Result, ArgsData); \ - return; \ - } -#include -#undef _PI_API - } - -#define _PI_API(api) \ - void set##_##api( \ - const typename to_function< \ - typename detail::function_traits::args_type>::type \ - &Handler) { \ - MHandler##_##api = [Handler](const pi_plugin &Plugin, \ - std::optional Res, void *Data) { \ - using TupleT = \ - typename detail::function_traits::args_type; \ - TupleT Tuple = unpack( \ - (char *)Data, \ - std::make_index_sequence::value>{}); \ - const auto Wrapper = [&Plugin, Res, Handler](auto &...Args) { \ - Handler(Plugin, Res, Args...); \ - }; \ - std::apply(Wrapper, Tuple); \ - }; \ - } -#include -#undef _PI_API - -private: -#define _PI_API(api) \ - std::function, void *)> \ - MHandler##_##api = \ - [](const pi_plugin &, std::optional, void *) {}; -#include -#undef _PI_API -}; -} // namespace xpti_helpers -} // namespace _V1 -} // namespace sycl diff --git a/sycl/tools/xpti_helpers/usm_analyzer.hpp b/sycl/tools/xpti_helpers/usm_analyzer.hpp index 1eee3474c51ed..3a5daea9dfdd7 100644 --- a/sycl/tools/xpti_helpers/usm_analyzer.hpp +++ b/sycl/tools/xpti_helpers/usm_analyzer.hpp @@ -8,9 +8,7 @@ #include "xpti/xpti_trace_framework.h" -#include "pi_arguments_handler.hpp" - -#include +#include #include #include @@ -188,8 +186,6 @@ class USMAnalyzer { // TO DO: allocations must be tracked with device std::map ActivePointers; TracepointInfo LastTracepoint; - sycl::xpti_helpers::PiArgumentsHandler ArgHandlerPostCall; - sycl::xpti_helpers::PiArgumentsHandler ArgHandlerPreCall; bool TerminateOnError = false; USMAnalyzer(const USMAnalyzer &obj) = delete; @@ -206,30 +202,61 @@ class USMAnalyzer { void printToErrorStream() { PrintToError = true; } - void setupUSMHandlers() { - ArgHandlerPostCall.set_piextUSMHostAlloc(USMAnalyzer::handleUSMHostAlloc); - ArgHandlerPostCall.set_piextUSMDeviceAlloc( - USMAnalyzer::handleUSMDeviceAlloc); - ArgHandlerPostCall.set_piextUSMSharedAlloc( - USMAnalyzer::handleUSMSharedAlloc); - ArgHandlerPreCall.set_piextUSMFree(USMAnalyzer::handleUSMFree); - ArgHandlerPreCall.set_piMemBufferCreate(USMAnalyzer::handleMemBufferCreate); - ArgHandlerPreCall.set_piextUSMEnqueueFill( - USMAnalyzer::handleUSMEnqueueFill); - ArgHandlerPreCall.set_piextUSMEnqueueMemcpy( - USMAnalyzer::handleUSMEnqueueMemcpy); - ArgHandlerPreCall.set_piextUSMEnqueuePrefetch( - USMAnalyzer::handleUSMEnqueuePrefetch); - ArgHandlerPreCall.set_piextUSMEnqueueMemAdvise( - USMAnalyzer::handleUSMEnqueueMemAdvise); - ArgHandlerPreCall.set_piextUSMEnqueueFill2D( - USMAnalyzer::handleUSMEnqueueFill2D); - ArgHandlerPreCall.set_piextUSMEnqueueMemset2D( - USMAnalyzer::handleUSMEnqueueMemset2D); - ArgHandlerPreCall.set_piextUSMEnqueueMemcpy2D( - USMAnalyzer::handleUSMEnqueueMemcpy2D); - ArgHandlerPreCall.set_piextKernelSetArgPointer( - USMAnalyzer::handleKernelSetArgPointer); + void handlePostCall(const xpti::function_with_args_t *Data) { + switch (static_cast(Data->function_id)) { + case UR_FUNCTION_USM_HOST_ALLOC: + handleUSMHostAlloc( + static_cast(Data->args_data)); + return; + case UR_FUNCTION_USM_DEVICE_ALLOC: + handleUSMDeviceAlloc( + static_cast(Data->args_data)); + return; + case UR_FUNCTION_USM_SHARED_ALLOC: + handleUSMSharedAlloc( + static_cast(Data->args_data)); + return; + default: + return; + } + } + + void handlePreCall(const xpti::function_with_args_t *Data) { + switch (static_cast(Data->function_id)) { + case UR_FUNCTION_USM_FREE: + handleUSMFree(static_cast(Data->args_data)); + return; + case UR_FUNCTION_MEM_BUFFER_CREATE: + handleMemBufferCreate( + static_cast(Data->args_data)); + return; + case UR_FUNCTION_ENQUEUE_USM_MEMCPY: + handleUSMEnqueueMemcpy( + static_cast(Data->args_data)); + return; + case UR_FUNCTION_ENQUEUE_USM_PREFETCH: + handleUSMEnqueuePrefetch( + static_cast(Data->args_data)); + return; + case UR_FUNCTION_ENQUEUE_USM_ADVISE: + handleUSMEnqueueMemAdvise( + static_cast(Data->args_data)); + return; + case UR_FUNCTION_ENQUEUE_USM_FILL_2D: + handleUSMEnqueueFill2D( + static_cast(Data->args_data)); + return; + case UR_FUNCTION_ENQUEUE_USM_MEMCPY_2D: + handleUSMEnqueueMemcpy2D( + static_cast(Data->args_data)); + return; + case UR_FUNCTION_KERNEL_SET_ARG_POINTER: + handleKernelSetArgPointer( + static_cast(Data->args_data)); + return; + default: + return; + } } void fillLastTracepointData(const xpti::trace_event_data_t *ObjectEvent) { @@ -252,50 +279,41 @@ class USMAnalyzer { } } - static void handleUSMHostAlloc(const pi_plugin &, std::optional, - void **ResultPtr, pi_context, - pi_usm_mem_properties *, size_t Size, - pi_uint32) { + static void handleUSMHostAlloc(const ur_usm_host_alloc_params_t *Params) { auto &GS = USMAnalyzer::getInstance(); AllocationInfo Info; Info.Location = GS.LastTracepoint; - Info.Length = Size; + Info.Length = *Params->psize; Info.Kind = AllocKind::host; - GS.ActivePointers[*ResultPtr] = Info; + GS.ActivePointers[**Params->pppMem] = Info; } - static void handleUSMDeviceAlloc(const pi_plugin &, std::optional, - void **ResultPtr, pi_context, pi_device, - pi_usm_mem_properties *, size_t Size, - pi_uint32) { + static void handleUSMDeviceAlloc(const ur_usm_device_alloc_params_t *Params) { auto &GS = USMAnalyzer::getInstance(); AllocationInfo Info; Info.Location = GS.LastTracepoint; - Info.Length = Size; + Info.Length = *Params->psize; Info.Kind = AllocKind::device; - GS.ActivePointers[*ResultPtr] = Info; + GS.ActivePointers[**Params->pppMem] = Info; } - static void handleUSMSharedAlloc(const pi_plugin &, std::optional, - void **ResultPtr, pi_context, pi_device, - pi_usm_mem_properties *, size_t Size, - pi_uint32) { + static void handleUSMSharedAlloc(const ur_usm_shared_alloc_params_t *Params) { auto &GS = USMAnalyzer::getInstance(); AllocationInfo Info; Info.Location = GS.LastTracepoint; - Info.Length = Size; + Info.Length = *Params->psize; Info.Kind = AllocKind::shared; - GS.ActivePointers[*ResultPtr] = Info; + GS.ActivePointers[**Params->pppMem] = Info; } - static void handleUSMFree(const pi_plugin &, std::optional, - pi_context, void *Ptr) { + static void handleUSMFree(const ur_usm_free_params_t *Params) { auto &GS = USMAnalyzer::getInstance(); auto &OutStream = GS.getOutStream(); - if (GS.ActivePointers.count(Ptr) == 0) { + if (GS.ActivePointers.count(*Params->ppMem) == 0) { OutStream << std::endl; - OutStream << PrintPrefix << "Attempt to free pointer " << std::hex << Ptr; + OutStream << PrintPrefix << "Attempt to free pointer " << std::hex + << *Params->ppMem; OutStream << " that was not allocated with SYCL USM APIs.\n"; OutStream << PrintIndentation << "Location: function " << GS.LastTracepoint.Function; @@ -304,15 +322,17 @@ class USMAnalyzer { if (GS.TerminateOnError) std::terminate(); } - GS.ActivePointers.erase(Ptr); + GS.ActivePointers.erase(*Params->ppMem); } - static void handleMemBufferCreate(const pi_plugin &, std::optional, - pi_context, pi_mem_flags, size_t Size, - void *HostPtr, pi_mem *, - const pi_mem_properties *) { + static void + handleMemBufferCreate(const ur_mem_buffer_create_params_t *Params) { auto &GS = USMAnalyzer::getInstance(); auto &OutStream = GS.getOutStream(); + void *HostPtr = nullptr; + if (*Params->ppProperties) { + HostPtr = (*Params->ppProperties)->pHost; + } for (const auto &Alloc : GS.ActivePointers) { const void *Begin = Alloc.first; const void *End = @@ -326,7 +346,7 @@ class USMAnalyzer { NeedsTerminate = true; } - const void *HostEnd = static_cast(HostPtr) + Size; + const void *HostEnd = static_cast(HostPtr) + *Params->psize; if (HostEnd > End) { OutStream << PrintPrefix << "Buffer size exceeds allocated host memory size.\n"; @@ -350,78 +370,55 @@ class USMAnalyzer { } } - static void handleUSMEnqueueFill(const pi_plugin &, std::optional, - pi_queue, void *ptr, const void *, size_t, - size_t numBytes, pi_uint32, const pi_event *, - pi_event *) { - CheckPointerValidness("input parameter", ptr, numBytes, "fill"); + static void + handleUSMEnqueueMemcpy(const ur_enqueue_usm_memcpy_params_t *Params) { + CheckPointerValidness("source memory block", *Params->ppSrc, *Params->psize, + "memcpy"); + CheckPointerValidness("destination memory block", *Params->ppDst, + *Params->psize, "memcpy"); } - static void handleUSMEnqueueMemcpy(const pi_plugin &, - std::optional, pi_queue, - pi_bool, void *dst_ptr, - const void *src_ptr, size_t size, - pi_uint32, const pi_event *, pi_event *) { - CheckPointerValidness("source memory block", src_ptr, size, "memcpy"); - CheckPointerValidness("destination memory block", dst_ptr, size, "memcpy"); + static void + handleUSMEnqueueFill(const ur_enqueue_usm_memcpy_params_t *Params) { + CheckPointerValidness("input parameter", *Params->ppDst, *Params->psize, + "fill"); } - static void handleUSMEnqueuePrefetch(const pi_plugin &, - std::optional, pi_queue, - const void *ptr, size_t size, - pi_usm_migration_flags, pi_uint32, - const pi_event *, pi_event *) { - CheckPointerValidness("input parameter", ptr, size, "prefetch"); + static void + handleUSMEnqueuePrefetch(const ur_enqueue_usm_prefetch_params_t *Params) { + CheckPointerValidness("input parameter", *Params->ppMem, *Params->psize, + "prefetch"); } - static void handleUSMEnqueueMemAdvise(const pi_plugin &, - std::optional, pi_queue, - const void *ptr, size_t length, - pi_mem_advice, pi_event *) { - CheckPointerValidness("input parameter", ptr, length, "mem_advise"); + static void + handleUSMEnqueueMemAdvise(const ur_enqueue_usm_advise_params_t *Params) { + CheckPointerValidness("input parameter", *Params->ppMem, *Params->psize, + "mem_advise"); } - static void handleUSMEnqueueFill2D(const pi_plugin &, - std::optional, pi_queue, - void *ptr, size_t pitch, size_t, - const void *, size_t width, size_t height, - pi_uint32, const pi_event *, pi_event *) { + static void + handleUSMEnqueueFill2D(const ur_enqueue_usm_fill_2d_params_t *Params) { // TO DO: add checks for pattern validity - CheckPointerValidness("input parameter", ptr, pitch, width, height, + CheckPointerValidness("input parameter", *Params->ppMem, *Params->ppitch, + *Params->pwidth, *Params->pheight, "ext_oneapi_fill2d"); } - static void handleUSMEnqueueMemset2D(const pi_plugin &, - std::optional, pi_queue, - void *ptr, size_t pitch, int, - size_t width, size_t height, pi_uint32, - const pi_event *, pi_event *) { - CheckPointerValidness("input parameter", ptr, pitch, width, height, - "ext_oneapi_memset2d"); - } - - static void handleUSMEnqueueMemcpy2D(const pi_plugin &, - std::optional, pi_queue, - pi_bool, void *dst_ptr, size_t dst_pitch, - const void *src_ptr, size_t src_pitch, - size_t width, size_t height, pi_uint32, - const pi_event *, pi_event *) { - CheckPointerValidness("source parameter", src_ptr, src_pitch, width, height, + static void + handleUSMEnqueueMemcpy2D(const ur_enqueue_usm_memcpy_2d_params_t *Params) { + CheckPointerValidness("source parameter", *Params->ppSrc, + *Params->psrcPitch, *Params->pwidth, *Params->pheight, + "ext_oneapi_copy2d/ext_oneapi_memcpy2d"); + CheckPointerValidness("destination parameter", *Params->ppDst, + *Params->pdstPitch, *Params->pwidth, *Params->pheight, "ext_oneapi_copy2d/ext_oneapi_memcpy2d"); - CheckPointerValidness("destination parameter", dst_ptr, dst_pitch, width, - height, "ext_oneapi_copy2d/ext_oneapi_memcpy2d"); } - static void handleKernelSetArgPointer(const pi_plugin &, - std::optional, pi_kernel, - pi_uint32 arg_index, size_t arg_size, - const void *arg_value) { - // no clarity how to handle complex types so check only simple pointers here - if (arg_size == sizeof(arg_value)) { - void *Ptr = *(void **)(const_cast(arg_value)); - CheckPointerValidness( - "kernel parameter with index = " + std::to_string(arg_index), Ptr, - 0 /*no data how it will be used in kernel*/, "kernel"); - } + static void + handleKernelSetArgPointer(const ur_kernel_set_arg_pointer_params_t *Params) { + void *Ptr = (const_cast(*Params->ppArgValue)); + CheckPointerValidness( + "kernel parameter with index = " + std::to_string(*Params->pargIndex), + Ptr, 0 /*no data how it will be used in kernel*/, "kernel"); } }; diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt index 71d2413c29748..ec740f913ed4d 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -25,11 +25,11 @@ include(AddSYCLUnitTest) add_custom_target(check-sycl-unittests) -# TODO PI tests require real hardware and must be moved to sycl/test-e2e. -option(SYCL_PI_TESTS "Enable PI-specific unit tests" OFF) +# TODO UR tests require real hardware and must be moved to sycl/test-e2e. +option(SYCL_UR_TESTS "Enable UR-specific unit tests" OFF) -if (SYCL_PI_TESTS) - add_subdirectory(pi) +if (SYCL_UR_TESTS) + add_subdirectory(ur) endif() add_subdirectory(allowlist) @@ -55,5 +55,5 @@ add_subdirectory(builtins) add_subdirectory(buffer/l0_specific) # TODO Enable xpti tests for Windows if (NOT WIN32) - add_subdirectory(xpti_trace) + add_subdirectory(xpti_trace) endif() diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index b276d509fcc6a..7ea07400d745a 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -17,4 +17,6 @@ add_sycl_unittest(ExtensionsTests OBJECT ) add_subdirectory(CommandGraph) -add_subdirectory(VirtualFunctions) + +# Disabled pending UR fix adding setDataAs to mock dummy handle. +#add_subdirectory(VirtualFunctions) diff --git a/sycl/unittests/Extensions/CommandGraph/Common.hpp b/sycl/unittests/Extensions/CommandGraph/Common.hpp index a2e0965572cbf..f9c25088a6221 100644 --- a/sycl/unittests/Extensions/CommandGraph/Common.hpp +++ b/sycl/unittests/Extensions/CommandGraph/Common.hpp @@ -12,9 +12,9 @@ #include "detail/graph_impl.hpp" #include -#include #include #include +#include #include @@ -25,8 +25,7 @@ using namespace sycl::ext::oneapi; class CommandGraphTest : public ::testing::Test { public: CommandGraphTest() - : Mock{}, Plat{Mock.getPlatform()}, Dev{Plat.get_devices()[0]}, - Queue{Dev}, + : Mock{}, Plat{sycl::platform()}, Dev{Plat.get_devices()[0]}, Queue{Dev}, Graph{Queue.get_context(), Dev, {experimental::property::graph::assume_buffer_outlives_graph{}}} { @@ -36,7 +35,7 @@ class CommandGraphTest : public ::testing::Test { void SetUp() override {} protected: - unittest::PiMock Mock; + unittest::UrMock<> Mock; sycl::platform Plat; sycl::device Dev; sycl::queue Queue; diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 30b49fd2dabc8..054a663cebdd4 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -309,6 +309,10 @@ TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) { try { Graph.begin_recording(Q); } catch (exception &Exception) { + // Ensure fusion wrapper references are released now, otherwise we can end + // up trying to release backend objects after the mock backend has been + // unloaded. + fw.cancel_fusion(); ExceptionCode = Exception.code(); } ASSERT_EQ(ExceptionCode, sycl::errc::invalid); diff --git a/sycl/unittests/Extensions/CompositeDevice.cpp b/sycl/unittests/Extensions/CompositeDevice.cpp index 687e18df79597..8c875ebd9beda 100644 --- a/sycl/unittests/Extensions/CompositeDevice.cpp +++ b/sycl/unittests/Extensions/CompositeDevice.cpp @@ -1,179 +1,182 @@ +#include "sycl/platform.hpp" #include -#include +#include #include #include namespace { -const auto COMPOSITE_DEVICE_0 = reinterpret_cast(1u); -const auto COMPONENT_DEVICE_A = reinterpret_cast(2u); -const auto COMPONENT_DEVICE_B = reinterpret_cast(3u); +const auto COMPOSITE_DEVICE_0 = reinterpret_cast(1u); +const auto COMPONENT_DEVICE_A = reinterpret_cast(2u); +const auto COMPONENT_DEVICE_B = reinterpret_cast(3u); -// We do not report COMPONENT_DEVICE_D through mocked piDevicesGet to emulate +// We do not report COMPONENT_DEVICE_D through mocked urDeviceGet to emulate // that it is not available to ensure that COMPOSITE_DEVICE_1 is not returned // through platform::ext_oneapi_get_composite_devices and // sycl:ext::oneapi::experimental::get_composite_devices APIs -const auto COMPOSITE_DEVICE_1 = reinterpret_cast(4u); -const auto COMPONENT_DEVICE_C = reinterpret_cast(5u); -const auto COMPONENT_DEVICE_D = reinterpret_cast(6u); - -pi_result redefine_piDevicesGet(pi_platform platform, pi_device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - if (num_devices) - *num_devices = 3; - if (devices) { - if (num_entries > 0) - devices[0] = COMPONENT_DEVICE_A; - if (num_entries > 1) - devices[1] = COMPONENT_DEVICE_B; - if (num_entries > 2) - devices[2] = COMPONENT_DEVICE_C; +const auto COMPOSITE_DEVICE_1 = reinterpret_cast(4u); +const auto COMPONENT_DEVICE_C = reinterpret_cast(5u); +const auto COMPONENT_DEVICE_D = reinterpret_cast(6u); + +ur_result_t redefine_urDeviceGet(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevices) + **params.ppNumDevices = 3; + if (*params.pphDevices) { + if (*params.pNumEntries > 0) + (*params.pphDevices)[0] = COMPONENT_DEVICE_A; + if (*params.pNumEntries > 1) + (*params.pphDevices)[1] = COMPONENT_DEVICE_B; + if (*params.pNumEntries > 2) + (*params.pphDevices)[2] = COMPONENT_DEVICE_C; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result after_piDeviceGetInfo(pi_device device, pi_device_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_EXT_ONEAPI_DEVICE_INFO_COMPOSITE_DEVICE: - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_device); - if (param_value) { - if (device == COMPONENT_DEVICE_A || device == COMPONENT_DEVICE_B) { - *static_cast(param_value) = COMPOSITE_DEVICE_0; - } else if (device == COMPONENT_DEVICE_C || device == COMPONENT_DEVICE_D) { - *static_cast(param_value) = COMPOSITE_DEVICE_1; +ur_result_t after_urDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_DEVICE_INFO_COMPOSITE_DEVICE: + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_device_handle_t); + if (*params.ppPropValue) { + if (*params.phDevice == COMPONENT_DEVICE_A || + *params.phDevice == COMPONENT_DEVICE_B) { + *static_cast(*params.ppPropValue) = + COMPOSITE_DEVICE_0; + } else if (*params.phDevice == COMPONENT_DEVICE_C || + *params.phDevice == COMPONENT_DEVICE_D) { + *static_cast(*params.ppPropValue) = + COMPOSITE_DEVICE_1; } else - *static_cast(param_value) = nullptr; + *static_cast(*params.ppPropValue) = nullptr; } - return PI_SUCCESS; - - case PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES: - if (device == COMPOSITE_DEVICE_0) { - if (param_value_size_ret) - *param_value_size_ret = 2 * sizeof(pi_device); - if (param_value) { - if (param_value_size >= sizeof(pi_device)) - static_cast(param_value)[0] = COMPONENT_DEVICE_A; - if (param_value_size >= 2 * sizeof(pi_device)) - static_cast(param_value)[1] = COMPONENT_DEVICE_B; + return UR_RESULT_SUCCESS; + + case UR_DEVICE_INFO_COMPONENT_DEVICES: + if (*params.phDevice == COMPOSITE_DEVICE_0) { + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = 2 * sizeof(ur_device_handle_t); + if (*params.ppPropValue) { + if (*params.ppropSize >= sizeof(ur_device_handle_t)) + static_cast(*params.ppPropValue)[0] = + COMPONENT_DEVICE_A; + if (*params.ppropSize >= 2 * sizeof(ur_device_handle_t)) + static_cast(*params.ppPropValue)[1] = + COMPONENT_DEVICE_B; } - } else if (device == COMPOSITE_DEVICE_1) { - if (param_value_size_ret) - *param_value_size_ret = 2 * sizeof(pi_device); - if (param_value) { - if (param_value_size >= sizeof(pi_device)) - static_cast(param_value)[0] = COMPONENT_DEVICE_C; - if (param_value_size >= 2 * sizeof(pi_device)) - static_cast(param_value)[1] = COMPONENT_DEVICE_D; + } else if (*params.phDevice == COMPOSITE_DEVICE_1) { + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = 2 * sizeof(ur_device_handle_t); + if (*params.ppPropValue) { + if (*params.ppropSize >= sizeof(ur_device_handle_t)) + static_cast(*params.ppPropValue)[0] = + COMPONENT_DEVICE_C; + if (*params.ppropSize >= 2 * sizeof(ur_device_handle_t)) + static_cast(*params.ppPropValue)[1] = + COMPONENT_DEVICE_D; } } else { - if (param_value_size_ret) - *param_value_size_ret = 0; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = 0; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; default: - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } } -pi_result after_piDeviceGetInfo_unsupported(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_EXT_ONEAPI_DEVICE_INFO_COMPOSITE_DEVICE: - case PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES: - return PI_ERROR_INVALID_VALUE; +ur_result_t after_urDeviceGetInfo_unsupported(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_DEVICE_INFO_COMPOSITE_DEVICE: + case UR_DEVICE_INFO_COMPONENT_DEVICES: + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; default: - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } } -pi_result after_piDeviceGetInfo_no_component_devices( - pi_device device, pi_device_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) { - switch (param_name) { - case PI_EXT_ONEAPI_DEVICE_INFO_COMPOSITE_DEVICE: - return PI_ERROR_INVALID_VALUE; - case PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES: - if (param_value_size_ret) - *param_value_size_ret = 0; - return PI_SUCCESS; +ur_result_t after_urDeviceGetInfo_no_component_devices(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_DEVICE_INFO_COMPOSITE_DEVICE: + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMPONENT_DEVICES: + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = 0; + return UR_RESULT_SUCCESS; default: - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } } -thread_local std::vector DevicesUsedInContextCreation; +thread_local std::vector DevicesUsedInContextCreation; -pi_result after_piContextCreate(const pi_context_properties *, - pi_uint32 num_devices, const pi_device *devices, - void (*)(const char *, const void *, size_t, - void *), - void *, pi_context *ret_context) { +ur_result_t after_urContextCreate(void *pParams) { + auto params = *static_cast(pParams); + DevicesUsedInContextCreation.assign( + *params.pphDevices, *params.pphDevices + *params.pDeviceCount); - DevicesUsedInContextCreation.assign(devices, devices + num_devices); - - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } } // namespace TEST(CompositeDeviceTest, PlatformExtOneAPIGetCompositeDevices) { - sycl::unittest::PiMock Mock; - Mock.redefine(redefine_piDevicesGet); - Mock.redefineAfter( - after_piDeviceGetInfo); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefine_urDeviceGet); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); std::vector Composites = Plt.ext_oneapi_get_composite_devices(); // We don't expect to see COMPOSITE_DEVICE_1 here, because one of its // components (COMPONENT_DEVICE_D) is not available. ASSERT_EQ(Composites.size(), 1u); - ASSERT_EQ(sycl::bit_cast( + ASSERT_EQ(sycl::bit_cast( sycl::get_native(Composites.front())), COMPOSITE_DEVICE_0); } TEST(CompositeDeviceTest, SYCLExtOneAPIExperimentalGetCompositeDevices) { - sycl::unittest::PiMock Mock; - Mock.redefine(redefine_piDevicesGet); - Mock.redefineAfter( - after_piDeviceGetInfo); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefine_urDeviceGet); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); std::vector Composites = sycl::ext::oneapi::experimental::get_composite_devices(); // We don't expect to see COMPOSITE_DEVICE_1 here, because one of its // components (COMPONENT_DEVICE_D) is not available. ASSERT_EQ(Composites.size(), 1u); - ASSERT_EQ(sycl::bit_cast( + ASSERT_EQ(sycl::bit_cast( sycl::get_native(Composites.front())), COMPOSITE_DEVICE_0); } TEST(CompositeDeviceTest, DescendentDeviceSupportInContext) { - sycl::unittest::PiMock Mock; - Mock.redefine(redefine_piDevicesGet); - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefineAfter( - after_piContextCreate); - - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefine_urDeviceGet); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback("urContextCreate", + &after_urContextCreate); + + sycl::platform Plt = sycl::platform(); sycl::device RootDevice = Plt.get_devices()[0]; ASSERT_TRUE(RootDevice.has(sycl::aspect::ext_oneapi_is_component)); sycl::context Ctx(RootDevice); @@ -190,13 +193,13 @@ TEST(CompositeDeviceTest, DescendentDeviceSupportInContext) { ASSERT_EQ(DevicesUsedInContextCreation.size(), 3u); ASSERT_TRUE(std::any_of( DevicesUsedInContextCreation.begin(), DevicesUsedInContextCreation.end(), - [=](pi_device D) { return D == COMPOSITE_DEVICE_0; })); + [=](ur_device_handle_t D) { return D == COMPOSITE_DEVICE_0; })); ASSERT_TRUE(std::any_of( DevicesUsedInContextCreation.begin(), DevicesUsedInContextCreation.end(), - [=](pi_device D) { return D == COMPONENT_DEVICE_A; })); + [=](ur_device_handle_t D) { return D == COMPONENT_DEVICE_A; })); ASSERT_TRUE(std::any_of( DevicesUsedInContextCreation.begin(), DevicesUsedInContextCreation.end(), - [=](pi_device D) { return D == COMPONENT_DEVICE_B; })); + [=](ur_device_handle_t D) { return D == COMPONENT_DEVICE_B; })); // Even though under the hood we have created context for 3 devices, // user-visible interface should only report the exact list of devices passed // by user to the context constructor. @@ -205,14 +208,15 @@ TEST(CompositeDeviceTest, DescendentDeviceSupportInContext) { } TEST(CompositeDeviceTest, DescendentDeviceSupportInQueue) { - sycl::unittest::PiMock Mock; - Mock.redefine(redefine_piDevicesGet); - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefineAfter( - after_piContextCreate); - - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefine_urDeviceGet); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback("urContextCreate", + &after_urContextCreate); + + sycl::platform Plt = sycl::platform(); sycl::device ComponentDevice = Plt.get_devices()[0]; ASSERT_TRUE(ComponentDevice.has(sycl::aspect::ext_oneapi_is_component)); @@ -226,12 +230,13 @@ TEST(CompositeDeviceTest, DescendentDeviceSupportInQueue) { TEST(CompositeDeviceTest, UnsupportedNegative) { // For the unsupported case, the backend does not need to be L0. - sycl::unittest::PiMock Mock; - Mock.redefine(redefine_piDevicesGet); - Mock.redefineAfter( - after_piDeviceGetInfo_unsupported); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefine_urDeviceGet); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo_unsupported); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::device ComponentDevice = Plt.get_devices()[0]; ASSERT_FALSE(ComponentDevice.has(sycl::aspect::ext_oneapi_is_component)); @@ -245,12 +250,13 @@ TEST(CompositeDeviceTest, UnsupportedNegative) { } TEST(CompositeDeviceTest, NoComponentDevices) { - sycl::unittest::PiMock Mock; - Mock.redefine(redefine_piDevicesGet); - Mock.redefineAfter( - after_piDeviceGetInfo_no_component_devices); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefine_urDeviceGet); + mock::getCallbacks().set_after_callback( + "urDeviceGetInfo", &after_urDeviceGetInfo_no_component_devices); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::device ComponentDevice = Plt.get_devices()[0]; ASSERT_FALSE(ComponentDevice.has(sycl::aspect::ext_oneapi_is_composite)); diff --git a/sycl/unittests/Extensions/DefaultContext.cpp b/sycl/unittests/Extensions/DefaultContext.cpp index 945cd627e711b..e0b85ce1a4e85 100644 --- a/sycl/unittests/Extensions/DefaultContext.cpp +++ b/sycl/unittests/Extensions/DefaultContext.cpp @@ -6,11 +6,12 @@ // //===----------------------------------------------------------------------===// +#include "sycl/platform.hpp" #include #include -#include #include +#include #include @@ -19,11 +20,11 @@ inline constexpr auto EnableDefaultContextsName = "SYCL_ENABLE_DEFAULT_CONTEXTS"; void test_default_context_enabled() { - sycl::unittest::PiMock Mock1; - sycl::platform Plt1 = Mock1.getPlatform(); + sycl::unittest::UrMock<> Mock1; + sycl::platform Plt1 = sycl::platform(); - sycl::unittest::PiMock Mock2; - sycl::platform Plt2 = Mock2.getPlatform(); + sycl::unittest::UrMock<> Mock2; + sycl::platform Plt2 = sycl::platform(); const sycl::device Dev1 = Plt1.get_devices()[0]; const sycl::device Dev2 = Plt2.get_devices()[0]; @@ -38,8 +39,8 @@ void test_default_context_enabled() { } void test_default_context_disabled() { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); bool catchException = false; try { @@ -81,8 +82,8 @@ TEST(DefaultContextTest, DefaultContextCanBeDisabledEnabled) { TEST(DefaultContextTest, DefaultContextValueChangedAfterQueueCreated) { sycl::detail::enable_ext_oneapi_default_context(false); - sycl::unittest::PiMock Mock1; - sycl::platform Plt = Mock1.getPlatform(); + sycl::unittest::UrMock<> Mock1; + sycl::platform Plt = sycl::platform(); const sycl::device Dev1 = Plt.get_devices()[0]; const sycl::device Dev2 = Plt.get_devices()[0]; diff --git a/sycl/unittests/Extensions/DeviceGlobal.cpp b/sycl/unittests/Extensions/DeviceGlobal.cpp index 0af8ebe4924e4..ee59cb4dc2d84 100644 --- a/sycl/unittests/Extensions/DeviceGlobal.cpp +++ b/sycl/unittests/Extensions/DeviceGlobal.cpp @@ -6,23 +6,17 @@ // //===----------------------------------------------------------------------===// -#include #include #include "detail/context_impl.hpp" #include "detail/kernel_program_cache.hpp" #include -#include -#include +#include +#include #include -#include -#include - -using sycl::detail::PiApiKind; - class DeviceGlobalTestKernel; constexpr const char *DeviceGlobalTestKernelName = "DeviceGlobalTestKernel"; constexpr const char *DeviceGlobalName = "DeviceGlobalName"; @@ -59,7 +53,7 @@ struct KernelInfo } // namespace _V1 } // namespace sycl -static sycl::unittest::PiImage generateDeviceGlobalImage() { +static sycl::unittest::UrImage generateDeviceGlobalImage() { using namespace sycl::unittest; // Call device global map initializer explicitly to mimic the integration @@ -67,18 +61,18 @@ static sycl::unittest::PiImage generateDeviceGlobalImage() { sycl::detail::device_global_map::add(&DeviceGlobal, DeviceGlobalName); // Insert remaining device global info into the binary. - PiPropertySet PropSet; - PiProperty DevGlobInfo = + UrPropertySet PropSet; + UrProperty DevGlobInfo = makeDeviceGlobalInfo(DeviceGlobalName, sizeof(int) * 2, 0); PropSet.insert(__SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS, - PiArray{std::move(DevGlobInfo)}); + UrArray{std::move(DevGlobInfo)}); std::vector Bin{10, 11, 12, 13, 14, 15}; // Random data - PiArray Entries = + UrArray Entries = makeEmptyKernels({DeviceGlobalTestKernelName}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -89,7 +83,7 @@ static sycl::unittest::PiImage generateDeviceGlobalImage() { return Img; } -static sycl::unittest::PiImage generateDeviceGlobalImgScopeImage() { +static sycl::unittest::UrImage generateDeviceGlobalImgScopeImage() { using namespace sycl::unittest; // Call device global map initializer explicitly to mimic the integration @@ -98,18 +92,18 @@ static sycl::unittest::PiImage generateDeviceGlobalImgScopeImage() { DeviceGlobalImgScopeName); // Insert remaining device global info into the binary. - PiPropertySet PropSet; - PiProperty DevGlobInfo = + UrPropertySet PropSet; + UrProperty DevGlobInfo = makeDeviceGlobalInfo(DeviceGlobalImgScopeName, sizeof(int) * 2, 1); PropSet.insert(__SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS, - PiArray{std::move(DevGlobInfo)}); + UrArray{std::move(DevGlobInfo)}); std::vector Bin{10, 11, 12, 13, 14, 15}; // Random data - PiArray Entries = + UrArray Entries = makeEmptyKernels({DeviceGlobalImgScopeTestKernelName}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -121,15 +115,17 @@ static sycl::unittest::PiImage generateDeviceGlobalImgScopeImage() { } namespace { -sycl::unittest::PiImage Imgs[] = {generateDeviceGlobalImage(), +sycl::unittest::UrImage Imgs[] = {generateDeviceGlobalImage(), generateDeviceGlobalImgScopeImage()}; -sycl::unittest::PiImageArray<2> ImgArray{Imgs}; +sycl::unittest::UrImageArray<2> ImgArray{Imgs}; // Trackers. thread_local DeviceGlobalElemType MockDeviceGlobalMem; thread_local DeviceGlobalElemType MockDeviceGlobalImgScopeMem; -thread_local std::optional DeviceGlobalInitEvent = std::nullopt; -thread_local std::optional DeviceGlobalWriteEvent = std::nullopt; +thread_local std::optional DeviceGlobalInitEvent = + std::nullopt; +thread_local std::optional DeviceGlobalWriteEvent = + std::nullopt; thread_local unsigned KernelCallCounter = 0; thread_local unsigned DeviceGlobalWriteCounter = 0; thread_local unsigned DeviceGlobalReadCounter = 0; @@ -137,87 +133,82 @@ thread_local unsigned DeviceGlobalReadCounter = 0; // Markers. thread_local bool TreatDeviceGlobalInitEventAsCompleted = false; thread_local bool TreatDeviceGlobalWriteEventAsCompleted = false; -thread_local std::optional ExpectedReadWritePIProgram = +thread_local std::optional ExpectedReadWriteURProgram = std::nullopt; -static pi_result after_piextUSMDeviceAlloc(void **result_ptr, pi_context, - pi_device, pi_usm_mem_properties *, - size_t, pi_uint32) { +static ur_result_t after_urUSMDeviceAlloc(void *pParams) { + auto params = *static_cast(pParams); // Use the mock memory. - *result_ptr = MockDeviceGlobalMem; - return PI_SUCCESS; + **params.pppMem = MockDeviceGlobalMem; + return UR_RESULT_SUCCESS; } -static pi_result after_piextUSMEnqueueMemcpy(pi_queue, pi_bool, void *dst_ptr, - const void *src_ptr, size_t size, - pi_uint32, const pi_event *, - pi_event *event) { +static ur_result_t after_urEnqueueUSMMemcpy(void *pParams) { + auto params = *static_cast(pParams); // If DeviceGlobalInitEvent.has_value() is true then this means that this is // the second call to MemCopy and we don't want to initialize anything. If // it's the first call then we want to set the DeviceGlobalInitEvent if (!DeviceGlobalInitEvent.has_value()) - DeviceGlobalInitEvent = *event; - std::memcpy(dst_ptr, src_ptr, size); - return PI_SUCCESS; + DeviceGlobalInitEvent = **params.pphEvent; + std::memcpy(*params.ppDst, *params.ppSrc, *params.psize); + return UR_RESULT_SUCCESS; } template -pi_result after_piextEnqueueDeviceGlobalVariableWrite( - pi_queue, pi_program program, const char *, pi_bool, size_t count, - size_t offset, const void *src_ptr, pi_uint32, const pi_event *, - pi_event *event) { +ur_result_t after_urEnqueueDeviceGlobalVariableWrite(void *pParams) { + auto params = + *static_cast(pParams); if constexpr (Exclusive) { EXPECT_FALSE(DeviceGlobalWriteEvent.has_value()) - << "piextEnqueueDeviceGlobalVariableWrite is called multiple times!"; + << "urEnqueueDeviceGlobalVariableWrite is called multiple times!"; } - if (ExpectedReadWritePIProgram.has_value()) { - EXPECT_EQ(*ExpectedReadWritePIProgram, program) - << "piextEnqueueDeviceGlobalVariableWrite did not receive the expected " + if (ExpectedReadWriteURProgram.has_value()) { + EXPECT_EQ(*ExpectedReadWriteURProgram, *params.phProgram) + << "urEnqueueDeviceGlobalVariableWrite did not receive the expected " "program!"; } - std::memcpy(MockDeviceGlobalImgScopeMem + offset, src_ptr, count); - DeviceGlobalWriteEvent = *event; + std::memcpy(MockDeviceGlobalImgScopeMem + *params.poffset, *params.ppSrc, + *params.pcount); + DeviceGlobalWriteEvent = **params.pphEvent; ++DeviceGlobalWriteCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result after_piextEnqueueDeviceGlobalVariableRead( - pi_queue, pi_program program, const char *, pi_bool, size_t count, - size_t offset, void *dst_ptr, pi_uint32, const pi_event *, - pi_event *event) { - if (ExpectedReadWritePIProgram.has_value()) { - EXPECT_EQ(*ExpectedReadWritePIProgram, program) - << "piextEnqueueDeviceGlobalVariableRead did not receive the expected " +ur_result_t after_urEnqueueDeviceGlobalVariableRead(void *pParams) { + auto params = + *static_cast(pParams); + if (ExpectedReadWriteURProgram.has_value()) { + EXPECT_EQ(*ExpectedReadWriteURProgram, *params.phProgram) + << "urEnqueueDeviceGlobalVariableRead did not receive the expected " "program!"; } - std::memcpy(dst_ptr, MockDeviceGlobalImgScopeMem + offset, count); + std::memcpy(*params.ppDst, MockDeviceGlobalImgScopeMem + *params.poffset, + *params.pcount); ++DeviceGlobalReadCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result after_piEventGetInfo(pi_event event, pi_event_info param_name, size_t, - void *param_value, size_t *) { - if (param_name == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS && - param_value != nullptr) { +ur_result_t after_urEventGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS && + *params.ppPropValue != nullptr) { if ((TreatDeviceGlobalInitEventAsCompleted && DeviceGlobalInitEvent.has_value() && - event == *DeviceGlobalInitEvent) || + *params.phEvent == *DeviceGlobalInitEvent) || (TreatDeviceGlobalWriteEventAsCompleted && DeviceGlobalWriteEvent.has_value() && - event == *DeviceGlobalWriteEvent)) - *static_cast(param_value) = PI_EVENT_COMPLETE; + *params.phEvent == *DeviceGlobalWriteEvent)) + *static_cast(*params.ppPropValue) = + UR_EVENT_STATUS_COMPLETE; else - *static_cast(param_value) = PI_EVENT_SUBMITTED; + *static_cast(*params.ppPropValue) = + UR_EVENT_STATUS_SUBMITTED; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result after_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *) { +ur_result_t after_urEnqueueKernelLaunch(void *pParams) { + auto params = *static_cast(pParams); ++KernelCallCounter; EXPECT_TRUE(DeviceGlobalInitEvent.has_value()) << "DeviceGlobalInitEvent has not been set. Kernel call " @@ -226,11 +217,12 @@ pi_result after_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, << "DeviceGlobalWriteEvent has not been set. Kernel call " << KernelCallCounter; - const pi_event *EventListEnd = event_wait_list + num_events_in_wait_list; + const ur_event_handle_t *EventListEnd = + *params.pphEventWaitList + *params.pnumEventsInWaitList; bool DeviceGlobalInitEventFound = - std::find(event_wait_list, EventListEnd, *DeviceGlobalInitEvent) != - EventListEnd; + std::find(*params.pphEventWaitList, EventListEnd, + *DeviceGlobalInitEvent) != EventListEnd; if (TreatDeviceGlobalInitEventAsCompleted) { EXPECT_FALSE(DeviceGlobalInitEventFound) << "DeviceGlobalInitEvent was in event wait list but was not expected. " @@ -244,8 +236,8 @@ pi_result after_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, } bool DeviceGlobalWriteEventFound = - std::find(event_wait_list, EventListEnd, *DeviceGlobalWriteEvent) != - EventListEnd; + std::find(*params.pphEventWaitList, EventListEnd, + *DeviceGlobalWriteEvent) != EventListEnd; if (TreatDeviceGlobalWriteEventAsCompleted) { EXPECT_FALSE(DeviceGlobalWriteEventFound) << "DeviceGlobalWriteEvent was in event wait list but was not " @@ -257,55 +249,49 @@ pi_result after_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, "missing. Kernel call " << KernelCallCounter; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -void ResetTrackersAndMarkers() { - std::memset(MockDeviceGlobalMem, 1, sizeof(DeviceGlobalElemType)); - std::memset(MockDeviceGlobalImgScopeMem, 0, sizeof(DeviceGlobalElemType)); - DeviceGlobalWriteEvent = std::nullopt; - DeviceGlobalInitEvent = std::nullopt; - KernelCallCounter = 0; - DeviceGlobalWriteCounter = 0; - DeviceGlobalReadCounter = 0; - TreatDeviceGlobalInitEventAsCompleted = false; - TreatDeviceGlobalWriteEventAsCompleted = false; - ExpectedReadWritePIProgram = std::nullopt; -} - -std::pair -CommonSetup(std::function RedefinitionFunc) { - ResetTrackersAndMarkers(); - - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - - RedefinitionFunc(Mock); +} // namespace - // Create new context to isolate device_global initialization. - sycl::context C{Plt.get_devices()[0]}; - sycl::queue Q{C, Plt.get_devices()[0]}; +class DeviceGlobalTest : public ::testing::Test { + void SetUp() { + ResetTrackersAndMarkers(); + sycl::platform Plt = sycl::platform(); + sycl::context C{Plt.get_devices()[0]}; + Q = sycl::queue(C, Plt.get_devices()[0]); + } - return std::make_pair(std::move(Mock), std::move(Q)); -} + void ResetTrackersAndMarkers() { + std::memset(MockDeviceGlobalMem, 1, sizeof(DeviceGlobalElemType)); + std::memset(MockDeviceGlobalImgScopeMem, 0, sizeof(DeviceGlobalElemType)); + DeviceGlobalWriteEvent = std::nullopt; + DeviceGlobalInitEvent = std::nullopt; + KernelCallCounter = 0; + DeviceGlobalWriteCounter = 0; + DeviceGlobalReadCounter = 0; + TreatDeviceGlobalInitEventAsCompleted = false; + TreatDeviceGlobalWriteEventAsCompleted = false; + ExpectedReadWriteURProgram = std::nullopt; + } -} // namespace +public: + sycl::unittest::UrMock<> Mock; + sycl::queue Q; +}; // Macros for common redefinition calls. -#define REDEFINE_AFTER(API) redefineAfter(after_##API) +#define REDEFINE_AFTER(API) \ + mock::getCallbacks().set_after_callback(#API, &after_##API) #define REDEFINE_AFTER_TEMPLATED(API, ...) \ - redefineAfter(after_##API<__VA_ARGS__>) - -TEST(DeviceGlobalTest, DeviceGlobalInitBeforeUse) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - MockRef.REDEFINE_AFTER(piEnqueueKernelLaunch); - }); - std::ignore = Mock; + mock::getCallbacks().set_after_callback(#API, &after_##API<__VA_ARGS__>) + +TEST_F(DeviceGlobalTest, DeviceGlobalInitBeforeUse) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); + REDEFINE_AFTER(urEnqueueKernelLaunch); // Kernel call 1. // First launch should create both init events. @@ -327,13 +313,10 @@ TEST(DeviceGlobalTest, DeviceGlobalInitBeforeUse) { Q.single_task([]() {}); } -TEST(DeviceGlobalTest, DeviceGlobalInitialMemContents) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER(piextEnqueueDeviceGlobalVariableRead); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalInitialMemContents) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER(urEnqueueDeviceGlobalVariableRead); int Results[2] = {3, 4}; // This should replace the contents of Results with {0, 0} @@ -352,15 +335,11 @@ TEST(DeviceGlobalTest, DeviceGlobalInitialMemContents) { EXPECT_EQ(MockDeviceGlobalMem[1], Results[1]); } -TEST(DeviceGlobalTest, DeviceGlobalCopyToBeforeUseFull) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalCopyToBeforeUseFull) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); int Vals[2] = {42, 1234}; Q.copy(Vals, DeviceGlobal).wait(); @@ -381,15 +360,11 @@ TEST(DeviceGlobalTest, DeviceGlobalCopyToBeforeUseFull) { EXPECT_TRUE(DeviceGlobalWriteEvent.has_value()); } -TEST(DeviceGlobalTest, DeviceGlobalMemcpyToBeforeUseFull) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalMemcpyToBeforeUseFull) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); int Vals[2] = {42, 1234}; Q.memcpy(DeviceGlobal, Vals).wait(); @@ -410,15 +385,11 @@ TEST(DeviceGlobalTest, DeviceGlobalMemcpyToBeforeUseFull) { EXPECT_TRUE(DeviceGlobalWriteEvent.has_value()); } -TEST(DeviceGlobalTest, DeviceGlobalCopyToBeforeUsePartialNoOffset) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalCopyToBeforeUsePartialNoOffset) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); int Val = 42; Q.copy(&Val, DeviceGlobal, 1).wait(); @@ -438,15 +409,11 @@ TEST(DeviceGlobalTest, DeviceGlobalCopyToBeforeUsePartialNoOffset) { EXPECT_TRUE(DeviceGlobalWriteEvent.has_value()); } -TEST(DeviceGlobalTest, DeviceGlobalMemcpyToBeforeUsePartialNoOffset) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalMemcpyToBeforeUsePartialNoOffset) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); int Val = 42; Q.memcpy(DeviceGlobal, &Val, sizeof(int)).wait(); @@ -466,15 +433,11 @@ TEST(DeviceGlobalTest, DeviceGlobalMemcpyToBeforeUsePartialNoOffset) { EXPECT_TRUE(DeviceGlobalWriteEvent.has_value()); } -TEST(DeviceGlobalTest, DeviceGlobalCopyToBeforeUsePartialWithOffset) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalCopyToBeforeUsePartialWithOffset) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); int Val = 42; Q.copy(&Val, DeviceGlobal, 1, 1).wait(); @@ -494,15 +457,11 @@ TEST(DeviceGlobalTest, DeviceGlobalCopyToBeforeUsePartialWithOffset) { EXPECT_TRUE(DeviceGlobalWriteEvent.has_value()); } -TEST(DeviceGlobalTest, DeviceGlobalInitBeforeMemcpyToPartialWithOffset) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalInitBeforeMemcpyToPartialWithOffset) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); int Val = 42; Q.memcpy(DeviceGlobal, &Val, sizeof(int), sizeof(int)).wait(); @@ -522,15 +481,11 @@ TEST(DeviceGlobalTest, DeviceGlobalInitBeforeMemcpyToPartialWithOffset) { EXPECT_TRUE(DeviceGlobalWriteEvent.has_value()); } -TEST(DeviceGlobalTest, DeviceGlobalCopyFromBeforeUse) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalCopyFromBeforeUse) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); int Vals[2] = {42, 1234}; Q.copy(DeviceGlobal, Vals).wait(); @@ -543,15 +498,11 @@ TEST(DeviceGlobalTest, DeviceGlobalCopyFromBeforeUse) { EXPECT_EQ(MockDeviceGlobalMem[1], Vals[1]); } -TEST(DeviceGlobalTest, DeviceGlobalMemcpyFromBeforeUse) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalMemcpyFromBeforeUse) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); int Vals[2] = {42, 1234}; Q.memcpy(Vals, DeviceGlobal).wait(); @@ -564,15 +515,11 @@ TEST(DeviceGlobalTest, DeviceGlobalMemcpyFromBeforeUse) { EXPECT_EQ(MockDeviceGlobalMem[1], Vals[1]); } -TEST(DeviceGlobalTest, DeviceGlobalUseBeforeCopyTo) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalUseBeforeCopyTo) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); Q.single_task([]() {}).wait(); @@ -590,15 +537,11 @@ TEST(DeviceGlobalTest, DeviceGlobalUseBeforeCopyTo) { Q.single_task([]() {}).wait(); } -TEST(DeviceGlobalTest, DeviceGlobalUseBeforeMemcpyTo) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER(piextUSMDeviceAlloc); - MockRef.REDEFINE_AFTER(piextUSMEnqueueMemcpy); - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - true); - MockRef.REDEFINE_AFTER(piEventGetInfo); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalUseBeforeMemcpyTo) { + REDEFINE_AFTER(urUSMDeviceAlloc); + REDEFINE_AFTER(urEnqueueUSMMemcpy); + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); + REDEFINE_AFTER(urEventGetInfo); Q.single_task([]() {}).wait(); @@ -618,13 +561,9 @@ TEST(DeviceGlobalTest, DeviceGlobalUseBeforeMemcpyTo) { Q.single_task([]() {}).wait(); } -TEST(DeviceGlobalTest, DeviceGlobalImgScopeCopyToBeforeUse) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - false); - MockRef.REDEFINE_AFTER(piextEnqueueDeviceGlobalVariableRead); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalImgScopeCopyToBeforeUse) { + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, false); + REDEFINE_AFTER(urEnqueueDeviceGlobalVariableRead); int Vals[2] = {42, 1234}; Q.copy(Vals, DeviceGlobalImgScope).wait(); @@ -639,13 +578,9 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeCopyToBeforeUse) { EXPECT_EQ(MockDeviceGlobalImgScopeMem[1], Vals[1]); } -TEST(DeviceGlobalTest, DeviceGlobalImgScopeMemcpyToBeforeUse) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - false); - MockRef.REDEFINE_AFTER(piextEnqueueDeviceGlobalVariableRead); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalImgScopeMemcpyToBeforeUse) { + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, false); + REDEFINE_AFTER(urEnqueueDeviceGlobalVariableRead); int Vals[2] = {42, 1234}; Q.memcpy(DeviceGlobalImgScope, Vals).wait(); @@ -660,13 +595,9 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeMemcpyToBeforeUse) { EXPECT_EQ(MockDeviceGlobalImgScopeMem[1], Vals[1]); } -TEST(DeviceGlobalTest, DeviceGlobalImgScopeCopyFromBeforeUse) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - false); - MockRef.REDEFINE_AFTER(piextEnqueueDeviceGlobalVariableRead); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalImgScopeCopyFromBeforeUse) { + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, false); + REDEFINE_AFTER(urEnqueueDeviceGlobalVariableRead); int Vals[2] = {42, 1234}; Q.copy(DeviceGlobalImgScope, Vals).wait(); @@ -681,13 +612,9 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeCopyFromBeforeUse) { EXPECT_EQ(MockDeviceGlobalImgScopeMem[1], Vals[1]); } -TEST(DeviceGlobalTest, DeviceGlobalImgScopeMemcpyFromBeforeUse) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - false); - MockRef.REDEFINE_AFTER(piextEnqueueDeviceGlobalVariableRead); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalImgScopeMemcpyFromBeforeUse) { + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, false); + REDEFINE_AFTER(urEnqueueDeviceGlobalVariableRead); int Vals[2] = {42, 1234}; Q.memcpy(Vals, DeviceGlobalImgScope).wait(); @@ -702,13 +629,9 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeMemcpyFromBeforeUse) { EXPECT_EQ(MockDeviceGlobalImgScopeMem[1], Vals[1]); } -TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeCopyTo) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - false); - MockRef.REDEFINE_AFTER(piextEnqueueDeviceGlobalVariableRead); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeCopyTo) { + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, false); + REDEFINE_AFTER(urEnqueueDeviceGlobalVariableRead); Q.single_task([]() {}).wait(); @@ -718,7 +641,7 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeCopyTo) { CtxImpl->getKernelProgramCache().acquireKernelsPerProgramCache().get(); ASSERT_EQ(KernelCache.size(), (size_t)1) << "Expect 1 program in kernel cache"; - ExpectedReadWritePIProgram = KernelCache.begin()->first; + ExpectedReadWriteURProgram = KernelCache.begin()->first; // Expect no write or read yet. EXPECT_EQ(DeviceGlobalWriteCounter, 0u); @@ -735,13 +658,9 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeCopyTo) { EXPECT_EQ(MockDeviceGlobalImgScopeMem[1], Vals[1]); } -TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeMemcpyTo) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - false); - MockRef.REDEFINE_AFTER(piextEnqueueDeviceGlobalVariableRead); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeMemcpyTo) { + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, false); + REDEFINE_AFTER(urEnqueueDeviceGlobalVariableRead); Q.single_task([]() {}).wait(); @@ -751,7 +670,7 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeMemcpyTo) { CtxImpl->getKernelProgramCache().acquireKernelsPerProgramCache().get(); ASSERT_EQ(KernelCache.size(), (size_t)1) << "Expect 1 program in kernel cache"; - ExpectedReadWritePIProgram = KernelCache.begin()->first; + ExpectedReadWriteURProgram = KernelCache.begin()->first; // Expect no write or read yet. EXPECT_EQ(DeviceGlobalWriteCounter, 0u); @@ -768,13 +687,9 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeMemcpyTo) { EXPECT_EQ(MockDeviceGlobalImgScopeMem[1], Vals[1]); } -TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeCopyFrom) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - false); - MockRef.REDEFINE_AFTER(piextEnqueueDeviceGlobalVariableRead); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeCopyFrom) { + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, false); + REDEFINE_AFTER(urEnqueueDeviceGlobalVariableRead); Q.single_task([]() {}).wait(); @@ -784,7 +699,7 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeCopyFrom) { CtxImpl->getKernelProgramCache().acquireKernelsPerProgramCache().get(); ASSERT_EQ(KernelCache.size(), (size_t)1) << "Expect 1 program in kernel cache"; - ExpectedReadWritePIProgram = KernelCache.begin()->first; + ExpectedReadWriteURProgram = KernelCache.begin()->first; // Expect no write or read yet. EXPECT_EQ(DeviceGlobalWriteCounter, 0u); @@ -801,13 +716,9 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeCopyFrom) { EXPECT_EQ(MockDeviceGlobalImgScopeMem[1], Vals[1]); } -TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeMemcpyFrom) { - auto [Mock, Q] = CommonSetup([](sycl::unittest::PiMock &MockRef) { - MockRef.REDEFINE_AFTER_TEMPLATED(piextEnqueueDeviceGlobalVariableWrite, - false); - MockRef.REDEFINE_AFTER(piextEnqueueDeviceGlobalVariableRead); - }); - std::ignore = Mock; +TEST_F(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeMemcpyFrom) { + REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, false); + REDEFINE_AFTER(urEnqueueDeviceGlobalVariableRead); Q.single_task([]() {}).wait(); @@ -817,7 +728,7 @@ TEST(DeviceGlobalTest, DeviceGlobalImgScopeUseBeforeMemcpyFrom) { CtxImpl->getKernelProgramCache().acquireKernelsPerProgramCache().get(); ASSERT_EQ(KernelCache.size(), (size_t)1) << "Expect 1 program in kernel cache"; - ExpectedReadWritePIProgram = KernelCache.begin()->first; + ExpectedReadWriteURProgram = KernelCache.begin()->first; // Expect no write or read yet. EXPECT_EQ(DeviceGlobalWriteCounter, 0u); diff --git a/sycl/unittests/Extensions/DiscardEvent.cpp b/sycl/unittests/Extensions/DiscardEvent.cpp index dc729c74084e0..73a2436cc0d49 100644 --- a/sycl/unittests/Extensions/DiscardEvent.cpp +++ b/sycl/unittests/Extensions/DiscardEvent.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include +#include "sycl/platform.hpp" #include +#include #include @@ -21,51 +22,46 @@ namespace oneapiext = ext::oneapi::experimental; namespace { -thread_local size_t counter_piEnqueueKernelLaunch = 0; -inline pi_result redefined_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *, pi_uint32, - const pi_event *, - pi_event *event) { - ++counter_piEnqueueKernelLaunch; - EXPECT_EQ(event, nullptr); - return PI_SUCCESS; +thread_local size_t counter_urEnqueueKernelLaunch = 0; +inline ur_result_t redefined_urEnqueueKernelLaunch(void *pParams) { + ++counter_urEnqueueKernelLaunch; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pphEvent, nullptr); + return UR_RESULT_SUCCESS; } -thread_local size_t counter_piEnqueueEventsWaitWithBarrier = 0; +thread_local size_t counter_urEnqueueEventsWaitWithBarrier = 0; thread_local std::chrono::time_point - timestamp_piEnqueueEventsWaitWithBarrier; -inline pi_result after_piEnqueueEventsWaitWithBarrier(pi_queue, pi_uint32, - const pi_event *, - pi_event *) { - ++counter_piEnqueueEventsWaitWithBarrier; - timestamp_piEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); - return PI_SUCCESS; + timestamp_urEnqueueEventsWaitWithBarrier; +inline ur_result_t after_urEnqueueEventsWaitWithBarrier(void *) { + ++counter_urEnqueueEventsWaitWithBarrier; + timestamp_urEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); + return UR_RESULT_SUCCESS; } class DiscardEventTests : public ::testing::Test { public: DiscardEventTests() - : Mock{}, Q{context(Mock.getPlatform()), default_selector_v, + : Mock{}, Q{context(sycl::platform()), default_selector_v, property::queue::in_order{}} {} protected: void SetUp() override { - counter_piEnqueueKernelLaunch = 0; - counter_piEnqueueEventsWaitWithBarrier = 0; + counter_urEnqueueKernelLaunch = 0; + counter_urEnqueueEventsWaitWithBarrier = 0; } - unittest::PiMock Mock; + unittest::UrMock<> Mock; queue Q; }; TEST_F(DiscardEventTests, BarrierBeforeHostTask) { // Special test for case where host_task need an event after, so a barrier is // enqueued to create a usable event. - Mock.redefine( - redefined_piEnqueueKernelLaunch); - Mock.redefineAfter( - after_piEnqueueEventsWaitWithBarrier); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); oneapiext::single_task>(Q, []() {}); @@ -75,9 +71,9 @@ TEST_F(DiscardEventTests, BarrierBeforeHostTask) { [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); }).wait(); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); - ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, size_t{1}); - ASSERT_TRUE(HostTaskTimestamp > timestamp_piEnqueueEventsWaitWithBarrier); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueEventsWaitWithBarrier, size_t{1}); + ASSERT_TRUE(HostTaskTimestamp > timestamp_urEnqueueEventsWaitWithBarrier); } } // namespace diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp index 10c86342b3a4c..9b92c850c1f86 100644 --- a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -7,8 +7,9 @@ //===----------------------------------------------------------------------===// // Tests the behavior of enqueue free functions when events can be discarded. -#include +#include "sycl/platform.hpp" #include +#include #include @@ -23,129 +24,114 @@ namespace oneapiext = ext::oneapi::experimental; namespace { -inline pi_result after_piKernelGetInfo(pi_kernel kernel, - pi_kernel_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { +inline ur_result_t after_urKernelGetInfo(void *pParams) { + auto params = *static_cast(pParams); constexpr char MockKernel[] = "TestKernel"; - if (param_name == PI_KERNEL_INFO_FUNCTION_NAME) { - if (param_value) { - assert(param_value_size == sizeof(MockKernel)); - std::memcpy(param_value, MockKernel, sizeof(MockKernel)); + if (*params.ppropName == UR_KERNEL_INFO_FUNCTION_NAME) { + if (*params.ppPropValue) { + assert(*params.ppropSize == sizeof(MockKernel)); + std::memcpy(*params.ppPropValue, MockKernel, sizeof(MockKernel)); } - if (param_value_size_ret) - *param_value_size_ret = sizeof(MockKernel); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(MockKernel); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -thread_local size_t counter_piEnqueueKernelLaunch = 0; -inline pi_result redefined_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *, pi_uint32, - const pi_event *, - pi_event *event) { - ++counter_piEnqueueKernelLaunch; - EXPECT_EQ(event, nullptr); - return PI_SUCCESS; +thread_local size_t counter_urEnqueueKernelLaunch = 0; +inline ur_result_t redefined_urEnqueueKernelLaunch(void *pParams) { + ++counter_urEnqueueKernelLaunch; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pphEvent, nullptr); + return UR_RESULT_SUCCESS; } -thread_local size_t counter_piextUSMEnqueueMemcpy = 0; -inline pi_result redefined_piextUSMEnqueueMemcpy(pi_queue, pi_bool, void *, - const void *, size_t, - pi_uint32, const pi_event *, - pi_event *event) { - ++counter_piextUSMEnqueueMemcpy; - EXPECT_EQ(event, nullptr); - return PI_SUCCESS; +thread_local size_t counter_urUSMEnqueueMemcpy = 0; +inline ur_result_t redefined_urUSMEnqueueMemcpy(void *pParams) { + ++counter_urUSMEnqueueMemcpy; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pphEvent, nullptr); + return UR_RESULT_SUCCESS; } -thread_local size_t counter_piextUSMEnqueueFill = 0; -inline pi_result redefined_piextUSMEnqueueFill(pi_queue, void *, const void *, - size_t, size_t, pi_uint32, - const pi_event *, - pi_event *event) { - ++counter_piextUSMEnqueueFill; - EXPECT_EQ(event, nullptr); - return PI_SUCCESS; +thread_local size_t counter_urUSMEnqueueFill = 0; +inline ur_result_t redefined_urUSMEnqueueFill(void *pParams) { + ++counter_urUSMEnqueueFill; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pphEvent, nullptr); + return UR_RESULT_SUCCESS; } -thread_local size_t counter_piextUSMEnqueuePrefetch = 0; -inline pi_result redefined_piextUSMEnqueuePrefetch(pi_queue, const void *, - size_t, - pi_usm_migration_flags, - pi_uint32, const pi_event *, - pi_event *event) { - ++counter_piextUSMEnqueuePrefetch; - EXPECT_EQ(event, nullptr); - return PI_SUCCESS; +thread_local size_t counter_urUSMEnqueuePrefetch = 0; +inline ur_result_t redefined_urUSMEnqueuePrefetch(void *pParams) { + ++counter_urUSMEnqueuePrefetch; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pphEvent, nullptr); + return UR_RESULT_SUCCESS; } -thread_local size_t counter_piextUSMEnqueueMemAdvise = 0; -inline pi_result redefined_piextUSMEnqueueMemAdvise(pi_queue, const void *, - size_t, pi_mem_advice, - pi_event *event) { - ++counter_piextUSMEnqueueMemAdvise; - EXPECT_EQ(event, nullptr); - return PI_SUCCESS; +thread_local size_t counter_urUSMEnqueueMemAdvise = 0; +inline ur_result_t redefined_urUSMEnqueueMemAdvise(void *pParams) { + ++counter_urUSMEnqueueMemAdvise; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pphEvent, nullptr); + return UR_RESULT_SUCCESS; } -thread_local size_t counter_piEnqueueEventsWaitWithBarrier = 0; +thread_local size_t counter_urEnqueueEventsWaitWithBarrier = 0; thread_local std::chrono::time_point - timestamp_piEnqueueEventsWaitWithBarrier; -inline pi_result after_piEnqueueEventsWaitWithBarrier(pi_queue, pi_uint32, - const pi_event *, - pi_event *) { - ++counter_piEnqueueEventsWaitWithBarrier; - timestamp_piEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); - return PI_SUCCESS; + timestamp_urEnqueueEventsWaitWithBarrier; +inline ur_result_t after_urEnqueueEventsWaitWithBarrier(void *pParams) { + ++counter_urEnqueueEventsWaitWithBarrier; + timestamp_urEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); + return UR_RESULT_SUCCESS; } class EnqueueFunctionsEventsTests : public ::testing::Test { public: EnqueueFunctionsEventsTests() - : Mock{}, Q{context(Mock.getPlatform()), default_selector_v, + : Mock{}, Q{context(sycl::platform()), default_selector_v, property::queue::in_order{}} {} protected: void SetUp() override { - counter_piEnqueueKernelLaunch = 0; - counter_piextUSMEnqueueMemcpy = 0; - counter_piextUSMEnqueueFill = 0; - counter_piextUSMEnqueuePrefetch = 0; - counter_piextUSMEnqueueMemAdvise = 0; - counter_piEnqueueEventsWaitWithBarrier = 0; + counter_urEnqueueKernelLaunch = 0; + counter_urUSMEnqueueMemcpy = 0; + counter_urUSMEnqueueFill = 0; + counter_urUSMEnqueuePrefetch = 0; + counter_urUSMEnqueueMemAdvise = 0; + counter_urEnqueueEventsWaitWithBarrier = 0; } - unittest::PiMock Mock; + unittest::UrMock<> Mock; queue Q; }; TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::submit(Q, [&](handler &CGH) { oneapiext::single_task>(CGH, []() {}); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::single_task>(Q, []() {}); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); - Mock.redefineAfter(after_piKernelGetInfo); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback("urKernelGetInfo", + &after_urKernelGetInfo); auto KID = get_kernel_id>(); auto KB = get_kernel_bundle( @@ -157,13 +143,14 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { oneapiext::submit(Q, [&](handler &CGH) { oneapiext::single_task(CGH, Kernel); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); - Mock.redefineAfter(after_piKernelGetInfo); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback("urKernelGetInfo", + &after_urKernelGetInfo); auto KID = get_kernel_id>(); auto KB = get_kernel_bundle( @@ -175,33 +162,34 @@ TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { oneapiext::single_task(Q, Kernel); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::submit(Q, [&](handler &CGH) { oneapiext::parallel_for>(CGH, range<1>{32}, [](item<1>) {}); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::parallel_for>(Q, range<1>{32}, [](item<1>) {}); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); - Mock.redefineAfter(after_piKernelGetInfo); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback("urKernelGetInfo", + &after_urKernelGetInfo); auto KID = get_kernel_id>(); auto KB = get_kernel_bundle( @@ -214,13 +202,14 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { oneapiext::parallel_for(CGH, range<1>{32}, Kernel); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); - Mock.redefineAfter(after_piKernelGetInfo); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback("urKernelGetInfo", + &after_urKernelGetInfo); auto KID = get_kernel_id>(); auto KB = get_kernel_bundle( @@ -232,35 +221,36 @@ TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { oneapiext::parallel_for(Q, range<1>{32}, Kernel); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::submit(Q, [&](handler &CGH) { oneapiext::nd_launch>( CGH, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::nd_launch>(Q, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); - Mock.redefineAfter(after_piKernelGetInfo); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback("urKernelGetInfo", + &after_urKernelGetInfo); auto KID = get_kernel_id>(); auto KB = get_kernel_bundle( @@ -273,13 +263,14 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { oneapiext::nd_launch(CGH, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); }); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { - Mock.redefine( - redefined_piEnqueueKernelLaunch); - Mock.redefineAfter(after_piKernelGetInfo); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback("urKernelGetInfo", + &after_urKernelGetInfo); auto KID = get_kernel_id>(); auto KB = get_kernel_bundle( @@ -291,12 +282,12 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueMemcpy); + mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", + &redefined_urUSMEnqueueMemcpy); constexpr size_t N = 1024; int *Src = malloc_shared(N, Q); @@ -306,15 +297,15 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { oneapiext::memcpy(CGH, Src, Dst, sizeof(int) * N); }); - ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); free(Src, Q); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, MemcpyShortcutNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueMemcpy); + mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", + &redefined_urUSMEnqueueMemcpy); constexpr size_t N = 1024; int *Src = malloc_shared(N, Q); @@ -322,15 +313,15 @@ TEST_F(EnqueueFunctionsEventsTests, MemcpyShortcutNoEvent) { oneapiext::memcpy(Q, Src, Dst, sizeof(int) * N); - ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); free(Src, Q); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, SubmitCopyNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueMemcpy); + mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", + &redefined_urUSMEnqueueMemcpy); constexpr size_t N = 1024; int *Src = malloc_shared(N, Q); @@ -339,15 +330,15 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitCopyNoEvent) { oneapiext::submit(Q, [&](handler &CGH) { oneapiext::copy(CGH, Dst, Src, N); }); - ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); free(Src, Q); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueMemcpy); + mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", + &redefined_urUSMEnqueueMemcpy); constexpr size_t N = 1024; int *Src = malloc_shared(N, Q); @@ -355,15 +346,15 @@ TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) { oneapiext::memcpy(Q, Dst, Src, N); - ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); free(Src, Q); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueFill); + mock::getCallbacks().set_replace_callback("urEnqueueUSMFill", + &redefined_urUSMEnqueueFill); constexpr size_t N = 1024; int *Dst = malloc_shared(N, Q); @@ -372,28 +363,28 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { oneapiext::memset(CGH, Dst, int{1}, sizeof(int) * N); }); - ASSERT_EQ(counter_piextUSMEnqueueFill, size_t{1}); + ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1}); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, MemsetShortcutNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueFill); + mock::getCallbacks().set_replace_callback("urEnqueueUSMFill", + &redefined_urUSMEnqueueFill); constexpr size_t N = 1024; int *Dst = malloc_shared(N, Q); oneapiext::memset(Q, Dst, 1, sizeof(int) * N); - ASSERT_EQ(counter_piextUSMEnqueueFill, size_t{1}); + ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1}); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, SubmitPrefetchNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueuePrefetch); + mock::getCallbacks().set_replace_callback("urEnqueueUSMPrefetch", + redefined_urUSMEnqueuePrefetch); constexpr size_t N = 1024; int *Dst = malloc_shared(N, Q); @@ -401,28 +392,28 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitPrefetchNoEvent) { oneapiext::submit( Q, [&](handler &CGH) { oneapiext::prefetch(CGH, Dst, sizeof(int) * N); }); - ASSERT_EQ(counter_piextUSMEnqueuePrefetch, size_t{1}); + ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1}); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, PrefetchShortcutNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueuePrefetch); + mock::getCallbacks().set_replace_callback("urEnqueueUSMPrefetch", + redefined_urUSMEnqueuePrefetch); constexpr size_t N = 1024; int *Dst = malloc_shared(N, Q); oneapiext::prefetch(Q, Dst, sizeof(int) * N); - ASSERT_EQ(counter_piextUSMEnqueuePrefetch, size_t{1}); + ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1}); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, SubmitMemAdviseNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueMemAdvise); + mock::getCallbacks().set_replace_callback("urEnqueueUSMAdvise", + redefined_urUSMEnqueueMemAdvise); constexpr size_t N = 1024; int *Dst = malloc_shared(N, Q); @@ -431,21 +422,21 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemAdviseNoEvent) { oneapiext::mem_advise(CGH, Dst, sizeof(int) * N, 1); }); - ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, size_t{1}); + ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1}); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueMemAdvise); + mock::getCallbacks().set_replace_callback("urEnqueueUSMAdvise", + &redefined_urUSMEnqueueMemAdvise); constexpr size_t N = 1024; int *Dst = malloc_shared(N, Q); oneapiext::mem_advise(Q, Dst, sizeof(int) * N, 1); - ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, size_t{1}); + ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1}); free(Dst, Q); } @@ -453,10 +444,10 @@ TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) { TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { // Special test for case where host_task need an event after, so a barrier is // enqueued to create a usable event. - Mock.redefine( - redefined_piEnqueueKernelLaunch); - Mock.redefineAfter( - after_piEnqueueEventsWaitWithBarrier); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); oneapiext::single_task>(Q, []() {}); @@ -466,9 +457,9 @@ TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); }).wait(); - ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); - ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, size_t{1}); - ASSERT_TRUE(HostTaskTimestamp > timestamp_piEnqueueEventsWaitWithBarrier); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_urEnqueueEventsWaitWithBarrier, size_t{1}); + ASSERT_TRUE(HostTaskTimestamp > timestamp_urEnqueueEventsWaitWithBarrier); } } // namespace diff --git a/sycl/unittests/Extensions/FPGADeviceSelectors.cpp b/sycl/unittests/Extensions/FPGADeviceSelectors.cpp index 289c75595b5ac..01b1c035d18ea 100644 --- a/sycl/unittests/Extensions/FPGADeviceSelectors.cpp +++ b/sycl/unittests/Extensions/FPGADeviceSelectors.cpp @@ -10,8 +10,8 @@ #include #include -#include #include +#include #include @@ -21,77 +21,73 @@ static constexpr char HARDWARE_PLATFORM_NAME[] = "Intel(R) FPGA SDK for OpenCL(TM)"; template struct RedefTemplatedWrapper { - static pi_result redefinedPlatformGetInfo(pi_platform platform, - pi_platform_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_PLATFORM_INFO_NAME: { + static ur_result_t redefinedPlatformGetInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_PLATFORM_INFO_NAME: { size_t PlatformNameLen = strlen(PlatformName) + 1; - if (param_value) { - assert(param_value_size == PlatformNameLen); - std::memcpy(param_value, PlatformName, PlatformNameLen); + if (*params.ppPropValue) { + assert(*params.ppropSize == PlatformNameLen); + std::memcpy(*params.ppPropValue, PlatformName, PlatformNameLen); } - if (param_value_size_ret) - *param_value_size_ret = PlatformNameLen; - return PI_SUCCESS; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = PlatformNameLen; + return UR_RESULT_SUCCESS; } - case PI_EXT_PLATFORM_INFO_BACKEND: { - constexpr auto MockPlatformBackend = PI_EXT_PLATFORM_BACKEND_UNKNOWN; - if (param_value) { - std::memcpy(param_value, &MockPlatformBackend, + case UR_PLATFORM_INFO_BACKEND: { + constexpr auto MockPlatformBackend = UR_PLATFORM_BACKEND_UNKNOWN; + if (*params.ppPropValue) { + std::memcpy(*params.ppPropValue, &MockPlatformBackend, sizeof(MockPlatformBackend)); } - if (param_value_size_ret) - *param_value_size_ret = sizeof(MockPlatformBackend); - return PI_SUCCESS; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(MockPlatformBackend); + return UR_RESULT_SUCCESS; } default: - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } } }; -static pi_result redefinedDeviceGetInfo(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { +static ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); constexpr char MockDeviceName[] = "Mock FPGA device"; - switch (param_name) { - case PI_DEVICE_INFO_TYPE: { - if (param_value) - *static_cast<_pi_device_type *>(param_value) = PI_DEVICE_TYPE_ACC; - if (param_value_size_ret) - *param_value_size_ret = sizeof(PI_DEVICE_TYPE_ACC); - return PI_SUCCESS; + switch (*params.ppropName) { + case UR_DEVICE_INFO_TYPE: { + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = + UR_DEVICE_TYPE_FPGA; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(UR_DEVICE_TYPE_FPGA); + return UR_RESULT_SUCCESS; } - case PI_DEVICE_INFO_NAME: { - if (param_value) { - assert(param_value_size == sizeof(MockDeviceName)); - std::memcpy(param_value, MockDeviceName, sizeof(MockDeviceName)); + case UR_DEVICE_INFO_NAME: { + if (*params.ppPropValue) { + assert(*params.ppropSize == sizeof(MockDeviceName)); + std::memcpy(*params.ppPropValue, MockDeviceName, sizeof(MockDeviceName)); } - if (param_value_size_ret) - *param_value_size_ret = sizeof(MockDeviceName); - return PI_SUCCESS; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(MockDeviceName); + return UR_RESULT_SUCCESS; } // Mock FPGA has no sub-devices - case PI_DEVICE_INFO_PARTITION_PROPERTIES: { - if (param_value_size_ret) { - *param_value_size_ret = 0; + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { + if (*params.ppPropSizeRet) { + **params.ppPropSizeRet = 0; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } - case PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: { - assert(param_value_size == sizeof(pi_device_affinity_domain)); - if (param_value) { - *static_cast(param_value) = 0; + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: { + assert(*params.ppropSize == sizeof(ur_device_affinity_domain_flags_t)); + if (*params.ppPropValue) { + *static_cast(*params.ppPropValue) = + 0; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } default: - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } } @@ -99,11 +95,13 @@ TEST(FPGADeviceSelectorsTest, FPGASelectorTest) { using namespace sycl::detail; using namespace sycl::unittest; - sycl::unittest::PiMock Mock; - Mock.redefine(redefinedDeviceGetInfo); - Mock.redefine( - RedefTemplatedWrapper::redefinedPlatformGetInfo); - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_replace_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + mock::getCallbacks().set_replace_callback( + "urPlatformGetInfo", + &RedefTemplatedWrapper::redefinedPlatformGetInfo); + sycl::platform Plt = sycl::platform(); sycl::context Ctx{Plt.get_devices()}; sycl::queue FPGAQueue{Ctx, sycl::ext::intel::fpga_selector_v}; @@ -123,11 +121,14 @@ TEST(FPGADeviceSelectorsTest, FPGAEmulatorSelectorTest) { using namespace sycl::detail; using namespace sycl::unittest; - sycl::unittest::PiMock Mock; - Mock.redefine(redefinedDeviceGetInfo); - Mock.redefine( - RedefTemplatedWrapper::redefinedPlatformGetInfo); - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_replace_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + mock::getCallbacks().set_replace_callback( + "urPlatformGetInfo", + &RedefTemplatedWrapper< + EMULATION_PLATFORM_NAME>::redefinedPlatformGetInfo); + sycl::platform Plt = sycl::platform(); sycl::context Ctx{Plt.get_devices()}; sycl::queue EmuFPGAQueue{Ctx, sycl::ext::intel::fpga_emulator_selector_v}; @@ -150,11 +151,13 @@ TEST(FPGADeviceSelectorsTest, FPGASimulatorSelectorTest) { constexpr char INTELFPGA_ENV[] = "CL_CONTEXT_MPSIM_DEVICE_INTELFPGA"; ScopedEnvVar EnvVar(INTELFPGA_ENV, nullptr, []() {}); - sycl::unittest::PiMock Mock; - Mock.redefine(redefinedDeviceGetInfo); - Mock.redefine( - RedefTemplatedWrapper::redefinedPlatformGetInfo); - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_replace_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + mock::getCallbacks().set_replace_callback( + "urPlatformGetInfo", + &RedefTemplatedWrapper::redefinedPlatformGetInfo); + sycl::platform Plt = sycl::platform(); sycl::context Ctx{Plt.get_devices()}; sycl::queue SimuFPGAQueue{Ctx, sycl::ext::intel::fpga_simulator_selector_v}; @@ -182,8 +185,8 @@ TEST(FPGADeviceSelectorsTest, NegativeFPGASelectorTest) { ScopedEnvVar EnvVar(INTELFPGA_ENV, nullptr, []() {}); // Do not redefine any APIs. We want it to fail for all. - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::context Ctx{Plt.get_devices()}; try { diff --git a/sycl/unittests/Extensions/KernelProperties.cpp b/sycl/unittests/Extensions/KernelProperties.cpp index 31aa984726b0a..39117a6e391b8 100644 --- a/sycl/unittests/Extensions/KernelProperties.cpp +++ b/sycl/unittests/Extensions/KernelProperties.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -#include +#include "sycl/platform.hpp" +#include "ur_mock_helpers.hpp" +#include #include #include @@ -19,27 +21,24 @@ namespace oneapiext = sycl::ext::oneapi::experimental; namespace { -inline pi_result after_piKernelGetInfo(pi_kernel kernel, - pi_kernel_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { +inline ur_result_t after_urKernelGetInfo(void* pParams) { + auto params = *static_cast(pParams); constexpr char MockKernel[] = "TestKernel"; - if (param_name == PI_KERNEL_INFO_FUNCTION_NAME) { - if (param_value) { - assert(param_value_size == sizeof(MockKernel)); - std::memcpy(param_value, MockKernel, sizeof(MockKernel)); + if (*params.ppropName == UR_KERNEL_INFO_FUNCTION_NAME) { + if (*params.ppPropValue) { + assert(*params.ppropSize == sizeof(MockKernel)); + std::memcpy(*params.ppPropValue, MockKernel, sizeof(MockKernel)); } - if (param_value_size_ret) - *param_value_size_ret = sizeof(MockKernel); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(MockKernel); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } class KernelPropertiesTests : public ::testing::Test { public: KernelPropertiesTests() - : Mock{}, Q{sycl::context(Mock.getPlatform()), sycl::default_selector_v} { + : Mock{}, Q{sycl::context(sycl::platform()), sycl::default_selector_v} { } inline sycl::kernel GetTestKernel() { @@ -66,11 +65,10 @@ class KernelPropertiesTests : public ::testing::Test { protected: void SetUp() override { - Mock.redefineAfter( - after_piKernelGetInfo); + mock::getCallbacks().set_after_callback("urKernelGetInfo", after_urKernelGetInfo); } - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; sycl::queue Q; }; diff --git a/sycl/unittests/Extensions/OneAPIProd.cpp b/sycl/unittests/Extensions/OneAPIProd.cpp index 732c71dc86f8b..6b50f760c4154 100644 --- a/sycl/unittests/Extensions/OneAPIProd.cpp +++ b/sycl/unittests/Extensions/OneAPIProd.cpp @@ -8,7 +8,7 @@ #include -#include +#include #include @@ -16,15 +16,16 @@ using namespace sycl; static bool QueueFlushed = false; -static pi_result redefinedQueueFlush(pi_queue Queue) { +static ur_result_t redefinedQueueFlush(void *) { QueueFlushed = true; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(OneAPIProdTest, PiQueueFlush) { - sycl::unittest::PiMock Mock(backend::ext_oneapi_level_zero); - sycl::platform Plt = Mock.getPlatform(); - Mock.redefine(redefinedQueueFlush); + sycl::unittest::UrMock Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_replace_callback("urQueueFlush", + &redefinedQueueFlush); context Ctx{Plt}; queue Queue{Ctx, default_selector_v}; Queue.ext_oneapi_prod(); diff --git a/sycl/unittests/Extensions/ProfilingTag.cpp b/sycl/unittests/Extensions/ProfilingTag.cpp index f88fad082aae7..7b18b9ba00e4e 100644 --- a/sycl/unittests/Extensions/ProfilingTag.cpp +++ b/sycl/unittests/Extensions/ProfilingTag.cpp @@ -8,99 +8,92 @@ #include -#include +#include #include -template -pi_result after_piDeviceGetInfo(pi_device, pi_device_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT) { - if (param_value) - *static_cast(param_value) = TimestampSupport; - if (param_value_size_ret) - *param_value_size_ret = sizeof(TimestampSupport); +template +ur_result_t after_urDeviceGetInfo(void *pParams) { + auto &Params = *reinterpret_cast(pParams); + if (*Params.ppropName == UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP) { + if (Params.ppPropValue) + *static_cast(*Params.ppPropValue) = TimestampSupport; + if (*Params.ppPropSizeRet) + **Params.ppPropSizeRet = sizeof(TimestampSupport); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -thread_local size_t counter_piEnqueueTimestampRecordingExp = 0; -inline pi_result after_piEnqueueTimestampRecordingExp(pi_queue, pi_bool, - pi_uint32, - const pi_event *, - pi_event *) { - ++counter_piEnqueueTimestampRecordingExp; - return PI_SUCCESS; +thread_local size_t counter_urEnqueueTimestampRecordingExp = 0; +inline ur_result_t after_urEnqueueTimestampRecordingExp(void *) { + ++counter_urEnqueueTimestampRecordingExp; + return UR_RESULT_SUCCESS; } -thread_local std::optional LatestProfilingQuery; -inline pi_result after_piEventGetProfilingInfo(pi_event, - pi_profiling_info param_name, - size_t, void *, size_t *) { - LatestProfilingQuery = param_name; - return PI_SUCCESS; +thread_local std::optional LatestProfilingQuery; +inline ur_result_t after_urEventGetProfilingInfo(void *pParams) { + auto &Params = + *reinterpret_cast(pParams); + LatestProfilingQuery = *Params.ppropName; + return UR_RESULT_SUCCESS; } -thread_local size_t counter_piEnqueueEventsWaitWithBarrier = 0; -inline pi_result after_piEnqueueEventsWaitWithBarrier(pi_queue, pi_uint32, - const pi_event *, - pi_event *) { - ++counter_piEnqueueEventsWaitWithBarrier; - return PI_SUCCESS; +thread_local size_t counter_urEnqueueEventsWaitWithBarrier = 0; +inline ur_result_t after_urEnqueueEventsWaitWithBarrier(void *) { + ++counter_urEnqueueEventsWaitWithBarrier; + return UR_RESULT_SUCCESS; } class ProfilingTagTest : public ::testing::Test { public: - ProfilingTagTest() : Mock{}, Plt{Mock.getPlatform()} {} + ProfilingTagTest() : Mock{} {} protected: void SetUp() override { - counter_piEnqueueTimestampRecordingExp = 0; - counter_piEnqueueEventsWaitWithBarrier = 0; + counter_urEnqueueTimestampRecordingExp = 0; + counter_urEnqueueEventsWaitWithBarrier = 0; LatestProfilingQuery = std::nullopt; } protected: - sycl::unittest::PiMock Mock; - sycl::platform Plt; + sycl::unittest::UrMock<> Mock; }; TEST_F(ProfilingTagTest, ProfilingTagSupportedDefaultQueue) { - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefineAfter( - after_piEnqueueTimestampRecordingExp); - Mock.redefineAfter( - after_piEventGetProfilingInfo); - - sycl::context Ctx{Plt}; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp); + mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", + &after_urEventGetProfilingInfo); + + sycl::context Ctx{sycl::platform()}; sycl::queue Queue{Ctx, sycl::default_selector_v}; sycl::device Dev = Queue.get_device(); ASSERT_TRUE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag)); sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); - ASSERT_EQ(size_t{1}, counter_piEnqueueTimestampRecordingExp); + ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp); E.get_profiling_info(); ASSERT_TRUE(LatestProfilingQuery.has_value()); - ASSERT_EQ(*LatestProfilingQuery, PI_PROFILING_INFO_COMMAND_START); + ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_START); E.get_profiling_info(); ASSERT_TRUE(LatestProfilingQuery.has_value()); - ASSERT_EQ(*LatestProfilingQuery, PI_PROFILING_INFO_COMMAND_END); + ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END); } TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingQueue) { - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefineAfter( - after_piEnqueueTimestampRecordingExp); - Mock.redefineAfter( - after_piEventGetProfilingInfo); - - sycl::context Ctx{Plt}; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp); + mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", + &after_urEventGetProfilingInfo); + + sycl::context Ctx{sycl::platform()}; sycl::queue Queue{Ctx, sycl::default_selector_v, {sycl::property::queue::enable_profiling()}}; @@ -109,26 +102,26 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingQueue) { ASSERT_TRUE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag)); sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); - ASSERT_EQ(size_t{1}, counter_piEnqueueTimestampRecordingExp); + ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp); E.get_profiling_info(); ASSERT_TRUE(LatestProfilingQuery.has_value()); - ASSERT_EQ(*LatestProfilingQuery, PI_PROFILING_INFO_COMMAND_START); + ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_START); E.get_profiling_info(); ASSERT_TRUE(LatestProfilingQuery.has_value()); - ASSERT_EQ(*LatestProfilingQuery, PI_PROFILING_INFO_COMMAND_END); + ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END); } TEST_F(ProfilingTagTest, ProfilingTagFallbackDefaultQueue) { - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefineAfter( - after_piEnqueueTimestampRecordingExp); - Mock.redefineAfter( - after_piEventGetProfilingInfo); - - sycl::context Ctx{Plt}; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp); + mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", + &after_urEventGetProfilingInfo); + + sycl::context Ctx{sycl::platform()}; sycl::queue Queue{Ctx, sycl::default_selector_v}; sycl::device Dev = Queue.get_device(); @@ -143,14 +136,14 @@ TEST_F(ProfilingTagTest, ProfilingTagFallbackDefaultQueue) { } TEST_F(ProfilingTagTest, ProfilingTagFallbackProfilingQueue) { - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefineAfter( - after_piEnqueueTimestampRecordingExp); - Mock.redefineAfter( - after_piEnqueueEventsWaitWithBarrier); - - sycl::context Ctx{Plt}; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp); + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); + + sycl::context Ctx{sycl::platform()}; sycl::queue Queue{Ctx, sycl::default_selector_v, {sycl::property::queue::enable_profiling()}}; @@ -159,6 +152,6 @@ TEST_F(ProfilingTagTest, ProfilingTagFallbackProfilingQueue) { ASSERT_FALSE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag)); sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); - ASSERT_EQ(size_t{0}, counter_piEnqueueTimestampRecordingExp); - ASSERT_EQ(size_t{1}, counter_piEnqueueEventsWaitWithBarrier); + ASSERT_EQ(size_t{0}, counter_urEnqueueTimestampRecordingExp); + ASSERT_EQ(size_t{1}, counter_urEnqueueEventsWaitWithBarrier); } diff --git a/sycl/unittests/Extensions/USMMemcpy2D.cpp b/sycl/unittests/Extensions/USMMemcpy2D.cpp index 9644224310f8d..18d45736e54cb 100644 --- a/sycl/unittests/Extensions/USMMemcpy2D.cpp +++ b/sycl/unittests/Extensions/USMMemcpy2D.cpp @@ -11,8 +11,8 @@ #include #include -#include -#include +#include +#include #include @@ -125,18 +125,18 @@ struct KernelInfo> } // namespace _V1 } // namespace sycl -static sycl::unittest::PiImage generateMemopsImage() { +static sycl::unittest::UrImage generateMemopsImage() { using namespace sycl::unittest; - PiPropertySet PropSet; + UrPropertySet PropSet; std::vector Bin{10, 11, 12, 13, 14, 15}; // Random data - PiArray Entries = makeEmptyKernels( + UrArray Entries = makeEmptyKernels( {USMFillHelperKernelNameLong, USMFillHelperKernelNameChar, USMMemcpyHelperKernelNameLong, USMMemcpyHelperKernelNameChar}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -148,262 +148,201 @@ static sycl::unittest::PiImage generateMemopsImage() { } namespace { -sycl::unittest::PiImage Imgs[] = {generateMemopsImage()}; -sycl::unittest::PiImageArray<1> ImgArray{Imgs}; +sycl::unittest::UrImage Imgs[] = {generateMemopsImage()}; +sycl::unittest::UrImageArray<1> ImgArray{Imgs}; -size_t LastMemopsQuery = 0; +ur_context_info_t LastMemopsQuery = UR_CONTEXT_INFO_NUM_DEVICES; -struct Fill2DStruct { - pi_queue queue; - void *ptr; +struct Fill2dParams { + ur_queue_handle_t hQueue; + void *pMem; size_t pitch; - size_t pattern_size; - const void *pattern; + size_t patternSize; + std::vector pattern; size_t width; size_t height; - pi_uint32 num_events_in_waitlist; - const pi_event *events_waitlist; - pi_event *event; } LastFill2D; -struct Memset2DStruct { - pi_queue queue; - void *ptr; - size_t pitch; - int value; - size_t width; - size_t height; - pi_uint32 num_events_in_waitlist; - const pi_event *events_waitlist; - pi_event *event; -} LastMemset2D; - -struct Memcpy2DStruct { - pi_queue queue; - pi_bool blocking; - void *dst_ptr; - size_t dst_pitch; - const void *src_ptr; - size_t src_pitch; +struct Memcpy2dParams { + ur_queue_handle_t hQueue; + void *pDst; + size_t dstPitch; + const void *pSrc; + size_t srcPitch; size_t width; size_t height; - pi_uint32 num_events_in_waitlist; - const pi_event *events_waitlist; - pi_event *event; } LastMemcpy2D; -std::map KernelToNameMap; +std::map KernelToNameMap; template -pi_result after_piContextGetInfo(pi_context context, pi_context_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT: - LastMemopsQuery = param_name; - if (param_value) - *static_cast(param_value) = MemfillSupported; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_bool); - return PI_SUCCESS; - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: - LastMemopsQuery = param_name; - if (param_value) - *static_cast(param_value) = MemsetSupported; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_bool); - return PI_SUCCESS; - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: - LastMemopsQuery = param_name; - if (param_value) - *static_cast(param_value) = MemcpySupported; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_bool); - return PI_SUCCESS; +ur_result_t after_urContextGetInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT: + LastMemopsQuery = *params.ppropName; + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = MemfillSupported; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_bool_t); + return UR_RESULT_SUCCESS; + case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: + LastMemopsQuery = *params.ppropName; + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = MemcpySupported; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_bool_t); + return UR_RESULT_SUCCESS; default:; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result after_piDeviceGetInfo(pi_device device, pi_device_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES: - if (param_value) { - assert(param_value_size == 3 * sizeof(size_t)); - size_t *Ptr = static_cast(param_value); +ur_result_t after_urDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: + if (*params.ppPropValue) { + assert(*params.ppropSize == 3 * sizeof(size_t)); + size_t *Ptr = static_cast(*params.ppPropValue); Ptr[0] = 32; Ptr[1] = 32; Ptr[2] = 32; } - if (param_value_size_ret) - *param_value_size_ret = 3 * sizeof(size_t); - return PI_SUCCESS; - case PI_DEVICE_INFO_MAX_COMPUTE_UNITS: - if (param_value) { - assert(param_value_size == sizeof(pi_uint32)); - *static_cast(param_value) = 256; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = 3 * sizeof(size_t); + return UR_RESULT_SUCCESS; + case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: + if (*params.ppPropValue) { + assert(*params.ppropSize == sizeof(uint32_t)); + *static_cast(*params.ppPropValue) = 256; } - if (param_value_size_ret) - *param_value_size_ret = 3 * sizeof(size_t); - return PI_SUCCESS; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = 3 * sizeof(size_t); + return UR_RESULT_SUCCESS; default:; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -template -pi_result after_piextUSMGetMemAllocInfo(pi_context, const void *, - pi_mem_alloc_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_MEM_ALLOC_TYPE: { - if (param_value) { - assert(param_value_size == sizeof(pi_usm_type)); - *static_cast(param_value) = USMType; +template +ur_result_t after_urUSMGetMemAllocInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_USM_ALLOC_INFO_TYPE: { + if (*params.ppPropValue) { + assert(*params.ppropSize == sizeof(ur_usm_type_t)); + *static_cast(*params.ppPropValue) = USMType; } - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_usm_type); - return PI_SUCCESS; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_usm_type_t); + return UR_RESULT_SUCCESS; } default:; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefine_piextUSMEnqueueFill2D(pi_queue queue, void *ptr, - size_t pitch, size_t pattern_size, - const void *pattern, size_t width, - size_t height, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - LastFill2D = - Fill2DStruct{queue, ptr, pitch, pattern_size, - pattern, width, height, num_events_in_waitlist, - events_waitlist, event}; - return PI_SUCCESS; +ur_result_t redefine_urEnqueueUSMFill2D(void *pParams) { + auto params = *static_cast(pParams); + LastFill2D = Fill2dParams{*params.phQueue, + *params.ppMem, + *params.ppitch, + *params.ppatternSize, + std::vector(*params.ppatternSize), + *params.pwidth, + *params.pheight}; + std::memcpy(LastFill2D.pattern.data(), *params.ppPattern, + *params.ppatternSize); + return UR_RESULT_SUCCESS; } -pi_result redefine_piextUSMEnqueueMemset2D(pi_queue queue, void *ptr, - size_t pitch, int value, - size_t width, size_t height, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - LastMemset2D = Memset2DStruct{queue, - ptr, - pitch, - value, - width, - height, - num_events_in_waitlist, - events_waitlist, - event}; - return PI_SUCCESS; +ur_result_t redefine_urEnqueueUSMMemcpy2D(void *pParams) { + auto params = *static_cast(pParams); + LastMemcpy2D = Memcpy2dParams{ + *params.phQueue, *params.ppDst, *params.pdstPitch, *params.ppSrc, + *params.psrcPitch, *params.pwidth, *params.pheight}; + return UR_RESULT_SUCCESS; } -pi_result redefine_piextUSMEnqueueMemcpy2D( - pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, - const void *src_ptr, size_t src_pitch, size_t width, size_t height, - pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, - pi_event *event) { - LastMemcpy2D = - Memcpy2DStruct{queue, blocking, dst_ptr, - dst_pitch, src_ptr, src_pitch, - width, height, num_events_in_waitlist, - events_waitlist, event}; - return PI_SUCCESS; -} - -pi_result after_piKernelCreate(pi_program, const char *kernel_name, - pi_kernel *ret_kernel) { - KernelToNameMap[*ret_kernel] = kernel_name; - return PI_SUCCESS; +ur_result_t after_urKernelCreate(void *pParams) { + auto params = *static_cast(pParams); + KernelToNameMap[**params.pphKernel] = *params.ppKernelName; + return UR_RESULT_SUCCESS; } std::string LastEnqueuedKernel; -pi_result after_piEnqueueKernelLaunch(pi_queue, pi_kernel kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *, pi_uint32, - const pi_event *, pi_event *) { - auto KernelIt = KernelToNameMap.find(kernel); +ur_result_t after_urEnqueueKernelLaunch(void *pParams) { + auto params = *static_cast(pParams); + auto KernelIt = KernelToNameMap.find(*params.phKernel); EXPECT_TRUE(KernelIt != KernelToNameMap.end()); LastEnqueuedKernel = KernelIt->second; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } } // namespace // Tests that the right APIs are called when they are reported as supported // natively. TEST(USMMemcpy2DTest, USMMemops2DSupported) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue Q{Plt.get_devices()[0]}; std::shared_ptr QueueImpl = sycl::detail::getSyclObjImpl(Q); - Mock.redefineAfter( - after_piContextGetInfo); - Mock.redefine( - redefine_piextUSMEnqueueFill2D); - Mock.redefine( - redefine_piextUSMEnqueueMemset2D); - Mock.redefine( - redefine_piextUSMEnqueueMemcpy2D); - Mock.redefineAfter( - after_piextUSMGetMemAllocInfo); + mock::getCallbacks().set_after_callback( + "urContextGetInfo", &after_urContextGetInfo); + mock::getCallbacks().set_replace_callback("urEnqueueUSMFill2D", + &redefine_urEnqueueUSMFill2D); + mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy2D", + &redefine_urEnqueueUSMMemcpy2D); + mock::getCallbacks().set_after_callback( + "urUSMGetMemAllocInfo", &after_urUSMGetMemAllocInfo); long *Ptr1 = sycl::malloc_device(10, Q); long *Ptr2 = sycl::malloc_device(16, Q); Q.ext_oneapi_fill2d(Ptr1, 5, 42l, 4, 2); - EXPECT_TRUE(LastMemopsQuery == PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT); - EXPECT_EQ(LastFill2D.queue, (pi_queue)QueueImpl->getHandleRef()); - EXPECT_EQ(LastFill2D.ptr, (void *)Ptr1); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_FILL2D_SUPPORT); + EXPECT_EQ(LastFill2D.hQueue, (ur_queue_handle_t)QueueImpl->getHandleRef()); + EXPECT_EQ(LastFill2D.pMem, (void *)Ptr1); EXPECT_EQ(LastFill2D.pitch, (size_t)5); - EXPECT_EQ(LastFill2D.pattern_size, sizeof(long)); + EXPECT_EQ(LastFill2D.patternSize, sizeof(long)); EXPECT_EQ(LastFill2D.width, (size_t)4); EXPECT_EQ(LastFill2D.height, (size_t)2); Q.ext_oneapi_memset2d(Ptr1, 5 * sizeof(long), 123, 4 * sizeof(long), 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT); - EXPECT_EQ(LastMemset2D.queue, (pi_queue)QueueImpl->getHandleRef()); - EXPECT_EQ(LastMemset2D.ptr, (void *)Ptr1); - EXPECT_EQ(LastMemset2D.pitch, (size_t)5 * sizeof(long)); - EXPECT_EQ(LastMemset2D.value, 123); - EXPECT_EQ(LastMemset2D.width, (size_t)4 * sizeof(long)); - EXPECT_EQ(LastMemset2D.height, (size_t)2); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_FILL2D_SUPPORT); + EXPECT_EQ(LastFill2D.hQueue, (ur_queue_handle_t)QueueImpl->getHandleRef()); + EXPECT_EQ(LastFill2D.pMem, (void *)Ptr1); + EXPECT_EQ(LastFill2D.pitch, (size_t)5 * sizeof(long)); + EXPECT_EQ(LastFill2D.pattern[0], 123); + EXPECT_EQ(LastFill2D.width, (size_t)4 * sizeof(long)); + EXPECT_EQ(LastFill2D.height, (size_t)2); Q.ext_oneapi_memcpy2d(Ptr1, 5 * sizeof(long), Ptr2, 8 * sizeof(long), 4 * sizeof(long), 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); - EXPECT_EQ(LastMemcpy2D.queue, (pi_queue)QueueImpl->getHandleRef()); - EXPECT_EQ(LastMemcpy2D.dst_ptr, (void *)Ptr1); - EXPECT_EQ(LastMemcpy2D.dst_pitch, (size_t)5 * sizeof(long)); - EXPECT_EQ(LastMemcpy2D.src_ptr, (void *)Ptr2); - EXPECT_EQ(LastMemcpy2D.src_pitch, (size_t)8 * sizeof(long)); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); + EXPECT_EQ(LastMemcpy2D.hQueue, (ur_queue_handle_t)QueueImpl->getHandleRef()); + EXPECT_EQ(LastMemcpy2D.pDst, (void *)Ptr1); + EXPECT_EQ(LastMemcpy2D.dstPitch, (size_t)5 * sizeof(long)); + EXPECT_EQ(LastMemcpy2D.pSrc, (void *)Ptr2); + EXPECT_EQ(LastMemcpy2D.srcPitch, (size_t)8 * sizeof(long)); EXPECT_EQ(LastMemcpy2D.width, (size_t)4 * sizeof(long)); EXPECT_EQ(LastMemcpy2D.height, (size_t)2); Q.ext_oneapi_copy2d(Ptr1, 5, Ptr2, 8, 4, 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); - EXPECT_EQ(LastMemcpy2D.queue, (pi_queue)QueueImpl->getHandleRef()); - EXPECT_EQ(LastMemcpy2D.dst_ptr, (void *)Ptr2); - EXPECT_EQ(LastMemcpy2D.dst_pitch, (size_t)8 * sizeof(long)); - EXPECT_EQ(LastMemcpy2D.src_ptr, (void *)Ptr1); - EXPECT_EQ(LastMemcpy2D.src_pitch, (size_t)5 * sizeof(long)); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); + EXPECT_EQ(LastMemcpy2D.hQueue, (ur_queue_handle_t)QueueImpl->getHandleRef()); + EXPECT_EQ(LastMemcpy2D.pDst, (void *)Ptr2); + EXPECT_EQ(LastMemcpy2D.dstPitch, (size_t)8 * sizeof(long)); + EXPECT_EQ(LastMemcpy2D.pSrc, (void *)Ptr1); + EXPECT_EQ(LastMemcpy2D.srcPitch, (size_t)5 * sizeof(long)); EXPECT_EQ(LastMemcpy2D.width, (size_t)4 * sizeof(long)); EXPECT_EQ(LastMemcpy2D.height, (size_t)2); } @@ -411,207 +350,189 @@ TEST(USMMemcpy2DTest, USMMemops2DSupported) { // Tests that the right fallback kernels are called when a backend does not // support the APIs natively. TEST(USMMemcpy2DTest, USMMemops2DUnsupported) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue Q{Plt.get_devices()[0]}; - Mock.redefineAfter( - after_piContextGetInfo); - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefineAfter( - after_piKernelCreate); - Mock.redefineAfter( - after_piEnqueueKernelLaunch); - Mock.redefineAfter( - after_piextUSMGetMemAllocInfo); + mock::getCallbacks().set_after_callback( + "urContextGetInfo", &after_urContextGetInfo); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback("urKernelCreate", + &after_urKernelCreate); + mock::getCallbacks().set_after_callback("urEnqueueKernelLaunch", + &after_urEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback( + "urUSMGetMemAllocInfo", &after_urUSMGetMemAllocInfo); long *Ptr1 = sycl::malloc_device(10, Q); long *Ptr2 = sycl::malloc_device(16, Q); Q.ext_oneapi_fill2d(Ptr1, 5, 42l, 4, 2); - EXPECT_TRUE(LastMemopsQuery == PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_FILL2D_SUPPORT); EXPECT_EQ(LastEnqueuedKernel, USMFillHelperKernelNameLong); Q.ext_oneapi_memset2d(Ptr1, 5 * sizeof(long), 123, 4 * sizeof(long), 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_FILL2D_SUPPORT); EXPECT_EQ(LastEnqueuedKernel, USMFillHelperKernelNameChar); Q.ext_oneapi_memcpy2d(Ptr1, 5 * sizeof(long), Ptr2, 8 * sizeof(long), 4 * sizeof(long), 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); EXPECT_EQ(LastEnqueuedKernel, USMMemcpyHelperKernelNameChar); Q.ext_oneapi_copy2d(Ptr1, 5, Ptr2, 8, 4, 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); EXPECT_EQ(LastEnqueuedKernel, USMMemcpyHelperKernelNameLong); } // Tests that the right paths are taken when the backend only supports native // USM fill. TEST(USMMemcpy2DTest, USMFillSupportedOnly) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue Q{Plt.get_devices()[0]}; std::shared_ptr QueueImpl = sycl::detail::getSyclObjImpl(Q); - Mock.redefineAfter( - after_piContextGetInfo); - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefineAfter( - after_piKernelCreate); - Mock.redefineAfter( - after_piEnqueueKernelLaunch); - Mock.redefine( - redefine_piextUSMEnqueueFill2D); - Mock.redefineAfter( - after_piextUSMGetMemAllocInfo); + mock::getCallbacks().set_after_callback( + "urContextGetInfo", &after_urContextGetInfo); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback("urKernelCreate", + &after_urKernelCreate); + mock::getCallbacks().set_after_callback("urEnqueueKernelLaunch", + &after_urEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urEnqueueUSMFill2D", + &redefine_urEnqueueUSMFill2D); + mock::getCallbacks().set_after_callback( + "urUSMGetMemAllocInfo", &after_urUSMGetMemAllocInfo); long *Ptr1 = sycl::malloc_device(10, Q); long *Ptr2 = sycl::malloc_device(16, Q); Q.ext_oneapi_fill2d(Ptr1, 5, 42l, 4, 2); - EXPECT_TRUE(LastMemopsQuery == PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT); - EXPECT_EQ(LastFill2D.queue, (pi_queue)QueueImpl->getHandleRef()); - EXPECT_EQ(LastFill2D.ptr, (void *)Ptr1); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_FILL2D_SUPPORT); + EXPECT_EQ(LastFill2D.hQueue, QueueImpl->getHandleRef()); + EXPECT_EQ(LastFill2D.pMem, (void *)Ptr1); EXPECT_EQ(LastFill2D.pitch, (size_t)5); - EXPECT_EQ(LastFill2D.pattern_size, sizeof(long)); + EXPECT_EQ(LastFill2D.patternSize, sizeof(long)); EXPECT_EQ(LastFill2D.width, (size_t)4); EXPECT_EQ(LastFill2D.height, (size_t)2); EXPECT_NE(LastEnqueuedKernel, USMFillHelperKernelNameLong); - Q.ext_oneapi_memset2d(Ptr1, 5 * sizeof(long), 123, 4 * sizeof(long), 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT); - EXPECT_EQ(LastEnqueuedKernel, USMFillHelperKernelNameChar); - Q.ext_oneapi_memcpy2d(Ptr1, 5 * sizeof(long), Ptr2, 8 * sizeof(long), 4 * sizeof(long), 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); EXPECT_EQ(LastEnqueuedKernel, USMMemcpyHelperKernelNameChar); Q.ext_oneapi_copy2d(Ptr1, 5, Ptr2, 8, 4, 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); EXPECT_EQ(LastEnqueuedKernel, USMMemcpyHelperKernelNameLong); } // Tests that the right paths are taken when the backend only supports native // USM memset. TEST(USMMemcpy2DTest, USMMemsetSupportedOnly) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue Q{Plt.get_devices()[0]}; std::shared_ptr QueueImpl = sycl::detail::getSyclObjImpl(Q); - Mock.redefineAfter( - after_piContextGetInfo); - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefineAfter( - after_piKernelCreate); - Mock.redefineAfter( - after_piEnqueueKernelLaunch); - Mock.redefine( - redefine_piextUSMEnqueueMemset2D); - Mock.redefineAfter( - after_piextUSMGetMemAllocInfo); + // Enable fill + set, they are implemented with the same entry point in the + // backend so supporting one means supporting both. + mock::getCallbacks().set_after_callback( + "urContextGetInfo", &after_urContextGetInfo); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback("urKernelCreate", + &after_urKernelCreate); + mock::getCallbacks().set_after_callback("urEnqueueKernelLaunch", + &after_urEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback( + "urUSMGetMemAllocInfo", &after_urUSMGetMemAllocInfo); + mock::getCallbacks().set_replace_callback("urEnqueueUSMFill2D", + &redefine_urEnqueueUSMFill2D); long *Ptr1 = sycl::malloc_device(10, Q); long *Ptr2 = sycl::malloc_device(16, Q); - Q.ext_oneapi_fill2d(Ptr1, 5, 42l, 4, 2); - EXPECT_TRUE(LastMemopsQuery == PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT); - EXPECT_EQ(LastEnqueuedKernel, USMFillHelperKernelNameLong); - Q.ext_oneapi_memset2d(Ptr1, 5 * sizeof(long), 123, 4 * sizeof(long), 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT); - EXPECT_EQ(LastMemset2D.queue, (pi_queue)QueueImpl->getHandleRef()); - EXPECT_EQ(LastMemset2D.ptr, (void *)Ptr1); - EXPECT_EQ(LastMemset2D.pitch, (size_t)5 * sizeof(long)); - EXPECT_EQ(LastMemset2D.value, 123); - EXPECT_EQ(LastMemset2D.width, (size_t)4 * sizeof(long)); - EXPECT_EQ(LastMemset2D.height, (size_t)2); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_FILL2D_SUPPORT); + EXPECT_EQ(LastFill2D.hQueue, QueueImpl->getHandleRef()); + EXPECT_EQ(LastFill2D.pMem, (void *)Ptr1); + EXPECT_EQ(LastFill2D.pitch, (size_t)5 * sizeof(long)); + EXPECT_EQ(LastFill2D.pattern[0], 123); + EXPECT_EQ(LastFill2D.width, (size_t)4 * sizeof(long)); + EXPECT_EQ(LastFill2D.height, (size_t)2); EXPECT_NE(LastEnqueuedKernel, USMFillHelperKernelNameChar); Q.ext_oneapi_memcpy2d(Ptr1, 5 * sizeof(long), Ptr2, 8 * sizeof(long), 4 * sizeof(long), 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); EXPECT_EQ(LastEnqueuedKernel, USMMemcpyHelperKernelNameChar); Q.ext_oneapi_copy2d(Ptr1, 5, Ptr2, 8, 4, 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); EXPECT_EQ(LastEnqueuedKernel, USMMemcpyHelperKernelNameLong); } // Tests that the right paths are taken when the backend only supports native // USM memcpy. TEST(USMMemcpy2DTest, USMMemcpySupportedOnly) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue Q{Plt.get_devices()[0]}; std::shared_ptr QueueImpl = sycl::detail::getSyclObjImpl(Q); - Mock.redefineAfter( - after_piContextGetInfo); - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefineAfter( - after_piKernelCreate); - Mock.redefineAfter( - after_piEnqueueKernelLaunch); - Mock.redefine( - redefine_piextUSMEnqueueMemcpy2D); - Mock.redefineAfter( - after_piextUSMGetMemAllocInfo); + mock::getCallbacks().set_after_callback( + "urContextGetInfo", &after_urContextGetInfo); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback("urKernelCreate", + &after_urKernelCreate); + mock::getCallbacks().set_after_callback("urEnqueueKernelLaunch", + &after_urEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy2D", + &redefine_urEnqueueUSMMemcpy2D); + mock::getCallbacks().set_after_callback( + "urUSMGetMemAllocInfo", &after_urUSMGetMemAllocInfo); long *Ptr1 = sycl::malloc_device(10, Q); long *Ptr2 = sycl::malloc_device(16, Q); Q.ext_oneapi_fill2d(Ptr1, 5, 42l, 4, 2); - EXPECT_TRUE(LastMemopsQuery == PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_FILL2D_SUPPORT); EXPECT_EQ(LastEnqueuedKernel, USMFillHelperKernelNameLong); Q.ext_oneapi_memset2d(Ptr1, 5 * sizeof(long), 123, 4 * sizeof(long), 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_FILL2D_SUPPORT); EXPECT_EQ(LastEnqueuedKernel, USMFillHelperKernelNameChar); Q.ext_oneapi_memcpy2d(Ptr1, 5 * sizeof(long), Ptr2, 8 * sizeof(long), 4 * sizeof(long), 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); - EXPECT_EQ(LastMemcpy2D.queue, (pi_queue)QueueImpl->getHandleRef()); - EXPECT_EQ(LastMemcpy2D.dst_ptr, (void *)Ptr1); - EXPECT_EQ(LastMemcpy2D.dst_pitch, (size_t)5 * sizeof(long)); - EXPECT_EQ(LastMemcpy2D.src_ptr, (void *)Ptr2); - EXPECT_EQ(LastMemcpy2D.src_pitch, (size_t)8 * sizeof(long)); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); + EXPECT_EQ(LastMemcpy2D.hQueue, QueueImpl->getHandleRef()); + EXPECT_EQ(LastMemcpy2D.pDst, (void *)Ptr1); + EXPECT_EQ(LastMemcpy2D.dstPitch, (size_t)5 * sizeof(long)); + EXPECT_EQ(LastMemcpy2D.pSrc, (void *)Ptr2); + EXPECT_EQ(LastMemcpy2D.srcPitch, (size_t)8 * sizeof(long)); EXPECT_EQ(LastMemcpy2D.width, (size_t)4 * sizeof(long)); EXPECT_EQ(LastMemcpy2D.height, (size_t)2); EXPECT_NE(LastEnqueuedKernel, USMMemcpyHelperKernelNameChar); Q.ext_oneapi_copy2d(Ptr1, 5, Ptr2, 8, 4, 2); - EXPECT_TRUE(LastMemopsQuery == - PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); - EXPECT_EQ(LastMemcpy2D.queue, (pi_queue)QueueImpl->getHandleRef()); - EXPECT_EQ(LastMemcpy2D.dst_ptr, (void *)Ptr2); - EXPECT_EQ(LastMemcpy2D.dst_pitch, (size_t)8 * sizeof(long)); - EXPECT_EQ(LastMemcpy2D.src_ptr, (void *)Ptr1); - EXPECT_EQ(LastMemcpy2D.src_pitch, (size_t)5 * sizeof(long)); + EXPECT_TRUE(LastMemopsQuery == UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT); + EXPECT_EQ(LastMemcpy2D.hQueue, QueueImpl->getHandleRef()); + EXPECT_EQ(LastMemcpy2D.pDst, (void *)Ptr2); + EXPECT_EQ(LastMemcpy2D.dstPitch, (size_t)8 * sizeof(long)); + EXPECT_EQ(LastMemcpy2D.pSrc, (void *)Ptr1); + EXPECT_EQ(LastMemcpy2D.srcPitch, (size_t)5 * sizeof(long)); EXPECT_EQ(LastMemcpy2D.width, (size_t)4 * sizeof(long)); EXPECT_EQ(LastMemcpy2D.height, (size_t)2); EXPECT_NE(LastEnqueuedKernel, USMMemcpyHelperKernelNameLong); @@ -620,8 +541,8 @@ TEST(USMMemcpy2DTest, USMMemcpySupportedOnly) { // Negative tests for cases where USM 2D memory operations are expected to throw // exceptions. TEST(USMMemcpy2DTest, NegativeUSM2DOps) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue Q{Plt.get_devices()[0]}; long *Ptr1 = sycl::malloc_device(10, Q); diff --git a/sycl/unittests/Extensions/USMP2P.cpp b/sycl/unittests/Extensions/USMP2P.cpp index ac44bb6ddd54e..bf02c69d755c0 100644 --- a/sycl/unittests/Extensions/USMP2P.cpp +++ b/sycl/unittests/Extensions/USMP2P.cpp @@ -7,66 +7,62 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include int check = 0; -pi_result redefinedDevicesGet(pi_platform platform, pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - if (num_devices) - *num_devices = 2; - if (devices && num_entries > 0) { - devices[0] = reinterpret_cast(1); - devices[1] = reinterpret_cast(2); +ur_result_t redefinedDeviceGet(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevices) + **params.ppNumDevices = 2; + if (*params.pphDevices && *params.pNumEntries > 0) { + (*params.pphDevices)[0] = reinterpret_cast(1); + (*params.pphDevices)[1] = reinterpret_cast(2); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEnablePeerAccess(pi_device command_device, - pi_device peer_device) { +ur_result_t redefinedEnablePeerAccess(void *) { check = 3; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedDisablePeerAccess(pi_device command_device, - pi_device peer_device) { +ur_result_t redefinedDisablePeerAccess(void *) { check = 4; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedPeerAccessGetInfo(pi_device command_device, - pi_device peer_device, pi_peer_attr attr, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +ur_result_t redefinedPeerAccessGetInfo(void *pParams) { + auto params = + *static_cast(pParams); - if (param_value) - *static_cast(param_value) = 1; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_int32); + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = 1; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(int32_t); - if (attr == PI_PEER_ACCESS_SUPPORTED) { + if (*params.ppropName == UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED) { check = 1; - } else if (attr == PI_PEER_ATOMICS_SUPPORTED) { + } else if (*params.ppropName == UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED) { check = 2; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(USMP2PTest, USMP2PTest) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - Mock.redefine(redefinedDevicesGet); - Mock.redefine( - redefinedEnablePeerAccess); - Mock.redefine( - redefinedDisablePeerAccess); - Mock.redefine( - redefinedPeerAccessGetInfo); + mock::getCallbacks().set_replace_callback("urDeviceGet", &redefinedDeviceGet); + mock::getCallbacks().set_replace_callback("urUsmP2PEnablePeerAccessExp", + &redefinedEnablePeerAccess); + mock::getCallbacks().set_replace_callback("urUsmP2PDisablePeerAccessExp", + &redefinedDisablePeerAccess); + mock::getCallbacks().set_replace_callback("urUsmP2PPeerAccessGetInfoExp", + &redefinedPeerAccessGetInfo); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); auto Dev1 = Plt.get_devices()[0]; auto Dev2 = Plt.get_devices()[1]; diff --git a/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp b/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp index cb3b92ea49ff1..8ec091e099f8a 100644 --- a/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp +++ b/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp @@ -1,9 +1,11 @@ +#include "sycl/platform.hpp" +#include "ur_mock_helpers.hpp" #include #include -#include -#include #include +#include +#include #include @@ -44,11 +46,11 @@ KERNEL_INFO(KernelG) } // namespace _V1 } // namespace sycl -static sycl::unittest::PiImage +static sycl::unittest::UrImage generateImage(std::initializer_list KernelNames, const std::string &VFSets, bool UsesVFSets, unsigned char Magic) { - sycl::unittest::PiPropertySet PropSet; - sycl::unittest::PiArray Props; + sycl::unittest::UrPropertySet PropSet; + sycl::unittest::UrArray Props; uint64_t PropSize = VFSets.size(); std::vector Storage(/* bytes for size */ 8 + PropSize + /* null terminator */ 1); @@ -59,7 +61,7 @@ generateImage(std::initializer_list KernelNames, Storage.back() = '\0'; const std::string PropName = UsesVFSets ? "uses-virtual-functions-set" : "virtual-functions-set"; - sycl::unittest::PiProperty Prop(PropName, Storage, + sycl::unittest::UrProperty Prop(PropName, Storage, SYCL_PROPERTY_TYPE_BYTE_ARRAY); Props.push_back(Prop); @@ -67,10 +69,10 @@ generateImage(std::initializer_list KernelNames, std::vector Bin{Magic}; - sycl::unittest::PiArray Entries = + sycl::unittest::UrArray Entries = sycl::unittest::makeEmptyKernels(KernelNames); - sycl::unittest::PiImage Img{ + sycl::unittest::UrImage Img{ SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options @@ -101,7 +103,7 @@ static constexpr unsigned PROGRAM_F1 = 53; // Device images with no entires are ignored by SYCL RT during registration. // Therefore, we have to provide some kernel names to make the test work, even // if we don't really have them/use them. -static sycl::unittest::PiImage Imgs[] = { +static sycl::unittest::UrImage Imgs[] = { generateImage({"KernelA"}, "set-a", /* uses vf set */ true, PROGRAM_A), generateImage({"DummyKernel0"}, "set-a", /* provides vf set */ false, PROGRAM_A0), @@ -129,12 +131,13 @@ static sycl::unittest::PiImage Imgs[] = { generateImage({"KernelG"}, "set-f", /* uses vf set */ true, PROGRAM_F1)}; // Registers mock devices images in the SYCL RT -static sycl::unittest::PiImageArray<15> ImgArray{Imgs}; +static sycl::unittest::UrImageArray<15> ImgArray{Imgs}; TEST(VirtualFunctions, SingleKernelUsesSingleVFSet) { - auto Mock = setupRuntimeLinkingMock(); + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); CapturedLinkingData.clear(); @@ -143,9 +146,9 @@ TEST(VirtualFunctions, SingleKernelUsesSingleVFSet) { Q.single_task([=]() {}); // When we submit this kernel, we expect that two programs were created (one // for a kernel and another providing virtual functions set for it). - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramCreateCalls, 2u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 2u); // Both programs should be linked together. - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramLinkCalls, 1u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); ASSERT_TRUE( CapturedLinkingData.LinkedProgramsContains({PROGRAM_A, PROGRAM_A0})); // And the linked program should be used to create a kernel. @@ -154,9 +157,10 @@ TEST(VirtualFunctions, SingleKernelUsesSingleVFSet) { } TEST(VirtualFunctions, SingleKernelUsesSingleVFSetProvidedTwice) { - auto Mock = setupRuntimeLinkingMock(); + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); CapturedLinkingData.clear(); @@ -166,9 +170,9 @@ TEST(VirtualFunctions, SingleKernelUsesSingleVFSetProvidedTwice) { Q.single_task([=]() {}); // When we submit this kernel, we expect that three programs were created (one // for a kernel and another two providing virtual functions set for it). - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramCreateCalls, 3u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 3u); // Both programs should be linked together. - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramLinkCalls, 1u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( {PROGRAM_B, PROGRAM_B0, PROGRAM_B1})); // And the linked program should be used to create a kernel. @@ -177,9 +181,10 @@ TEST(VirtualFunctions, SingleKernelUsesSingleVFSetProvidedTwice) { } TEST(VirtualFunctions, SingleKernelUsesDifferentVFSets) { - auto Mock = setupRuntimeLinkingMock(); + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); CapturedLinkingData.clear(); @@ -189,9 +194,9 @@ TEST(VirtualFunctions, SingleKernelUsesDifferentVFSets) { Q.single_task([=]() {}); // When we submit this kernel, we expect that three programs were created (one // for a kernel and another two providing virtual functions set for it). - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramCreateCalls, 3u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 3u); // Both programs should be linked together. - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramLinkCalls, 1u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( {PROGRAM_C, PROGRAM_C0, PROGRAM_C1})); // And the linked program should be used to create a kernel. @@ -200,9 +205,10 @@ TEST(VirtualFunctions, SingleKernelUsesDifferentVFSets) { } TEST(VirtualFunctions, RecursiveSearchOfDependentDeviceImages) { - auto Mock = setupRuntimeLinkingMock(); + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); CapturedLinkingData.clear(); @@ -213,9 +219,9 @@ TEST(VirtualFunctions, RecursiveSearchOfDependentDeviceImages) { // When we submit this kernel, we expect that four programs were created (one // for KernelD and another providing "set-d", as well as one for KernelE and // another providing "set-e"). - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramCreateCalls, 4u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 4u); // Both programs should be linked together. - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramLinkCalls, 1u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( {PROGRAM_D, PROGRAM_D0, PROGRAM_E, PROGRAM_E0})); // And the linked program should be used to create a kernel. @@ -224,9 +230,10 @@ TEST(VirtualFunctions, RecursiveSearchOfDependentDeviceImages) { } TEST(VirtualFunctions, TwoKernelsShareTheSameSet) { - auto Mock = setupRuntimeLinkingMock(); + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); CapturedLinkingData.clear(); @@ -235,9 +242,9 @@ TEST(VirtualFunctions, TwoKernelsShareTheSameSet) { Q.single_task([=]() {}); // When we submit this kernel, we expect that three programs were created (one // for KernelF, another providing "set-f" and one more for KernelG) - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramCreateCalls, 3u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 3u); // Both programs should be linked together. - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramLinkCalls, 1u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( {PROGRAM_F, PROGRAM_F0, PROGRAM_F1})); // And the linked program should be used to create a kernel. @@ -249,8 +256,8 @@ TEST(VirtualFunctions, TwoKernelsShareTheSameSet) { // When we submit a second kernel, we expect that no new programs will be // created and we will simply use previously linked program for that kernel. Q.single_task([=]() {}); - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramCreateCalls, 0u); - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramLinkCalls, 0u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 0u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 0u); ASSERT_EQ(CapturedLinkingData.ProgramUsedToCreateKernel, PROGRAM_F * PROGRAM_F0 * PROGRAM_F1); } diff --git a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp index d0a4d3812a7bd..31168ebc51b6b 100644 --- a/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicFenceCapabilities.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include #include -#include +#include +#include using namespace sycl; @@ -16,68 +16,65 @@ namespace { thread_local bool deviceGetInfoCalled; -pi_result redefinedDevicesGet(pi_platform platform, pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - if (num_devices) - *num_devices = 2; - if (devices && num_entries > 0) { - devices[0] = reinterpret_cast(1); - devices[1] = reinterpret_cast(2); +ur_result_t redefinedDevicesGet(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevices) + **params.ppNumDevices = 2; + if (*params.pphDevices && *params.pNumEntries > 0) { + (*params.pphDevices)[0] = reinterpret_cast(1); + (*params.pphDevices)[1] = reinterpret_cast(2); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedDeviceGetInfoAfter(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { +ur_result_t redefinedDeviceGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { deviceGetInfoCalled = true; - if (param_value) { - auto *Result = - reinterpret_cast(param_value); - if (device == reinterpret_cast(1)) { + if (*params.ppPropValue) { + auto *Result = reinterpret_cast( + *params.ppPropValue); + if (*params.phDevice == reinterpret_cast(1)) { std::cout << "Order Device 1" << std::endl; - *Result = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST; + *Result = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; } - if (device == reinterpret_cast(2)) { + if (*params.phDevice == reinterpret_cast(2)) { std::cout << "Order Device 2" << std::endl; - *Result = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_SEQ_CST; + *Result = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; } } - } else if (param_name == PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { + } else if (*params.ppropName == + UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { deviceGetInfoCalled = true; - if (param_value) { - auto *Result = - reinterpret_cast(param_value); - if (device == reinterpret_cast(1)) { + if (*params.ppPropValue) { + auto *Result = reinterpret_cast( + *params.ppPropValue); + if (*params.phDevice == reinterpret_cast(1)) { std::cout << "Scope Device 1" << std::endl; - *Result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | - PI_MEMORY_SCOPE_SYSTEM; + *Result = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP | UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; } - if (device == reinterpret_cast(2)) { + if (*params.phDevice == reinterpret_cast(2)) { std::cout << "Scope Device 2" << std::endl; - *Result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SYSTEM; + *Result = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; } } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(AtomicFenceCapabilitiesCheck, CheckDeviceAtomicFenceOrderCapabilities) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); device Dev = Plt.get_devices()[0]; deviceGetInfoCalled = false; - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); auto order_capabilities = Dev.get_info(); EXPECT_TRUE(deviceGetInfoCalled); @@ -102,14 +99,14 @@ TEST(AtomicFenceCapabilitiesCheck, CheckDeviceAtomicFenceOrderCapabilities) { } TEST(AtomicFenceCapabilitiesCheck, CheckDeviceAtomicFenceScopeCapabilities) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); device Dev = Plt.get_devices()[0]; deviceGetInfoCalled = false; - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); auto scope_capabilities = Dev.get_info(); EXPECT_TRUE(deviceGetInfoCalled); @@ -134,11 +131,11 @@ TEST(AtomicFenceCapabilitiesCheck, CheckDeviceAtomicFenceScopeCapabilities) { } TEST(AtomicFenceCapabilitiesCheck, CheckContextAtomicFenceOrderCapabilities) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); - Mock.redefineAfter(redefinedDevicesGet); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDeviceGet", &redefinedDevicesGet); auto devices = Plt.get_devices(); context Ctx{devices}; @@ -158,11 +155,11 @@ TEST(AtomicFenceCapabilitiesCheck, CheckContextAtomicFenceOrderCapabilities) { } TEST(AtomicFenceCapabilitiesCheck, CheckContextAtomicFenceScopeCapabilities) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); - Mock.redefineAfter(redefinedDevicesGet); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDeviceGet", &redefinedDevicesGet); auto devices = Plt.get_devices(); context Ctx{devices}; diff --git a/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp index ad0b36a34a465..4d6878e72bc11 100644 --- a/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include using namespace sycl; @@ -23,45 +23,44 @@ static bool has_capability(const std::vector &deviceCapabilities, capabilityToFind) != deviceCapabilities.end(); } -pi_result redefinedDevicesGet(pi_platform platform, pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - if (num_devices) - *num_devices = 2; - if (devices && num_entries > 0) { - devices[0] = reinterpret_cast(1); - devices[1] = reinterpret_cast(2); +ur_result_t redefinedDevicesGet(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevices) + **params.ppNumDevices = 2; + if (*params.pphDevices && *params.pNumEntries > 0) { + (*params.pphDevices)[0] = reinterpret_cast(1); + (*params.pphDevices)[1] = reinterpret_cast(2); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedDeviceGetInfo(pi_device device, pi_device_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) { +ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) { deviceGetInfoCalled = true; - if (param_value) { - pi_memory_order_capabilities *Capabilities = - reinterpret_cast(param_value); - if (device == reinterpret_cast(1)) { - *Capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST; + if (*params.ppPropValue) { + ur_memory_order_capability_flags_t *Capabilities = + reinterpret_cast( + *params.ppPropValue); + if (*params.phDevice == reinterpret_cast(1)) { + *Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; } - if (device == reinterpret_cast(2)) { - *Capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_SEQ_CST; + if (*params.phDevice == reinterpret_cast(2)) { + *Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; } } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(AtomicMemoryOrderCapabilities, DeviceQueryReturnsCorrectCapabilities) { - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); - Mock.redefineAfter( - redefinedDeviceGetInfo); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); const device Dev = Plt.get_devices()[0]; context Ctx{Dev}; @@ -82,12 +81,12 @@ TEST(AtomicMemoryOrderCapabilities, DeviceQueryReturnsCorrectCapabilities) { } TEST(AtomicMemoryOrderCapabilities, ContextQueryReturnsCorrectCapabilities) { - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); - Mock.redefineAfter( - redefinedDeviceGetInfo); - Mock.redefineAfter(redefinedDevicesGet); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + mock::getCallbacks().set_after_callback("urDeviceGet", &redefinedDevicesGet); auto devices = Plt.get_devices(); context Ctx{devices}; diff --git a/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp index eb48d9ad17bde..886b8732a6f26 100644 --- a/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp +++ b/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include #include -#include +#include +#include using namespace sycl; @@ -16,51 +16,47 @@ namespace { thread_local bool deviceGetInfoCalled; -pi_result redefinedDevicesGet(pi_platform platform, pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - if (num_devices) - *num_devices = 2; - if (devices && num_entries > 0) { - devices[0] = reinterpret_cast(1); - devices[1] = reinterpret_cast(2); +ur_result_t redefinedDevicesGet(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevices) + **params.ppNumDevices = 2; + if (*params.pphDevices && *params.pNumEntries > 0) { + (*params.pphDevices)[0] = reinterpret_cast(1); + (*params.pphDevices)[1] = reinterpret_cast(2); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedDeviceGetInfoAfter(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) { +ur_result_t redefinedDeviceGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) { deviceGetInfoCalled = true; - if (param_value) { - auto *Result = - reinterpret_cast(param_value); - if (device == reinterpret_cast(1)) { - *Result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | - PI_MEMORY_SCOPE_SYSTEM; + if (*params.ppPropValue) { + auto *Result = reinterpret_cast( + *params.ppPropValue); + if (*params.phDevice == reinterpret_cast(1)) { + *Result = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP | UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; } - if (device == reinterpret_cast(2)) { - *Result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SYSTEM; + if (*params.phDevice == reinterpret_cast(2)) { + *Result = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; } } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(AtomicMemoryScopeCapabilitiesCheck, CheckDeviceAtomicMemoryScopeCapabilities) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); device Dev = Plt.get_devices()[0]; deviceGetInfoCalled = false; - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); auto scope_capabilities = Dev.get_info(); EXPECT_TRUE(deviceGetInfoCalled); @@ -86,12 +82,12 @@ TEST(AtomicMemoryScopeCapabilitiesCheck, TEST(AtomicMemoryScopeCapabilitiesCheck, CheckContextAtomicMemoryScopeCapabilities) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); - Mock.redefineAfter(redefinedDevicesGet); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDeviceGet", &redefinedDevicesGet); auto devices = Plt.get_devices(); context Ctx{devices}; diff --git a/sycl/unittests/SYCL2020/DeviceAspectTraits.cpp b/sycl/unittests/SYCL2020/DeviceAspectTraits.cpp index f3b4e393f4a03..d5f45f164b6cb 100644 --- a/sycl/unittests/SYCL2020/DeviceAspectTraits.cpp +++ b/sycl/unittests/SYCL2020/DeviceAspectTraits.cpp @@ -8,7 +8,7 @@ // #include -#include +#include #include diff --git a/sycl/unittests/SYCL2020/DeviceGetInfoAspects.cpp b/sycl/unittests/SYCL2020/DeviceGetInfoAspects.cpp index 8e1d5bf7f7cb8..42ff1c6eceabf 100644 --- a/sycl/unittests/SYCL2020/DeviceGetInfoAspects.cpp +++ b/sycl/unittests/SYCL2020/DeviceGetInfoAspects.cpp @@ -8,8 +8,8 @@ #include -#include -#include +#include +#include #include @@ -24,16 +24,16 @@ static bool containsAspect(const std::vector &DeviceAspects, } TEST(DeviceGetInfo, SupportedDeviceAspects) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::device Dev = Plt.get_devices()[0]; std::vector DeviceAspects = Dev.get_info(); // Tests to examine aspects of default mock device, as defined in - // helpers/PiMockPlugin.hpp so these tests all need to be kept in sync with + // helpers/UrMockPlugin.hpp so these tests all need to be kept in sync with // changes to that file. EXPECT_TRUE(containsAspect(DeviceAspects, aspect::gpu)); EXPECT_TRUE(containsAspect(DeviceAspects, aspect::fp16)); diff --git a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp index 3860833256048..ba2ae917808d3 100644 --- a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp +++ b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp @@ -13,8 +13,8 @@ #include #include -#include #include +#include #include @@ -26,79 +26,78 @@ using namespace sycl; int TestCounter = 0; int DeviceRetainCounter = 0; -static pi_result redefinedContextRetain(pi_context c) { +static ur_result_t redefinedContextRetain(void *) { ++TestCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedQueueRetain(pi_queue c) { +static ur_result_t redefinedQueueRetain(void *) { ++TestCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedDeviceRetain(pi_device c) { +static ur_result_t redefinedDeviceRetain(void *) { ++TestCounter; ++DeviceRetainCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedProgramRetain(pi_program c) { +static ur_result_t redefinedProgramRetain(void *) { ++TestCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedEventRetain(pi_event c) { +static ur_result_t redefinedEventRetain(void *) { ++TestCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedMemRetain(pi_mem c) { +static ur_result_t redefinedMemRetain(void *) { ++TestCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedMemBufferCreate(pi_context, pi_mem_flags, size_t size, - void *, pi_mem *, - const pi_mem_properties *) { - return PI_SUCCESS; -} +ur_result_t redefinedMemBufferCreate(void *) { return UR_RESULT_SUCCESS; } -pi_result redefinedEventGetInfo(pi_event event, pi_event_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - EXPECT_EQ(param_name, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) +ur_result_t redefinedEventGetInfo(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppropName, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) << "Unexpected event info requested"; // Report half of events as complete static int Counter = 0; - auto *Result = reinterpret_cast(param_value); - *Result = (++Counter % 2 == 0) ? PI_EVENT_COMPLETE : PI_EVENT_RUNNING; - return PI_SUCCESS; + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = + (++Counter % 2 == 0) ? UR_EVENT_STATUS_COMPLETE : UR_EVENT_STATUS_RUNNING; + return UR_RESULT_SUCCESS; } -static pi_result redefinedUSMEnqueueMemset(pi_queue, void *, const void *, - size_t, size_t, pi_uint32, - const pi_event *, pi_event *event) { - *event = reinterpret_cast(new int{}); - return PI_SUCCESS; +static ur_result_t redefinedEnqueueUSMFill(void *pParams) { + auto params = *static_cast(pParams); + **params.pphEvent = reinterpret_cast(new int{}); + return UR_RESULT_SUCCESS; } TEST(GetNative, GetNativeHandle) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - - Mock.redefineBefore(redefinedEventGetInfo); - Mock.redefineBefore( - redefinedContextRetain); - Mock.redefineBefore(redefinedQueueRetain); - Mock.redefineBefore(redefinedDeviceRetain); - Mock.redefineBefore( - redefinedProgramRetain); - Mock.redefineBefore(redefinedEventRetain); - Mock.redefineBefore(redefinedMemRetain); - Mock.redefineBefore( - redefinedMemBufferCreate); - Mock.redefineBefore( - redefinedUSMEnqueueMemset); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + + mock::getCallbacks().set_before_callback("urEventGetInfo", + &redefinedEventGetInfo); + mock::getCallbacks().set_before_callback("urContextRetain", + &redefinedContextRetain); + mock::getCallbacks().set_before_callback("urQueueRetain", + &redefinedQueueRetain); + mock::getCallbacks().set_before_callback("urDeviceRetain", + &redefinedDeviceRetain); + mock::getCallbacks().set_before_callback("urProgramRetain", + &redefinedProgramRetain); + mock::getCallbacks().set_before_callback("urEventRetain", + &redefinedEventRetain); + mock::getCallbacks().set_before_callback("urMemRetain", &redefinedMemRetain); + mock::getCallbacks().set_before_callback("urMemBufferCreate", + &redefinedMemBufferCreate); + mock::getCallbacks().set_before_callback("urEnqueueUSMFill", + &redefinedEnqueueUSMFill); context Context(Plt); queue Queue(Context, default_selector_v); @@ -122,7 +121,7 @@ TEST(GetNative, GetNativeHandle) { get_native(Event); get_native(Buffer); - // Depending on global caches state, piDeviceRetain is called either once or + // Depending on global caches state, urDeviceRetain is called either once or // twice, so there'll be 6 or 7 calls. ASSERT_EQ(TestCounter, 6 + DeviceRetainCounter - 1) << "Not all the retain methods were called"; diff --git a/sycl/unittests/SYCL2020/HasExtension.cpp b/sycl/unittests/SYCL2020/HasExtension.cpp index 71d01ba0f2c29..636c363f28616 100644 --- a/sycl/unittests/SYCL2020/HasExtension.cpp +++ b/sycl/unittests/SYCL2020/HasExtension.cpp @@ -9,16 +9,16 @@ #include #include -#include +#include #include using namespace sycl; TEST(HasExtensionID, HasExtensionCallsCorrectPluginMethods) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::device Dev = Plt.get_devices()[0]; bool PlatformHasSubgroups = opencl::has_extension(Plt, "cl_khr_subgroups"); diff --git a/sycl/unittests/SYCL2020/IsCompatible.cpp b/sycl/unittests/SYCL2020/IsCompatible.cpp index 72197b8cc2ca8..aaa295bf77b2f 100644 --- a/sycl/unittests/SYCL2020/IsCompatible.cpp +++ b/sycl/unittests/SYCL2020/IsCompatible.cpp @@ -1,8 +1,8 @@ #include #include -#include -#include +#include +#include #include @@ -22,19 +22,19 @@ MOCK_INTEGRATION_HEADER(TestKernelCPUValidReqdWGSize3D) MOCK_INTEGRATION_HEADER(TestKernelGPU) MOCK_INTEGRATION_HEADER(TestKernelACC) -static sycl::unittest::PiImage +static sycl::unittest::UrImage generateDefaultImage(std::initializer_list KernelNames, const std::vector &Aspects, const std::vector &ReqdWGSize = {}) { using namespace sycl::unittest; - PiPropertySet PropSet; + UrPropertySet PropSet; addDeviceRequirementsProps(PropSet, Aspects, ReqdWGSize); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = makeEmptyKernels(KernelNames); + UrArray Entries = makeEmptyKernels(KernelNames); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -45,7 +45,7 @@ generateDefaultImage(std::initializer_list KernelNames, return Img; } -static sycl::unittest::PiImage Imgs[7] = { +static sycl::unittest::UrImage Imgs[7] = { // Images for validating checks based on max_work_group_size + aspects generateDefaultImage({"TestKernelCPU"}, {sycl::aspect::cpu}, {32}), // 32 <= 256 (OK) @@ -64,81 +64,69 @@ static sycl::unittest::PiImage Imgs[7] = { generateDefaultImage({"TestKernelGPU"}, {sycl::aspect::gpu}), generateDefaultImage({"TestKernelACC"}, {sycl::aspect::accelerator})}; -static sycl::unittest::PiImageArray<7> ImgArray{Imgs}; +static sycl::unittest::UrImageArray<7> ImgArray{Imgs}; -static pi_result redefinedDeviceGetInfoCPU(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_TYPE) { - auto *Result = reinterpret_cast<_pi_device_type *>(param_value); - *Result = PI_DEVICE_TYPE_CPU; +static ur_result_t redefinedDeviceGetInfoCPU(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_TYPE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_TYPE_CPU; } - if (param_name == PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE) { - auto *Result = static_cast(param_value); + if (*params.ppropName == UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE) { + auto *Result = static_cast(*params.ppPropValue); *Result = 256; } - if (param_name == PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES) { - auto *Result = static_cast(param_value); + if (*params.ppropName == UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES) { + auto *Result = static_cast(*params.ppPropValue); *Result = 256; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedDeviceGetInfoCPU3D(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_TYPE) { - auto *Result = reinterpret_cast<_pi_device_type *>(param_value); - *Result = PI_DEVICE_TYPE_CPU; +static ur_result_t redefinedDeviceGetInfoCPU3D(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_TYPE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_TYPE_CPU; } - if (param_name == PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE) { - auto *Result = static_cast(param_value); + if (*params.ppropName == UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE) { + auto *Result = static_cast(*params.ppPropValue); *Result = 256; } - if (param_name == PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES) { - auto *Result = static_cast(param_value); + if (*params.ppropName == UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES) { + auto *Result = static_cast(*params.ppPropValue); Result[0] = 256; Result[1] = 255; Result[2] = 254; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } // Mock device is "GPU" by default, but we need to redefine it just in case // if there are some changes in the future -static pi_result redefinedDeviceGetInfoGPU(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_TYPE) { - auto *Result = reinterpret_cast<_pi_device_type *>(param_value); - *Result = PI_DEVICE_TYPE_GPU; +static ur_result_t redefinedDeviceGetInfoGPU(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_TYPE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_TYPE_GPU; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedDeviceGetInfoACC(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_TYPE) { - auto *Result = reinterpret_cast<_pi_device_type *>(param_value); - *Result = PI_DEVICE_TYPE_ACC; +static ur_result_t redefinedDeviceGetInfoACC(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_TYPE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_TYPE_FPGA; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(IsCompatible, CPU) { - sycl::unittest::PiMock Mock; - Mock.redefineAfter( - redefinedDeviceGetInfoCPU); - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoCPU); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; EXPECT_TRUE(Dev.is_cpu()); @@ -148,50 +136,50 @@ TEST(IsCompatible, CPU) { } TEST(IsCompatible, CPUInvalidReqdWGSize1D) { - sycl::unittest::PiMock Mock; - Mock.redefineAfter( - redefinedDeviceGetInfoCPU); - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoCPU); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; EXPECT_FALSE(sycl::is_compatible(Dev)); } TEST(IsCompatible, CPUInvalidReqdWGSize2D) { - sycl::unittest::PiMock Mock; - Mock.redefineAfter( - redefinedDeviceGetInfoCPU); - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoCPU); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; EXPECT_FALSE(sycl::is_compatible(Dev)); } TEST(IsCompatible, CPUInvalidReqdWGSize3D) { - sycl::unittest::PiMock Mock; - Mock.redefineAfter( - redefinedDeviceGetInfoCPU3D); - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoCPU3D); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; EXPECT_FALSE(sycl::is_compatible(Dev)); } TEST(IsCompatible, CPUValidReqdWGSize3D) { - sycl::unittest::PiMock Mock; - Mock.redefineAfter( - redefinedDeviceGetInfoCPU3D); - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoCPU3D); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; EXPECT_TRUE(sycl::is_compatible(Dev)); } TEST(IsCompatible, GPU) { - sycl::unittest::PiMock Mock; - Mock.redefineAfter( - redefinedDeviceGetInfoGPU); - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoGPU); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; EXPECT_TRUE(Dev.is_gpu()); @@ -201,10 +189,10 @@ TEST(IsCompatible, GPU) { } TEST(IsCompatible, ACC) { - sycl::unittest::PiMock Mock; - Mock.redefineAfter( - redefinedDeviceGetInfoACC); - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoACC); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; EXPECT_TRUE(Dev.is_accelerator()); diff --git a/sycl/unittests/SYCL2020/KernelBundle.cpp b/sycl/unittests/SYCL2020/KernelBundle.cpp index 98e92d13328c8..9829f746ac819 100644 --- a/sycl/unittests/SYCL2020/KernelBundle.cpp +++ b/sycl/unittests/SYCL2020/KernelBundle.cpp @@ -11,8 +11,8 @@ #include #include -#include -#include +#include +#include #include @@ -24,22 +24,22 @@ MOCK_INTEGRATION_HEADER(TestKernel) MOCK_INTEGRATION_HEADER(TestKernelExeOnly) MOCK_INTEGRATION_HEADER(TestKernelWithAspects) -static sycl::unittest::PiImage +static sycl::unittest::UrImage generateDefaultImage(std::initializer_list KernelNames, sycl_device_binary_type BinaryType, const char *DeviceTargetSpec, const std::vector &Aspects = {}) { using namespace sycl::unittest; - PiPropertySet PropSet; + UrPropertySet PropSet; if (!Aspects.empty()) addDeviceRequirementsProps(PropSet, Aspects); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = makeEmptyKernels(KernelNames); + UrArray Entries = makeEmptyKernels(KernelNames); - PiImage Img{BinaryType, // Format + UrImage Img{BinaryType, // Format DeviceTargetSpec, "", // Compile options "", // Link options @@ -50,7 +50,7 @@ generateDefaultImage(std::initializer_list KernelNames, return Img; } -static sycl::unittest::PiImage Imgs[] = { +static sycl::unittest::UrImage Imgs[] = { generateDefaultImage({"TestKernel"}, SYCL_DEVICE_BINARY_TYPE_SPIRV, __SYCL_DEVICE_BINARY_TARGET_SPIRV64), generateDefaultImage({"TestKernelExeOnly"}, SYCL_DEVICE_BINARY_TYPE_NATIVE, @@ -61,24 +61,21 @@ static sycl::unittest::PiImage Imgs[] = { generateDefaultImage( {"TestKernelWithAspects"}, SYCL_DEVICE_BINARY_TYPE_NATIVE, __SYCL_DEVICE_BINARY_TARGET_SPIRV64, {sycl::aspect::gpu})}; -static sycl::unittest::PiImageArray ImgArray{Imgs}; - -static pi_result redefinedDeviceGetInfoCPU(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_TYPE) { - auto *Result = reinterpret_cast<_pi_device_type *>(param_value); - *Result = PI_DEVICE_TYPE_CPU; +static sycl::unittest::UrImageArray ImgArray{Imgs}; + +static ur_result_t redefinedDeviceGetInfoCPU(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_TYPE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_TYPE_CPU; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(KernelBundle, GetKernelBundleFromKernel) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; sycl::context Ctx{Dev}; sycl::queue Queue{Ctx, Dev}; @@ -95,9 +92,9 @@ TEST(KernelBundle, GetKernelBundleFromKernel) { } TEST(KernelBundle, KernelBundleAndItsDevImageStateConsistency) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; sycl::queue Queue{Dev}; @@ -123,9 +120,9 @@ TEST(KernelBundle, KernelBundleAndItsDevImageStateConsistency) { } TEST(KernelBundle, EmptyKernelBundle) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; sycl::queue Queue{Dev}; const sycl::context Ctx = Queue.get_context(); @@ -138,9 +135,9 @@ TEST(KernelBundle, EmptyKernelBundle) { } TEST(KernelBundle, EmptyKernelBundleKernelLaunchException) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; sycl::queue Queue{Dev}; const sycl::context Ctx = Queue.get_context(); @@ -179,9 +176,9 @@ TEST(KernelBundle, EmptyKernelBundleKernelLaunchException) { } TEST(KernelBundle, HasKernelBundle) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; sycl::queue Queue{Dev}; @@ -225,9 +222,9 @@ TEST(KernelBundle, HasKernelBundle) { } TEST(KernelBundle, UseKernelBundleWrongContextPrimaryQueueOnly) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; sycl::queue Queue{Dev}; const sycl::context QueueCtx = Queue.get_context(); const sycl::context OtherCtx{Dev}; @@ -266,9 +263,9 @@ TEST(KernelBundle, UseKernelBundleWrongContextPrimaryQueueOnly) { } TEST(KernelBundle, UseKernelBundleWrongContextPrimaryQueueValidSecondaryQueue) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; const sycl::context PrimaryCtx{Dev}; const sycl::context SecondaryCtx{Dev}; @@ -312,9 +309,9 @@ TEST(KernelBundle, UseKernelBundleWrongContextPrimaryQueueValidSecondaryQueue) { } TEST(KernelBundle, UseKernelBundleValidPrimaryQueueWrongContextSecondaryQueue) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; const sycl::context PrimaryCtx{Dev}; const sycl::context SecondaryCtx{Dev}; @@ -358,9 +355,9 @@ TEST(KernelBundle, UseKernelBundleValidPrimaryQueueWrongContextSecondaryQueue) { } TEST(KernelBundle, UseKernelBundleWrongContextPrimaryQueueAndSecondaryQueue) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; const sycl::context PrimaryCtx{Dev}; const sycl::context SecondaryCtx{Dev}; const sycl::context OtherCtx{Dev}; @@ -407,9 +404,9 @@ TEST(KernelBundle, UseKernelBundleWrongContextPrimaryQueueAndSecondaryQueue) { } TEST(KernelBundle, EmptyDevicesKernelBundleLinkException) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; sycl::queue Queue{Dev}; const sycl::context Ctx = Queue.get_context(); @@ -451,63 +448,60 @@ TEST(KernelBundle, EmptyDevicesKernelBundleLinkException) { } } -pi_device ParentDevice = nullptr; -pi_platform PiPlatform = nullptr; +ur_device_handle_t ParentDevice = nullptr; +ur_platform_handle_t UrPlatform = nullptr; -pi_result redefinedDeviceGetInfoAfter(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_PARTITION_PROPERTIES) { - if (param_value) { +ur_result_t redefinedDeviceGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { + if (*params.ppPropValue) { auto *Result = - reinterpret_cast(param_value); - *Result = PI_DEVICE_PARTITION_EQUALLY; + reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_PARTITION_EQUALLY; } - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_device_partition_property); - } else if (param_name == PI_DEVICE_INFO_MAX_COMPUTE_UNITS) { - auto *Result = reinterpret_cast(param_value); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_device_partition_t); + } else if (*params.ppropName == UR_DEVICE_INFO_MAX_COMPUTE_UNITS) { + auto *Result = reinterpret_cast(*params.ppPropValue); *Result = 2; - } else if (param_name == PI_DEVICE_INFO_PARENT_DEVICE) { - auto *Result = reinterpret_cast(param_value); - *Result = (device == ParentDevice) ? nullptr : ParentDevice; - } else if (param_name == PI_DEVICE_INFO_PLATFORM) { - auto *Result = reinterpret_cast(param_value); - *Result = PiPlatform; + } else if (*params.ppropName == UR_DEVICE_INFO_PARENT_DEVICE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = (*params.phDevice == ParentDevice) ? nullptr : ParentDevice; + } else if (*params.ppropName == UR_DEVICE_INFO_PLATFORM) { + auto *Result = + reinterpret_cast(*params.ppPropValue); + *Result = UrPlatform; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedDevicePartitionAfter( - pi_device device, const pi_device_partition_property *properties, - pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices) { - if (out_devices) { - for (size_t I = 0; I < num_devices; ++I) { - out_devices[I] = reinterpret_cast(1000 + I); +ur_result_t redefinedDevicePartitionAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.pphSubDevices) { + for (size_t I = 0; I < *params.pNumDevices; ++I) { + *params.pphSubDevices[I] = reinterpret_cast(1000 + I); } } - if (out_num_devices) - *out_num_devices = num_devices; - return PI_SUCCESS; + if (*params.ppNumDevicesRet) + **params.ppNumDevicesRet = *params.pNumDevices; + return UR_RESULT_SUCCESS; } TEST(KernelBundle, DescendentDevice) { // Mock a non-OpenCL plugin since use of descendent devices of context members // is not supported there yet. - sycl::unittest::PiMock Mock(sycl::backend::ext_oneapi_level_zero); + sycl::unittest::UrMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); - PiPlatform = sycl::detail::getSyclObjImpl(Plt)->getHandleRef(); + UrPlatform = sycl::detail::getSyclObjImpl(Plt)->getHandleRef(); - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); - Mock.redefineAfter( - redefinedDevicePartitionAfter); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDevicePartition", + &redefinedDevicePartitionAfter); - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; ParentDevice = sycl::detail::getSyclObjImpl(Dev)->getHandleRef(); sycl::context Ctx{Dev}; sycl::device Subdev = @@ -529,12 +523,12 @@ TEST(KernelBundle, DescendentDevice) { } TEST(KernelBundle, CheckIfBundleHasIncompatibleKernel) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; // TestKernelWithAspects has GPU aspect, so it shouldn't be compatible with // the CPU device and hence shouldn't be in the kernel bundle. - Mock.redefineAfter( - redefinedDeviceGetInfoCPU); - sycl::platform Plt = Mock.getPlatform(); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoCPU); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; EXPECT_TRUE(Dev.is_cpu()); @@ -548,8 +542,8 @@ TEST(KernelBundle, CheckIfBundleHasIncompatibleKernel) { } TEST(KernelBundle, CheckIfBundleHasCompatibleKernel) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); // GPU by default. const sycl::device Dev = Plt.get_devices()[0]; EXPECT_TRUE(Dev.is_gpu()); @@ -564,12 +558,12 @@ TEST(KernelBundle, CheckIfBundleHasCompatibleKernel) { } TEST(KernelBundle, CheckIfIncompatibleBundleExists) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; // TestKernelWithAspects has GPU aspect, so it shouldn't be compatible with // the CPU device and hence shouldn't be in the kernel bundle. - Mock.redefineAfter( - redefinedDeviceGetInfoCPU); - sycl::platform Plt = Mock.getPlatform(); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoCPU); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; EXPECT_TRUE(Dev.is_cpu()); @@ -585,8 +579,8 @@ TEST(KernelBundle, CheckIfIncompatibleBundleExists) { } TEST(KernelBundle, CheckIfCompatibleBundleExists2) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); // GPU by default. const sycl::device Dev = Plt.get_devices()[0]; EXPECT_TRUE(Dev.is_gpu()); @@ -599,12 +593,12 @@ TEST(KernelBundle, CheckIfCompatibleBundleExists2) { } TEST(KernelBundle, CheckExceptionIfKernelIncompatible) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; // TestKernelWithAspects has GPU aspect, so it shouldn't be compatible with // the CPU device and hence shouldn't be in the kernel bundle. - Mock.redefineAfter( - redefinedDeviceGetInfoCPU); - sycl::platform Plt = Mock.getPlatform(); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoCPU); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; EXPECT_TRUE(Dev.is_cpu()); @@ -620,17 +614,17 @@ TEST(KernelBundle, CheckExceptionIfKernelIncompatible) { } TEST(KernelBundle, HasKernelForSubDevice) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); - Mock.redefineAfter( - redefinedDevicePartitionAfter); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDevicePartition", + &redefinedDevicePartitionAfter); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; - PiPlatform = sycl::detail::getSyclObjImpl(Plt)->getHandleRef(); + UrPlatform = sycl::detail::getSyclObjImpl(Plt)->getHandleRef(); ParentDevice = sycl::detail::getSyclObjImpl(Dev)->getHandleRef(); sycl::kernel_bundle Bundle = diff --git a/sycl/unittests/SYCL2020/KernelBundleStateFiltering.cpp b/sycl/unittests/SYCL2020/KernelBundleStateFiltering.cpp index d8b40b4b73149..897ff6aba4f4d 100644 --- a/sycl/unittests/SYCL2020/KernelBundleStateFiltering.cpp +++ b/sycl/unittests/SYCL2020/KernelBundleStateFiltering.cpp @@ -11,8 +11,8 @@ #include #include -#include -#include +#include +#include #include @@ -35,20 +35,20 @@ MOCK_INTEGRATION_HEADER(KernelE) namespace { std::set TrackedImages; -sycl::unittest::PiImage +sycl::unittest::UrImage generateDefaultImage(std::initializer_list KernelNames, sycl_device_binary_type BinaryType, const char *DeviceTargetSpec) { using namespace sycl::unittest; - PiPropertySet PropSet; + UrPropertySet PropSet; static unsigned char NImage = 0; std::vector Bin{NImage++}; - PiArray Entries = makeEmptyKernels(KernelNames); + UrArray Entries = makeEmptyKernels(KernelNames); - PiImage Img{BinaryType, // Format + UrImage Img{BinaryType, // Format DeviceTargetSpec, "", // Compile options "", // Link options @@ -69,7 +69,7 @@ generateDefaultImage(std::initializer_list KernelNames, // Image 5: input, KernelE // Image 6: exe, KernelE // Image 7: exe. KernelE -sycl::unittest::PiImage Imgs[] = { +sycl::unittest::UrImage Imgs[] = { generateDefaultImage({"KernelA", "KernelB"}, SYCL_DEVICE_BINARY_TYPE_SPIRV, __SYCL_DEVICE_BINARY_TARGET_SPIRV64), generateDefaultImage({"KernelA"}, SYCL_DEVICE_BINARY_TYPE_NATIVE, @@ -77,7 +77,7 @@ sycl::unittest::PiImage Imgs[] = { generateDefaultImage({"KernelC"}, SYCL_DEVICE_BINARY_TYPE_SPIRV, __SYCL_DEVICE_BINARY_TARGET_SPIRV64), generateDefaultImage({"KernelC"}, SYCL_DEVICE_BINARY_TYPE_NATIVE, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64), + __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA), generateDefaultImage({"KernelD"}, SYCL_DEVICE_BINARY_TYPE_SPIRV, __SYCL_DEVICE_BINARY_TARGET_SPIRV64), generateDefaultImage({"KernelE"}, SYCL_DEVICE_BINARY_TYPE_SPIRV, @@ -87,78 +87,78 @@ sycl::unittest::PiImage Imgs[] = { generateDefaultImage({"KernelE"}, SYCL_DEVICE_BINARY_TYPE_NATIVE, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64)}; -sycl::unittest::PiImageArray ImgArray{Imgs}; +sycl::unittest::UrImageArray ImgArray{Imgs}; std::vector UsedImageIndices; -void redefinedPiProgramCreateCommon(const void *bin) { +void redefinedUrProgramCreateCommon(const void *bin) { if (TrackedImages.count(bin) != 0) { unsigned char ImgIdx = *reinterpret_cast(bin); UsedImageIndices.push_back(ImgIdx); } } -pi_result redefinedPiProgramCreate(pi_context context, const void *il, - size_t length, pi_program *res_program) { - redefinedPiProgramCreateCommon(il); - return PI_SUCCESS; +ur_result_t redefinedUrProgramCreate(void *pParams) { + auto params = *static_cast(pParams); + redefinedUrProgramCreateCommon(*params.ppIL); + return UR_RESULT_SUCCESS; } -pi_result redefinedPiProgramCreateWithBinary( - pi_context context, pi_uint32 num_devices, const pi_device *device_list, - const size_t *lengths, const unsigned char **binaries, - size_t num_metadata_entries, const pi_device_binary_property *metadata, - pi_int32 *binary_status, pi_program *ret_program) { - redefinedPiProgramCreateCommon(binaries[0]); - return PI_SUCCESS; +ur_result_t redefinedUrProgramCreateWithBinary(void *pParams) { + auto params = *static_cast(pParams); + redefinedUrProgramCreateCommon(*params.ppBinary); + return UR_RESULT_SUCCESS; } -pi_result redefinedDevicesGet(pi_platform platform, pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - if (num_devices) - *num_devices = 2; +ur_result_t redefinedDevicesGet(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevices) + **params.ppNumDevices = 2; - if (devices) { - devices[0] = reinterpret_cast(1); - devices[1] = reinterpret_cast(2); + if (*params.pphDevices) { + (*params.pphDevices)[0] = reinterpret_cast(1); + (*params.pphDevices)[1] = reinterpret_cast(2); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedExtDeviceSelectBinary(pi_device device, - pi_device_binary *pi_binaries, - pi_uint32 num_binaries, - pi_uint32 *selected_binary_ind) { - EXPECT_EQ(num_binaries, 1U); - auto *binaries = reinterpret_cast(pi_binaries); +ur_result_t redefinedDeviceSelectBinary(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pNumBinaries, 1U); // Treat image 3 as incompatible with one of the devices. - if (TrackedImages.count(binaries[0]->BinaryStart) != 0 && - *binaries[0]->BinaryStart == 3 && - device == reinterpret_cast(2)) { - return PI_ERROR_INVALID_BINARY; + // + // FIXME: this is expecting ur_device_binary so it can do stuff with the + // actual binary, not just the metadata.. not sure how we're going to support + // this + std::string BinarySpec = (*params.ppBinaries)[0].pDeviceTargetSpec; + if (BinarySpec.find("spir64_fpga") != std::string::npos && + *params.phDevice == reinterpret_cast(2)) { + return UR_RESULT_ERROR_INVALID_BINARY; } - *selected_binary_ind = 0; - return PI_SUCCESS; + **params.ppSelectedBinary = 0; + return UR_RESULT_SUCCESS; } void verifyImageUse(const std::vector &ExpectedImages) { std::sort(UsedImageIndices.begin(), UsedImageIndices.end()); EXPECT_TRUE(std::is_sorted(ExpectedImages.begin(), ExpectedImages.end())); EXPECT_EQ(UsedImageIndices, ExpectedImages); + if (UsedImageIndices != ExpectedImages) { + printf("break here\n"); + } UsedImageIndices.clear(); } TEST(KernelBundle, DeviceImageStateFiltering) { - sycl::unittest::PiMock Mock; - Mock.redefineAfter( - redefinedPiProgramCreate); - Mock.redefineAfter( - redefinedPiProgramCreateWithBinary); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_after_callback("urProgramCreateWithIL", + &redefinedUrProgramCreate); + mock::getCallbacks().set_after_callback("urProgramCreateWithBinary", + &redefinedUrProgramCreateWithBinary); // No kernel ids specified. { - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; sycl::context Ctx{Dev}; sycl::kernel_bundle KernelBundle = @@ -172,7 +172,7 @@ TEST(KernelBundle, DeviceImageStateFiltering) { // Request specific kernel ids. { - const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + const sycl::device Dev = sycl::platform().get_devices()[0]; sycl::context Ctx{Dev}; sycl::kernel_bundle KernelBundle = @@ -184,10 +184,11 @@ TEST(KernelBundle, DeviceImageStateFiltering) { // Check the case where some executable images are unsupported by one of // the devices. { - Mock.redefine(redefinedDevicesGet); - Mock.redefine( - redefinedExtDeviceSelectBinary); - const std::vector Devs = Mock.getPlatform().get_devices(); + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefinedDevicesGet); + mock::getCallbacks().set_replace_callback("urDeviceSelectBinary", + &redefinedDeviceSelectBinary); + const std::vector Devs = sycl::platform().get_devices(); sycl::context Ctx{Devs}; sycl::kernel_bundle KernelBundle = diff --git a/sycl/unittests/SYCL2020/KernelID.cpp b/sycl/unittests/SYCL2020/KernelID.cpp index 13075850cc6ba..21e458864549a 100644 --- a/sycl/unittests/SYCL2020/KernelID.cpp +++ b/sycl/unittests/SYCL2020/KernelID.cpp @@ -9,8 +9,8 @@ #include #include -#include -#include +#include +#include #include @@ -47,17 +47,17 @@ struct KernelInfo : public unittest::MockKernelInfoBase { } // namespace _V1 } // namespace sycl -static sycl::unittest::PiImage +static sycl::unittest::UrImage generateDefaultImage(std::initializer_list Kernels) { using namespace sycl::unittest; - PiPropertySet PropSet; + UrPropertySet PropSet; std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = makeEmptyKernels(Kernels); + UrArray Entries = makeEmptyKernels(Kernels); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -68,12 +68,12 @@ generateDefaultImage(std::initializer_list Kernels) { return Img; } -static sycl::unittest::PiImage Imgs[2] = { +static sycl::unittest::UrImage Imgs[2] = { generateDefaultImage({"KernelID_TestKernel1", "KernelID_TestKernel3"}), generateDefaultImage( {"KernelID_TestKernel2", "_ZTSN2cl4sycl6detail23__sycl_service_kernel__14ServiceKernel1"})}; -static sycl::unittest::PiImageArray<2> ImgArray{Imgs}; +static sycl::unittest::UrImageArray<2> ImgArray{Imgs}; TEST(KernelID, AllProgramKernelIds) { std::vector AllKernelIDs = sycl::get_kernel_ids(); @@ -109,8 +109,8 @@ TEST(KernelID, NoServiceKernelIds) { } TEST(KernelID, FreeKernelIDEqualsKernelBundleId) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; @@ -134,8 +134,8 @@ TEST(KernelID, FreeKernelIDEqualsKernelBundleId) { } TEST(KernelID, KernelBundleKernelIDsIntersectAll) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; @@ -155,8 +155,8 @@ TEST(KernelID, KernelBundleKernelIDsIntersectAll) { } TEST(KernelID, KernelIDHasKernel) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; @@ -234,8 +234,8 @@ TEST(KernelID, KernelIDHasKernel) { } TEST(KernelID, HasKernelTemplated) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; @@ -253,8 +253,8 @@ TEST(KernelID, HasKernelTemplated) { } TEST(KernelID, GetKernelIDInvalidKernelName) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); try { sycl::get_kernel_id(); diff --git a/sycl/unittests/SYCL2020/SpecializationConstant.cpp b/sycl/unittests/SYCL2020/SpecializationConstant.cpp index ffb4c4cdfc233..9875084a3ed92 100644 --- a/sycl/unittests/SYCL2020/SpecializationConstant.cpp +++ b/sycl/unittests/SYCL2020/SpecializationConstant.cpp @@ -12,8 +12,8 @@ #include #include -#include -#include +#include +#include #include @@ -37,22 +37,21 @@ template <> const char *get_spec_constant_symbolic_ID() { } // namespace _V1 } // namespace sycl -static sycl::unittest::PiImage generateImageWithSpecConsts() { +static sycl::unittest::UrImage generateImageWithSpecConsts() { using namespace sycl::unittest; std::vector SpecConstData; - PiProperty SC1 = makeSpecConstant(SpecConstData, "SC1", {0}, {0}, {42}); - PiProperty SC2 = makeSpecConstant(SpecConstData, "SC2", {1}, {0}, {8}); + UrProperty SC1 = makeSpecConstant(SpecConstData, "SC1", {0}, {0}, {42}); + UrProperty SC2 = makeSpecConstant(SpecConstData, "SC2", {1}, {0}, {8}); - PiPropertySet PropSet; + UrPropertySet PropSet; addSpecConstants({SC1, SC2}, std::move(SpecConstData), PropSet); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = + UrArray Entries = makeEmptyKernels({"SpecializationConstant_TestKernel"}); - - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -63,12 +62,12 @@ static sycl::unittest::PiImage generateImageWithSpecConsts() { return Img; } -static sycl::unittest::PiImage Img = generateImageWithSpecConsts(); -static sycl::unittest::PiImageArray<1> ImgArray{&Img}; +static sycl::unittest::UrImage Img = generateImageWithSpecConsts(); +static sycl::unittest::UrImageArray<1> ImgArray{&Img}; TEST(SpecializationConstant, DefaultValuesAreSet) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; @@ -96,8 +95,8 @@ TEST(SpecializationConstant, DefaultValuesAreSet) { } TEST(SpecializationConstant, DefaultValuesAreOverriden) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; @@ -132,8 +131,8 @@ TEST(SpecializationConstant, DefaultValuesAreOverriden) { } TEST(SpecializationConstant, SetSpecConstAfterUseKernelBundle) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; @@ -175,8 +174,8 @@ TEST(SpecializationConstant, SetSpecConstAfterUseKernelBundle) { } TEST(SpecializationConstant, GetSpecConstAfterUseKernelBundle) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; @@ -219,8 +218,8 @@ TEST(SpecializationConstant, GetSpecConstAfterUseKernelBundle) { } TEST(SpecializationConstant, UseKernelBundleAfterSetSpecConst) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; @@ -262,8 +261,8 @@ TEST(SpecializationConstant, UseKernelBundleAfterSetSpecConst) { } TEST(SpecializationConstant, NoKernel) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; diff --git a/sycl/unittests/accessor/AccessorPlaceholder.cpp b/sycl/unittests/accessor/AccessorPlaceholder.cpp index 611e3fbff8757..045ae6402d22e 100644 --- a/sycl/unittests/accessor/AccessorPlaceholder.cpp +++ b/sycl/unittests/accessor/AccessorPlaceholder.cpp @@ -1,7 +1,7 @@ #include -#include -#include +#include +#include #include #include @@ -49,8 +49,8 @@ TEST(AccessorPlaceholderTest, PlaceholderNoneTargetDevice) { int data(14); sycl::range<1> r(1); sycl::buffer data_buf(&data, r); - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue q{Plt.get_devices()[0]}; q.submit([&](sycl::handler &cgh) { AccT acc(data_buf, cgh); @@ -67,8 +67,8 @@ TEST(AccessorPlaceholderTest, PlaceholderTrueTargetDevice) { int data(14); sycl::range<1> r(1); sycl::buffer data_buf(&data, r); - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue q{Plt.get_devices()[0]}; q.submit([&](sycl::handler &cgh) { AccT acc(data_buf, cgh); @@ -85,8 +85,8 @@ TEST(AccessorPlaceholderTest, PlaceholderFalseTargetDevice) { int data(14); sycl::range<1> r(1); sycl::buffer data_buf(&data, r); - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue q{Plt.get_devices()[0]}; q.submit([&](sycl::handler &cgh) { AccT acc(data_buf, cgh); diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index 144ab1a546c0e..2216674f244ea 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -49,12 +49,12 @@ TEST(ParseAllowListTests, CheckUnsupportedKeyNameIsHandledInSingleDeviceDesc) { "BackendName:level_zero,SomeUnsupportedKey:gpu"); FAIL() << "Expected an exception"; } catch (sycl::exception const &e) { - EXPECT_EQ( - std::string("Unrecognized key in SYCL_DEVICE_ALLOWLIST. For " - "details, please refer to " - "https://github.com/intel/llvm/blob/sycl/sycl/doc/" - "EnvironmentVariables.md -30 (PI_ERROR_INVALID_VALUE)"), - e.what()); + EXPECT_EQ(std::string( + "Unrecognized key in SYCL_DEVICE_ALLOWLIST. For " + "details, please refer to " + "https://github.com/intel/llvm/blob/sycl/sycl/doc/" + "EnvironmentVariables.md 4 (UR_RESULT_ERROR_INVALID_VALUE)"), + e.what()); } catch (...) { FAIL() << "Expected sycl::exception"; } @@ -68,12 +68,12 @@ TEST( "DriverVersion:{{value}}|SomeUnsupportedKey:gpu"); FAIL() << "Expected an exception"; } catch (sycl::exception const &e) { - EXPECT_EQ( - std::string("Unrecognized key in SYCL_DEVICE_ALLOWLIST. For " - "details, please refer to " - "https://github.com/intel/llvm/blob/sycl/sycl/doc/" - "EnvironmentVariables.md -30 (PI_ERROR_INVALID_VALUE)"), - e.what()); + EXPECT_EQ(std::string( + "Unrecognized key in SYCL_DEVICE_ALLOWLIST. For " + "details, please refer to " + "https://github.com/intel/llvm/blob/sycl/sycl/doc/" + "EnvironmentVariables.md 4 (UR_RESULT_ERROR_INVALID_VALUE)"), + e.what()); } catch (...) { FAIL() << "Expected sycl::exception"; } @@ -87,12 +87,12 @@ TEST( "BackendName:level_zero|SomeUnsupportedKey:gpu"); FAIL() << "Expected an exception"; } catch (sycl::exception const &e) { - EXPECT_EQ( - std::string("Unrecognized key in SYCL_DEVICE_ALLOWLIST. For " - "details, please refer to " - "https://github.com/intel/llvm/blob/sycl/sycl/doc/" - "EnvironmentVariables.md -30 (PI_ERROR_INVALID_VALUE)"), - e.what()); + EXPECT_EQ(std::string( + "Unrecognized key in SYCL_DEVICE_ALLOWLIST. For " + "details, please refer to " + "https://github.com/intel/llvm/blob/sycl/sycl/doc/" + "EnvironmentVariables.md 4 (UR_RESULT_ERROR_INVALID_VALUE)"), + e.what()); } catch (...) { FAIL() << "Expected sycl::exception"; } @@ -105,12 +105,12 @@ TEST(ParseAllowListTests, "DriverVersion:{{value1}}|SomeUnsupportedKey:{{value2}}"); FAIL() << "Expected an exception"; } catch (sycl::exception const &e) { - EXPECT_EQ( - std::string("Unrecognized key in SYCL_DEVICE_ALLOWLIST. For " - "details, please refer to " - "https://github.com/intel/llvm/blob/sycl/sycl/doc/" - "EnvironmentVariables.md -30 (PI_ERROR_INVALID_VALUE)"), - e.what()); + EXPECT_EQ(std::string( + "Unrecognized key in SYCL_DEVICE_ALLOWLIST. For " + "details, please refer to " + "https://github.com/intel/llvm/blob/sycl/sycl/doc/" + "EnvironmentVariables.md 4 (UR_RESULT_ERROR_INVALID_VALUE)"), + e.what()); } catch (...) { FAIL() << "Expected sycl::exception"; } @@ -134,8 +134,8 @@ TEST(ParseAllowListTests, CheckMissingOpenDoubleCurlyBracesAreHandled) { FAIL() << "Expected an exception"; } catch (sycl::exception const &e) { EXPECT_EQ(std::string("Key DeviceName of SYCL_DEVICE_ALLOWLIST " - "should have value which starts with {{ -30 " - "(PI_ERROR_INVALID_VALUE)"), + "should have value which starts with {{ 4 " + "(UR_RESULT_ERROR_INVALID_VALUE)"), e.what()); } catch (...) { FAIL() << "Expected sycl::exception"; @@ -149,8 +149,8 @@ TEST(ParseAllowListTests, CheckMissingClosedDoubleCurlyBracesAreHandled) { FAIL() << "Expected an exception"; } catch (sycl::exception const &e) { EXPECT_EQ(std::string("Key DriverVersion of SYCL_DEVICE_ALLOWLIST " - "should have value which ends with }} -30 " - "(PI_ERROR_INVALID_VALUE)"), + "should have value which ends with }} 4 " + "(UR_RESULT_ERROR_INVALID_VALUE)"), e.what()); } catch (...) { FAIL() << "Expected sycl::exception"; @@ -197,12 +197,12 @@ TEST(ParseAllowListTests, CheckIncorrectBackendNameValueIsHandled) { sycl::detail::parseAllowList("BackendName:blablabla"); FAIL() << "Expected an exception"; } catch (sycl::exception const &e) { - EXPECT_EQ( - std::string("Value blablabla for key BackendName is not valid in " - "SYCL_DEVICE_ALLOWLIST. For details, please refer to " - "https://github.com/intel/llvm/blob/sycl/sycl/doc/" - "EnvironmentVariables.md -30 (PI_ERROR_INVALID_VALUE)"), - e.what()); + EXPECT_EQ(std::string( + "Value blablabla for key BackendName is not valid in " + "SYCL_DEVICE_ALLOWLIST. For details, please refer to " + "https://github.com/intel/llvm/blob/sycl/sycl/doc/" + "EnvironmentVariables.md 4 (UR_RESULT_ERROR_INVALID_VALUE)"), + e.what()); } catch (...) { FAIL() << "Expected sycl::exception"; } @@ -214,12 +214,12 @@ TEST(ParseAllowListTests, CheckIncorrectDeviceTypeValueIsHandled) { sycl::detail::parseAllowList("DeviceType:blablabla"); FAIL() << "Expected an exception"; } catch (sycl::exception const &e) { - EXPECT_EQ( - std::string("Value blablabla for key DeviceType is not valid in " - "SYCL_DEVICE_ALLOWLIST. For details, please refer to " - "https://github.com/intel/llvm/blob/sycl/sycl/doc/" - "EnvironmentVariables.md -30 (PI_ERROR_INVALID_VALUE)"), - e.what()); + EXPECT_EQ(std::string( + "Value blablabla for key DeviceType is not valid in " + "SYCL_DEVICE_ALLOWLIST. For details, please refer to " + "https://github.com/intel/llvm/blob/sycl/sycl/doc/" + "EnvironmentVariables.md 4 (UR_RESULT_ERROR_INVALID_VALUE)"), + e.what()); } catch (...) { FAIL() << "Expected sycl::exception"; } @@ -231,13 +231,13 @@ TEST(ParseAllowListTests, CheckIncorrectDeviceVendorIdValueIsHandled) { sycl::detail::parseAllowList("DeviceVendorId:blablabla"); FAIL() << "Expected an exception"; } catch (sycl::exception const &e) { - EXPECT_EQ( - std::string("Value blablabla for key DeviceVendorId is not valid in " - "SYCL_DEVICE_ALLOWLIST. It should have the hex format. For " - "details, please refer to " - "https://github.com/intel/llvm/blob/sycl/sycl/doc/" - "EnvironmentVariables.md -30 (PI_ERROR_INVALID_VALUE)"), - e.what()); + EXPECT_EQ(std::string( + "Value blablabla for key DeviceVendorId is not valid in " + "SYCL_DEVICE_ALLOWLIST. It should have the hex format. For " + "details, please refer to " + "https://github.com/intel/llvm/blob/sycl/sycl/doc/" + "EnvironmentVariables.md 4 (UR_RESULT_ERROR_INVALID_VALUE)"), + e.what()); } catch (...) { FAIL() << "Expected sycl::exception"; } @@ -264,10 +264,11 @@ TEST(ParseAllowListTests, CheckExceptionIsThrownForValueWOColonDelim) { FAIL() << "Expected an exception"; } catch (sycl::exception const &e) { EXPECT_EQ( - std::string("SYCL_DEVICE_ALLOWLIST has incorrect format. For " - "details, please refer to " - "https://github.com/intel/llvm/blob/sycl/sycl/" - "doc/EnvironmentVariables.md -30 (PI_ERROR_INVALID_VALUE)"), + std::string( + "SYCL_DEVICE_ALLOWLIST has incorrect format. For " + "details, please refer to " + "https://github.com/intel/llvm/blob/sycl/sycl/" + "doc/EnvironmentVariables.md 4 (UR_RESULT_ERROR_INVALID_VALUE)"), e.what()); } catch (...) { FAIL() << "Expected sycl::exception"; diff --git a/sycl/unittests/assert/assert.cpp b/sycl/unittests/assert/assert.cpp index b2650ff8877c4..33048f3433932 100644 --- a/sycl/unittests/assert/assert.cpp +++ b/sycl/unittests/assert/assert.cpp @@ -18,6 +18,7 @@ * pipe. */ +#include "ur_mock_helpers.hpp" #define SYCL_FALLBACK_ASSERT 1 // Enable use of interop kernel c-tor #define __SYCL_INTERNAL_API @@ -28,8 +29,8 @@ #include #include -#include -#include +#include +#include #include @@ -72,22 +73,22 @@ struct KernelInfo<::sycl::detail::__sycl_service_kernel__::AssertInfoCopier> } // namespace _V1 } // namespace sycl -static sycl::unittest::PiImage generateDefaultImage() { +static sycl::unittest::UrImage generateDefaultImage() { using namespace sycl::unittest; static const std::string KernelName = "TestKernel"; static const std::string CopierKernelName = "_ZTSN2cl4sycl6detail23__sycl_service_kernel__16AssertInfoCopierE"; - PiPropertySet PropSet; + UrPropertySet PropSet; setKernelUsesAssert({KernelName}, PropSet); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = makeEmptyKernels({KernelName}); + UrArray Entries = makeEmptyKernels({KernelName}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -98,19 +99,19 @@ static sycl::unittest::PiImage generateDefaultImage() { return Img; } -static sycl::unittest::PiImage generateCopierKernelImage() { +static sycl::unittest::UrImage generateCopierKernelImage() { using namespace sycl::unittest; static const std::string CopierKernelName = "_ZTSN2cl4sycl6detail23__sycl_service_kernel__16AssertInfoCopierE"; - PiPropertySet PropSet; + UrPropertySet PropSet; std::vector Bin{10, 11, 12, 13, 14, 15}; // Random data - PiArray Entries = makeEmptyKernels({CopierKernelName}); + UrArray Entries = makeEmptyKernels({CopierKernelName}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -121,9 +122,9 @@ static sycl::unittest::PiImage generateCopierKernelImage() { return Img; } -sycl::unittest::PiImage Imgs[] = {generateDefaultImage(), +sycl::unittest::UrImage Imgs[] = {generateDefaultImage(), generateCopierKernelImage()}; -sycl::unittest::PiImageArray<2> ImgArray{Imgs}; +sycl::unittest::UrImageArray<2> ImgArray{Imgs}; struct AssertHappened { int Flag = 0; @@ -166,29 +167,25 @@ static int MemoryMapCounter = MemoryMapCounterBase; static constexpr int PauseWaitOnIdx = KernelLaunchCounterBase + 1; // Mock redifinitions -static pi_result redefinedKernelGetGroupInfoAfter( - pi_kernel kernel, pi_device device, pi_kernel_group_info param_name, - size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - if (param_name == PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE) { - if (param_value_size_ret) { - *param_value_size_ret = 3 * sizeof(size_t); - } else if (param_value) { - auto size = static_cast(param_value); +static ur_result_t redefinedKernelGetGroupInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE) { + if (*params.ppPropSizeRet) { + **params.ppPropSizeRet = 3 * sizeof(size_t); + } else if (*params.ppPropValue) { + auto size = static_cast(*params.ppPropValue); size[0] = 1; size[1] = 1; size[2] = 1; } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result -redefinedEnqueueKernelLaunchAfter(pi_queue, pi_kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *LocalSize, pi_uint32 NDeps, - const pi_event *Deps, pi_event *RetEvent) { - static pi_event UserKernelEvent = *RetEvent; +static ur_result_t redefinedEnqueueKernelLaunchAfter(void *pParams) { + auto params = *static_cast(pParams); + static ur_event_handle_t UserKernelEvent = **params.pphEvent; int Val = KernelLaunchCounter++; // This output here is to reduce amount of time requried to debug/reproduce a // failing test upon feature break @@ -196,64 +193,62 @@ redefinedEnqueueKernelLaunchAfter(pi_queue, pi_kernel, pi_uint32, if (PauseWaitOnIdx == Val) { // It should be copier kernel. Check if it depends on user's one. - EXPECT_EQ(NDeps, 1U); - EXPECT_EQ(Deps[0], UserKernelEvent); + EXPECT_EQ(*params.pnumEventsInWaitList, 1U); + EXPECT_EQ(*params.pphEventWaitList[0], UserKernelEvent); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedEventsWaitPositive(pi_uint32 num_events, - const pi_event *event_list) { +static ur_result_t redefinedEventWaitPositive(void *pParams) { + auto params = *static_cast(pParams); // there should be two events: one is for memory map and the other is for // copier kernel - assert(num_events == 2); + assert(*params.pnumEvents == 2); - int EventIdx1 = reinterpret_cast(event_list[0])[0]; - int EventIdx2 = reinterpret_cast(event_list[1])[0]; + int EventIdx1 = reinterpret_cast((*params.pphEventWaitList)[0])[0]; + int EventIdx2 = reinterpret_cast((*params.pphEventWaitList)[1])[0]; // This output here is to reduce amount of time requried to debug/reproduce // a failing test upon feature break printf("Waiting for events %i, %i\n", EventIdx1, EventIdx2); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedEventsWaitNegative(pi_uint32 num_events, - const pi_event *event_list) { +static ur_result_t redefinedEventWaitNegative(void *pParams) { + auto params = *static_cast(pParams); // For negative tests we do not expect the copier kernel to be used, so // instead we accept whatever amount we get. // This output here is to reduce amount of time requried to debug/reproduce // a failing test upon feature break - printf("Waiting for %i events ", num_events); - for (size_t I = 0; I < num_events; ++I) - printf("%i, ", reinterpret_cast(event_list[I])[0]); + printf("Waiting for %i events ", *params.pnumEventsInWaitList); + for (size_t I = 0; I < *params.pnumEventsInWaitList; ++I) + printf("%i, ", reinterpret_cast(*params.pphEvent[I])[0]); printf("\n"); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedEnqueueMemBufferMapAfter( - pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, - pi_map_flags map_flags, size_t offset, size_t size, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *RetEvent, void **RetMap) { +static ur_result_t redefinedEnqueueMemBufferMapAfter(void *pParams) { + auto params = *static_cast(pParams); MemoryMapCounter++; // This output here is to reduce amount of time requried to debug/reproduce a // failing test upon feature break printf("Memory map %i\n", MemoryMapCounter); - *RetMap = (void *)&ExpectedToOutput; + **params.pppRetMap = (void *)&ExpectedToOutput; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static void setupMock(sycl::unittest::PiMock &Mock) { +static void setupMock(sycl::unittest::UrMock<> &Mock) { using namespace sycl::detail; - Mock.redefineAfter( - redefinedKernelGetGroupInfoAfter); - Mock.redefineAfter( - redefinedEnqueueKernelLaunchAfter); - Mock.redefineAfter( - redefinedEnqueueMemBufferMapAfter); - Mock.redefineBefore(redefinedEventsWaitPositive); + mock::getCallbacks().set_after_callback("urKernelGetGroupInfo", + &redefinedKernelGetGroupInfoAfter); + mock::getCallbacks().set_after_callback("urEnqueueKernelLaunch", + &redefinedEnqueueKernelLaunchAfter); + mock::getCallbacks().set_after_callback("urEnqueueMemBufferMap", + &redefinedEnqueueMemBufferMapAfter); + mock::getCallbacks().set_before_callback("urEventWait", + &redefinedEventWaitPositive); } namespace TestInteropKernel { @@ -261,125 +256,111 @@ const sycl::context *Context = nullptr; const sycl::device *Device = nullptr; int KernelLaunchCounter = ::KernelLaunchCounterBase; -static pi_result redefinedKernelGetInfo(pi_kernel Kernel, - pi_kernel_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - if (PI_KERNEL_INFO_CONTEXT == ParamName) { - pi_context PiContext = +static ur_result_t redefinedKernelGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (UR_KERNEL_INFO_CONTEXT == *params.ppropName) { + ur_context_handle_t UrContext = sycl::detail::getSyclObjImpl(*Context)->getHandleRef(); - if (ParamValue) - memcpy(ParamValue, &PiContext, sizeof(PiContext)); - if (ParamValueSizeRet) - *ParamValueSizeRet = sizeof(PiContext); + if (*params.ppPropValue) + memcpy(*params.ppPropValue, &UrContext, sizeof(UrContext)); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(UrContext); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } - if (PI_KERNEL_INFO_PROGRAM == ParamName) { - pi_program PIProgram = nullptr; - pi_result Res = mock_piProgramCreate(/*pi_context=*/0x0, /**il*/ nullptr, - /*length=*/0, &PIProgram); - EXPECT_TRUE(PI_SUCCESS == Res); + if (UR_KERNEL_INFO_PROGRAM == *params.ppropName) { + ur_program_handle_t URProgram = + mock::createDummyHandle(); - if (ParamValue) - memcpy(ParamValue, &PIProgram, sizeof(PIProgram)); - if (ParamValueSizeRet) - *ParamValueSizeRet = sizeof(PIProgram); + if (*params.ppPropValue) + memcpy(*params.ppPropValue, &URProgram, sizeof(URProgram)); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(URProgram); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } - if (PI_KERNEL_INFO_FUNCTION_NAME == ParamName) { + if (UR_KERNEL_INFO_FUNCTION_NAME == *params.ppropName) { static const char FName[] = "TestFnName"; - if (ParamValue) { + if (*params.ppPropValue) { size_t L = strlen(FName) + 1; - if (L < ParamValueSize) - L = ParamValueSize; + if (L < *params.ppropSize) + L = *params.ppropSize; - memcpy(ParamValue, FName, L); + memcpy(*params.ppPropValue, FName, L); } - if (ParamValueSizeRet) - *ParamValueSizeRet = strlen(FName) + 1; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = strlen(FName) + 1; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } - return PI_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; } -static pi_result redefinedEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *LocalSize, - pi_uint32 N, const pi_event *Deps, - pi_event *RetEvent) { +static ur_result_t redefinedEnqueueKernelLaunch(void *pParms) { int Val = KernelLaunchCounter++; // This output here is to reduce amount of time requried to debug/reproduce a // failing test upon feature break printf("Enqueued %i\n", Val); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedProgramGetInfo(pi_program P, - pi_program_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - if (PI_PROGRAM_INFO_NUM_DEVICES == ParamName) { +static ur_result_t redefinedProgramGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (UR_PROGRAM_INFO_NUM_DEVICES == *params.ppropName) { static const int V = 1; - if (ParamValue) - memcpy(ParamValue, &V, sizeof(V)); - if (ParamValueSizeRet) - *ParamValueSizeRet = sizeof(V); + if (*params.ppPropValue) + memcpy(*params.ppPropValue, &V, sizeof(V)); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(V); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } - if (PI_PROGRAM_INFO_DEVICES == ParamName) { - EXPECT_EQ(ParamValueSize, 1 * sizeof(pi_device)); + if (UR_PROGRAM_INFO_DEVICES == *params.ppropName) { + EXPECT_EQ(*params.ppropSize, 1 * sizeof(ur_device_handle_t)); - pi_device Dev = sycl::detail::getSyclObjImpl(*Device)->getHandleRef(); + ur_device_handle_t Dev = sycl::detail::getSyclObjImpl(*Device)->getHandleRef(); - if (ParamValue) - memcpy(ParamValue, &Dev, sizeof(Dev)); - if (ParamValueSizeRet) - *ParamValueSizeRet = sizeof(Dev); + if (*params.ppPropValue) + memcpy(*params.ppPropValue, &Dev, sizeof(Dev)); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(Dev); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } - return PI_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; } -static pi_result redefinedProgramGetBuildInfo(pi_program P, pi_device D, - pi_program_build_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - if (PI_PROGRAM_BUILD_INFO_BINARY_TYPE == ParamName) { - static const pi_program_binary_type T = PI_PROGRAM_BINARY_TYPE_EXECUTABLE; - if (ParamValue) - memcpy(ParamValue, &T, sizeof(T)); - if (ParamValueSizeRet) - *ParamValueSizeRet = sizeof(T); - return PI_SUCCESS; +static ur_result_t redefinedProgramGetBuildInfo(void *pParams) { + auto params = *static_cast(pParams); + if (UR_PROGRAM_BUILD_INFO_BINARY_TYPE == *params.ppropName) { + static const ur_program_binary_type_t T = UR_PROGRAM_BINARY_TYPE_EXECUTABLE; + if (*params.ppPropValue) + memcpy(*params.ppPropValue, &T, sizeof(T)); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(T); + return UR_RESULT_SUCCESS; } - if (PI_PROGRAM_BUILD_INFO_OPTIONS == ParamName) { - if (ParamValueSizeRet) - *ParamValueSizeRet = 0; - return PI_SUCCESS; + if (UR_PROGRAM_BUILD_INFO_OPTIONS == *params.ppropName) { + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = 0; + return UR_RESULT_SUCCESS; } - return PI_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; } } // namespace TestInteropKernel -static void setupMockForInterop(sycl::unittest::PiMock &Mock, +static void setupMockForInterop(sycl::unittest::UrMock<> &Mock, const sycl::context &Ctx, const sycl::device &Dev) { using namespace sycl::detail; @@ -388,19 +369,22 @@ static void setupMockForInterop(sycl::unittest::PiMock &Mock, TestInteropKernel::Device = &Dev; TestInteropKernel::Context = &Ctx; - Mock.redefineAfter( - redefinedKernelGetGroupInfoAfter); - Mock.redefineBefore( - TestInteropKernel::redefinedEnqueueKernelLaunch); - Mock.redefineAfter( - redefinedEnqueueMemBufferMapAfter); - Mock.redefineBefore(redefinedEventsWaitNegative); - Mock.redefineBefore( - TestInteropKernel::redefinedKernelGetInfo); - Mock.redefineBefore( - TestInteropKernel::redefinedProgramGetInfo); - Mock.redefineBefore( - TestInteropKernel::redefinedProgramGetBuildInfo); + mock::getCallbacks().set_after_callback("urKernelGetGroupInfo", + &redefinedKernelGetGroupInfoAfter); + mock::getCallbacks().set_before_callback( + "urEnqueueKernelLaunch", + &TestInteropKernel::redefinedEnqueueKernelLaunch); + mock::getCallbacks().set_after_callback("urEnqueueMemBufferMap", + &redefinedEnqueueMemBufferMapAfter); + mock::getCallbacks().set_before_callback("urEventWait", + &redefinedEventWaitNegative); + mock::getCallbacks().set_before_callback( + "urKernelGetInfo", &TestInteropKernel::redefinedKernelGetInfo); + mock::getCallbacks().set_before_callback( + "urProgramGetInfo", &TestInteropKernel::redefinedProgramGetInfo); + mock::getCallbacks().set_before_callback( + "urProgramGetBuildInfo", + &TestInteropKernel::redefinedProgramGetBuildInfo); } #ifndef _WIN32 @@ -411,8 +395,8 @@ void ChildProcess(int StdErrFD) { exit(1); } - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); setupMock(Mock); @@ -487,9 +471,9 @@ void ParentProcess(int ChildPID, int ChildStdErrFD) { TEST(Assert, TestPositive) { // Ensure that the mock plugin is initialized before spawning work. Since the - // test needs no redefinitions we do not need to create a PiMock instance, but - // the mock plugin is still needed to have a valid platform available. - sycl::unittest::PiMock::EnsureMockPluginInitialized(); + // test needs no redefinitions we do not need to create a UrMock<> instance, + // but the mock plugin is still needed to have a valid platform available. + // sycl::unittest::UrMock::InitUr(); #ifndef _WIN32 static constexpr int ReadFDIdx = 0; @@ -530,8 +514,8 @@ TEST(Assert, TestAssertServiceKernelHidden) { } TEST(Assert, TestInteropKernelNegative) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; @@ -540,15 +524,11 @@ TEST(Assert, TestInteropKernelNegative) { sycl::queue Queue{Ctx, Dev}; - pi_kernel PIKernel = nullptr; - - pi_result Res = mock_piKernelCreate( - /*pi_program=*/0x0, /*kernel_name=*/"dummy_kernel", &PIKernel); - EXPECT_TRUE(PI_SUCCESS == Res); + auto URKernel = mock::createDummyHandle(); // TODO use make_kernel. This requires a fix in backend.cpp to get plugin // from context instead of free getPlugin to alllow for mocking of its methods - sycl::kernel KInterop((cl_kernel)PIKernel, Ctx); + sycl::kernel KInterop((cl_kernel)URKernel, Ctx); Queue.submit([&](sycl::handler &H) { H.single_task(KInterop); }); @@ -557,8 +537,8 @@ TEST(Assert, TestInteropKernelNegative) { } TEST(Assert, TestInteropKernelFromProgramNegative) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; diff --git a/sycl/unittests/buffer/BufferLocation.cpp b/sycl/unittests/buffer/BufferLocation.cpp index 77d674f602641..71c6d1fa545cd 100644 --- a/sycl/unittests/buffer/BufferLocation.cpp +++ b/sycl/unittests/buffer/BufferLocation.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #define SYCL2020_DISABLE_DEPRECATION_WARNINGS -#include #include +#include #include #include @@ -20,87 +20,96 @@ const uint64_t DEFAULT_VALUE = 7777; static uint64_t PassedLocation = DEFAULT_VALUE; -pi_result redefinedMemBufferCreate(pi_context, pi_mem_flags, size_t size, - void *, pi_mem *, - const pi_mem_properties *properties) { +ur_result_t redefinedMemBufferCreateBefore(void *pParams) { + auto params = reinterpret_cast(pParams); PassedLocation = DEFAULT_VALUE; - if (!properties) - return PI_SUCCESS; + if (!*params->ppProperties) + return UR_RESULT_SUCCESS; + + auto nextProps = + static_cast((*params->ppProperties)->pNext); // properties must ended by 0 - size_t I = 0; - while (true) { - if (properties[I] != 0) { - if (properties[I] != PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION) { - I += 2; - } else { - PassedLocation = properties[I + 1]; - break; - } + while (nextProps) { + if (nextProps->stype != + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES) { + nextProps = static_cast(nextProps->pNext); + break; } + PassedLocation = + reinterpret_cast(nextProps) + ->location; + nextProps = reinterpret_cast(nextProps->pNext); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedDeviceGetInfoAfter(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_TYPE) { - auto *Result = reinterpret_cast<_pi_device_type *>(param_value); - *Result = PI_DEVICE_TYPE_ACC; +static ur_result_t redefinedDeviceGetInfoAfter(void *pParams) { + auto params = reinterpret_cast(pParams); + switch (*params->ppropName) { + case UR_DEVICE_INFO_TYPE: { + auto *Result = reinterpret_cast(*params->ppPropValue); + *Result = UR_DEVICE_TYPE_FPGA; + break; } - if (param_name == PI_DEVICE_INFO_COMPILER_AVAILABLE) { - auto *Result = reinterpret_cast(param_value); + case UR_DEVICE_INFO_COMPILER_AVAILABLE: { + auto *Result = reinterpret_cast(*params->ppPropValue); *Result = true; + break; } - if (param_name == PI_DEVICE_INFO_EXTENSIONS) { + case UR_DEVICE_INFO_EXTENSIONS: { const std::string name = "cl_intel_mem_alloc_buffer_location"; // Increase size by one for the null terminator const size_t nameSize = name.size() + 1; - if (!param_value) { + if (!*params->ppPropValue) { // Choose bigger size so that both original and redefined function // has enough memory for storing the extension string - *param_value_size_ret = - nameSize > *param_value_size_ret ? nameSize : *param_value_size_ret; + **params->ppPropSizeRet = nameSize > **params->ppPropSizeRet + ? nameSize + : **params->ppPropSizeRet; } else { - char *dst = static_cast(param_value); + char *dst = static_cast(*params->ppPropValue); strcpy(dst, name.data()); } + break; } // This mock device has no sub-devices - if (param_name == PI_DEVICE_INFO_PARTITION_PROPERTIES) { - if (param_value_size_ret) { - *param_value_size_ret = 0; + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { + if (*params->ppPropSizeRet) { + **params->ppPropSizeRet = 0; } + break; } - if (param_name == PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { - assert(param_value_size == sizeof(pi_device_affinity_domain)); - if (param_value) { - *static_cast(param_value) = 0; + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: { + assert(*params->ppropSize == sizeof(ur_device_affinity_domain_flags_t)); + if (*params->ppPropValue) { + *static_cast(*params->ppPropValue) = + 0; } + break; + } + default: + break; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } class BufferTest : public ::testing::Test { public: - BufferTest() : Mock{}, Plt{Mock.getPlatform()} {} + BufferTest() : Mock{}, Plt{sycl::platform()} {} protected: void SetUp() override { - Mock.redefineBefore( - redefinedMemBufferCreate); - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_before_callback("urMemBufferCreate", + &redefinedMemBufferCreateBefore); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); } -protected: - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; sycl::platform Plt; }; diff --git a/sycl/unittests/buffer/BufferReleaseBase.cpp b/sycl/unittests/buffer/BufferReleaseBase.cpp index 27c45edc983d7..172b005bf60af 100644 --- a/sycl/unittests/buffer/BufferReleaseBase.cpp +++ b/sycl/unittests/buffer/BufferReleaseBase.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "BufferReleaseBase.hpp" +#include "gmock/gmock.h" class BufferDestructionCheck : public BufferDestructionCheckCommon {}; @@ -88,46 +89,6 @@ TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefaultAllocator) { RawBufferImplPtr); } -pi_device GlobalDeviceHandle(createDummyHandle()); - -inline pi_result customMockDevicesGet(pi_platform platform, - pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - if (num_devices) - *num_devices = 1; - - if (devices && num_entries > 0) - devices[0] = GlobalDeviceHandle; - - return PI_SUCCESS; -} - -inline pi_result customMockContextGetInfo(pi_context context, - pi_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_CONTEXT_INFO_NUM_DEVICES: { - if (param_value) - *static_cast(param_value) = 1; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_uint32); - return PI_SUCCESS; - } - case PI_CONTEXT_INFO_DEVICES: { - if (param_value) - *static_cast(param_value) = GlobalDeviceHandle; - if (param_value_size_ret) - *param_value_size_ret = sizeof(GlobalDeviceHandle); - break; - } - default:; - } - return PI_SUCCESS; -} - TEST_F(BufferDestructionCheck, BufferWithRawHostPtr) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; @@ -235,19 +196,19 @@ TEST_F(BufferDestructionCheck, BufferWithIterators) { RawBufferImplPtr); } -std::map ExpectedEventStatus; -pi_result getEventInfoFunc(pi_event Event, pi_event_info PName, size_t PVSize, - void *PV, size_t *PVSizeRet) { - EXPECT_EQ(PName, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) +std::map ExpectedEventStatus; +ur_result_t replaceEventGetInfo(void *pParams) { + auto params = *reinterpret_cast(pParams); + EXPECT_EQ(*params.ppropName, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) << "Unknown param name"; // could not use assert here - EXPECT_EQ(PVSize, 4u); - auto it = ExpectedEventStatus.find(Event); + EXPECT_EQ(*params.ppropSize, 4u); + auto it = ExpectedEventStatus.find(*params.phEvent); if (it != ExpectedEventStatus.end()) { - *(static_cast(PV)) = it->second; - return PI_SUCCESS; + *(static_cast(*params.ppPropValue)) = it->second; + return UR_RESULT_SUCCESS; } else - return PI_ERROR_INVALID_OPERATION; + return UR_RESULT_ERROR_INVALID_OPERATION; } TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { @@ -265,12 +226,13 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { MockCmdWithReleaseTracking *WriteCmd = nullptr; ReadCmd = new MockCmdWithReleaseTracking(sycl::detail::getSyclObjImpl(Q), MockReq); - ReadCmd->getEvent()->getHandleRef() = - createDummyHandle(); // just assign to be able to use mock + // These dummy handles are automatically cleaned up by the runtime + ReadCmd->getEvent()->getHandleRef() = reinterpret_cast( + mock::createDummyHandle()); WriteCmd = new MockCmdWithReleaseTracking(sycl::detail::getSyclObjImpl(Q), MockReq); - WriteCmd->getEvent()->getHandleRef() = - createDummyHandle(); // just assign to be able to use mock + WriteCmd->getEvent()->getHandleRef() = reinterpret_cast( + mock::createDummyHandle()); ReadCmd->MEnqueueStatus = sycl::detail::EnqueueResultT::SyclEnqueueSuccess; WriteCmd->MEnqueueStatus = sycl::detail::EnqueueResultT::SyclEnqueueSuccess; @@ -281,23 +243,28 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { MockSchedulerPtr->addNodeToLeaves(Rec, WriteCmd, sycl::access::mode::write, ToEnqueue); - Mock.redefine(getEventInfoFunc); + mock::getCallbacks().set_replace_callback("urEventGetInfo", + &replaceEventGetInfo); testing::InSequence S; - ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_SUBMITTED; + ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = + UR_EVENT_STATUS_SUBMITTED; ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = - PI_EVENT_SUBMITTED; + UR_EVENT_STATUS_SUBMITTED; EXPECT_FALSE(MockSchedulerPtr->checkLeavesCompletion(Rec)); - ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; + ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = + UR_EVENT_STATUS_COMPLETE; ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = - PI_EVENT_SUBMITTED; + UR_EVENT_STATUS_SUBMITTED; EXPECT_FALSE(MockSchedulerPtr->checkLeavesCompletion(Rec)); - ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; - ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; + ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = + UR_EVENT_STATUS_COMPLETE; + ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = + UR_EVENT_STATUS_COMPLETE; EXPECT_TRUE(MockSchedulerPtr->checkLeavesCompletion(Rec)); // previous expect_call is still valid and will generate failure if we recieve // call here, no need for extra limitation diff --git a/sycl/unittests/buffer/BufferReleaseBase.hpp b/sycl/unittests/buffer/BufferReleaseBase.hpp index bfcc4fb8369ed..a4982af3b581f 100644 --- a/sycl/unittests/buffer/BufferReleaseBase.hpp +++ b/sycl/unittests/buffer/BufferReleaseBase.hpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include #include +#include #include #include @@ -39,7 +39,7 @@ class MockCmdWithReleaseTracking : public MockCommand { template class BufferDestructionCheckCommon : public ::testing::Test { public: - BufferDestructionCheckCommon() : Mock(Backend), Plt(Mock.getPlatform()) {} + BufferDestructionCheckCommon() : Mock(), Plt(sycl::platform()) {} protected: void SetUp() override { @@ -67,7 +67,7 @@ class BufferDestructionCheckCommon : public ::testing::Test { } protected: - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock Mock; sycl::platform Plt; MockScheduler *MockSchedulerPtr; }; diff --git a/sycl/unittests/buffer/CMakeLists.txt b/sycl/unittests/buffer/CMakeLists.txt index 330c8317296b9..4f520dfe60879 100644 --- a/sycl/unittests/buffer/CMakeLists.txt +++ b/sycl/unittests/buffer/CMakeLists.txt @@ -1,4 +1,5 @@ add_sycl_unittest(BufferTests OBJECT + PARTIAL_SOURCES_INTENDED BufferLocation.cpp Image.cpp BufferReleaseBase.cpp diff --git a/sycl/unittests/buffer/KernelArgMemObj.cpp b/sycl/unittests/buffer/KernelArgMemObj.cpp index 7f59cf52d09e2..ba9b9c9b1001e 100644 --- a/sycl/unittests/buffer/KernelArgMemObj.cpp +++ b/sycl/unittests/buffer/KernelArgMemObj.cpp @@ -9,8 +9,8 @@ #include #include -#include -#include +#include +#include class TestKernelWithMemObj; @@ -33,39 +33,37 @@ struct KernelInfo : public unittest::MockKernelInfoBase { } // namespace _V1 } // namespace sycl -static auto Img = +static sycl::unittest::UrImage Img = sycl::unittest::generateDefaultImage({"TestKernelWithMemObj"}); -static sycl::unittest::PiImageArray<1> ImgArray{&Img}; +static sycl::unittest::UrImageArray<1> ImgArray{&Img}; using namespace sycl; bool PropertyPresent = false; -pi_mem_obj_property PropsCopy{}; +ur_kernel_arg_mem_obj_properties_t PropsCopy{}; -pi_result redefinedKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, - const pi_mem_obj_property *arg_properties, - const pi_mem *arg_value) { - PropertyPresent = arg_properties != nullptr; +ur_result_t redefinedKernelSetArgMemObj(void *pParams) { + auto params = *static_cast(pParams); + PropertyPresent = *params.ppProperties != nullptr; if (PropertyPresent) - PropsCopy = *arg_properties; - return PI_SUCCESS; + PropsCopy = **params.ppProperties; + return UR_RESULT_SUCCESS; } -class BufferTestPiArgs : public ::testing::Test { +class BuferTestUrArgs : public ::testing::Test { public: - BufferTestPiArgs() - : Mock(sycl::backend::ext_oneapi_level_zero), Plt{Mock.getPlatform()} {} + BuferTestUrArgs() : Mock(), Plt{sycl::platform()} {} protected: void SetUp() override { PropertyPresent = false; PropsCopy = {}; - Mock.redefineBefore( - redefinedKernelSetArgMemObj); + mock::getCallbacks().set_before_callback("urKernelSetArgMemObj", + &redefinedKernelSetArgMemObj); } template - void TestFunc(pi_mem_obj_access ExpectedAccessMode) { + void TestFunc(ur_mem_flags_t ExpectedAccessMode) { queue Queue(context(Plt), default_selector_v); sycl::buffer Buf(3); Queue @@ -80,31 +78,30 @@ class BufferTestPiArgs : public ::testing::Test { }) .wait(); ASSERT_TRUE(PropertyPresent); - EXPECT_EQ(PropsCopy.type, PI_KERNEL_ARG_MEM_OBJ_ACCESS); - EXPECT_EQ(PropsCopy.mem_access, ExpectedAccessMode); + EXPECT_EQ(PropsCopy.memoryAccess, ExpectedAccessMode); } protected: - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock Mock; sycl::platform Plt; }; -TEST_F(BufferTestPiArgs, KernelSetArgMemObjReadWrite) { - TestFunc(PI_ACCESS_READ_WRITE); +TEST_F(BuferTestUrArgs, KernelSetArgMemObjReadWrite) { + TestFunc(UR_MEM_FLAG_READ_WRITE); } -TEST_F(BufferTestPiArgs, KernelSetArgMemObjDiscardReadWrite) { - TestFunc(PI_ACCESS_READ_WRITE); +TEST_F(BuferTestUrArgs, KernelSetArgMemObjDiscardReadWrite) { + TestFunc(UR_MEM_FLAG_READ_WRITE); } -TEST_F(BufferTestPiArgs, KernelSetArgMemObjRead) { - TestFunc(PI_ACCESS_READ_ONLY); +TEST_F(BuferTestUrArgs, KernelSetArgMemObjRead) { + TestFunc(UR_MEM_FLAG_READ_ONLY); } -TEST_F(BufferTestPiArgs, KernelSetArgMemObjWrite) { - TestFunc(PI_ACCESS_WRITE_ONLY); +TEST_F(BuferTestUrArgs, KernelSetArgMemObjWrite) { + TestFunc(UR_MEM_FLAG_WRITE_ONLY); } -TEST_F(BufferTestPiArgs, KernelSetArgMemObjDiscardWrite) { - TestFunc(PI_ACCESS_WRITE_ONLY); +TEST_F(BuferTestUrArgs, KernelSetArgMemObjDiscardWrite) { + TestFunc(UR_MEM_FLAG_WRITE_ONLY); } diff --git a/sycl/unittests/buffer/MemChannel.cpp b/sycl/unittests/buffer/MemChannel.cpp index cb14d33567088..9dc04491b3dff 100644 --- a/sycl/unittests/buffer/MemChannel.cpp +++ b/sycl/unittests/buffer/MemChannel.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include #include +#include #include #include @@ -19,57 +19,52 @@ constexpr uint32_t DEFAULT_VALUE = 7777; static uint32_t PassedChannel = DEFAULT_VALUE; -static pi_result -redefinedMemBufferCreateBefore(pi_context, pi_mem_flags, size_t size, void *, - pi_mem *, const pi_mem_properties *properties) { +static ur_result_t redefinedMemBufferCreateBefore(void *pParams) { + auto &Params = *reinterpret_cast(pParams); PassedChannel = DEFAULT_VALUE; - if (!properties) - return PI_SUCCESS; - - // properties must ended by 0 - size_t I = 0; - while (properties[I] != 0) { - if (properties[I] == PI_MEM_PROPERTIES_CHANNEL) { - PassedChannel = properties[I + 1]; - break; + if (!*Params.ppProperties) + return UR_RESULT_SUCCESS; + + auto Next = + reinterpret_cast((*Params.ppProperties)->pNext); + while (Next) { + if (Next->stype == UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES) { + auto ChannelProperties = + reinterpret_cast(Next); + PassedChannel = ChannelProperties->channel; } - I += 2; + Next = reinterpret_cast(Next->pNext); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } template -static pi_result -redefinedDeviceGetInfoAfter(pi_device device, pi_device_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT) { - if (param_value) - *reinterpret_cast(param_value) = RetVal; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_bool); +static ur_result_t redefinedDeviceGetInfoAfter(void *pParams) { + auto &Params = *reinterpret_cast(pParams); + if (*Params.ppropName == UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT) { + if (*Params.ppPropValue) + *reinterpret_cast(*Params.ppPropValue) = RetVal; + if (*Params.ppPropSizeRet) + **Params.ppPropSizeRet = sizeof(ur_bool_t); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } class BufferMemChannelTest : public ::testing::Test { public: - BufferMemChannelTest() : Mock{}, Plt{Mock.getPlatform()} {} + BufferMemChannelTest() : Mock{}, Plt{sycl::platform()} {} protected: - void SetUp() override {} - -protected: - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; sycl::platform Plt; }; // Test that the mem channel aspect and info query correctly reports true when // device supports it. TEST_F(BufferMemChannelTest, MemChannelAspectTrue) { - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); sycl::device Dev = Plt.get_devices()[0]; EXPECT_TRUE(Dev.get_info()); @@ -79,8 +74,8 @@ TEST_F(BufferMemChannelTest, MemChannelAspectTrue) { // Test that the mem channel aspect and info query correctly reports false when // device supports it. TEST_F(BufferMemChannelTest, MemChannelAspectFalse) { - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); sycl::device Dev = Plt.get_devices()[0]; EXPECT_FALSE(Dev.get_info()); @@ -90,10 +85,10 @@ TEST_F(BufferMemChannelTest, MemChannelAspectFalse) { // Tests that the right buffer property identifier and values are passed to // buffer creation. TEST_F(BufferMemChannelTest, MemChannelProp) { - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); - Mock.redefineBefore( - redefinedMemBufferCreateBefore); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_before_callback("urMemBufferCreate", + &redefinedMemBufferCreateBefore); sycl::queue Q{Plt.get_devices()[0]}; sycl::buffer Buf(3, sycl::property::buffer::mem_channel{42}); diff --git a/sycl/unittests/buffer/SubbufferLargeSize.cpp b/sycl/unittests/buffer/SubbufferLargeSize.cpp index 5e776906cc59b..71d7c67dc9354 100644 --- a/sycl/unittests/buffer/SubbufferLargeSize.cpp +++ b/sycl/unittests/buffer/SubbufferLargeSize.cpp @@ -11,33 +11,30 @@ #include #include -#include #include +#include -std::vector PiMethodData; +std::vector UrMethodData; -inline pi_result redefinedMemBufferPartition(pi_mem, pi_mem_flags, - pi_buffer_create_type, - void *buffer_create_info, - pi_mem *) { - PiMethodData.push_back( - *reinterpret_cast(buffer_create_info)); +inline ur_result_t redefinedMemBufferPartition(void *pParams) { + auto params = *static_cast(pParams); + UrMethodData.push_back(**params.ppRegion); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } class LargeBufferSizeTest : public ::testing::Test { public: - LargeBufferSizeTest() : Mock{}, Plt{Mock.getPlatform()} {} + LargeBufferSizeTest() : Mock{}, Plt{sycl::platform()} {} protected: void SetUp() override { - Mock.redefineAfter( - redefinedMemBufferPartition); + mock::getCallbacks().set_after_callback("urMemBufferPartition", + &redefinedMemBufferPartition); } protected: - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; sycl::platform Plt; }; @@ -77,9 +74,9 @@ TEST_F(LargeBufferSizeTest, MoreThan32bit) { }) .wait(); - ASSERT_EQ(PiMethodData.size(), 2ul); - EXPECT_EQ(PiMethodData[0].origin, 0ul); - EXPECT_EQ(PiMethodData[0].size, SubbufferSize1); - EXPECT_EQ(PiMethodData[1].origin, OffsetInBytes); - EXPECT_EQ(PiMethodData[1].size, SubbufferSize2); + ASSERT_EQ(UrMethodData.size(), 2ul); + EXPECT_EQ(UrMethodData[0].origin, 0ul); + EXPECT_EQ(UrMethodData[0].size, SubbufferSize1); + EXPECT_EQ(UrMethodData[1].origin, OffsetInBytes); + EXPECT_EQ(UrMethodData[1].size, SubbufferSize2); } diff --git a/sycl/unittests/buffer/l0_specific/BufferReleaseL0.cpp b/sycl/unittests/buffer/l0_specific/BufferReleaseL0.cpp index c40632fc488f2..27fa05939c24f 100644 --- a/sycl/unittests/buffer/l0_specific/BufferReleaseL0.cpp +++ b/sycl/unittests/buffer/l0_specific/BufferReleaseL0.cpp @@ -11,51 +11,47 @@ class BufferDestructionCheckL0 : public BufferDestructionCheckCommon< sycl::backend::ext_oneapi_level_zero> {}; -pi_device GlobalDeviceHandle(createDummyHandle()); +ur_device_handle_t + GlobalDeviceHandle(mock::createDummyHandle()); -inline pi_result customMockDevicesGet(pi_platform platform, - pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - if (num_devices) - *num_devices = 1; +inline ur_result_t customMockDevicesGet(void *pParams) { + auto params = *reinterpret_cast(pParams); + if (*params.ppNumDevices) + **params.ppNumDevices = 1; - if (devices && num_entries > 0) - devices[0] = GlobalDeviceHandle; + if (*params.pphDevices && *params.pNumEntries > 0) + *params.pphDevices[0] = GlobalDeviceHandle; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -inline pi_result customMockContextGetInfo(pi_context context, - pi_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_CONTEXT_INFO_NUM_DEVICES: { - if (param_value) - *static_cast(param_value) = 1; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_uint32); - return PI_SUCCESS; +inline ur_result_t customMockContextGetInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_CONTEXT_INFO_NUM_DEVICES: { + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = 1; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(uint32_t); + return UR_RESULT_SUCCESS; } - case PI_CONTEXT_INFO_DEVICES: { - if (param_value) - *static_cast(param_value) = GlobalDeviceHandle; - if (param_value_size_ret) - *param_value_size_ret = sizeof(GlobalDeviceHandle); + case UR_CONTEXT_INFO_DEVICES: { + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = + GlobalDeviceHandle; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(GlobalDeviceHandle); break; } default:; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST_F(BufferDestructionCheckL0, BufferWithSizeOnlyInterop) { - Mock.redefineAfter( - customMockContextGetInfo); - Mock.redefineAfter( - customMockDevicesGet); + mock::getCallbacks().set_after_callback("urContextGetInfo", + &customMockContextGetInfo); + mock::getCallbacks().set_after_callback("urDeviceGet", &customMockDevicesGet); auto Test = [&](sycl::ext::oneapi::level_zero::ownership Ownership) { sycl::context ContextForInterop{Plt}; @@ -103,4 +99,4 @@ TEST_F(BufferDestructionCheckL0, BufferWithSizeOnlyInterop) { Test(sycl::ext::oneapi::level_zero::ownership::keep); Test(sycl::ext::oneapi::level_zero::ownership::transfer); -} \ No newline at end of file +} diff --git a/sycl/unittests/context_device/Context.cpp b/sycl/unittests/context_device/Context.cpp index 291cc872ab665..3680a9de7bc97 100644 --- a/sycl/unittests/context_device/Context.cpp +++ b/sycl/unittests/context_device/Context.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include @@ -20,11 +20,11 @@ class ContextTest : public ::testing::Test { // default initialization, in case no devices are available before mock // has been initialized. ContextTest() - : mock{}, deviceA{mock.getPlatform().get_devices().front()}, - deviceB{mock.getPlatform().get_devices().back()} {} + : mock{}, deviceA{sycl::platform().get_devices().front()}, + deviceB{sycl::platform().get_devices().back()} {} protected: - unittest::PiMock mock; + unittest::UrMock<> mock; device deviceA, deviceB; }; diff --git a/sycl/unittests/context_device/DeviceRefCounter.cpp b/sycl/unittests/context_device/DeviceRefCounter.cpp index 46cb9b5c293ad..4803372a95aa6 100644 --- a/sycl/unittests/context_device/DeviceRefCounter.cpp +++ b/sycl/unittests/context_device/DeviceRefCounter.cpp @@ -9,42 +9,39 @@ #define SYCL2020_DISABLE_DEPRECATION_WARNINGS #include -#include +#include #include int DevRefCounter = 0; -static pi_result redefinedDevicesGetAfter(pi_platform platform, - pi_device_type device_type, - pi_uint32 num_entries, - pi_device *devices, - pi_uint32 *num_devices) { - if (devices) - DevRefCounter += num_entries; - return PI_SUCCESS; +static ur_result_t redefinedDevicesGetAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.pphDevices) + DevRefCounter += *params.pNumEntries; + return UR_RESULT_SUCCESS; } -static pi_result redefinedDeviceRetainAfter(pi_device device) { +static ur_result_t redefinedDeviceRetainAfter(void *) { DevRefCounter++; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedDeviceReleaseAfter(pi_device device) { +static ur_result_t redefinedDeviceReleaseAfter(void *) { DevRefCounter--; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(DevRefCounter, DevRefCounter) { { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - - Mock.redefineAfter( - redefinedDevicesGetAfter); - Mock.redefineAfter( - redefinedDeviceRetainAfter); - Mock.redefineAfter( - redefinedDeviceReleaseAfter); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + + mock::getCallbacks().set_after_callback("urDeviceGet", + &redefinedDevicesGetAfter); + mock::getCallbacks().set_after_callback("urDeviceRetain", + &redefinedDeviceRetainAfter); + mock::getCallbacks().set_after_callback("urDeviceRelease", + &redefinedDeviceReleaseAfter); Plt.get_devices(); } diff --git a/sycl/unittests/event/EventDestruction.cpp b/sycl/unittests/event/EventDestruction.cpp index 20427b7623cf8..d0f2b1336c28d 100644 --- a/sycl/unittests/event/EventDestruction.cpp +++ b/sycl/unittests/event/EventDestruction.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include #include +#include #include @@ -16,31 +16,27 @@ using namespace sycl; static int ReleaseCounter = 0; -static pi_result redefinedEventRelease(pi_event event) { +static ur_result_t redefinedEventRelease(void *) { ++ReleaseCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedMemBufferCreate(pi_context, pi_mem_flags, size_t size, - void *, pi_mem *, - const pi_mem_properties *) { - return PI_SUCCESS; -} +ur_result_t redefinedMemBufferCreate(void *) { return UR_RESULT_SUCCESS; } class EventDestructionTest : public ::testing::Test { public: - EventDestructionTest() : Mock{}, Plt{Mock.getPlatform()} {} + EventDestructionTest() : Mock{}, Plt{sycl::platform()} {} protected: void SetUp() override { - Mock.redefineBefore( - redefinedEventRelease); - Mock.redefineBefore( - redefinedMemBufferCreate); + mock::getCallbacks().set_before_callback("urEventRelease", + &redefinedEventRelease); + mock::getCallbacks().set_before_callback("urMemBufferCreate", + &redefinedMemBufferCreate); } protected: - unittest::PiMock Mock; + unittest::UrMock<> Mock; sycl::platform Plt; }; diff --git a/sycl/unittests/handler/SetArgForLocalAccessor.cpp b/sycl/unittests/handler/SetArgForLocalAccessor.cpp index ab7d8f387b761..7a9079872ce36 100644 --- a/sycl/unittests/handler/SetArgForLocalAccessor.cpp +++ b/sycl/unittests/handler/SetArgForLocalAccessor.cpp @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// +#include "ur_mock_helpers.hpp" #include #include -#include +#include #include @@ -20,23 +21,23 @@ namespace { size_t LocalBufferArgSize = 0; -pi_result redefined_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, - size_t arg_size, const void *arg_value) { - LocalBufferArgSize = arg_size; +ur_result_t redefined_urKernelSetArgLocal(void *pParams) { + auto params = *static_cast(pParams); + LocalBufferArgSize = *params.pargSize; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(HandlerSetArg, LocalAccessor) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; redefineMockForKernelInterop(Mock); - Mock.redefine( - redefined_piKernelSetArg); + mock::getCallbacks().set_replace_callback("urKernelSetArgLocal", + &redefined_urKernelSetArgLocal); constexpr size_t Size = 128; sycl::queue Q; - DummyHandleT handle; + ur_native_handle_t handle = mock::createDummyHandle(); auto KernelCL = reinterpret_cast::template input_type>(&handle); auto Kernel = diff --git a/sycl/unittests/handler/require.cpp b/sycl/unittests/handler/require.cpp index 79e780b08044f..45d020ff0542c 100644 --- a/sycl/unittests/handler/require.cpp +++ b/sycl/unittests/handler/require.cpp @@ -1,10 +1,10 @@ #include -#include +#include #include TEST(Require, RequireWithNonPlaceholderAccessor) { - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; sycl::queue Q; int data = 5; { @@ -21,7 +21,7 @@ TEST(Require, RequireWithNonPlaceholderAccessor) { TEST(Require, checkIfAccBoundedToHandler) { std::string msg("placeholder accessor must be bound by calling " "handler::require() before it can be used."); - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; sycl::queue Q; int data = 0; diff --git a/sycl/unittests/helpers/KernelInteropCommon.hpp b/sycl/unittests/helpers/KernelInteropCommon.hpp index c7af46acd88fb..146df5cd301f1 100644 --- a/sycl/unittests/helpers/KernelInteropCommon.hpp +++ b/sycl/unittests/helpers/KernelInteropCommon.hpp @@ -6,121 +6,117 @@ // //===----------------------------------------------------------------------===// -#include +#include struct TestContext { // SYCL RT has number of checks that all devices and contexts are consistent // between kernel, kernel_bundle and other objects. // - // To ensure that those checks pass, we intercept some PI calls to extract - // the exact PI handles of device and context used in queue creation to later + // To ensure that those checks pass, we intercept some UR calls to extract + // the exact UR handles of device and context used in queue creation to later // return them when program/context/kernel info is requested. - pi_device deviceHandle; - pi_context contextHandle; + ur_device_handle_t deviceHandle; + ur_context_handle_t contextHandle; - pi_program programHandle = createDummyHandle(); + ur_program_handle_t programHandle = + mock::createDummyHandle(); - ~TestContext() { releaseDummyHandle(programHandle); } + ~TestContext() { + mock::releaseDummyHandle(programHandle); + } }; TestContext GlobalContext; -pi_result after_piContextGetInfo(pi_context context, pi_context_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_CONTEXT_INFO_DEVICES: - if (param_value) - *static_cast(param_value) = GlobalContext.deviceHandle; - if (param_value_size_ret) - *param_value_size_ret = sizeof(GlobalContext.deviceHandle); +ur_result_t after_urContextGetInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_CONTEXT_INFO_DEVICES: + if (*params.ppropName) + *static_cast(*params.ppPropValue) = + GlobalContext.deviceHandle; + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(GlobalContext.deviceHandle); break; default:; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result after_piProgramGetInfo(pi_program program, pi_program_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +ur_result_t after_urProgramGetInfo(void *pParams) { + auto params = *static_cast(pParams); - switch (param_name) { - case PI_PROGRAM_INFO_DEVICES: - if (param_value_size_ret) - *param_value_size_ret = sizeof(GlobalContext.deviceHandle); - if (param_value) - *static_cast(param_value) = GlobalContext.deviceHandle; + switch (*params.ppropName) { + case UR_PROGRAM_INFO_DEVICES: + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(GlobalContext.deviceHandle); + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = + GlobalContext.deviceHandle; break; default:; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefined_piProgramGetBuildInfo(pi_program program, pi_device device, - _pi_program_build_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_PROGRAM_BUILD_INFO_BINARY_TYPE: - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_program_binary_type); - if (param_value) - *static_cast(param_value) = - PI_PROGRAM_BINARY_TYPE_EXECUTABLE; +ur_result_t redefined_urProgramGetBuildInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_program_binary_type_t); + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = + UR_PROGRAM_BINARY_TYPE_EXECUTABLE; break; default:; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result after_piContextCreate(const pi_context_properties *properties, - pi_uint32 num_devices, const pi_device *devices, - void (*pfn_notify)(const char *errinfo, - const void *private_info, - size_t cb, void *user_data), - void *user_data, pi_context *ret_context) { - if (ret_context) - GlobalContext.contextHandle = *ret_context; - GlobalContext.deviceHandle = *devices; - return PI_SUCCESS; +ur_result_t after_urContextCreate(void *pParams) { + auto params = *static_cast(pParams); + if (*params.pphContext) + GlobalContext.contextHandle = **params.pphContext; + GlobalContext.deviceHandle = **params.pphDevices; + return UR_RESULT_SUCCESS; } -pi_result after_piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_KERNEL_INFO_CONTEXT: - if (param_value_size_ret) - *param_value_size_ret = sizeof(GlobalContext.contextHandle); - if (param_value) - *static_cast(param_value) = GlobalContext.contextHandle; +ur_result_t after_urKernelGetInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_KERNEL_INFO_CONTEXT: + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(GlobalContext.contextHandle); + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = + GlobalContext.contextHandle; break; - case PI_KERNEL_INFO_PROGRAM: - if (param_value_size_ret) - *param_value_size_ret = sizeof(GlobalContext.programHandle); - if (param_value) - *(pi_program *)param_value = GlobalContext.programHandle; + case UR_KERNEL_INFO_PROGRAM: + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(GlobalContext.programHandle); + if (*params.ppPropValue) + *(ur_program_handle_t *)*params.ppPropValue = GlobalContext.programHandle; break; default:; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -void redefineMockForKernelInterop(sycl::unittest::PiMock &Mock) { - Mock.redefineAfter( - after_piContextCreate); - Mock.redefineAfter( - after_piProgramGetInfo); - Mock.redefineAfter( - after_piContextGetInfo); - Mock.redefineAfter( - after_piKernelGetInfo); - Mock.redefine( - redefined_piProgramGetBuildInfo); +void redefineMockForKernelInterop(sycl::unittest::UrMock<> &Mock) { + mock::getCallbacks().set_after_callback("urContextCreate", + &after_urContextCreate); + mock::getCallbacks().set_after_callback("urProgramGetInfo", + &after_urProgramGetInfo); + mock::getCallbacks().set_after_callback("urContextGetInfo", + &after_urContextGetInfo); + mock::getCallbacks().set_after_callback("urKernelGetInfo", + &after_urKernelGetInfo); + mock::getCallbacks().set_replace_callback("urProgramGetBuildInfo", + &redefined_urProgramGetBuildInfo); } diff --git a/sycl/unittests/helpers/PiMock.hpp b/sycl/unittests/helpers/PiMock.hpp deleted file mode 100644 index 9399915752263..0000000000000 --- a/sycl/unittests/helpers/PiMock.hpp +++ /dev/null @@ -1,407 +0,0 @@ -//==------------- PiMock.hpp --- Mock unit testing library -----------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This mini-library provides facilities to test the DPC++ Runtime behavior upon -// specific results of the underlying low-level API calls. By exploiting the -// Plugin Interface API, the stored addresses of the actual plugin-specific -// implementations can be overwritten to point at user-defined mock functions. -// -// To make testing independent of existing plugins and devices, all plugins are -// forcefully unloaded and the mock plugin is registered as the only plugin. -// -// While this could be done manually for each unit-testing scenario, the library -// aims to rule out the boilerplate, providing helper APIs which can be re-used -// by all such unit tests. The test code stemming from this can be more consise, -// with little difference from non-mock classes' usage. -// -// The following unit testing scenarios are thereby simplified: -// 1) testing the DPC++ RT management of specific PI return codes; -// 2) coverage of corner-cases related to specific data outputs -// from underlying runtimes; -// 3) testing the order of PI API calls; -// ..., etc. -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "PiMockPlugin.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace sycl { -inline namespace _V1 { -namespace unittest { - -namespace detail = sycl::detail; - -/// The macro below defines a proxy functions for each PI API call. -/// This proxy function calls all the functions registered in CallBefore* -/// function pointer array, then calls Original function, then calls functions -/// registered in CallAfter* array. -/// -/// If a function from CallBefore* returns a non-PI_SUCCESS return code the -/// proxy function bails out. - -/// Number of functions that can be registered as CallBefore and CallAfter -inline constexpr size_t CallStackSize = 16; -#define _PI_API(api) \ - \ - inline decltype(&::api) CallBefore_##api[CallStackSize] = {nullptr}; \ - inline decltype(&::api) CallOriginal_##api = mock_##api; \ - inline decltype(&::api) CallAfter_##api[CallStackSize] = {nullptr}; \ - \ - template RetT proxy_mock_##api(ArgsT... Args) { \ - for (size_t I = 0; I < CallStackSize && CallBefore_##api[I]; ++I) { \ - /* If before function returns an error bail out */ \ - const RetT Res = CallBefore_##api[I](Args...); \ - if (Res != PI_SUCCESS) \ - return Res; \ - } \ - \ - RetT Ret = CallOriginal_##api(Args...); \ - \ - for (size_t I = 0; I < CallStackSize && CallAfter_##api[I]; ++I) \ - CallAfter_##api[I](Args...); \ - \ - return Ret; \ - } \ - \ - /* A helper function for instantiating proxy functions for a given */ \ - /* PI API signature */ \ - template \ - int ConverterT_##api(RetT_ (*FuncArg)(ArgsT_...)) { \ - [[maybe_unused]] constexpr static RetT_ (*Func)(ArgsT_...) = \ - proxy_mock_##api; \ - return 42; \ - } \ - inline int Anchor_##api = ConverterT_##api(decltype (&::api)(0x0)); \ - \ - /*Overrides a plugin PI function with a given one */ \ - template \ - inline void setFuncPtr(sycl::detail::pi::PiPlugin *MPlugin, \ - decltype(&::api) FuncPtr); \ - template <> \ - inline void setFuncPtr( \ - sycl::detail::pi::PiPlugin * MPlugin, decltype(&::api) FuncPtr) { \ - CallOriginal_##api = FuncPtr; \ - } \ - \ - /*Adds a function to be called before the PI function*/ \ - template \ - inline void setFuncPtrBefore(sycl::detail::pi::PiPlugin *MPlugin, \ - decltype(&::api) FuncPtr); \ - template <> \ - inline void setFuncPtrBefore( \ - sycl::detail::pi::PiPlugin * MPlugin, decltype(&::api) FuncPtr) { \ - /* Find free slot */ \ - size_t I = 0; \ - for (; I < CallStackSize && CallBefore_##api[I]; ++I) \ - ; \ - assert(I < CallStackSize && "Too many calls before"); \ - CallBefore_##api[I] = FuncPtr; \ - } \ - \ - /*Adds a function to be called after the PI function*/ \ - template \ - inline void setFuncPtrAfter(sycl::detail::pi::PiPlugin *MPlugin, \ - decltype(&::api) FuncPtr); \ - template <> \ - inline void setFuncPtrAfter( \ - sycl::detail::pi::PiPlugin * MPlugin, decltype(&::api) FuncPtr) { \ - /* Find free slot */ \ - size_t I = 0; \ - for (; I < CallStackSize && CallAfter_##api[I]; ++I) \ - ; \ - assert(I < CallStackSize && "Too many calls after"); \ - CallAfter_##api[I] = FuncPtr; \ - } -#include -#undef _PI_API - -// Unregister functions set for calling before and after PI API -inline void clearRedefinedCalls() { - for (size_t I = 0; I < CallStackSize; ++I) { -#define _PI_API(api) \ - CallBefore_##api[I] = nullptr; \ - CallOriginal_##api = mock_##api; \ - CallAfter_##api[I] = nullptr; -#include -#undef _PI_API - } -} - -#define _PI_MOCK_PLUGIN_CONCAT(A, B) A##B -#define PI_MOCK_PLUGIN_CONCAT(A, B) _PI_MOCK_PLUGIN_CONCAT(A, B) - -inline pi_plugin::FunctionPointers getProxyMockedFunctionPointers() { - return { -#define _PI_API(api) PI_MOCK_PLUGIN_CONCAT(proxy_mock_, api), -#include -#undef _PI_API - }; -} - -#undef PI_MOCK_PLUGIN_CONCAT -#undef _PI_MOCK_PLUGIN_CONCAT - -/// The PiMock class manages the mock PI plugin and wraps an instance of a SYCL -/// platform class created from this plugin. Additionally it allows for the -/// redefinitions of functions in the PI API allowing tests to customize the -/// behavior of the underlying plugin to fit the need of the tests. -/// -/// Mock platform instances must not share the plugin resources with -/// any other SYCL platform within the given context. Otherwise, mock -/// redefinitions would also affect other platforms' behavior. -/// Therefore, any plugin-related information is fully copied whenever -/// a user-passed SYCL object instance is being mocked. -/// The underlying SYCL platform must be a non-host platform to facilitate -/// plugin usage. -/// -/// Simple usage examples would look like this: -/// ``` -/// pi_result redefinePiProgramRetain(pi_program program) { /*code*/ } -/// /*...*/ -/// unittest::PiMock Mock; -/// Mock.redefineBefore(redefinePiProgramRetain); -/// platform &MockP = Mock.getPlatform(); -/// /*...*/ -/// ``` -// TODO: Consider reworking the class into a `detail::plugin` derivative. -class PiMock { -public: - /// Constructs PiMock using the mock PI plugin. - /// - /// A new plugin will be stored into the platform instance, which - /// will no longer share the plugin with other platform instances - /// within the given context. A separate platform instance will be - /// held by the PiMock instance. - /// - /// \param Backend is the backend type to mock, intended for testing backend - /// specific runtime logic. - PiMock(backend Backend = backend::opencl) { - // Create new mock plugin platform and plugin handles - // Note: Mock plugin will be generated if it has not been yet. - MPlatformImpl = GetMockPlatformImpl(Backend); - detail::PluginPtr NewPluginPtr; - { - const detail::PluginPtr &OriginalPlugin = MPlatformImpl->getPlugin(); - // Copy the PiPlugin, thus untying our to-be mock platform from other - // platforms within the context. Reset our platform to use the new plugin. - NewPluginPtr = std::make_shared( - OriginalPlugin->getPiPluginPtr(), Backend, - OriginalPlugin->getLibraryHandle()); - // Save a copy of the platform resource - OrigFuncTable = OriginalPlugin->getPiPlugin().PiFunctionTable; - } - MPlatformImpl->setPlugin(NewPluginPtr, Backend); - // Extract the new PiPlugin instance by a non-const pointer, - // explicitly allowing modification - MPiPluginMockPtr = &NewPluginPtr->getPiPlugin(); - } - - PiMock(PiMock &&Other) { - MPlatformImpl = std::move(Other.MPlatformImpl); - OrigFuncTable = std::move(Other.OrigFuncTable); - Other.OrigFuncTable = {}; // Move above doesn't reset the optional. - MPiPluginMockPtr = std::move(Other.MPiPluginMockPtr); - Other.MIsMoved = true; - } - PiMock(const PiMock &) = delete; - PiMock &operator=(const PiMock &) = delete; - ~PiMock() { - // Do nothing if mock was moved. - if (MIsMoved) - return; - - // Since the plugin relies on the global vars to store function pointers we - // need to reset them for the new PiMock plugin instance - // TODO: Make function pointers array for each PiMock instance? - clearRedefinedCalls(); - if (!OrigFuncTable) - return; - - MPiPluginMockPtr->PiFunctionTable = *OrigFuncTable; - // calling drainThreadPool and releaseResources explicitly due to win - // related WA in shutdown process - detail::GlobalHandler::instance().drainThreadPool(); - detail::GlobalHandler::instance().getScheduler().releaseResources( - detail::BlockingT::BLOCKING); - detail::GlobalHandler::instance().releaseDefaultContexts(); - } - - /// Returns a handle to the SYCL platform instance. - /// - /// \return A reference to the SYCL platform. - sycl::platform getPlatform() { - return sycl::detail::createSyclObjFromImpl(MPlatformImpl); - } - - template - using FuncPtrT = typename sycl::detail::pi::PiFuncInfo::FuncPtrT; - template - using SignatureT = typename std::remove_pointer>::type; - - /// Adds a function to be called before a given PI API - /// - /// \param Replacement is a mock std::function instance to be - /// called instead of the given PI API. This function must - /// not have been constructed from a lambda. - template - void - redefineBefore(const std::function> &Replacement) { - FuncPtrT FuncPtr = - *Replacement.template target>(); - assert(FuncPtr && - "Function target is empty, try passing a lambda directly"); - setFuncPtrBefore(MPiPluginMockPtr, *FuncPtr); - } - - /// redefineBefore overload for function pointer/captureless lambda arguments. - /// - /// \param Replacement is a mock callable assignable to a function - /// pointer (function pointer/captureless lambda). - - template - void redefineBefore(const FunctorT &Replacement) { - // TODO: Check for matching signatures/assignability - setFuncPtrBefore(MPiPluginMockPtr, Replacement); - } - /// Redefines the implementation of a given PI API to the input - /// function object. - /// - /// \param Replacement is a mock std::function instance to be - /// called instead of the given PI API. This function must - /// not have been constructed from a lambda. - template - void redefine(const std::function> &Replacement) { - // TODO: Find a way to store FPointer first so that real PI functions can - // be called alongside the mock ones. Something like: - // `enum class MockPIPolicy { InsteadOf, Before, After};` - // may need to be introduced. - FuncPtrT FuncPtr = - *Replacement.template target>(); - assert(FuncPtr && - "Function target is empty, try passing a lambda directly"); - setFuncPtr(MPiPluginMockPtr, *FuncPtr); - } - - /// A `redefine` overload for function pointer/captureless lambda - /// arguments. - /// - /// \param Replacement is a mock callable assignable to a function - /// pointer (function pointer/captureless lambda). - template - void redefine(const FunctorT &Replacement) { - // TODO: Check for matching signatures/assignability - setFuncPtr(MPiPluginMockPtr, Replacement); - } - - /// Adds a function to be called after a given PI API - /// - /// \param Replacement is a mock std::function instance to be - /// called instead of the given PI API. This function must - /// not have been constructed from a lambda. - template - void - redefineAfter(const std::function> &Replacement) { - FuncPtrT FuncPtr = - *Replacement.template target>(); - assert(FuncPtr && - "Function target is empty, try passing a lambda directly"); - setFuncPtrAfter(MPiPluginMockPtr, *FuncPtr); - } - - /// redefineAfter overload for function pointer/captureless lambda arguments. - /// - /// \param Replacement is a mock callable assignable to a function - /// pointer (function pointer/captureless lambda). - template - void redefineAfter(const FunctorT &Replacement) { - // TODO: Check for matching signatures/assignability - setFuncPtrAfter(MPiPluginMockPtr, Replacement); - } - - /// Ensures that the mock plugin has been initialized and has been registered - /// in the global handler. Additionally, all existing plugins will be removed - /// and unloaded to avoid them being accidentally picked up by tests using - /// selectors. - /// \param Backend is the backend type to mock, intended for testing backend - /// specific runtime logic. - static void EnsureMockPluginInitialized(backend Backend = backend::opencl) { - // Only initialize the plugin once. - if (MMockPluginPtr) - return; - - // Ensure that the other plugins are initialized so we can unload them. - // This makes sure that the mock plugin is the only available plugin. - detail::pi::initialize(); - detail::GlobalHandler::instance().unloadPlugins(); - std::vector &Plugins = - detail::GlobalHandler::instance().getPlugins(); - - assert(Plugins.empty() && "Clear failed to remove all plugins."); - - auto RTPlugin = - std::make_shared(sycl::detail::pi::PiPlugin{ - "pi.ver.mock", "plugin.ver.mock", /*Targets=*/nullptr, - getProxyMockedFunctionPointers(), _PI_SANITIZE_TYPE_NONE}); - - MMockPluginPtr = std::make_shared(RTPlugin, Backend, - /*Library=*/nullptr); - Plugins.push_back(MMockPluginPtr); - } - -private: - /// Ensures that the mock PI plugin has been registered and creates a - /// platform_impl from it. - /// - /// \return a shared_ptr to a platform_impl created from the mock PI plugin. - static std::shared_ptr - GetMockPlatformImpl(backend Backend) { - EnsureMockPluginInitialized(Backend); - - pi_uint32 NumPlatforms = 0; - MMockPluginPtr->call_nocheck( - 0, nullptr, &NumPlatforms); - assert(NumPlatforms > 0 && "No platforms returned by mock plugin."); - pi_platform PiPlatform; - MMockPluginPtr->call_nocheck( - 1, &PiPlatform, nullptr); - return detail::platform_impl::getOrMakePlatformImpl(PiPlatform, - MMockPluginPtr); - } - - std::shared_ptr MPlatformImpl; - std::optional OrigFuncTable; - // Extracted at initialization for convenience purposes. The resource - // itself is owned by the platform instance. - sycl::detail::pi::PiPlugin *MPiPluginMockPtr; - - // Marker to indicate if the mock was moved. - bool MIsMoved = false; - - // Pointer to the mock plugin pointer. This is static to avoid - // reinitialization and re-registration of the same plugin. - static inline detail::PluginPtr MMockPluginPtr = nullptr; -}; - -} // namespace unittest -} // namespace _V1 -} // namespace sycl diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp deleted file mode 100644 index dc2839f74d42a..0000000000000 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ /dev/null @@ -1,1665 +0,0 @@ -//==---------- PiMockPlugin.hpp --- Mock unit testing PI plugin ------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// A simple implementation of a PI plugin to be used for device-independent -// mock unit-testing. -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include -#include -#include -#include - -// Helpers for dummy handles - -struct DummyHandleT { - DummyHandleT(size_t DataSize = 0) - : MStorage(DataSize), MData(MStorage.data()) {} - DummyHandleT(unsigned char *Data) : MData(Data) {} - std::atomic MRefCounter = 1; - std::vector MStorage; - unsigned char *MData = nullptr; - - template T getDataAs() { - assert(MStorage.size() >= sizeof(T)); - return *reinterpret_cast(MStorage.data()); - } - - template T setDataAs(T Val) { - assert(MStorage.size() >= sizeof(T)); - return *reinterpret_cast(MStorage.data()) = Val; - } -}; - -using DummyHandlePtrT = DummyHandleT *; - -// Allocates a dummy handle of type T with support of reference counting. -// Takes optional 'Size' parameter which can be used to allocate additional -// memory. The handle has to be deallocated using 'releaseDummyHandle'. -template inline T createDummyHandle(size_t Size = 0) { - DummyHandlePtrT DummyHandlePtr = new DummyHandleT(Size); - return reinterpret_cast(DummyHandlePtr); -} - -// Allocates a dummy handle of type T with support of reference counting -// and associates it with the provided Data. -template inline T createDummyHandleWithData(unsigned char *Data) { - DummyHandlePtrT DummyHandlePtr = new DummyHandleT(Data); - return reinterpret_cast(DummyHandlePtr); -} - -// Decrement reference counter for the handle and deallocates it if the -// reference counter becomes zero -template inline void releaseDummyHandle(T Handle) { - auto DummyHandlePtr = reinterpret_cast(Handle); - const size_t NewValue = --DummyHandlePtr->MRefCounter; - if (NewValue == 0) - delete DummyHandlePtr; -} - -// Increment reference counter for the handle -template inline void retainDummyHandle(T Handle) { - auto DummyHandlePtr = reinterpret_cast(Handle); - ++DummyHandlePtr->MRefCounter; -} - -// -// Platform -// -inline pi_result mock_piPlatformsGet(pi_uint32 num_entries, - pi_platform *platforms, - pi_uint32 *num_platforms) { - if (num_platforms) - *num_platforms = 1; - - if (platforms && num_entries > 0) - platforms[0] = reinterpret_cast(1); - - return PI_SUCCESS; -} - -inline pi_result mock_piPlatformGetInfo(pi_platform platform, - pi_platform_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - constexpr char MockPlatformName[] = "Mock platform"; - constexpr char MockSupportedExtensions[] = - "cl_khr_il_program cl_khr_subgroups cl_intel_subgroups " - "cl_intel_subgroups_short cl_intel_required_subgroup_size "; - switch (param_name) { - case PI_PLATFORM_INFO_NAME: { - if (param_value) { - assert(param_value_size == sizeof(MockPlatformName)); - std::memcpy(param_value, MockPlatformName, sizeof(MockPlatformName)); - } - if (param_value_size_ret) - *param_value_size_ret = sizeof(MockPlatformName); - return PI_SUCCESS; - } - case PI_PLATFORM_INFO_EXTENSIONS: { - if (param_value) { - assert(param_value_size == sizeof(MockSupportedExtensions)); - std::memcpy(param_value, MockSupportedExtensions, - sizeof(MockSupportedExtensions)); - } - if (param_value_size_ret) - *param_value_size_ret = sizeof(MockSupportedExtensions); - return PI_SUCCESS; - } - case PI_EXT_PLATFORM_INFO_BACKEND: { - constexpr auto MockPlatformBackend = PI_EXT_PLATFORM_BACKEND_OPENCL; - if (param_value) { - std::memcpy(param_value, &MockPlatformBackend, - sizeof(MockPlatformBackend)); - } - if (param_value_size_ret) - *param_value_size_ret = sizeof(MockPlatformBackend); - return PI_SUCCESS; - } - default: { - constexpr const char FallbackValue[] = "str"; - constexpr size_t FallbackValueSize = sizeof(FallbackValue); - if (param_value_size_ret) - *param_value_size_ret = FallbackValueSize; - - if (param_value && param_value_size >= FallbackValueSize) - std::memcpy(param_value, FallbackValue, FallbackValueSize); - - return PI_SUCCESS; - } - } -} - -inline pi_result -mock_piextPlatformGetNativeHandle(pi_platform platform, - pi_native_handle *nativeHandle) { - *nativeHandle = reinterpret_cast(platform); - return PI_SUCCESS; -} - -inline pi_result -mock_piextPlatformCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_platform *platform) { - *platform = reinterpret_cast(nativeHandle); - retainDummyHandle(*platform); - return PI_SUCCESS; -} - -inline pi_result mock_piDevicesGet(pi_platform platform, - pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - if (num_devices) - *num_devices = 1; - - if (devices && num_entries > 0) - devices[0] = reinterpret_cast(1); - - return PI_SUCCESS; -} - -inline pi_result mock_piDeviceGetInfo(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - constexpr char MockDeviceName[] = "Mock device"; - constexpr char MockSupportedExtensions[] = - "cl_khr_fp64 cl_khr_fp16 cl_khr_il_program ur_exp_command_buffer"; - switch (param_name) { - case PI_DEVICE_INFO_TYPE: { - // Act like any device is a GPU. - // TODO: Should we mock more device types? - if (param_value) - *static_cast<_pi_device_type *>(param_value) = PI_DEVICE_TYPE_GPU; - if (param_value_size_ret) - *param_value_size_ret = sizeof(PI_DEVICE_TYPE_GPU); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_NAME: { - if (param_value) { - assert(param_value_size == sizeof(MockDeviceName)); - std::memcpy(param_value, MockDeviceName, sizeof(MockDeviceName)); - } - if (param_value_size_ret) - *param_value_size_ret = sizeof(MockDeviceName); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_PARENT_DEVICE: { - if (param_value) - *static_cast(param_value) = nullptr; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_device *); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_EXTENSIONS: { - if (param_value) { - assert(param_value_size >= sizeof(MockSupportedExtensions)); - std::memcpy(param_value, MockSupportedExtensions, - sizeof(MockSupportedExtensions)); - } - if (param_value_size_ret) - *param_value_size_ret = sizeof(MockSupportedExtensions); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_USM_HOST_SUPPORT: - case PI_DEVICE_INFO_USM_DEVICE_SUPPORT: - case PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: - case PI_DEVICE_INFO_HOST_UNIFIED_MEMORY: - case PI_DEVICE_INFO_AVAILABLE: - case PI_DEVICE_INFO_LINKER_AVAILABLE: - case PI_DEVICE_INFO_COMPILER_AVAILABLE: - case PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_SUPPORT: - case PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT: { - if (param_value) - *static_cast(param_value) = PI_TRUE; - if (param_value_size_ret) - *param_value_size_ret = sizeof(PI_TRUE); - return PI_SUCCESS; - } - // This mock GPU device has no sub-devices - case PI_DEVICE_INFO_PARTITION_PROPERTIES: { - if (param_value_size_ret) { - *param_value_size_ret = 0; - } - return PI_SUCCESS; - } - case PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: { - assert(param_value_size == sizeof(pi_device_affinity_domain)); - if (param_value) { - *static_cast(param_value) = 0; - } - return PI_SUCCESS; - } - case PI_DEVICE_INFO_QUEUE_PROPERTIES: { - assert(param_value_size == sizeof(pi_queue_properties)); - if (param_value) { - *static_cast(param_value) = - PI_QUEUE_FLAG_PROFILING_ENABLE; - } - return PI_SUCCESS; - } - default: { - // In the default case we fill the return value with 0's. This may not be - // valid for all device queries, but it will mean a consistent return value - // for the query. - // Any tests that need special return values should either add behavior - // the this function or use redefineAfter with a function that adds the - // intended behavior. - if (param_value && param_value_size != 0) - std::memset(param_value, 0, param_value_size); - // Likewise, if the device info query asks for the size of the return value - // we tell it there is a single byte to avoid cases where the runtime tries - // to allocate some random amount of memory for the return value. - if (param_value_size_ret) - *param_value_size_ret = 1; - return PI_SUCCESS; - } - } -} - -inline pi_result mock_piDeviceRetain(pi_device device) { return PI_SUCCESS; } - -inline pi_result mock_piDeviceRelease(pi_device device) { return PI_SUCCESS; } - -inline pi_result mock_piDevicePartition( - pi_device device, const pi_device_partition_property *properties, - pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextDeviceGetNativeHandle(pi_device device, - pi_native_handle *nativeHandle) { - *nativeHandle = reinterpret_cast(device); - return PI_SUCCESS; -} - -inline pi_result mock_piextDeviceCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_platform platform, pi_device *device) { - *device = reinterpret_cast(nativeHandle); - retainDummyHandle(*device); - return PI_SUCCESS; -} - -inline pi_result mock_piextDeviceSelectBinary(pi_device device, - pi_device_binary *binaries, - pi_uint32 num_binaries, - pi_uint32 *selected_binary_ind) { - *selected_binary_ind = 0; - return PI_SUCCESS; -} - -inline pi_result -mock_piextGetDeviceFunctionPointer(pi_device device, pi_program program, - const char *function_name, - pi_uint64 *function_pointer_ret) { - return PI_SUCCESS; -} - -inline pi_result mock_piextGetGlobalVariablePointer( - pi_device device, pi_program program, const char *global_variable_name, - size_t *global_variable_size, void **global_variable_size_ret) { - return PI_SUCCESS; -} - -// -// Context -// -inline pi_result mock_piContextCreate( - const pi_context_properties *properties, pi_uint32 num_devices, - const pi_device *devices, - void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, - void *user_data), - void *user_data, pi_context *ret_context) { - *ret_context = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piContextGetInfo(pi_context context, - pi_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_CONTEXT_INFO_NUM_DEVICES: { - if (param_value) - *static_cast(param_value) = 1; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_uint32); - return PI_SUCCESS; - } - default: - return PI_SUCCESS; - } -} - -inline pi_result mock_piContextRetain(pi_context context) { - retainDummyHandle(context); - return PI_SUCCESS; -} - -inline pi_result mock_piContextRelease(pi_context context) { - releaseDummyHandle(context); - return PI_SUCCESS; -} - -inline pi_result mock_piextContextSetExtendedDeleter( - pi_context context, pi_context_extended_deleter func, void *user_data) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextContextGetNativeHandle(pi_context context, - pi_native_handle *nativeHandle) { - *nativeHandle = reinterpret_cast(context); - return PI_SUCCESS; -} - -inline pi_result mock_piextContextCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_uint32 numDevices, - const pi_device *devices, bool pluginOwnsNativeHandle, - pi_context *context) { - *context = reinterpret_cast(nativeHandle); - retainDummyHandle(*context); - return PI_SUCCESS; -} - -// -// Queue -// -inline pi_result mock_piQueueCreate(pi_context context, pi_device device, - pi_queue_properties properties, - pi_queue *queue) { - *queue = createDummyHandle(); - return PI_SUCCESS; -} -inline pi_result mock_piextQueueCreate(pi_context context, pi_device device, - pi_queue_properties *properties, - pi_queue *queue) { - *queue = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piQueueGetInfo(pi_queue command_queue, - pi_queue_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_QUEUE_INFO_DEVICE: { - if (param_value) - *static_cast(param_value) = reinterpret_cast(1); - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_device); - return PI_SUCCESS; - } - default: - return PI_SUCCESS; - } -} - -inline pi_result mock_piQueueRetain(pi_queue command_queue) { - retainDummyHandle(command_queue); - return PI_SUCCESS; -} - -inline pi_result mock_piQueueRelease(pi_queue command_queue) { - releaseDummyHandle(command_queue); - return PI_SUCCESS; -} - -inline pi_result mock_piQueueFinish(pi_queue command_queue) { - return PI_SUCCESS; -} - -inline pi_result mock_piQueueFlush(pi_queue command_queue) { - return PI_SUCCESS; -} - -inline pi_result mock_piextQueueGetNativeHandle(pi_queue queue, - pi_native_handle *nativeHandle, - int32_t *nativeHandleDesc) { - *nativeHandle = reinterpret_cast(queue); - return PI_SUCCESS; -} - -inline pi_result mock_piextQueueCreateWithNativeHandle( - pi_native_handle nativeHandle, int32_t nativeHandleDesc, pi_context context, - pi_device device, bool pluginOwnsNativeHandle, - pi_queue_properties *Properties, pi_queue *queue) { - *queue = reinterpret_cast(nativeHandle); - retainDummyHandle(*queue); - return PI_SUCCESS; -} - -// -// Memory -// -inline pi_result -mock_piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, - void *host_ptr, pi_mem *ret_mem, - const pi_mem_properties *properties = nullptr) { - if (host_ptr && flags & PI_MEM_FLAGS_HOST_PTR_USE) - *ret_mem = createDummyHandleWithData( - reinterpret_cast(host_ptr)); - else - *ret_mem = createDummyHandle(size); - return PI_SUCCESS; -} - -inline pi_result mock_piMemImageCreate(pi_context context, pi_mem_flags flags, - const pi_image_format *image_format, - const pi_image_desc *image_desc, - void *host_ptr, pi_mem *ret_mem) { - assert(false && - "TODO: mock_piMemImageCreate handle allocation size correctly"); - *ret_mem = createDummyHandle(/*size=*/1024 * 16); - return PI_SUCCESS; -} - -inline pi_result -mock_piextMemUnsampledImageHandleDestroy(pi_context context, pi_device device, - pi_image_handle handle) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextMemSampledImageHandleDestroy(pi_context context, pi_device device, - pi_image_handle handle) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemImageAllocate(pi_context context, - pi_device device, - pi_image_format *image_format, - pi_image_desc *image_desc, - pi_image_mem_handle *ret_mem) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemMipmapGetLevel(pi_context context, - pi_device device, - pi_image_mem_handle mip_mem, - unsigned int level, - pi_image_mem_handle *ret_mem) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemImageFree(pi_context context, pi_device device, - pi_image_mem_handle memory_handle) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemMipmapFree(pi_context context, pi_device device, - pi_image_mem_handle memory_handle) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemUnsampledImageCreate( - pi_context context, pi_device device, pi_image_mem_handle img_mem, - pi_image_format *image_format, pi_image_desc *desc, - pi_image_handle *ret_handle) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemMapExternalArray(pi_context context, - pi_device device, - pi_image_format *image_format, - pi_image_desc *image_desc, - pi_interop_mem_handle mem_handle, - pi_image_mem_handle *ret_mem) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemReleaseInterop(pi_context context, - pi_device device, - pi_interop_mem_handle ext_mem) { - return PI_SUCCESS; -} - -inline pi_result mock_piextImportExternalSemaphore( - pi_context context, pi_device device, - pi_external_semaphore_descriptor *sem_descriptor, - pi_interop_semaphore_handle *ret_handle) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextImportExternalMemory(pi_context context, pi_device device, - pi_external_mem_descriptor *mem_descriptor, - pi_interop_mem_handle *ret_handle) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextReleaseExternalSemaphore(pi_context context, pi_device device, - pi_interop_semaphore_handle sem_handle) { - return PI_SUCCESS; -} - -inline pi_result mock_piextWaitExternalSemaphore( - pi_queue command_queue, pi_interop_semaphore_handle sem_handle, - bool has_wait_value, uint64_t wait_value, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - return PI_SUCCESS; -} - -inline pi_result mock_piextSignalExternalSemaphore( - pi_queue command_queue, pi_interop_semaphore_handle sem_handle, - bool has_signal_value, uint64_t signal_value, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemUnsampledImageCreateInterop( - pi_context context, pi_device device, pi_image_format *image_format, - pi_image_desc *desc, pi_interop_mem_handle ext_mem_handle, - pi_image_handle *ret_img_handle) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemSampledImageCreateInterop( - pi_context context, pi_device device, pi_image_format *image_format, - pi_image_desc *desc, pi_sampler sampler, - pi_interop_mem_handle ext_mem_handle, pi_image_handle *ret_img_handle) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemSampledImageCreate( - pi_context context, pi_device device, pi_image_mem_handle img_mem, - pi_image_format *image_format, pi_image_desc *desc, pi_sampler sampler, - pi_image_handle *ret_handle) { - return PI_SUCCESS; -} - -inline pi_result mock_piextBindlessImageSamplerCreate( - pi_context context, const pi_sampler_properties *sampler_properties, - const float minMipmapLevelClamp, const float maxMipmapLevelClamp, - const float maxAnisotropy, pi_sampler *result_sampler) { - *result_sampler = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piextMemImageCopy( - pi_queue command_queue, void *dst_ptr, const void *src_ptr, - const pi_image_desc *src_image_desc, const pi_image_desc *dst_image_desc, - const pi_image_format *src_image_format, - const pi_image_format *dst_image_format, const pi_image_copy_flags flags, - pi_image_offset src_offset, pi_image_offset dst_offset, - pi_image_region copy_extent, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - return PI_SUCCESS; -} - -inline pi_result mock_piextMemImageGetInfo(pi_context context, - pi_image_mem_handle mem_handle, - pi_image_info param_name, - void *param_value, - size_t *param_value_size_ret) { - return PI_SUCCESS; -} - -inline pi_result mock_piMemGetInfo(pi_mem mem, pi_mem_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - return PI_SUCCESS; -} - -inline pi_result mock_piMemImageGetInfo(pi_mem image, pi_image_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - return PI_SUCCESS; -} - -inline pi_result mock_piMemRetain(pi_mem mem) { - retainDummyHandle(mem); - return PI_SUCCESS; -} - -inline pi_result mock_piMemRelease(pi_mem mem) { - releaseDummyHandle(mem); - return PI_SUCCESS; -} - -inline pi_result -mock_piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, - pi_buffer_create_type buffer_create_type, - void *buffer_create_info, pi_mem *ret_mem) { - // Create a sub buf without memory as we will reuse parent's one - *ret_mem = createDummyHandle(/*size=*/0); - - auto parentDummyHandle = reinterpret_cast(buffer); - auto childDummyHandle = reinterpret_cast(*ret_mem); - - auto region = reinterpret_cast(buffer_create_info); - - // Point the sub buf to the original buf memory - childDummyHandle->MData = parentDummyHandle->MData + region->origin; - - return PI_SUCCESS; -} - -inline pi_result mock_piextMemGetNativeHandle(pi_mem mem, pi_device dev, - pi_native_handle *nativeHandle) { - *nativeHandle = reinterpret_cast(mem); - return PI_SUCCESS; -} - -inline pi_result -mock_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context context, bool ownNativeHandle, - pi_mem *mem) { - *mem = reinterpret_cast(nativeHandle); - retainDummyHandle(*mem); - return PI_SUCCESS; -} - -inline pi_result mock_piextMemImageCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *RetImage) { - *RetImage = reinterpret_cast(NativeHandle); - retainDummyHandle(*RetImage); - return PI_SUCCESS; -} - -// -// Program -// - -inline pi_result mock_piProgramCreate(pi_context context, const void *il, - size_t length, pi_program *res_program) { - *res_program = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piProgramCreateWithBinary( - pi_context context, pi_uint32 num_devices, const pi_device *device_list, - const size_t *lengths, const unsigned char **binaries, - size_t num_metadata_entries, const pi_device_binary_property *metadata, - pi_int32 *binary_status, pi_program *ret_program) { - *ret_program = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piProgramGetInfo(pi_program program, - pi_program_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - - switch (param_name) { - case PI_PROGRAM_INFO_NUM_DEVICES: { - if (param_value) - *static_cast(param_value) = 1; - if (param_value_size_ret) - *param_value_size_ret = sizeof(size_t); - return PI_SUCCESS; - } - case PI_PROGRAM_INFO_BINARY_SIZES: { - if (param_value) - *static_cast(param_value) = 1; - if (param_value_size_ret) - *param_value_size_ret = sizeof(size_t); - return PI_SUCCESS; - } - case PI_PROGRAM_INFO_BINARIES: { - if (param_value) - **static_cast(param_value) = 1; - if (param_value_size_ret) - *param_value_size_ret = sizeof(unsigned char); - return PI_SUCCESS; - } - default: { - // TODO: Buildlog requires this but not any actual data afterwards. - // This should be investigated. Should this be moved to that test? - if (param_value_size_ret) - *param_value_size_ret = sizeof(size_t); - return PI_SUCCESS; - } - } -} - -inline pi_result -mock_piProgramLink(pi_context context, pi_uint32 num_devices, - const pi_device *device_list, const char *options, - pi_uint32 num_input_programs, - const pi_program *input_programs, - void (*pfn_notify)(pi_program program, void *user_data), - void *user_data, pi_program *ret_program) { - *ret_program = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piProgramCompile( - pi_program program, pi_uint32 num_devices, const pi_device *device_list, - const char *options, pi_uint32 num_input_headers, - const pi_program *input_headers, const char **header_include_names, - void (*pfn_notify)(pi_program program, void *user_data), void *user_data) { - return PI_SUCCESS; -} - -inline pi_result -mock_piProgramBuild(pi_program program, pi_uint32 num_devices, - const pi_device *device_list, const char *options, - void (*pfn_notify)(pi_program program, void *user_data), - void *user_data) { - return PI_SUCCESS; -} - -inline pi_result mock_piProgramGetBuildInfo( - pi_program program, pi_device device, _pi_program_build_info param_name, - size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - return PI_SUCCESS; -} - -inline pi_result mock_piProgramRetain(pi_program program) { - retainDummyHandle(program); - return PI_SUCCESS; -} - -inline pi_result mock_piProgramRelease(pi_program program) { - releaseDummyHandle(program); - return PI_SUCCESS; -} - -inline pi_result -mock_piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, - size_t spec_size, - const void *spec_value) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextProgramGetNativeHandle(pi_program program, - pi_native_handle *nativeHandle) { - *nativeHandle = reinterpret_cast(program); - return PI_SUCCESS; -} - -inline pi_result mock_piextProgramCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_context context, - bool pluginOwnsNativeHandle, pi_program *program) { - *program = reinterpret_cast(nativeHandle); - retainDummyHandle(*program); - return PI_SUCCESS; -} - -// -// Kernel -// - -inline pi_result mock_piKernelCreate(pi_program program, - const char *kernel_name, - pi_kernel *ret_kernel) { - *ret_kernel = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, - size_t arg_size, const void *arg_value) { - return PI_SUCCESS; -} - -inline pi_result mock_piKernelGetInfo(pi_kernel kernel, - pi_kernel_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - return PI_SUCCESS; -} - -inline pi_result mock_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, - pi_kernel_group_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { - if (param_value) { - auto RealVal = reinterpret_cast(param_value); - RealVal[0] = 0; - RealVal[1] = 0; - RealVal[2] = 0; - } - if (param_value_size_ret) - *param_value_size_ret = 3 * sizeof(size_t); - return PI_SUCCESS; - } - default: { - return PI_SUCCESS; - } - } -} - -inline pi_result mock_piKernelGetSubGroupInfo( - pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, - size_t input_value_size, const void *input_value, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) { - return PI_SUCCESS; -} - -inline pi_result mock_piKernelRetain(pi_kernel kernel) { - retainDummyHandle(kernel); - return PI_SUCCESS; -} - -inline pi_result mock_piKernelRelease(pi_kernel kernel) { - releaseDummyHandle(kernel); - return PI_SUCCESS; -} - -inline pi_result mock_piextKernelSetArgPointer(pi_kernel kernel, - pi_uint32 arg_index, - size_t arg_size, - const void *arg_value) { - return PI_SUCCESS; -} - -inline pi_result mock_piKernelSetExecInfo(pi_kernel kernel, - pi_kernel_exec_info value_name, - size_t param_value_size, - const void *param_value) { - return PI_SUCCESS; -} - -inline pi_result mock_piextKernelCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_context context, pi_program program, - bool pluginOwnsNativeHandle, pi_kernel *kernel) { - - *kernel = reinterpret_cast(nativeHandle); - retainDummyHandle(*kernel); - return PI_SUCCESS; -} - -inline pi_result -mock_piextKernelGetNativeHandle(pi_kernel kernel, - pi_native_handle *nativeHandle) { - *nativeHandle = reinterpret_cast(kernel); - return PI_SUCCESS; -} - -inline pi_result mock_piextKernelSuggestMaxCooperativeGroupCount( - pi_kernel kernel, size_t local_work_size, size_t dynamic_shared_memory_size, - pi_uint32 *group_count_ret) { - *group_count_ret = 1; - return PI_SUCCESS; -} - -// -// Events -// -inline pi_result mock_piEventCreate(pi_context context, pi_event *ret_event) { - *ret_event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piEventGetInfo(pi_event event, pi_event_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_EVENT_INFO_COMMAND_EXECUTION_STATUS: { - if (param_value) - *static_cast(param_value) = PI_EVENT_SUBMITTED; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_event_status); - return PI_SUCCESS; - } - default: { - return PI_SUCCESS; - } - } -} - -inline pi_result mock_piEventGetProfilingInfo(pi_event event, - pi_profiling_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - return PI_SUCCESS; -} - -inline pi_result mock_piEventsWait(pi_uint32 num_events, - const pi_event *event_list) { - return PI_SUCCESS; -} - -inline pi_result mock_piEventSetCallback( - pi_event event, pi_int32 command_exec_callback_type, - void (*pfn_notify)(pi_event event, pi_int32 event_command_status, - void *user_data), - void *user_data) { - return PI_SUCCESS; -} - -inline pi_result mock_piEventSetStatus(pi_event event, - pi_int32 execution_status) { - return PI_SUCCESS; -} - -inline pi_result mock_piEventRetain(pi_event event) { - retainDummyHandle(event); - return PI_SUCCESS; -} - -inline pi_result mock_piEventRelease(pi_event event) { - releaseDummyHandle(event); - return PI_SUCCESS; -} - -inline pi_result -mock_piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle) { - *nativeHandle = reinterpret_cast(event); - return PI_SUCCESS; -} - -inline pi_result -mock_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context context, bool ownNativeHandle, - pi_event *event) { - *event = reinterpret_cast(nativeHandle); - retainDummyHandle(*event); - return PI_SUCCESS; -} - -inline pi_result mock_piEnqueueTimestampRecordingExp( - pi_queue queue, pi_bool blocking, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); - return PI_SUCCESS; -} - -// -// Sampler -// -inline pi_result -mock_piSamplerCreate(pi_context context, - const pi_sampler_properties *sampler_properties, - pi_sampler *result_sampler) { - *result_sampler = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piSamplerGetInfo(pi_sampler sampler, - pi_sampler_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - return PI_SUCCESS; -} - -inline pi_result mock_piSamplerRetain(pi_sampler sampler) { - retainDummyHandle(sampler); - return PI_SUCCESS; -} - -inline pi_result mock_piSamplerRelease(pi_sampler sampler) { - releaseDummyHandle(sampler); - return PI_SUCCESS; -} - -// -// Queue Commands -// -inline pi_result mock_piEnqueueKernelLaunch( - pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piextEnqueueCooperativeKernelLaunch( - pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piEnqueueEventsWait(pi_queue command_queue, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piEnqueueEventsWaitWithBarrier( - pi_queue command_queue, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result -mock_piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, - pi_bool blocking_read, size_t offset, size_t size, - void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piEnqueueMemBufferReadRect( - pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result -mock_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, - pi_bool blocking_write, size_t offset, size_t size, - const void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piEnqueueMemBufferWriteRect( - pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - const void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result -mock_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, - pi_mem dst_buffer, size_t src_offset, - size_t dst_offset, size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piEnqueueMemBufferCopyRect( - pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, - pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, - pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, - size_t dst_row_pitch, size_t dst_slice_pitch, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piEnqueueMemBufferFill(pi_queue command_queue, - pi_mem buffer, const void *pattern, - size_t pattern_size, size_t offset, - size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piEnqueueMemImageRead( - pi_queue command_queue, pi_mem image, pi_bool blocking_read, - pi_image_offset origin, pi_image_region region, size_t row_pitch, - size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result -mock_piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, - pi_bool blocking_write, pi_image_offset origin, - pi_image_region region, size_t input_row_pitch, - size_t input_slice_pitch, const void *ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result -mock_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, - pi_mem dst_image, pi_image_offset src_origin, - pi_image_offset dst_origin, pi_image_region region, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result -mock_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, - const void *fill_color, const size_t *origin, - const size_t *region, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piEnqueueMemBufferMap(pi_queue command_queue, - pi_mem buffer, pi_bool blocking_map, - pi_map_flags map_flags, - size_t offset, size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event, void **ret_map) { - if (event) - *event = createDummyHandle(); - - auto parentDummyHandle = reinterpret_cast(buffer); - *ret_map = (void *)(parentDummyHandle->MData); - return PI_SUCCESS; -} - -inline pi_result mock_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, - void *mapped_ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result -mock_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, - const pi_mem_obj_property *arg_properties, - const pi_mem *arg_value) { - return PI_SUCCESS; -} - -inline pi_result mock_piextKernelSetArgSampler(pi_kernel kernel, - pi_uint32 arg_index, - const pi_sampler *arg_value) { - return PI_SUCCESS; -} - -/// -// USM -/// -inline pi_result mock_piextUSMHostAlloc(void **result_ptr, pi_context context, - pi_usm_mem_properties *properties, - size_t size, pi_uint32 alignment) { - assert(alignment < 16 && "TODO: mock_piextUSMHostAlloc handle alignment"); - *result_ptr = createDummyHandle(size); - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMDeviceAlloc(void **result_ptr, pi_context context, - pi_device device, - pi_usm_mem_properties *properties, - size_t size, pi_uint32 alignment) { - assert(alignment < 16 && "TODO: mock_piextUSMHostAlloc handle alignment"); - *result_ptr = createDummyHandle(size); - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMSharedAlloc(void **result_ptr, pi_context context, - pi_device device, - pi_usm_mem_properties *properties, - size_t size, pi_uint32 alignment) { - assert(alignment < 16 && "TODO: mock_piextUSMHostAlloc handle alignment"); - *result_ptr = createDummyHandle(size); - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMPitchedAlloc(void **result_ptr, - size_t *result_pitch, - pi_context context, pi_device device, - pi_usm_mem_properties *properties, - size_t width_in_bytes, size_t height, - unsigned int element_size_bytes) { - *result_ptr = createDummyHandle(width_in_bytes * height); - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMFree(pi_context context, void *ptr) { - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMEnqueueFill(pi_queue queue, void *ptr, - const void *pattern, - size_t patternSize, size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - if (event) - *event = createDummyHandle(); - - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, - void *dst_ptr, const void *src_ptr, - size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, - size_t size, - pi_usm_migration_flags flags, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, - size_t length, - pi_mem_advice advice, - pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMGetMemAllocInfo( - pi_context context, const void *ptr, pi_mem_alloc_info param_name, - size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMEnqueueFill2D(pi_queue queue, void *ptr, - size_t pitch, size_t pattern_size, - const void *pattern, size_t width, - size_t height, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMEnqueueMemset2D(pi_queue queue, void *ptr, - size_t pitch, int value, - size_t width, size_t height, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextUSMEnqueueMemcpy2D(pi_queue queue, pi_bool blocking, void *dst_ptr, - size_t dst_pitch, const void *src_ptr, - size_t src_pitch, size_t width, size_t height, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event) { - return PI_SUCCESS; -} - -inline pi_result mock_piextEnqueueDeviceGlobalVariableWrite( - pi_queue queue, pi_program program, const char *name, - pi_bool blocking_write, size_t count, size_t offset, const void *src, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piextEnqueueDeviceGlobalVariableRead( - pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, - size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result -mock_piextVirtualMemGranularityGetInfo(pi_context, pi_device, - pi_virtual_mem_granularity_info, size_t, - void *, size_t *) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextPhysicalMemCreate(pi_context, pi_device, size_t, - pi_physical_mem *ret_physical_mem) { - *ret_physical_mem = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piextPhysicalMemRetain(pi_physical_mem) { - return PI_SUCCESS; -} - -inline pi_result mock_piextPhysicalMemRelease(pi_physical_mem) { - return PI_SUCCESS; -} - -inline pi_result mock_piextVirtualMemReserve(pi_context, const void *start, - size_t range_size, - void **ret_ptr) { - *ret_ptr = - start ? const_cast(start) : createDummyHandle(range_size); - return PI_SUCCESS; -} - -inline pi_result mock_piextVirtualMemFree(pi_context, const void *, size_t) { - return PI_SUCCESS; -} - -inline pi_result mock_piextVirtualMemMap(pi_context, const void *, size_t, - pi_physical_mem, size_t, - pi_virtual_access_flags) { - return PI_SUCCESS; -} - -inline pi_result mock_piextVirtualMemUnmap(pi_context, const void *, size_t) { - return PI_SUCCESS; -} - -inline pi_result mock_piextVirtualMemSetAccess(pi_context, const void *, size_t, - pi_virtual_access_flags) { - return PI_SUCCESS; -} - -inline pi_result mock_piextVirtualMemGetInfo(pi_context, const void *, size_t, - pi_virtual_mem_info, size_t, - void *, size_t *) { - return PI_SUCCESS; -} - -inline pi_result mock_piextPluginGetOpaqueData(void *opaque_data_param, - void **opaque_data_return) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextCommandBufferCreate(pi_context context, pi_device device, - const pi_ext_command_buffer_desc *desc, - pi_ext_command_buffer *ret_command_buffer) { - *ret_command_buffer = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result -mock_piextCommandBufferRetain(pi_ext_command_buffer command_buffer) { - retainDummyHandle(command_buffer); - return PI_SUCCESS; -} - -inline pi_result -mock_piextCommandBufferRelease(pi_ext_command_buffer command_buffer) { - releaseDummyHandle(command_buffer); - return PI_SUCCESS; -} - -inline pi_result -mock_piextCommandBufferFinalize(pi_ext_command_buffer command_buffer) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferNDRangeKernel( - pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point, pi_ext_command_buffer_command *command) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferMemcpyUSM( - pi_ext_command_buffer command_buffer, void *dst_ptr, const void *src_ptr, - size_t size, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferMemBufferRead( - pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, - size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer command_buffer, pi_mem buffer, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferMemBufferWrite( - pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, - size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer command_buffer, pi_mem buffer, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t BufferRowPitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - const void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextEnqueueCommandBuffer( - pi_ext_command_buffer command_buffer, pi_queue queue, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferUpdateKernelLaunch( - pi_ext_command_buffer_command Command, - pi_ext_command_buffer_update_kernel_launch_desc *Desc) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextCommandBufferRetainCommand(pi_ext_command_buffer_command Command) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferMemBufferCopy( - pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, - size_t src_offset, size_t dst_offset, size_t size, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, - pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, - pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, - size_t dst_row_pitch, size_t dst_slice_pitch, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferMemBufferFill( - pi_ext_command_buffer command_buffer, pi_mem buffer, const void *pattern, - size_t pattern_size, size_t offset, size_t size, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferFillUSM( - pi_ext_command_buffer command_buffer, void *ptr, const void *pattern, - size_t pattern_size, size_t size, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferPrefetchUSM( - pi_ext_command_buffer command_buffer, const void *ptr, size_t size, - pi_usm_migration_flags flags, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextCommandBufferAdviseUSM( - pi_ext_command_buffer command_buffer, const void *ptr, size_t length, - pi_mem_advice advice, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - return PI_SUCCESS; -} - -inline pi_result mock_piextEnqueueNativeCommand(pi_queue, - void (*)(pi_queue, void *), - void *, uint32_t, - const pi_mem *, pi_uint32, - const pi_event *, pi_event *) { - return PI_SUCCESS; -} - -inline pi_result mock_piTearDown(void *PluginParameter) { return PI_SUCCESS; } - -inline pi_result mock_piPluginGetLastError(char **message) { - return PI_SUCCESS; -} - -inline pi_result mock_piPluginGetBackendOption(pi_platform platform, - const char *frontend_option, - const char **backend_option) { - *backend_option = ""; - return PI_SUCCESS; -} - -// Returns the wall-clock timestamp of host for deviceTime and hostTime -inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, - uint64_t *deviceTime, - uint64_t *hostTime) { - - using namespace std::chrono; - auto timeNanoseconds = - duration_cast(steady_clock::now().time_since_epoch()) - .count(); - if (deviceTime) { - *deviceTime = timeNanoseconds; - } - if (hostTime) { - *hostTime = timeNanoseconds; - } - return PI_SUCCESS; -} - -inline pi_result mock_piextEnqueueReadHostPipe( - pi_queue queue, pi_program program, const char *pipe_symbol, - pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piextEnqueueWriteHostPipe( - pi_queue queue, pi_program program, const char *pipe_symbol, - pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event) { - if (event) - *event = createDummyHandle(); - return PI_SUCCESS; -} - -inline pi_result mock_piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { - return PI_SUCCESS; -} - -inline pi_result mock_piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { - return PI_SUCCESS; -} - -inline pi_result -mock_piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, - pi_peer_attr attr, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) { - if (param_value) - *static_cast(param_value) = 1; - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_int32); - - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMImport(const void *HostPtr, size_t Size, - pi_context Context) { - return PI_SUCCESS; -} - -inline pi_result mock_piextUSMRelease(const void *HostPtr, pi_context Context) { - return PI_SUCCESS; -} - -inline pi_result mock_piextEnqueueKernelLaunchCustom( - pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumPropsInLaunchPropList, - const pi_launch_property *LaunchPropList, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - return PI_SUCCESS; -} diff --git a/sycl/unittests/helpers/RuntimeLinkingCommon.hpp b/sycl/unittests/helpers/RuntimeLinkingCommon.hpp index 175667da86ba3..6e786ed4cfdf1 100644 --- a/sycl/unittests/helpers/RuntimeLinkingCommon.hpp +++ b/sycl/unittests/helpers/RuntimeLinkingCommon.hpp @@ -1,11 +1,11 @@ -#include +#include #include // Helper holder for all the data we want to capture from mocked APIs struct LinkingCapturesHolder { - unsigned NumOfPiProgramCreateCalls = 0; - unsigned NumOfPiProgramLinkCalls = 0; + unsigned NumOfUrProgramCreateCalls = 0; + unsigned NumOfUrProgramLinkCalls = 0; unsigned ProgramUsedToCreateKernel = 0; std::vector LinkedPrograms; @@ -18,8 +18,8 @@ struct LinkingCapturesHolder { } void clear() { - NumOfPiProgramCreateCalls = 0; - NumOfPiProgramLinkCalls = 0; + NumOfUrProgramCreateCalls = 0; + NumOfUrProgramLinkCalls = 0; ProgramUsedToCreateKernel = 0; LinkedPrograms.clear(); } @@ -27,56 +27,50 @@ struct LinkingCapturesHolder { static LinkingCapturesHolder CapturedLinkingData; -static pi_result redefined_piProgramCreate(pi_context, const void *il, - size_t length, pi_program *res) { - auto *Magic = reinterpret_cast(il); - *res = createDummyHandle(sizeof(unsigned)); - reinterpret_cast(*res)->setDataAs(*Magic); - ++CapturedLinkingData.NumOfPiProgramCreateCalls; - return PI_SUCCESS; +static ur_result_t redefined_urProgramCreateWithIL(void *pParams) { + auto Params = *static_cast(pParams); + auto *Magic = reinterpret_cast(*Params.ppIL); + ur_program_handle_t *res = *Params.pphProgram; + *res = mock::createDummyHandle(sizeof(unsigned)); + reinterpret_cast(*res)->setDataAs(*Magic); + ++CapturedLinkingData.NumOfUrProgramCreateCalls; + return UR_RESULT_SUCCESS; } -static pi_result -redefined_piProgramLink(pi_context context, pi_uint32 num_devices, - const pi_device *device_list, const char *options, - pi_uint32 num_input_programs, - const pi_program *input_programs, - void (*pfn_notify)(pi_program program, void *user_data), - void *user_data, pi_program *ret_program) { +static ur_result_t redefined_urProgramLinkExp(void *pParams) { + auto Params = *static_cast(pParams); unsigned ResProgram = 1; - for (pi_uint32 I = 0; I < num_input_programs; ++I) { - auto Val = reinterpret_cast(input_programs[I]) + auto Programs = *Params.pphPrograms; + for (uint32_t I = 0; I < *Params.pcount; ++I) { + auto Val = reinterpret_cast(Programs[I]) ->getDataAs(); ResProgram *= Val; CapturedLinkingData.LinkedPrograms.push_back(Val); } - ++CapturedLinkingData.NumOfPiProgramLinkCalls; + ++CapturedLinkingData.NumOfUrProgramLinkCalls; - *ret_program = createDummyHandle(sizeof(unsigned)); - reinterpret_cast(*ret_program) + ur_program_handle_t *ret_program = *Params.pphProgram; + *ret_program = mock::createDummyHandle(sizeof(unsigned)); + reinterpret_cast(*ret_program) ->setDataAs(ResProgram); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefined_piKernelCreate(pi_program program, - const char *kernel_name, - pi_kernel *ret_kernel) { +static ur_result_t redefined_urKernelCreate(void *pParams) { + auto Params = *static_cast(pParams); CapturedLinkingData.ProgramUsedToCreateKernel = - reinterpret_cast(program)->getDataAs(); - *ret_kernel = createDummyHandle(); - return PI_SUCCESS; + reinterpret_cast(*Params.phProgram) + ->getDataAs(); + **Params.pphKernel = mock::createDummyHandle(); + return UR_RESULT_SUCCESS; } -static sycl::unittest::PiMock setupRuntimeLinkingMock() { - sycl::unittest::PiMock Mock; - - Mock.redefine( - redefined_piProgramCreate); - Mock.redefine( - redefined_piProgramLink); - Mock.redefine( - redefined_piKernelCreate); - - return Mock; +static void setupRuntimeLinkingMock() { + mock::getCallbacks().set_replace_callback("urProgramCreateWithIL", + redefined_urProgramCreateWithIL); + mock::getCallbacks().set_replace_callback("urProgramLinkExp", + redefined_urProgramLinkExp); + mock::getCallbacks().set_replace_callback("urKernelCreate", + redefined_urKernelCreate); } diff --git a/sycl/unittests/helpers/TestKernel.hpp b/sycl/unittests/helpers/TestKernel.hpp index 0edc129e03413..cea2acba20db5 100644 --- a/sycl/unittests/helpers/TestKernel.hpp +++ b/sycl/unittests/helpers/TestKernel.hpp @@ -9,7 +9,7 @@ #pragma once #include "MockKernelInfo.hpp" -#include "PiImage.hpp" +#include "UrImage.hpp" template class TestKernel; @@ -33,6 +33,6 @@ struct KernelInfo> } // namespace _V1 } // namespace sycl -static sycl::unittest::PiImage Img = +static sycl::unittest::UrImage Img = sycl::unittest::generateDefaultImage({"TestKernel"}); -static sycl::unittest::PiImageArray<1> ImgArray{&Img}; +static sycl::unittest::UrImageArray<1> ImgArray{&Img}; diff --git a/sycl/unittests/helpers/PiImage.hpp b/sycl/unittests/helpers/UrImage.hpp similarity index 82% rename from sycl/unittests/helpers/PiImage.hpp rename to sycl/unittests/helpers/UrImage.hpp index f2668bcb3eef2..566817d7ef375 100644 --- a/sycl/unittests/helpers/PiImage.hpp +++ b/sycl/unittests/helpers/UrImage.hpp @@ -1,4 +1,4 @@ -//==------------- PiImage.hpp --- PI mock image unit testing library -------==// +//==------------- UrImage.hpp --- UR mock image unit testing library -------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,38 +12,40 @@ #include #include #include -#include +#include #include namespace sycl { inline namespace _V1 { namespace unittest { -/// Convinience wrapper around _sycl_device_binary_property_struct. -class PiProperty { +using namespace sycl::detail; + +/// Convinience wrapper around _ur_device_binary_property_struct. +class UrProperty { public: using NativeType = _sycl_device_binary_property_struct; - /// Constructs a PI property. + /// Constructs a UR property. /// /// \param Name is a property name. /// \param Data is a vector of raw property value bytes. - /// \param Type is one of pi_property_type values. - PiProperty(const std::string &Name, std::vector Data, uint32_t Type) + /// \param Type is one of ur_property_type values. + UrProperty(const std::string &Name, std::vector Data, uint32_t Type) : MName(Name), MData(std::move(Data)), MType(Type) { updateNativeType(); } NativeType convertToNativeType() const { return MNative; } - PiProperty(const PiProperty &Src) { + UrProperty(const UrProperty &Src) { MName = Src.MName; MData = Src.MData; MType = Src.MType; updateNativeType(); } - PiProperty &operator=(const PiProperty &Src) { + UrProperty &operator=(const UrProperty &Src) { MName = Src.MName; MData = Src.MData; MType = Src.MType; @@ -68,23 +70,23 @@ class PiProperty { NativeType MNative; }; -/// Convinience wrapper for _pi_offload_entry_struct. -class PiOffloadEntry { +/// Convinience wrapper for _ur_offload_entry_struct. +class UrOffloadEntry { public: using NativeType = _sycl_offload_entry_struct; - PiOffloadEntry(const std::string &Name, std::vector Data, int32_t Flags) + UrOffloadEntry(const std::string &Name, std::vector Data, int32_t Flags) : MName(Name), MData(std::move(Data)), MFlags(Flags) { updateNativeType(); } - PiOffloadEntry(const PiOffloadEntry &Src) { + UrOffloadEntry(const UrOffloadEntry &Src) { MName = Src.MName; MData = Src.MData; MFlags = Src.MFlags; updateNativeType(); } - PiOffloadEntry &operator=(const PiOffloadEntry &Src) { + UrOffloadEntry &operator=(const UrOffloadEntry &Src) { MName = Src.MName; MData = Src.MData; MFlags = Src.MFlags; @@ -107,18 +109,18 @@ class PiOffloadEntry { NativeType MNative; }; -/// Generic array of PI entries. -template class PiArray { +/// Generic array of UR entries. +template class UrArray { public: - explicit PiArray(std::vector Entries) : MMockEntries(std::move(Entries)) { + explicit UrArray(std::vector Entries) : MMockEntries(std::move(Entries)) { updateEntries(); } - PiArray(std::initializer_list Entries) : MMockEntries(std::move(Entries)) { + UrArray(std::initializer_list Entries) : MMockEntries(std::move(Entries)) { updateEntries(); } - PiArray() = default; + UrArray() = default; void push_back(const T &Entry) { MMockEntries.push_back(Entry); @@ -159,15 +161,15 @@ template class PiArray { }; #ifdef __cpp_deduction_guides -template PiArray(std::vector) -> PiArray; +template UrArray(std::vector) -> UrArray; -template PiArray(std::initializer_list) -> PiArray; +template UrArray(std::initializer_list) -> UrArray; #endif // __cpp_deduction_guides /// Convenience wrapper for sycl_device_binary_property_set. -class PiPropertySet { +class UrPropertySet { public: - PiPropertySet() { + UrPropertySet() { // Most of unit-tests are statically linked with SYCL RT. On Linux and Mac // systems that causes incorrect RT installation directory detection, which // prevents proper loading of fallback libraries. See intel/llvm#6945 @@ -184,15 +186,15 @@ class PiPropertySet { // Name doesn't matter here, it is not used by RT // Value must be an all-zero 32-bit mask, which would mean that no fallback // libraries are needed to be loaded. - PiProperty DeviceLibReqMask("", Data, SYCL_PROPERTY_TYPE_UINT32); - insert(__SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK, PiArray{DeviceLibReqMask}); + UrProperty DeviceLibReqMask("", Data, SYCL_PROPERTY_TYPE_UINT32); + insert(__SYCL_PROPERTY_SET_DEVICELIB_REQ_MASK, UrArray{DeviceLibReqMask}); } /// Adds a new array of properties to the set. /// - /// \param Name is a property array name. See pi.h for list of known names. + /// \param Name is a property array name. See ur.hpp for list of known names. /// \param Props is an array of property values. - void insert(const std::string &Name, PiArray Props) { + void insert(const std::string &Name, UrArray Props) { MNames.push_back(Name); MMockProperties.push_back(std::move(Props)); MProperties.push_back(_sycl_device_binary_property_set_struct{ @@ -214,20 +216,20 @@ class PiPropertySet { private: std::vector MNames; - std::vector> MMockProperties; + std::vector> MMockProperties; std::vector<_sycl_device_binary_property_set_struct> MProperties; }; -/// Convenience wrapper around PI internal structures, that manages PI binary +/// Convenience wrapper around UR internal structures, that manages UR binary /// image data lifecycle. -class PiImage { +class UrImage { public: /// Constructs an arbitrary device image. - PiImage(uint16_t Version, uint8_t Kind, uint8_t Format, + UrImage(uint16_t Version, uint8_t Kind, uint8_t Format, const std::string &DeviceTargetSpec, const std::string &CompileOptions, const std::string &LinkOptions, std::vector Manifest, std::vector Binary, - PiArray OffloadEntries, PiPropertySet PropertySet) + UrArray OffloadEntries, UrPropertySet PropertySet) : MVersion(Version), MKind(Kind), MFormat(Format), MDeviceTargetSpec(DeviceTargetSpec), MCompileOptions(CompileOptions), MLinkOptions(LinkOptions), MManifest(std::move(Manifest)), @@ -235,11 +237,11 @@ class PiImage { MPropertySet(std::move(PropertySet)) {} /// Constructs a SYCL device image of the latest version. - PiImage(uint8_t Format, const std::string &DeviceTargetSpec, + UrImage(uint8_t Format, const std::string &DeviceTargetSpec, const std::string &CompileOptions, const std::string &LinkOptions, std::vector Binary, - PiArray OffloadEntries, PiPropertySet PropertySet) - : PiImage(SYCL_DEVICE_BINARY_VERSION, + UrArray OffloadEntries, UrPropertySet PropertySet) + : UrImage(SYCL_DEVICE_BINARY_VERSION, SYCL_DEVICE_BINARY_OFFLOAD_KIND_SYCL, Format, DeviceTargetSpec, CompileOptions, LinkOptions, {}, std::move(Binary), std::move(OffloadEntries), std::move(PropertySet)) {} @@ -273,17 +275,17 @@ class PiImage { std::string MLinkOptions; std::vector MManifest; std::vector MBinary; - PiArray MOffloadEntries; - PiPropertySet MPropertySet; + UrArray MOffloadEntries; + UrPropertySet MPropertySet; }; /// Convenience wrapper around sycl_device_binaries_struct, that manages mock /// device images' lifecycle. -template class PiImageArray { +template class UrImageArray { public: static constexpr size_t NumberOfImages = __NumberOfImages; - PiImageArray(PiImage *Imgs) { + UrImageArray(UrImage *Imgs) { for (size_t Idx = 0; Idx < NumberOfImages; ++Idx) MNativeImages[Idx] = Imgs[Idx].convertToNativeType(); @@ -298,7 +300,7 @@ template class PiImageArray { __sycl_register_lib(&MAllBinaries); } - ~PiImageArray() { __sycl_unregister_lib(&MAllBinaries); } + ~UrImageArray() { __sycl_unregister_lib(&MAllBinaries); } private: sycl_device_binary_struct MNativeImages[NumberOfImages]; @@ -330,7 +332,7 @@ template /// \param Offsets is a list of offsets inside composite spec constant. /// \param DefaultValues is a tuple of default values for composite spec const. template -inline PiProperty makeSpecConstant(std::vector &ValData, +inline UrProperty makeSpecConstant(std::vector &ValData, const std::string &Name, std::initializer_list IDs, std::initializer_list Offsets, @@ -382,15 +384,15 @@ inline PiProperty makeSpecConstant(std::vector &ValData, iterate_tuple(FillData, DefaultValues); - PiProperty Prop{Name, DescData, SYCL_PROPERTY_TYPE_BYTE_ARRAY}; + UrProperty Prop{Name, DescData, SYCL_PROPERTY_TYPE_BYTE_ARRAY}; return Prop; } /// Utility function to mark kernel as the one using assert inline void setKernelUsesAssert(const std::vector &Names, - PiPropertySet &Set) { - PiArray Value; + UrPropertySet &Set) { + UrArray Value; for (const std::string &N : Names) Value.push_back({N, {0, 0, 0, 0}, SYCL_PROPERTY_TYPE_UINT32}); Set.insert(__SYCL_PROPERTY_SET_SYCL_ASSERT_USED, std::move(Value)); @@ -399,36 +401,36 @@ inline void setKernelUsesAssert(const std::vector &Names, /// Utility function to add specialization constants to property set. /// /// This function overrides the default spec constant values. -inline void addSpecConstants(PiArray SpecConstants, - std::vector ValData, PiPropertySet &Props) { +inline void addSpecConstants(UrArray SpecConstants, + std::vector ValData, UrPropertySet &Props) { Props.insert(__SYCL_PROPERTY_SET_SPEC_CONST_MAP, std::move(SpecConstants)); - PiProperty Prop{"all", std::move(ValData), SYCL_PROPERTY_TYPE_BYTE_ARRAY}; + UrProperty Prop{"all", std::move(ValData), SYCL_PROPERTY_TYPE_BYTE_ARRAY}; - PiArray DefaultValues{std::move(Prop)}; + UrArray DefaultValues{std::move(Prop)}; Props.insert(__SYCL_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP, std::move(DefaultValues)); } /// Utility function to add ESIMD kernel flag to property set. -inline void addESIMDFlag(PiPropertySet &Props) { +inline void addESIMDFlag(UrPropertySet &Props) { std::vector ValData(sizeof(uint32_t)); ValData[0] = 1; - PiProperty Prop{"isEsimdImage", ValData, SYCL_PROPERTY_TYPE_UINT32}; + UrProperty Prop{"isEsimdImage", ValData, SYCL_PROPERTY_TYPE_UINT32}; - PiArray Value{std::move(Prop)}; + UrArray Value{std::move(Prop)}; Props.insert(__SYCL_PROPERTY_SET_SYCL_MISC_PROP, std::move(Value)); } /// Utility function to generate offload entries for kernels without arguments. -inline PiArray +inline UrArray makeEmptyKernels(std::initializer_list KernelNames) { - PiArray Entries; + UrArray Entries; for (const auto &Name : KernelNames) { - PiOffloadEntry E{Name, {}, 0}; + UrOffloadEntry E{Name, {}, 0}; Entries.push_back(std::move(E)); } return Entries; @@ -439,7 +441,7 @@ makeEmptyKernels(std::initializer_list KernelNames) { /// \param Name is a property name. /// \param NumArgs is a total number of arguments of a kernel. /// \param ElimArgMask is a bit mask of eliminated kernel arguments IDs. -inline PiProperty +inline UrProperty makeKernelParamOptInfo(const std::string &Name, const size_t NumArgs, const std::vector &ElimArgMask) { const size_t BYTES_FOR_SIZE = 8; @@ -451,7 +453,7 @@ makeKernelParamOptInfo(const std::string &Name, const size_t NumArgs, std::uninitialized_copy(ElimArgMask.begin(), ElimArgMask.end(), DescData.data() + BYTES_FOR_SIZE); - PiProperty Prop{Name, DescData, SYCL_PROPERTY_TYPE_BYTE_ARRAY}; + UrProperty Prop{Name, DescData, SYCL_PROPERTY_TYPE_BYTE_ARRAY}; return Prop; } @@ -462,7 +464,7 @@ makeKernelParamOptInfo(const std::string &Name, const size_t NumArgs, /// \param TypeSize is the size of the underlying type in the device global. /// \param DeviceImageScoped is whether the device global was device image scope /// decorated. -inline PiProperty makeDeviceGlobalInfo(const std::string &Name, +inline UrProperty makeDeviceGlobalInfo(const std::string &Name, const uint32_t TypeSize, const std::uint32_t DeviceImageScoped) { constexpr size_t BYTES_FOR_SIZE = 8; @@ -474,7 +476,7 @@ inline PiProperty makeDeviceGlobalInfo(const std::string &Name, std::memcpy(DescData.data() + BYTES_FOR_SIZE + sizeof(TypeSize), &DeviceImageScoped, sizeof(DeviceImageScoped)); - PiProperty Prop{Name, DescData, SYCL_PROPERTY_TYPE_BYTE_ARRAY}; + UrProperty Prop{Name, DescData, SYCL_PROPERTY_TYPE_BYTE_ARRAY}; return Prop; } @@ -484,7 +486,7 @@ inline PiProperty makeDeviceGlobalInfo(const std::string &Name, /// \param Name is the name of the hostpipe name. /// \param TypeSize is the size of the underlying type in the hostpipe. /// decorated. -inline PiProperty makeHostPipeInfo(const std::string &Name, +inline UrProperty makeHostPipeInfo(const std::string &Name, const uint32_t TypeSize) { constexpr size_t BYTES_FOR_SIZE = 8; const std::uint64_t BytesForArgs = sizeof(std::uint32_t); @@ -493,13 +495,13 @@ inline PiProperty makeHostPipeInfo(const std::string &Name, std::memcpy(DescData.data(), &BytesForArgs, sizeof(BytesForArgs)); std::memcpy(DescData.data() + BYTES_FOR_SIZE, &TypeSize, sizeof(TypeSize)); - PiProperty Prop{Name, DescData, SYCL_PROPERTY_TYPE_BYTE_ARRAY}; + UrProperty Prop{Name, DescData, SYCL_PROPERTY_TYPE_BYTE_ARRAY}; return Prop; } /// Utility function to add aspects to property set. -inline PiProperty makeAspectsProp(const std::vector &Aspects) { +inline UrProperty makeAspectsProp(const std::vector &Aspects) { const size_t BYTES_FOR_SIZE = 8; std::vector ValData(BYTES_FOR_SIZE + Aspects.size() * sizeof(sycl::aspect)); @@ -512,7 +514,7 @@ inline PiProperty makeAspectsProp(const std::vector &Aspects) { return {"aspects", ValData, SYCL_PROPERTY_TYPE_BYTE_ARRAY}; } -inline PiProperty makeReqdWGSizeProp(const std::vector &ReqdWGSize) { +inline UrProperty makeReqdWGSizeProp(const std::vector &ReqdWGSize) { const size_t BYTES_FOR_SIZE = 8; std::vector ValData(BYTES_FOR_SIZE + ReqdWGSize.size() * sizeof(int)); uint64_t ValDataSize = ValData.size(); @@ -526,24 +528,24 @@ inline PiProperty makeReqdWGSizeProp(const std::vector &ReqdWGSize) { } inline void -addDeviceRequirementsProps(PiPropertySet &Props, +addDeviceRequirementsProps(UrPropertySet &Props, const std::vector &Aspects, const std::vector &ReqdWGSize = {}) { - PiArray Value{makeAspectsProp(Aspects)}; + UrArray Value{makeAspectsProp(Aspects)}; if (!ReqdWGSize.empty()) Value.push_back(makeReqdWGSizeProp(ReqdWGSize)); Props.insert(__SYCL_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS, std::move(Value)); } -inline PiImage +inline UrImage generateDefaultImage(std::initializer_list KernelNames) { - PiPropertySet PropSet; + UrPropertySet PropSet; std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = makeEmptyKernels(KernelNames); + UrArray Entries = makeEmptyKernels(KernelNames); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp new file mode 100644 index 0000000000000..e4e525a274dbc --- /dev/null +++ b/sycl/unittests/helpers/UrMock.hpp @@ -0,0 +1,540 @@ +//==------------- UrMock.hpp --- Mock unit testing library -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This mini-library provides facilities to test the DPC++ Runtime behavior upon +// specific results of the underlying low-level API calls. By exploiting the +// Plugin Interface API, the stored addresses of the actual plugin-specific +// implementations can be overwritten to point at user-defined mock functions. +// +// To make testing independent of existing plugins and devices, all plugins are +// forcefully unloaded and the mock plugin is registered as the only plugin. +// +// While this could be done manually for each unit-testing scenario, the library +// aims to rule out the boilerplate, providing helper APIs which can be re-used +// by all such unit tests. The test code stemming from this can be more consise, +// with little difference from non-mock classes' usage. +// +// The following unit testing scenarios are thereby simplified: +// 1) testing the DPC++ RT management of specific UR return codes; +// 2) coverage of corner-cases related to specific data outputs +// from underlying runtimes; +// 3) testing the order of UR API calls; +// ..., etc. +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace sycl { + +inline namespace _V1 { +namespace unittest { + +namespace detail = sycl::detail; + +namespace MockAdapter { + +inline ur_result_t mock_urPlatformGet(void *pParams) { + auto params = reinterpret_cast(pParams); + if (*params->ppNumPlatforms) + **params->ppNumPlatforms = 1; + + if (*params->pphPlatforms && *params->pNumEntries > 0) + *params->pphPlatforms[0] = reinterpret_cast(1); + + return UR_RESULT_SUCCESS; +} + +inline ur_result_t mock_urDeviceGet(void *pParams) { + auto params = reinterpret_cast(pParams); + if (*params->ppNumDevices) + **params->ppNumDevices = 1; + + if (*params->pphDevices && *params->pNumEntries > 0) + *params->pphDevices[0] = reinterpret_cast(1); + + return UR_RESULT_SUCCESS; +} + +// since we're overriding DeviceGet to return a specific fake handle we'll also +// need to override the Retain/Release functions + +inline ur_result_t mock_urDeviceRetain(void *) { return UR_RESULT_SUCCESS; } +inline ur_result_t mock_urDeviceRelease(void *) { return UR_RESULT_SUCCESS; } + +template +inline ur_result_t mock_urAdapterGetInfo(void *pParams) { + auto params = reinterpret_cast(pParams); + + if (*params->ppropName == UR_ADAPTER_INFO_BACKEND) { + constexpr auto MockPlatformBackend = Backend; + if (*params->ppPropValue) { + std::memcpy(*params->ppPropValue, &MockPlatformBackend, + sizeof(MockPlatformBackend)); + } + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(MockPlatformBackend); + return UR_RESULT_SUCCESS; + } + return UR_RESULT_SUCCESS; +} + +template +inline ur_result_t mock_urPlatformGetInfo(void *pParams) { + auto params = reinterpret_cast(pParams); + constexpr char MockPlatformName[] = "Mock platform"; + constexpr char MockSupportedExtensions[] = + "cl_khr_il_program cl_khr_subgroups cl_intel_subgroups " + "cl_intel_subgroups_short cl_intel_required_subgroup_size "; + switch (*params->ppropName) { + case UR_PLATFORM_INFO_NAME: { + if (*params->ppPropValue) { + assert(*params->ppropSize == sizeof(MockPlatformName)); + std::memcpy(*params->ppPropValue, MockPlatformName, + sizeof(MockPlatformName)); + } + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(MockPlatformName); + return UR_RESULT_SUCCESS; + } + case UR_PLATFORM_INFO_EXTENSIONS: { + if (*params->ppPropValue) { + assert(*params->ppropSize == sizeof(MockSupportedExtensions)); + std::memcpy(*params->ppPropValue, MockSupportedExtensions, + sizeof(MockSupportedExtensions)); + } + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(MockSupportedExtensions); + return UR_RESULT_SUCCESS; + } + case UR_PLATFORM_INFO_BACKEND: { + constexpr auto MockPlatformBackend = Backend; + if (*params->ppPropValue) { + std::memcpy(*params->ppPropValue, &MockPlatformBackend, + sizeof(MockPlatformBackend)); + } + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(MockPlatformBackend); + return UR_RESULT_SUCCESS; + } + default: { + constexpr const char FallbackValue[] = "str"; + constexpr size_t FallbackValueSize = sizeof(FallbackValue); + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = FallbackValueSize; + + if (*params->ppPropValue && *params->ppropSize >= FallbackValueSize) + std::memcpy(*params->ppPropValue, FallbackValue, FallbackValueSize); + + return UR_RESULT_SUCCESS; + } + } +} + +inline ur_result_t mock_urDeviceGetInfo(void *pParams) { + auto params = reinterpret_cast(pParams); + constexpr char MockDeviceName[] = "Mock device"; + constexpr char MockSupportedExtensions[] = + "cl_khr_fp64 cl_khr_fp16 cl_khr_il_program ur_exp_command_buffer"; + switch (*params->ppropName) { + case UR_DEVICE_INFO_TYPE: { + // Act like any device is a GPU. + // TODO: Should we mock more device types? + if (*params->ppPropValue) + *static_cast(*params->ppPropValue) = + UR_DEVICE_TYPE_GPU; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(UR_DEVICE_TYPE_GPU); + return UR_RESULT_SUCCESS; + } + case UR_DEVICE_INFO_NAME: { + if (*params->ppPropValue) { + assert(*params->ppropSize == sizeof(MockDeviceName)); + std::memcpy(*params->ppPropValue, MockDeviceName, sizeof(MockDeviceName)); + } + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(MockDeviceName); + return UR_RESULT_SUCCESS; + } + case UR_DEVICE_INFO_PARENT_DEVICE: { + if (*params->ppPropValue) + *static_cast(*params->ppPropValue) = nullptr; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(ur_device_handle_t *); + return UR_RESULT_SUCCESS; + } + case UR_DEVICE_INFO_EXTENSIONS: { + if (*params->ppPropValue) { + assert(*params->ppropSize >= sizeof(MockSupportedExtensions)); + std::memcpy(*params->ppPropValue, MockSupportedExtensions, + sizeof(MockSupportedExtensions)); + } + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(MockSupportedExtensions); + return UR_RESULT_SUCCESS; + } + case UR_DEVICE_INFO_USM_HOST_SUPPORT: + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: + case UR_DEVICE_INFO_AVAILABLE: + case UR_DEVICE_INFO_LINKER_AVAILABLE: + case UR_DEVICE_INFO_COMPILER_AVAILABLE: + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + if (*params->ppPropValue) + *static_cast(*params->ppPropValue) = true; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(true); + return UR_RESULT_SUCCESS; + } + // This mock GPU device has no sub-devices + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { + if (*params->ppPropSizeRet) { + **params->ppPropSizeRet = 0; + } + return UR_RESULT_SUCCESS; + } + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: { + assert(*params->ppropSize == sizeof(ur_device_affinity_domain_flags_t)); + if (*params->ppPropValue) { + *static_cast(*params->ppPropValue) = + 0; + } + return UR_RESULT_SUCCESS; + } + case UR_DEVICE_INFO_QUEUE_PROPERTIES: { + assert(*params->ppropSize == sizeof(ur_queue_flags_t)); + if (*params->ppPropValue) { + *static_cast(*params->ppPropValue) = + UR_QUEUE_FLAG_PROFILING_ENABLE; + } + return UR_RESULT_SUCCESS; + } + case UR_DEVICE_INFO_COMPONENT_DEVICES: + if (*params->ppPropValue) { + *static_cast(*params->ppPropValue) = nullptr; + } + if (*params->ppPropSizeRet) { + **params->ppPropSizeRet = 0; + } + return UR_RESULT_SUCCESS; + default: { + // In the default case we fill the return value with 0's. This may not be + // valid for all device queries, but it will mean a consistent return value + // for the query. + // Any tests that need special return values should either add behavior + // the this function or use redefineAfter with a function that adds the + // intended behavior. + if (*params->ppPropValue && *params->ppropSize != 0) + std::memset(*params->ppPropValue, 0, *params->ppropSize); + // Likewise, if the device info query asks for the size of the return value + // we tell it there is a single byte to avoid cases where the runtime tries + // to allocate some random amount of memory for the return value. + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = 1; + return UR_RESULT_SUCCESS; + } + } +} + +inline ur_result_t mock_urProgramGetInfo(void *pParams) { + auto params = reinterpret_cast(pParams); + switch (*params->ppropName) { + case UR_PROGRAM_INFO_NUM_DEVICES: { + if (*params->ppPropValue) + *static_cast(*params->ppPropValue) = 1; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(size_t); + return UR_RESULT_SUCCESS; + } + case UR_PROGRAM_INFO_DEVICES: { + if (*params->ppPropValue) + *static_cast(*params->ppPropValue) = + reinterpret_cast(0x1); + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(ur_device_handle_t); + return UR_RESULT_SUCCESS; + } + case UR_PROGRAM_INFO_BINARY_SIZES: { + if (*params->ppPropValue) + *static_cast(*params->ppPropValue) = 1; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(size_t); + return UR_RESULT_SUCCESS; + } + case UR_PROGRAM_INFO_BINARIES: { + if (*params->ppPropValue) + **static_cast(*params->ppPropValue) = 1; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(unsigned char); + return UR_RESULT_SUCCESS; + } + default: { + // TODO: Buildlog requires this but not any actual data afterwards. + // This should be investigated. Should this be moved to that test? + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(size_t); + return UR_RESULT_SUCCESS; + } + } +} + +inline ur_result_t mock_urContextGetInfo(void *pParams) { + auto params = reinterpret_cast(pParams); + switch (*params->ppropName) { + case UR_CONTEXT_INFO_NUM_DEVICES: { + if (*params->ppPropValue) + *static_cast(*params->ppPropValue) = 1; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(uint32_t); + return UR_RESULT_SUCCESS; + } + default: + return UR_RESULT_SUCCESS; + } +} + +inline ur_result_t mock_urQueueGetInfo(void *pParams) { + auto params = reinterpret_cast(pParams); + switch (*params->ppropName) { + case UR_QUEUE_INFO_DEVICE: { + if (*params->ppPropValue) + *static_cast(*params->ppPropValue) = + reinterpret_cast(1); + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(ur_device_handle_t); + return UR_RESULT_SUCCESS; + } + default: + return UR_RESULT_SUCCESS; + } +} + +inline ur_result_t mock_urKernelGetGroupInfo(void *pParams) { + auto params = reinterpret_cast(pParams); + switch (*params->ppropName) { + case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { + if (*params->ppPropValue) { + auto RealVal = reinterpret_cast(*params->ppPropValue); + RealVal[0] = 0; + RealVal[1] = 0; + RealVal[2] = 0; + } + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = 3 * sizeof(size_t); + return UR_RESULT_SUCCESS; + } + default: { + return UR_RESULT_SUCCESS; + } + } +} + +inline ur_result_t mock_urEventGetInfo(void *pParams) { + auto params = reinterpret_cast(pParams); + switch (*params->ppropName) { + case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: { + if (*params->ppPropValue) + *static_cast(*params->ppPropValue) = + UR_EVENT_STATUS_SUBMITTED; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(ur_event_status_t); + return UR_RESULT_SUCCESS; + } + default: { + return UR_RESULT_SUCCESS; + } + } +} + +inline ur_result_t +mock_urKernelSuggestMaxCooperativeGroupCountExp(void *pParams) { + auto params = reinterpret_cast< + ur_kernel_suggest_max_cooperative_group_count_exp_params_t *>(pParams); + **params->ppGroupCountRet = 1; + return UR_RESULT_SUCCESS; +} + +inline ur_result_t mock_urDeviceSelectBinary(void *pParams) { + auto params = reinterpret_cast(pParams); + **params->ppSelectedBinary = 0; + return UR_RESULT_SUCCESS; +} + +inline ur_result_t mock_urPlatformGetBackendOption(void *pParams) { + auto params = + reinterpret_cast(pParams); + **params->pppPlatformOption = ""; + return UR_RESULT_SUCCESS; +} + +// Returns the wall-clock timestamp of host for deviceTime and hostTime +inline ur_result_t mock_urDeviceGetGlobalTimestamps(void *pParams) { + auto params = + reinterpret_cast(pParams); + using namespace std::chrono; + auto timeNanoseconds = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); + if (*params->ppDeviceTimestamp) { + **params->ppDeviceTimestamp = timeNanoseconds; + } + if (*params->ppHostTimestamp) { + **params->ppHostTimestamp = timeNanoseconds; + } + return UR_RESULT_SUCCESS; +} + +inline ur_result_t mock_urUsmP2PPeerAccessGetInfoExp(void *pParams) { + auto params = + reinterpret_cast(pParams); + if (*params->ppPropValue) + *static_cast(*params->ppPropValue) = 1; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = sizeof(int32_t); + + return UR_RESULT_SUCCESS; +} + +inline ur_result_t mock_urVirtualMemReserve(void *pParams) { + auto params = reinterpret_cast(pParams); + **params->pppStart = *params->ppStart + ? const_cast(*params->ppStart) + : mock::createDummyHandle(*params->psize); + return UR_RESULT_SUCCESS; +} + +} // namespace MockAdapter + +/// The UrMock<> class sets up UR for adapter mocking with the set of default +/// overrides above, and ensures the appropriate parts of the sycl runtime and +/// UR mocking code are reset/torn down in between tests. +/// +/// The template parameter allows tests to select an arbitrary backend to have +/// the mock adapter report itself as. +template class UrMock { +public: + /// Constructs UrMock<> + /// + /// This ensures UR is setup for adapter mocking and also injects our default + /// entry-point overrides into the mock adapter. + UrMock() { +#define ADD_DEFAULT_OVERRIDE(func_name, func_override) \ + mock::getCallbacks().set_replace_callback(#func_name, \ + &MockAdapter::func_override); + ADD_DEFAULT_OVERRIDE( + urAdapterGetInfo, + mock_urAdapterGetInfo) + ADD_DEFAULT_OVERRIDE(urPlatformGet, mock_urPlatformGet) + ADD_DEFAULT_OVERRIDE(urDeviceGet, mock_urDeviceGet) + ADD_DEFAULT_OVERRIDE(urDeviceRetain, mock_urDeviceRetain) + ADD_DEFAULT_OVERRIDE(urDeviceRelease, mock_urDeviceRelease) + ADD_DEFAULT_OVERRIDE( + urPlatformGetInfo, + mock_urPlatformGetInfo) + ADD_DEFAULT_OVERRIDE(urDeviceGetInfo, mock_urDeviceGetInfo) + ADD_DEFAULT_OVERRIDE(urProgramGetInfo, mock_urProgramGetInfo) + ADD_DEFAULT_OVERRIDE(urContextGetInfo, mock_urContextGetInfo) + ADD_DEFAULT_OVERRIDE(urQueueGetInfo, mock_urQueueGetInfo) + ADD_DEFAULT_OVERRIDE(urProgramGetInfo, mock_urProgramGetInfo) + ADD_DEFAULT_OVERRIDE(urKernelGetGroupInfo, mock_urKernelGetGroupInfo) + ADD_DEFAULT_OVERRIDE(urEventGetInfo, mock_urEventGetInfo) + ADD_DEFAULT_OVERRIDE(urKernelSuggestMaxCooperativeGroupCountExp, + mock_urKernelSuggestMaxCooperativeGroupCountExp) + ADD_DEFAULT_OVERRIDE(urDeviceSelectBinary, mock_urDeviceSelectBinary) + ADD_DEFAULT_OVERRIDE(urPlatformGetBackendOption, + mock_urPlatformGetBackendOption) + ADD_DEFAULT_OVERRIDE(urDeviceGetGlobalTimestamps, + mock_urDeviceGetGlobalTimestamps) + ADD_DEFAULT_OVERRIDE(urUsmP2PPeerAccessGetInfoExp, + mock_urUsmP2PPeerAccessGetInfoExp) + ADD_DEFAULT_OVERRIDE(urVirtualMemReserve, mock_urVirtualMemReserve) +#undef ADD_DEFAULT_OVERRIDE + + ur_loader_config_handle_t UrLoaderConfig = nullptr; + + urLoaderConfigCreate(&UrLoaderConfig); + urLoaderConfigSetMockingEnabled(UrLoaderConfig, true); + + sycl::detail::ur::initializeUr(UrLoaderConfig); + urLoaderConfigRelease(UrLoaderConfig); + } + + UrMock(UrMock &&Other) = delete; + UrMock(const UrMock &) = delete; + UrMock &operator=(const UrMock &) = delete; + ~UrMock() { + // mock::getCallbacks() is an application lifetime object, we need to reset + // these between tests + detail::GlobalHandler::instance().prepareSchedulerToRelease(true); + detail::GlobalHandler::instance().releaseDefaultContexts(); + // clear platform cache in case subsequent tests want a different backend, + // this forces platforms to be reconstructed (and thus queries about UR + // backend info to be called again) + detail::GlobalHandler::instance().getPlatformCache().clear(); + mock::getCallbacks().resetCallbacks(); + } + +private: + // These two helpers are needed to enable arbitrary backend selection + // at compile time. + static constexpr ur_platform_backend_t + convertToUrPlatformBackend(const sycl::backend SyclBackend) { + switch (SyclBackend) { + case sycl::backend::opencl: + return UR_PLATFORM_BACKEND_OPENCL; + case sycl::backend::ext_oneapi_level_zero: + return UR_PLATFORM_BACKEND_LEVEL_ZERO; + case sycl::backend::ext_oneapi_cuda: + return UR_PLATFORM_BACKEND_CUDA; + case sycl::backend::ext_oneapi_hip: + return UR_PLATFORM_BACKEND_HIP; + case sycl::backend::ext_oneapi_native_cpu: + return UR_PLATFORM_BACKEND_NATIVE_CPU; + default: + return UR_PLATFORM_BACKEND_UNKNOWN; + } + } + + static constexpr ur_adapter_backend_t + convertToUrAdapterBackend(sycl::backend SyclBackend) { + switch (SyclBackend) { + case sycl::backend::opencl: + return UR_ADAPTER_BACKEND_OPENCL; + case sycl::backend::ext_oneapi_level_zero: + return UR_ADAPTER_BACKEND_LEVEL_ZERO; + case sycl::backend::ext_oneapi_cuda: + return UR_ADAPTER_BACKEND_CUDA; + case sycl::backend::ext_oneapi_hip: + return UR_ADAPTER_BACKEND_HIP; + case sycl::backend::ext_oneapi_native_cpu: + return UR_ADAPTER_BACKEND_NATIVE_CPU; + default: + return UR_ADAPTER_BACKEND_UNKNOWN; + } + } +}; + +} // namespace unittest +} // namespace _V1 +} // namespace sycl diff --git a/sycl/unittests/kernel-and-program/Cache.cpp b/sycl/unittests/kernel-and-program/Cache.cpp index 4872bbbc5ad5a..c2ddb46c3a17f 100644 --- a/sycl/unittests/kernel-and-program/Cache.cpp +++ b/sycl/unittests/kernel-and-program/Cache.cpp @@ -13,10 +13,10 @@ #include "detail/context_impl.hpp" #include "detail/kernel_program_cache.hpp" -#include "sycl/detail/pi.h" +#include "sycl/detail/ur.hpp" #include -#include -#include +#include +#include #include #include @@ -50,21 +50,21 @@ template <> const char *get_spec_constant_symbolic_ID() { } // namespace _V1 } // namespace sycl -static sycl::unittest::PiImage generateDefaultImage() { +static sycl::unittest::UrImage generateDefaultImage() { using namespace sycl::unittest; std::vector SpecConstData; - PiProperty SC1 = makeSpecConstant(SpecConstData, "SC1", {0}, {0}, {42}); + UrProperty SC1 = makeSpecConstant(SpecConstData, "SC1", {0}, {0}, {42}); - PiPropertySet PropSet; + UrPropertySet PropSet; addSpecConstants({SC1}, std::move(SpecConstData), PropSet); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = + UrArray Entries = makeEmptyKernels({"CacheTestKernel", "CacheTestKernel2"}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -75,40 +75,37 @@ static sycl::unittest::PiImage generateDefaultImage() { return Img; } -static sycl::unittest::PiImage Img = generateDefaultImage(); -static sycl::unittest::PiImageArray<1> ImgArray{&Img}; +static sycl::unittest::UrImage Img = generateDefaultImage(); +static sycl::unittest::UrImageArray<1> ImgArray{&Img}; struct TestCtx { - detail::pi::PiContext context; + ur_context_handle_t context; }; std::unique_ptr globalCtx; -static pi_result redefinedKernelGetInfo(pi_kernel kernel, - pi_kernel_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_KERNEL_INFO_CONTEXT) { - auto ctx = reinterpret_cast(param_value); +static ur_result_t redefinedKernelGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_KERNEL_INFO_CONTEXT) { + auto ctx = reinterpret_cast(*params.ppPropValue); *ctx = globalCtx->context; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } class KernelAndProgramCacheTest : public ::testing::Test { public: - KernelAndProgramCacheTest() : Mock{}, Plt{Mock.getPlatform()} {} + KernelAndProgramCacheTest() : Mock{}, Plt{sycl::platform()} {} protected: void SetUp() override { - Mock.redefineBefore( - redefinedKernelGetInfo); + mock::getCallbacks().set_before_callback("urKernelGetInfo", + &redefinedKernelGetInfo); } protected: - unittest::PiMock Mock; + unittest::UrMock<> Mock; sycl::platform Plt; }; diff --git a/sycl/unittests/kernel-and-program/DeviceInfo.cpp b/sycl/unittests/kernel-and-program/DeviceInfo.cpp index f4f5349cd8f30..532d656ac49c1 100644 --- a/sycl/unittests/kernel-and-program/DeviceInfo.cpp +++ b/sycl/unittests/kernel-and-program/DeviceInfo.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include using namespace sycl; @@ -27,101 +27,96 @@ struct TestCtx { static std::unique_ptr TestContext; -static pi_result redefinedDeviceGetInfo(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_UUID) { +static ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_UUID) { TestContext->UUIDInfoCalled = true; - } else if (param_name == PI_DEVICE_INFO_BUILT_IN_KERNELS) { - if (param_value_size_ret) { - *param_value_size_ret = TestContext->BuiltInKernels.size() + 1; - } else if (param_value) { - char *dst = static_cast(param_value); - dst[TestContext->BuiltInKernels.copy(dst, param_value_size)] = '\0'; + } else if (*params.ppropName == UR_DEVICE_INFO_BUILT_IN_KERNELS) { + if (*params.ppPropSizeRet) { + **params.ppPropSizeRet = TestContext->BuiltInKernels.size() + 1; + } else if (*params.ppPropValue) { + char *dst = static_cast(*params.ppPropValue); + dst[TestContext->BuiltInKernels.copy(dst, *params.ppropSize)] = '\0'; } - } else if (param_name == PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY) { + } else if (*params.ppropName == UR_DEVICE_INFO_GLOBAL_MEM_FREE) { TestContext->FreeMemoryInfoCalled = true; - } else if (param_name == PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE) { - if (param_value_size_ret) - *param_value_size_ret = 4; + } else if (*params.ppropName == UR_DEVICE_INFO_MEMORY_CLOCK_RATE) { + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = 4; - if (param_value) { - assert(param_value_size == sizeof(uint32_t)); - *static_cast(param_value) = 800; + if (*params.ppPropValue) { + assert(*params.ppropSize == sizeof(uint32_t)); + *static_cast(*params.ppPropValue) = 800; } - } else if (param_name == PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH) { - if (param_value_size_ret) - *param_value_size_ret = 4; + } else if (*params.ppropName == UR_DEVICE_INFO_MEMORY_BUS_WIDTH) { + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = 4; - if (param_value) { - assert(param_value_size == sizeof(uint32_t)); - *static_cast(param_value) = 64; + if (*params.ppPropValue) { + assert(*params.ppropSize == sizeof(uint32_t)); + *static_cast(*params.ppPropValue) = 64; } } // This mock device has no sub-devices - if (param_name == PI_DEVICE_INFO_PARTITION_PROPERTIES) { - if (param_value_size_ret) { - *param_value_size_ret = 0; + if (*params.ppropName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { + if (*params.ppPropSizeRet) { + **params.ppPropSizeRet = 0; } } - if (param_name == PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { - assert(param_value_size == sizeof(pi_device_affinity_domain)); - if (param_value) { - *static_cast(param_value) = 0; + if (*params.ppropName == UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { + assert(*params.ppropSize == sizeof(ur_device_affinity_domain_flags_t)); + if (*params.ppPropValue) { + *static_cast(*params.ppPropValue) = + 0; } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } class DeviceInfoTest : public ::testing::Test { public: - DeviceInfoTest() : Mock{}, Plt{Mock.getPlatform()} {} + DeviceInfoTest() : Mock{}, Plt{sycl::platform()} {} protected: void SetUp() override { - Mock.redefineAfter( - redefinedDeviceGetInfo); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); } protected: - unittest::PiMock Mock; + unittest::UrMock<> Mock; sycl::platform Plt; }; -static pi_result redefinedNegativeDeviceGetInfo(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - switch (param_name) { - case PI_DEVICE_INFO_UUID: - case PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY: - case PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE: - case PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH: - return PI_ERROR_INVALID_VALUE; +static ur_result_t redefinedNegativeDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_DEVICE_INFO_UUID: + case UR_DEVICE_INFO_GLOBAL_MEM_FREE: + case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: + case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: + return UR_RESULT_ERROR_INVALID_VALUE; default: - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } class DeviceInfoNegativeTest : public ::testing::Test { public: - DeviceInfoNegativeTest() : Mock{}, Plt{Mock.getPlatform()} {} + DeviceInfoNegativeTest() : Mock{}, Plt{sycl::platform()} {} protected: void SetUp() override { - Mock.redefineBefore( - redefinedNegativeDeviceGetInfo); + mock::getCallbacks().set_before_callback("urDeviceGetInfo", + &redefinedNegativeDeviceGetInfo); } protected: - unittest::PiMock Mock; + unittest::UrMock<> Mock; sycl::platform Plt; }; @@ -136,8 +131,8 @@ TEST_F(DeviceInfoTest, GetDeviceUUID) { auto UUID = Dev.get_info(); EXPECT_EQ(TestContext->UUIDInfoCalled, true) - << "Expect piDeviceGetInfo to be " - << "called with PI_DEVICE_INFO_UUID"; + << "Expect urDeviceGetInfo to be " + << "called with UR_DEVICE_INFO_UUID"; EXPECT_EQ(sizeof(UUID), 16 * sizeof(unsigned char)) << "Expect device UUID to be " @@ -155,8 +150,8 @@ TEST_F(DeviceInfoTest, GetDeviceFreeMemory) { auto FreeMemory = Dev.get_info(); EXPECT_EQ(TestContext->FreeMemoryInfoCalled, true) - << "Expect piDeviceGetInfo to be " - << "called with PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY"; + << "Expect urDeviceGetInfo to be " + << "called with UR_DEVICE_INFO_GLOBAL_MEM_FREE"; EXPECT_EQ(sizeof(FreeMemory), sizeof(uint64_t)) << "Expect free_memory to be of uint64_t size"; diff --git a/sycl/unittests/kernel-and-program/KernelBuildOptions.cpp b/sycl/unittests/kernel-and-program/KernelBuildOptions.cpp index 65945a9bd511a..2fb0d69da97b9 100644 --- a/sycl/unittests/kernel-and-program/KernelBuildOptions.cpp +++ b/sycl/unittests/kernel-and-program/KernelBuildOptions.cpp @@ -12,8 +12,8 @@ #endif #include -#include -#include +#include +#include #include #include @@ -34,60 +34,53 @@ struct KernelInfo : public unittest::MockKernelInfoBase { } // namespace _V1 } // namespace sycl -static pi_result redefinedProgramBuild( - pi_program prog, pi_uint32, const pi_device *, const char *options, - void (*pfn_notify)(pi_program program, void *user_data), void *user_data) { - if (options) - BuildOpts = options; +static ur_result_t redefinedProgramBuild(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppOptions) + BuildOpts = *params.ppOptions; else BuildOpts = ""; - if (pfn_notify) { - pfn_notify(prog, user_data); - } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedProgramCompile(pi_program, pi_uint32, - const pi_device *, const char *options, - pi_uint32, const pi_program *, - const char **, - void (*)(pi_program, void *), void *) { - if (options) - BuildOpts = options; +static ur_result_t redefinedProgramCompile(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppOptions) + BuildOpts = *params.ppOptions; else BuildOpts = ""; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedProgramLink(pi_context, pi_uint32, const pi_device *, - const char *options, pi_uint32, - const pi_program *, - void (*)(pi_program, void *), void *, - pi_program *) { - if (options) - BuildOpts = options; +static ur_result_t redefinedProgramLink(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppOptions) + BuildOpts = *params.ppOptions; else BuildOpts = ""; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static void setupCommonMockAPIs(sycl::unittest::PiMock &Mock) { +static void setupCommonMockAPIs(sycl::unittest::UrMock<> &Mock) { using namespace sycl::detail; - Mock.redefineBefore(redefinedProgramCompile); - Mock.redefineBefore(redefinedProgramLink); - Mock.redefineBefore(redefinedProgramBuild); + mock::getCallbacks().set_before_callback("urProgramCompileExp", + &redefinedProgramCompile); + mock::getCallbacks().set_before_callback("urProgramLinkExp", + &redefinedProgramLink); + mock::getCallbacks().set_before_callback("urProgramBuildExp", + &redefinedProgramBuild); } -static sycl::unittest::PiImage generateDefaultImage() { +static sycl::unittest::UrImage generateDefaultImage() { using namespace sycl::unittest; - PiPropertySet PropSet; + UrPropertySet PropSet; addESIMDFlag(PropSet); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = makeEmptyKernels({"BuildOptsTestKernel"}); + UrArray Entries = makeEmptyKernels({"BuildOptsTestKernel"}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "-compile-img", // Compile options "-link-img", // Link options @@ -98,12 +91,12 @@ static sycl::unittest::PiImage generateDefaultImage() { return Img; } -sycl::unittest::PiImage Img = generateDefaultImage(); -sycl::unittest::PiImageArray<1> ImgArray{&Img}; +sycl::unittest::UrImage Img = generateDefaultImage(); +sycl::unittest::UrImageArray<1> ImgArray{&Img}; TEST(KernelBuildOptions, KernelBundleBasic) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); setupCommonMockAPIs(Mock); const sycl::device Dev = Plt.get_devices()[0]; diff --git a/sycl/unittests/kernel-and-program/KernelInfo.cpp b/sycl/unittests/kernel-and-program/KernelInfo.cpp index a5b406ba469c5..f7297a4145485 100644 --- a/sycl/unittests/kernel-and-program/KernelInfo.cpp +++ b/sycl/unittests/kernel-and-program/KernelInfo.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include using namespace sycl; @@ -26,46 +26,40 @@ struct TestCtx { static std::unique_ptr TestContext; -static pi_result redefinedKernelGetGroupInfo(pi_kernel kernel, pi_device device, - pi_kernel_group_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE) { +static ur_result_t redefinedKernelGetGroupInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE) { TestContext->PrivateMemSizeCalled = true; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedKernelGetInfo(pi_kernel kernel, - pi_kernel_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - EXPECT_EQ(param_name, PI_KERNEL_INFO_CONTEXT) +static ur_result_t redefinedKernelGetInfo(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppropName, UR_KERNEL_INFO_CONTEXT) << "Unexpected kernel info requested"; - auto *Result = reinterpret_cast(param_value); - sycl::detail::pi::PiContext PiCtx = + auto *Result = reinterpret_cast(*params.ppPropValue); + ur_context_handle_t UrContext = detail::getSyclObjImpl(TestContext->Ctx)->getHandleRef(); - *Result = PiCtx; - return PI_SUCCESS; + *Result = UrContext; + return UR_RESULT_SUCCESS; } class KernelInfoTest : public ::testing::Test { public: - KernelInfoTest() : Mock{}, Plt{Mock.getPlatform()} {} + KernelInfoTest() : Mock{}, Plt{sycl::platform()} {} protected: void SetUp() override { - Mock.redefineBefore( - redefinedKernelGetGroupInfo); - Mock.redefineBefore( - redefinedKernelGetInfo); + mock::getCallbacks().set_before_callback("urKernelGetGroupInfo", + &redefinedKernelGetGroupInfo); + mock::getCallbacks().set_before_callback("urKernelGetInfo", + &redefinedKernelGetInfo); } protected: - unittest::PiMock Mock; + unittest::UrMock<> Mock; sycl::platform Plt; }; @@ -82,5 +76,5 @@ TEST_F(KernelInfoTest, DISABLED_GetPrivateMemUsage) { // Ctx.get_devices()[0]); EXPECT_EQ(TestContext->PrivateMemSizeCalled, true) << "Expect piKernelGetGroupInfo to be " - << "called with PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE"; + << "called with UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE"; } diff --git a/sycl/unittests/kernel-and-program/KernelRelease.cpp b/sycl/unittests/kernel-and-program/KernelRelease.cpp index a982772db573c..7d0e862b284ee 100644 --- a/sycl/unittests/kernel-and-program/KernelRelease.cpp +++ b/sycl/unittests/kernel-and-program/KernelRelease.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include @@ -29,47 +29,43 @@ struct TestCtx { static std::unique_ptr TestContext; -static pi_result redefinedKernelCreate(pi_program program, - const char *kernel_name, - pi_kernel *ret_kernel) { +static ur_result_t redefinedKernelCreate(void *) { TestContext->KernelReferenceCount = 1; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedKernelRetain(pi_kernel kernel) { +static ur_result_t redefinedKernelRetain(void *) { ++TestContext->KernelReferenceCount; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedKernelRelease(pi_kernel kernel) { +static ur_result_t redefinedKernelRelease(void *) { --TestContext->KernelReferenceCount; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedKernelGetInfo(pi_kernel kernel, - pi_kernel_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - EXPECT_EQ(param_name, PI_KERNEL_INFO_CONTEXT) +static ur_result_t redefinedKernelGetInfo(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppropName, UR_KERNEL_INFO_CONTEXT) << "Unexpected kernel info requested"; - auto *Result = reinterpret_cast(param_value); - sycl::detail::pi::PiContext PiCtx = - detail::getSyclObjImpl(TestContext->Ctx)->getHandleRef(); - *Result = PiCtx; - return PI_SUCCESS; + auto *Result = reinterpret_cast(*params.ppPropValue); + auto UrContext = detail::getSyclObjImpl(TestContext->Ctx)->getHandleRef(); + *Result = UrContext; + return UR_RESULT_SUCCESS; } TEST(KernelReleaseTest, DISABLED_GetKernelRelease) { - sycl::unittest::PiMock Mock; - Mock.redefineBefore(redefinedKernelCreate); - Mock.redefineBefore(redefinedKernelRetain); - Mock.redefineBefore( - redefinedKernelRelease); - Mock.redefineBefore( - redefinedKernelGetInfo); - - context Ctx{Mock.getPlatform().get_devices()[0]}; + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urKernelCreate", + &redefinedKernelCreate); + mock::getCallbacks().set_before_callback("urKernelRetain", + &redefinedKernelRetain); + mock::getCallbacks().set_before_callback("urKernelRelease", + &redefinedKernelRelease); + mock::getCallbacks().set_before_callback("urKernelGetInfo", + &redefinedKernelGetInfo); + + context Ctx{sycl::platform().get_devices()[0]}; TestContext.reset(new TestCtx(Ctx)); // program Prg{Ctx}; diff --git a/sycl/unittests/kernel-and-program/MultipleDevsCache.cpp b/sycl/unittests/kernel-and-program/MultipleDevsCache.cpp index 77744401efdd4..5bcbc3150abef 100644 --- a/sycl/unittests/kernel-and-program/MultipleDevsCache.cpp +++ b/sycl/unittests/kernel-and-program/MultipleDevsCache.cpp @@ -12,8 +12,8 @@ #include "detail/kernel_bundle_impl.hpp" #include "detail/kernel_program_cache.hpp" #include -#include -#include +#include +#include #include @@ -25,86 +25,81 @@ class MultipleDevsCacheTestKernel; MOCK_INTEGRATION_HEADER(MultipleDevsCacheTestKernel) -static sycl::unittest::PiImage Img = +static sycl::unittest::UrImage Img = sycl::unittest::generateDefaultImage({"MultipleDevsCacheTestKernel"}); -static sycl::unittest::PiImageArray<1> ImgArray{&Img}; - -static pi_result redefinedDevicesGetAfter(pi_platform platform, - pi_device_type device_type, - pi_uint32 num_entries, - pi_device *devices, - pi_uint32 *num_devices) { - if (num_devices) { - *num_devices = static_cast(2); - return PI_SUCCESS; +static sycl::unittest::UrImageArray<1> ImgArray{&Img}; + +static ur_result_t redefinedDeviceGetAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevices) { + **params.ppNumDevices = static_cast(2); + return UR_RESULT_SUCCESS; } - if (num_entries == 2 && devices) { - devices[0] = reinterpret_cast(1111); - devices[1] = reinterpret_cast(2222); + if (*params.pNumEntries == 2 && *params.pphDevices) { + (*params.pphDevices)[0] = reinterpret_cast(1111); + (*params.pphDevices)[1] = reinterpret_cast(2222); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedDeviceGetInfo(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_TYPE) { - auto *Result = reinterpret_cast<_pi_device_type *>(param_value); - *Result = PI_DEVICE_TYPE_GPU; +static ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_TYPE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_TYPE_GPU; } - if (param_name == PI_DEVICE_INFO_COMPILER_AVAILABLE) { - auto *Result = reinterpret_cast(param_value); + if (*params.ppropName == UR_DEVICE_INFO_COMPILER_AVAILABLE) { + auto *Result = reinterpret_cast(*params.ppPropValue); *Result = true; } // This mock device has no sub-devices - if (param_name == PI_DEVICE_INFO_PARTITION_PROPERTIES) { - if (param_value_size_ret) { - *param_value_size_ret = 0; + if (*params.ppropName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { + if (*params.ppPropSizeRet) { + **params.ppPropSizeRet = 0; } } - if (param_name == PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { - assert(param_value_size == sizeof(pi_device_affinity_domain)); - if (param_value) { - *static_cast(param_value) = 0; + if (*params.ppropName == UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { + assert(*params.ppropSize == sizeof(ur_device_affinity_domain_flags_t)); + if (*params.ppPropValue) { + *static_cast(*params.ppPropValue) = + 0; } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } static int RetainCounter = 0; -static pi_result redefinedProgramRetain(pi_program program) { +static ur_result_t redefinedProgramRetain(void *) { ++RetainCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } static int KernelReleaseCounter = 0; -static pi_result redefinedKernelRelease(pi_kernel kernel) { +static ur_result_t redefinedKernelRelease(void *) { ++KernelReleaseCounter; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } class MultipleDeviceCacheTest : public ::testing::Test { public: - MultipleDeviceCacheTest() : Mock{}, Plt{Mock.getPlatform()} {} + MultipleDeviceCacheTest() : Mock{}, Plt{sycl::platform()} {} protected: void SetUp() override { - Mock.redefineAfter( - redefinedDevicesGetAfter); - Mock.redefineBefore( - redefinedDeviceGetInfo); - Mock.redefineBefore( - redefinedProgramRetain); - Mock.redefineBefore( - redefinedKernelRelease); + mock::getCallbacks().set_after_callback("urDeviceGet", + &redefinedDeviceGetAfter); + mock::getCallbacks().set_before_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + mock::getCallbacks().set_before_callback("urProgramRetain", + &redefinedProgramRetain); + mock::getCallbacks().set_before_callback("urKernelRelease", + &redefinedKernelRelease); } protected: - unittest::PiMock Mock; + unittest::UrMock<> Mock; platform Plt; }; @@ -132,7 +127,7 @@ TEST_F(MultipleDeviceCacheTest, ProgramRetain) { // Because of emulating 2 devices program is retained for each one in // build(). It is also depends on number of device images. This test has one // image, but other tests can create other images. Additional variable is - // added to control count of piProgramRetain calls + // added to control count of urProgramRetain calls auto BundleImpl = getSyclObjImpl(Bundle); // Bundle should only contain a single image, specifically the one with @@ -156,8 +151,8 @@ TEST_F(MultipleDeviceCacheTest, ProgramRetain) { // The kernel creating is called in handler::single_task(). // kernel_bundle::get_kernel() creates a kernel and shares it with created // programs. Also the kernel is retained in kernel_bundle::get_kernel(). A - // kernel is removed from cache if piKernelRelease was called for it, so it + // kernel is removed from cache if urKernelRelease was called for it, so it // will not be removed twice for the other programs. As a result we must - // expect 3 piKernelRelease calls. + // expect 3 urKernelRelease calls. EXPECT_EQ(KernelReleaseCounter, 3) << "Expect 3 piKernelRelease calls"; } diff --git a/sycl/unittests/kernel-and-program/OutOfResources.cpp b/sycl/unittests/kernel-and-program/OutOfResources.cpp index b839618264901..c249e6599ec5a 100644 --- a/sycl/unittests/kernel-and-program/OutOfResources.cpp +++ b/sycl/unittests/kernel-and-program/OutOfResources.cpp @@ -12,8 +12,8 @@ #include "detail/kernel_bundle_impl.hpp" #include "detail/kernel_program_cache.hpp" #include -#include -#include +#include +#include #include @@ -27,56 +27,52 @@ class OutOfResourcesKernel2; MOCK_INTEGRATION_HEADER(OutOfResourcesKernel1) MOCK_INTEGRATION_HEADER(OutOfResourcesKernel2) -static sycl::unittest::PiImage Img[2] = { +static sycl::unittest::UrImage Img[2] = { sycl::unittest::generateDefaultImage({"OutOfResourcesKernel1"}), sycl::unittest::generateDefaultImage({"OutOfResourcesKernel2"})}; -static sycl::unittest::PiImageArray<2> ImgArray{Img}; +static sycl::unittest::UrImageArray<2> ImgArray{Img}; static int nProgramCreate = 0; static volatile bool outOfResourcesToggle = false; static volatile bool outOfHostMemoryToggle = false; -static pi_result redefinedProgramCreate(pi_context context, const void *il, - size_t length, - pi_program *res_program) { +static ur_result_t redefinedProgramCreateWithIL(void *) { ++nProgramCreate; if (outOfResourcesToggle) { outOfResourcesToggle = false; - return PI_ERROR_OUT_OF_RESOURCES; + return UR_RESULT_ERROR_OUT_OF_RESOURCES; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result -redefinedProgramCreateOutOfHostMemory(pi_context context, const void *il, - size_t length, pi_program *res_program) { +static ur_result_t redefinedProgramCreateWithILOutOfHostMemory(void *) { ++nProgramCreate; if (outOfHostMemoryToggle) { outOfHostMemoryToggle = false; - return PI_ERROR_OUT_OF_HOST_MEMORY; + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -TEST(OutOfResourcesTest, piProgramCreate) { - sycl::unittest::PiMock Mock; - Mock.redefineBefore( - redefinedProgramCreate); +TEST(OutOfResourcesTest, urProgramCreate) { + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urProgramCreateWithIL", + &redefinedProgramCreateWithIL); - sycl::platform Plt{Mock.getPlatform()}; + sycl::platform Plt{sycl::platform()}; sycl::context Ctx{Plt}; auto CtxImpl = detail::getSyclObjImpl(Ctx); queue q(Ctx, default_selector_v); int runningTotal = 0; - // Cache is empty, so one piProgramCreate call. + // Cache is empty, so one urProgramCreateWithIL call. q.single_task([] {}); EXPECT_EQ(nProgramCreate, runningTotal += 1); - // Now, we make the next piProgramCreate call fail with - // PI_ERROR_OUT_OF_RESOURCES. The caching mechanism should catch this, - // clear the cache, and retry the piProgramCreate. + // Now, we make the next urProgramCreateWithIL call fail with + // UR_RESULT_ERROR_OUT_OF_RESOURCES. The caching mechanism should catch this, + // clear the cache, and retry the urProgramCreateWithIL. outOfResourcesToggle = true; q.single_task([] {}); EXPECT_FALSE(outOfResourcesToggle); @@ -87,9 +83,9 @@ TEST(OutOfResourcesTest, piProgramCreate) { EXPECT_EQ(Cache.size(), 1U) << "Expected 1 program in the cache"; } - // The next piProgramCreate call will fail with - // PI_ERROR_OUT_OF_RESOURCES. But OutOfResourcesKernel2 is in - // the cache, so we expect no new piProgramCreate calls. + // The next urProgramCreateWithIL call will fail with + // UR_RESULT_ERROR_OUT_OF_RESOURCES. But OutOfResourcesKernel2 is in + // the cache, so we expect no new urProgramCreateWithIL calls. outOfResourcesToggle = true; q.single_task([] {}); EXPECT_TRUE(outOfResourcesToggle); @@ -108,7 +104,7 @@ TEST(OutOfResourcesTest, piProgramCreate) { } // Finally, OutOfResourcesKernel1 will be in the cache, but - // OutOfResourceKenel2 will not, so one more piProgramCreate. + // OutOfResourceKenel2 will not, so one more urProgramCreateWithIL. // Toggle is not set, so this should succeed. q.single_task([] {}); q.single_task([] {}); @@ -120,27 +116,27 @@ TEST(OutOfResourcesTest, piProgramCreate) { } } -TEST(OutOfHostMemoryTest, piProgramCreate) { +TEST(OutOfHostMemoryTest, urProgramCreate) { // Reset to zero. nProgramCreate = 0; - sycl::unittest::PiMock Mock; - Mock.redefineBefore( - redefinedProgramCreateOutOfHostMemory); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback( + "urProgramCreateWithIL", &redefinedProgramCreateWithILOutOfHostMemory); - sycl::platform Plt{Mock.getPlatform()}; + sycl::platform Plt{sycl::platform()}; sycl::context Ctx{Plt}; auto CtxImpl = detail::getSyclObjImpl(Ctx); queue q(Ctx, default_selector_v); int runningTotal = 0; - // Cache is empty, so one piProgramCreate call. + // Cache is empty, so one urProgramCreateWithIL call. q.single_task([] {}); EXPECT_EQ(nProgramCreate, runningTotal += 1); - // Now, we make the next piProgramCreate call fail with - // PI_ERROR_OUT_OF_HOST_MEMORY. The caching mechanism should catch this, - // clear the cache, and retry the piProgramCreate. + // Now, we make the next urProgramCreateWithIL call fail with + // UR_RESULT_ERROR_OUT_OF_HOST_MEMORY. The caching mechanism should catch + // this, clear the cache, and retry the urProgramCreateWithIL. outOfHostMemoryToggle = true; q.single_task([] {}); EXPECT_FALSE(outOfHostMemoryToggle); @@ -151,9 +147,9 @@ TEST(OutOfHostMemoryTest, piProgramCreate) { EXPECT_EQ(Cache.size(), 1U) << "Expected 1 program in the cache"; } - // The next piProgramCreate call will fail with - // PI_ERROR_OUT_OF_HOST_MEMORY. But OutOfResourcesKernel2 is in - // the cache, so we expect no new piProgramCreate calls. + // The next urProgramCreateWithIL call will fail with + // UR_RESULT_ERROR_OUT_OF_HOST_MEMORY. But OutOfResourcesKernel2 is in the + // cache, so we expect no new urProgramCreateWithIL calls. outOfHostMemoryToggle = true; q.single_task([] {}); EXPECT_TRUE(outOfHostMemoryToggle); @@ -172,7 +168,7 @@ TEST(OutOfHostMemoryTest, piProgramCreate) { } // Finally, OutOfResourcesKernel1 will be in the cache, but - // OutOfResourceKenel2 will not, so one more piProgramCreate. + // OutOfResourceKenel2 will not, so one more urProgramCreateWithIL. // Toggle is not set, so this should succeed. q.single_task([] {}); q.single_task([] {}); @@ -186,40 +182,30 @@ TEST(OutOfHostMemoryTest, piProgramCreate) { static int nProgramLink = 0; -static pi_result -redefinedProgramLink(pi_context context, pi_uint32 num_devices, - const pi_device *device_list, const char *options, - pi_uint32 num_input_programs, - const pi_program *input_programs, - void (*pfn_notify)(pi_program program, void *user_data), - void *user_data, pi_program *ret_program) { +static ur_result_t redefinedProgramLink(void *) { ++nProgramLink; if (outOfResourcesToggle) { outOfResourcesToggle = false; - return PI_ERROR_OUT_OF_RESOURCES; + return UR_RESULT_ERROR_OUT_OF_RESOURCES; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedProgramLinkOutOfHostMemory( - pi_context context, pi_uint32 num_devices, const pi_device *device_list, - const char *options, pi_uint32 num_input_programs, - const pi_program *input_programs, - void (*pfn_notify)(pi_program program, void *user_data), void *user_data, - pi_program *ret_program) { +static ur_result_t redefinedProgramLinkOutOfHostMemory(void *) { ++nProgramLink; if (outOfHostMemoryToggle) { outOfHostMemoryToggle = false; - return PI_ERROR_OUT_OF_HOST_MEMORY; + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -TEST(OutOfResourcesTest, piProgramLink) { - sycl::unittest::PiMock Mock; - Mock.redefineBefore(redefinedProgramLink); +TEST(OutOfResourcesTest, urProgramLink) { + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urProgramLinkExp", + &redefinedProgramLink); - sycl::platform Plt{Mock.getPlatform()}; + sycl::platform Plt{sycl::platform()}; sycl::context Ctx{Plt}; auto CtxImpl = detail::getSyclObjImpl(Ctx); queue q(Ctx, default_selector_v); @@ -250,15 +236,15 @@ TEST(OutOfResourcesTest, piProgramLink) { } } -TEST(OutOfHostMemoryTest, piProgramLink) { +TEST(OutOfHostMemoryTest, urProgramLink) { // Reset to zero. nProgramLink = 0; - sycl::unittest::PiMock Mock; - Mock.redefineBefore( - redefinedProgramLinkOutOfHostMemory); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback( + "urProgramLinkExp", &redefinedProgramLinkOutOfHostMemory); - sycl::platform Plt{Mock.getPlatform()}; + sycl::platform Plt{sycl::platform()}; sycl::context Ctx{Plt}; auto CtxImpl = detail::getSyclObjImpl(Ctx); queue q(Ctx, default_selector_v); diff --git a/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp b/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp index 94dc90ae2ca11..c1f4af826c8eb 100644 --- a/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp +++ b/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp @@ -11,7 +11,7 @@ #include "detail/persistent_device_code_cache.hpp" #include #include -#include +#include #include #include #include @@ -54,24 +54,21 @@ std::vector> Progs = { static unsigned char DeviceCodeID = 2; -static pi_result redefinedProgramGetInfoAfter(pi_program program, - pi_program_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_PROGRAM_INFO_NUM_DEVICES) { - auto value = reinterpret_cast(param_value); +static ur_result_t redefinedProgramGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_PROGRAM_INFO_NUM_DEVICES) { + auto value = reinterpret_cast(*params.ppPropValue); *value = Progs[DeviceCodeID].size(); } - if (param_name == PI_PROGRAM_INFO_BINARY_SIZES) { - auto value = reinterpret_cast(param_value); + if (*params.ppropName == UR_PROGRAM_INFO_BINARY_SIZES) { + auto value = reinterpret_cast(*params.ppPropValue); for (size_t i = 0; i < Progs[DeviceCodeID].size(); ++i) value[i] = Progs[DeviceCodeID][i]; } - if (param_name == PI_PROGRAM_INFO_BINARIES) { - auto value = reinterpret_cast(param_value); + if (*params.ppropName == UR_PROGRAM_INFO_BINARIES) { + auto value = reinterpret_cast(*params.ppPropValue); for (size_t i = 0; i < Progs[DeviceCodeID].size(); ++i) { for (int j = 0; j < Progs[DeviceCodeID][i]; ++j) { value[i][j] = i; @@ -79,7 +76,7 @@ static pi_result redefinedProgramGetInfoAfter(pi_program program, } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } class PersistentDeviceCodeCache @@ -162,7 +159,7 @@ class PersistentDeviceCodeCache ResetSYCLCacheDirEnv(); } - PersistentDeviceCodeCache() : Mock{}, Plt{Mock.getPlatform()} { + PersistentDeviceCodeCache() : Mock{}, Plt{sycl::platform()} { char *SYCLCacheDir = getenv("SYCL_CACHE_DIR"); if (!SYCLCacheDir) { @@ -173,8 +170,8 @@ class PersistentDeviceCodeCache RootSYCLCacheDir = SYCLCacheDir; Dev = Plt.get_devices()[0]; - Mock.redefineAfter( - redefinedProgramGetInfoAfter); + mock::getCallbacks().set_after_callback("urProgramGetInfo", + &redefinedProgramGetInfoAfter); } /* Helper function for concurent cache item read/write from diffrent number @@ -219,7 +216,7 @@ class PersistentDeviceCodeCache } protected: - unittest::PiMock Mock; + unittest::UrMock<> Mock; platform Plt; device Dev; const char *EntryName = "Entry"; @@ -242,7 +239,7 @@ class PersistentDeviceCodeCache /*PropertySetsEnd*/ nullptr}; sycl_device_binary Bin = &BinStruct; detail::RTDeviceBinaryImage Img{Bin}; - sycl::detail::pi::PiProgram NativeProg; + ur_program_handle_t NativeProg; }; /* Checks that key values with \0 symbols are processed correctly diff --git a/sycl/unittests/pi/BackendString.hpp b/sycl/unittests/pi/BackendString.hpp deleted file mode 100644 index ff50480163208..0000000000000 --- a/sycl/unittests/pi/BackendString.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#pragma once - -#include -#include - -namespace pi { -inline std::string GetBackendString(const sycl::detail::PluginPtr &Plugin) { - std::stringstream Str; - for (sycl::backend Backend : - {sycl::backend::opencl, sycl::backend::ext_oneapi_level_zero, - sycl::backend::ext_oneapi_cuda, sycl::backend::ext_oneapi_hip}) { - if (Plugin->hasBackend(Backend)) { - Str << Backend; - } - } - return Str.str(); -} -} // namespace pi diff --git a/sycl/unittests/pi/CMakeLists.txt b/sycl/unittests/pi/CMakeLists.txt deleted file mode 100644 index 861fc41069c7e..0000000000000 --- a/sycl/unittests/pi/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -set(CMAKE_CXX_EXTENSIONS OFF) - -add_sycl_unittest(PiTests OBJECT - PiMock.cpp - PiUtility.cpp - pi_arguments_handler.cpp - piInteropRetain.cpp -) - -add_dependencies(PiTests sycl) -target_include_directories(PiTests PRIVATE SYSTEM ${sycl_inc_dir}) -target_include_directories(PiTests PRIVATE ${sycl_src_dir}/../tools/xpti_helpers) - diff --git a/sycl/unittests/pi/PiMock.cpp b/sycl/unittests/pi/PiMock.cpp deleted file mode 100644 index 02044d9631376..0000000000000 --- a/sycl/unittests/pi/PiMock.cpp +++ /dev/null @@ -1,166 +0,0 @@ -//==--------- PiMock.cpp --- A test for mock helper API's ------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include - -#include - -using namespace sycl; - -static bool GpiProgramBuildRedefineCalled = false; -static bool GpiKernelCreateRedefineCalled = false; -static bool GpiProgramRetainCalled = false; -static bool GpiContextCreateRedefineCalledAfter = false; -static bool GpiQueueCreateRedefineCalledBefore = false; - -pi_result piQueueCreateRedefineBefore(pi_context context, pi_device device, - pi_queue_properties properties, - pi_queue *queue) { - // The context should have been set by the original function - GpiQueueCreateRedefineCalledBefore = *queue == nullptr; - // Returning an error should stop calls to all redefined functions - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piContextCreateRedefineAfter( - const pi_context_properties *properties, pi_uint32 num_devices, - const pi_device *devices, - void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, - void *user_data), - void *user_data, pi_context *ret_context) { - // The context should have been set by the original function - GpiContextCreateRedefineCalledAfter = *ret_context != nullptr; - return PI_SUCCESS; -} - -pi_result piProgramBuildRedefine(pi_program, pi_uint32, const pi_device *, - const char *, void (*)(pi_program, void *), - void *) { - GpiProgramBuildRedefineCalled = true; - return PI_SUCCESS; -} - -pi_result piKernelCreateRedefine(pi_program, const char *, pi_kernel *) { - GpiKernelCreateRedefineCalled = true; - return PI_SUCCESS; -} - -TEST(PiMockTest, ConstructFromQueue) { - sycl::unittest::PiMock Mock; - queue MockQ{Mock.getPlatform().get_devices()[0]}; - queue NormalQ; - - const auto &NormalPiPlugin = - detail::getSyclObjImpl(NormalQ)->getPlugin()->getPiPlugin(); - const auto &MockedQueuePiPlugin = - detail::getSyclObjImpl(MockQ)->getPlugin()->getPiPlugin(); - const auto &PiMockPlugin = - detail::getSyclObjImpl(Mock.getPlatform())->getPlugin()->getPiPlugin(); - EXPECT_EQ(&MockedQueuePiPlugin, &PiMockPlugin) - << "The mocked object and the PiMock instance must share the same plugin"; - EXPECT_EQ(&NormalPiPlugin, &MockedQueuePiPlugin) - << "Normal and mock platforms must share the same plugin"; -} - -TEST(PiMockTest, ConstructFromPlatform) { - sycl::unittest::PiMock Mock; - sycl::platform MockPlatform = Mock.getPlatform(); - platform NormalPlatform(default_selector{}); - - const auto &NormalPiPlugin = - detail::getSyclObjImpl(NormalPlatform)->getPlugin()->getPiPlugin(); - const auto &MockedPlatformPiPlugin = - detail::getSyclObjImpl(MockPlatform)->getPlugin()->getPiPlugin(); - const auto &PiMockPlugin = - detail::getSyclObjImpl(Mock.getPlatform())->getPlugin()->getPiPlugin(); - EXPECT_EQ(&MockedPlatformPiPlugin, &PiMockPlugin) - << "The mocked object and the PiMock instance must share the same plugin"; - EXPECT_EQ(&NormalPiPlugin, &MockedPlatformPiPlugin) - << "Normal and mock platforms must share the same plugin"; -} - -TEST(PiMockTest, RedefineAPI) { - sycl::unittest::PiMock Mock; - const auto &MockPiPlugin = - detail::getSyclObjImpl(Mock.getPlatform())->getPlugin()->getPiPlugin(); - const auto &Table = MockPiPlugin.PiFunctionTable; - - // Pass a function pointer - Mock.redefine(piProgramBuildRedefine); - Table.piProgramBuild(/*pi_program*/ nullptr, /*num_devices=*/0, - /*device_list = */ nullptr, - /*options=*/nullptr, /*pfn_notify=*/nullptr, - /*user_data=*/nullptr); - - EXPECT_TRUE(GpiProgramBuildRedefineCalled) - << "Function redefinition didn't propagate to the mock plugin"; - - // Pass a std::function - Mock.redefine({piKernelCreateRedefine}); - - Table.piKernelCreate(/*pi_program=*/nullptr, /*kernel_name=*/nullptr, - /*pi_kernel=*/nullptr); - EXPECT_TRUE(GpiKernelCreateRedefineCalled) - << "Function redefinition didn't propagate to the mock plugin"; - - // Pass a captureless lambda - auto Lambda = [](pi_program) -> pi_result { - GpiProgramRetainCalled = true; - return PI_SUCCESS; - }; - Mock.redefine(Lambda); - Table.piProgramRetain(/*pi_program=*/nullptr); - - EXPECT_TRUE(GpiProgramRetainCalled) - << "Passing a lambda didn't change the function table entry"; -} - -TEST(PiMockTest, RedefineAfterAPI) { - sycl::unittest::PiMock Mock; - - const auto &MockPiPlugin = - detail::getSyclObjImpl(Mock.getPlatform())->getPlugin()->getPiPlugin(); - const auto &Table = MockPiPlugin.PiFunctionTable; - - // Pass a function pointer - Mock.redefineAfter( - piContextCreateRedefineAfter); - - pi_context PIContext = nullptr; - Table.piContextCreate( - /*pi_context_properties=*/nullptr, /*num_devices=*/0, - /*devices=*/nullptr, /*pfn_notify=*/nullptr, - /*user_data=*/nullptr, &PIContext); - - EXPECT_TRUE(GpiContextCreateRedefineCalledAfter) - << "The additional function is not called after the original one"; -} - -TEST(PiMockTest, RedefineBeforeAPI) { - sycl::unittest::PiMock Mock; - - const auto &MockPiPlugin = - detail::getSyclObjImpl(Mock.getPlatform())->getPlugin()->getPiPlugin(); - const auto &Table = MockPiPlugin.PiFunctionTable; - - // Pass a function pointer - Mock.redefineBefore( - piQueueCreateRedefineBefore); - - pi_queue Queue = nullptr; - Table.piQueueCreate(/*pi_context=*/nullptr, /*pi_device=*/nullptr, - /*pi_queue_properties=*/0, &Queue); - - EXPECT_TRUE(GpiQueueCreateRedefineCalledBefore) - << "The additional function is not called before the original one"; - - EXPECT_TRUE(nullptr == Queue) << "Queue is expected to be non-initialized as " - "the original function should not be called"; -} diff --git a/sycl/unittests/pi/PiUtility.cpp b/sycl/unittests/pi/PiUtility.cpp deleted file mode 100644 index b3f7d44ba2ec1..0000000000000 --- a/sycl/unittests/pi/PiUtility.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//==--------------------- PiUtility.cpp -- check for internal PI utilities -==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include -#include - -#include - -namespace { -using namespace sycl; - -TEST(PiUtilityTest, CheckPiCastScalar) { - std::int32_t I = 42; - std::int64_t L = 1234; - float F = 31.2f; - double D = 4321.1234; - float ItoF = detail::pi::cast(I); - double LtoD = detail::pi::cast(L); - std::int32_t FtoI = detail::pi::cast(F); - std::int32_t DtoL = detail::pi::cast(D); - EXPECT_EQ((std::int32_t)F, FtoI); - EXPECT_EQ((float)I, ItoF); - EXPECT_EQ((std::int64_t)D, DtoL); - EXPECT_EQ((double)L, LtoD); -} - -TEST(PiUtilityTest, CheckPiCastVector) { - std::vector IVec{6, 1, 5, 2, 3, 4}; - std::vector IVecToFVec = detail::pi::cast>(IVec); - ASSERT_EQ(IVecToFVec.size(), IVec.size()); - for (size_t I = 0; I < IVecToFVec.size(); ++I) - EXPECT_EQ(IVecToFVec[I], (float)IVec[I]); -} - -TEST(PiUtilityTest, CheckPiCastOCLEventVector) { - // Current special case for vectors of OpenCL vectors. This may change in the - // future. - std::vector EVec{(cl_event)0}; - pi_native_handle ENativeHandle = detail::pi::cast(EVec); - EXPECT_EQ(ENativeHandle, (pi_native_handle)EVec[0]); -} - -} // namespace diff --git a/sycl/unittests/pi/TestGetPlatforms.hpp b/sycl/unittests/pi/TestGetPlatforms.hpp deleted file mode 100644 index c089bad858a42..0000000000000 --- a/sycl/unittests/pi/TestGetPlatforms.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#pragma once - -#include - -#include -#include -#include - -namespace pi { -inline std::vector getPlatformsWithName(const char *name) { - std::vector platforms = sycl::platform::get_platforms(); - - // Remove platforms that have no devices or doesn't contain the name - auto end = - std::remove_if(platforms.begin(), platforms.end(), - [=](const sycl::platform &platform) -> bool { - const std::string platformName = - platform.get_info(); - return platformName.find(name) == std::string::npos || - platform.get_devices().size() == 0; - }); - platforms.erase(end, platforms.end()); - - return platforms; -} -} // namespace pi \ No newline at end of file diff --git a/sycl/unittests/pi/TestGetPlugin.hpp b/sycl/unittests/pi/TestGetPlugin.hpp deleted file mode 100644 index 774d65c02f420..0000000000000 --- a/sycl/unittests/pi/TestGetPlugin.hpp +++ /dev/null @@ -1,58 +0,0 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#pragma once - -#include "BackendString.hpp" -#include -#include -#include -#include - -namespace pi { -inline std::optional -initializeAndGet(sycl::backend backend) { - const auto &plugins = sycl::detail::pi::initialize(); - auto it = std::find_if(plugins.begin(), plugins.end(), - [=](sycl::detail::PluginPtr p) -> bool { - return p->hasBackend(backend); - }); - if (it == plugins.end()) { - std::stringstream strstr; - strstr << backend; - std::string msg = strstr.str(); - msg += " PI plugin not found!"; - std::cerr << "Warning: " << msg << " Tests using it will be skipped.\n"; - return std::nullopt; - } - return std::optional(*it); -} - -inline std::vector initializeAndRemoveInvalid() { - auto &plugins = sycl::detail::pi::initialize(); - - auto end = std::remove_if( - plugins.begin(), plugins.end(), - [](const sycl::detail::PluginPtr &plugin) -> bool { - pi_uint32 num = 0; - plugin->call_nocheck( - 0, nullptr, &num); - - bool removePlugin = num <= 0; - - if (removePlugin) { - std::cerr - << "Warning: " - << " PI API plugin returned no platforms via piPlatformsGet. " - "This plugin will be removed from testing.\n"; - } - - return removePlugin; - }); - - plugins.erase(end, plugins.end()); - - return plugins; -} -} // namespace pi diff --git a/sycl/unittests/pi/pi_arguments_handler.cpp b/sycl/unittests/pi/pi_arguments_handler.cpp deleted file mode 100644 index 54f4cc9dfb1c5..0000000000000 --- a/sycl/unittests/pi/pi_arguments_handler.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//==------- pi_arguments_handler.cpp --- A test for XPTI PI args helper ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include "pi_arguments_handler.hpp" - -#include - -#include - -TEST(PiArgumentsHandlerTest, CanUnpackArguments) { - sycl::xpti_helpers::PiArgumentsHandler Handler; - - const pi_uint32 NumPlatforms = 42; - pi_platform *Platforms = new pi_platform[NumPlatforms]; - - Handler.set_piPlatformsGet([&](const pi_plugin &, std::optional, - pi_uint32 NP, pi_platform *Plts, - pi_uint32 *Ret) { - EXPECT_EQ(NP, NumPlatforms); - EXPECT_EQ(Platforms, Plts); - EXPECT_EQ(Ret, nullptr); - }); - - constexpr size_t Size = sizeof(pi_uint32) + 2 * sizeof(void *); - std::array Data{0}; - *reinterpret_cast(Data.data()) = NumPlatforms; - *reinterpret_cast(Data.data() + sizeof(pi_uint32)) = - Platforms; - - pi_plugin Plugin{}; - uint32_t ID = static_cast(sycl::detail::PiApiKind::piPlatformsGet); - Handler.handle(ID, Plugin, std::nullopt, Data.data()); - - delete[] Platforms; -} diff --git a/sycl/unittests/pipes/host_pipe_registration.cpp b/sycl/unittests/pipes/host_pipe_registration.cpp index 202c82a79a548..b625b9f5fc5cc 100644 --- a/sycl/unittests/pipes/host_pipe_registration.cpp +++ b/sycl/unittests/pipes/host_pipe_registration.cpp @@ -13,8 +13,8 @@ #include #include #include -#include -#include +#include +#include #include class TestKernel; @@ -29,23 +29,23 @@ class PipeID; using Pipe = sycl::ext::intel::experimental::pipe; -static sycl::unittest::PiImage generateDefaultImage() { +static sycl::unittest::UrImage generateDefaultImage() { using namespace sycl::unittest; sycl::detail::host_pipe_map::add(Pipe::get_host_ptr(), "test_host_pipe_unique_id"); - PiPropertySet PropSet; - PiProperty HostPipeInfo = + UrPropertySet PropSet; + UrProperty HostPipeInfo = makeHostPipeInfo("test_host_pipe_unique_id", sizeof(int)); PropSet.insert(__SYCL_PROPERTY_SET_SYCL_HOST_PIPES, - PiArray{std::move(HostPipeInfo)}); + UrArray{std::move(HostPipeInfo)}); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = makeEmptyKernels({"TestKernel"}); + UrArray Entries = makeEmptyKernels({"TestKernel"}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -56,62 +56,59 @@ static sycl::unittest::PiImage generateDefaultImage() { return Img; } -pi_event READ = reinterpret_cast(0); -pi_event WRITE = reinterpret_cast(1); +ur_event_handle_t READ = reinterpret_cast(0); +ur_event_handle_t WRITE = reinterpret_cast(1); static constexpr int PipeReadVal = 8; static int PipeWriteVal = 0; -pi_result redefinedEnqueueReadHostPipe(pi_queue, pi_program, const char *, - pi_bool, void *ptr, size_t, pi_uint32, - const pi_event *, pi_event *event) { - *event = createDummyHandle(); - *(((int *)ptr)) = PipeReadVal; - return PI_SUCCESS; +ur_result_t redefinedEnqueueReadHostPipe(void *pParams) { + auto params = *static_cast(pParams); + **params.pphEvent = mock::createDummyHandle(); + *(((int *)(*params.ppDst))) = PipeReadVal; + return UR_RESULT_SUCCESS; } -pi_result redefinedEnqueueWriteHostPipe(pi_queue, pi_program, const char *, - pi_bool, void *ptr, size_t, pi_uint32, - const pi_event *, pi_event *event) { - *event = createDummyHandle(); + +ur_result_t redefinedEnqueueWriteHostPipe(void *pParams) { + auto params = *static_cast(pParams); + **params.pphEvent = mock::createDummyHandle(); PipeWriteVal = 9; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result after_piDeviceGetInfo(pi_device device, pi_device_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +ur_result_t after_urDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); constexpr char MockSupportedExtensions[] = "cl_khr_fp64 cl_khr_fp16 cl_khr_il_program " "cl_intel_program_scope_host_pipe"; - switch (param_name) { - case PI_DEVICE_INFO_EXTENSIONS: { - if (param_value) { - std::ignore = param_value_size; - assert(param_value_size >= sizeof(MockSupportedExtensions)); - std::memcpy(param_value, MockSupportedExtensions, + switch (*params.ppropName) { + case UR_DEVICE_INFO_EXTENSIONS: + if (*params.ppPropValue) { + std::ignore = *params.ppropSize; + assert(*params.ppropSize >= sizeof(MockSupportedExtensions)); + std::memcpy(*params.ppPropValue, MockSupportedExtensions, sizeof(MockSupportedExtensions)); } - if (param_value_size_ret) - *param_value_size_ret = sizeof(MockSupportedExtensions); - return PI_SUCCESS; - } + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(MockSupportedExtensions); + return UR_RESULT_SUCCESS; default:; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -void preparePiMock(unittest::PiMock &Mock) { - Mock.redefine( - redefinedEnqueueReadHostPipe); - Mock.redefine( - redefinedEnqueueWriteHostPipe); +void prepareUrMock(unittest::UrMock<> &Mock) { + mock::getCallbacks().set_replace_callback("urEnqueueReadHostPipe", + &redefinedEnqueueReadHostPipe); + mock::getCallbacks().set_replace_callback("urEnqueueWriteHostPipe", + &redefinedEnqueueWriteHostPipe); } class PipeTest : public ::testing::Test { public: - PipeTest() : Mock{}, Plt{Mock.getPlatform()} {} + PipeTest() : Mock{}, Plt{sycl::platform()} {} protected: void SetUp() override { - preparePiMock(Mock); + prepareUrMock(Mock); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; sycl::queue Q{Ctx, Dev}; @@ -120,19 +117,19 @@ class PipeTest : public ::testing::Test { } protected: - unittest::PiMock Mock; + unittest::UrMock<> Mock; sycl::platform Plt; context ctx; queue q; }; -static sycl::unittest::PiImage Img = generateDefaultImage(); -static sycl::unittest::PiImageArray<1> ImgArray{&Img}; +static sycl::unittest::UrImage Img = generateDefaultImage(); +static sycl::unittest::UrImageArray<1> ImgArray{&Img}; TEST_F(PipeTest, Basic) { // Fake extension - Mock.redefineAfter( - after_piDeviceGetInfo); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); // Device registration @@ -148,27 +145,26 @@ TEST_F(PipeTest, Basic) { } bool EventsWaitFails = true; -pi_result redefinedEventsWait(pi_uint32 num_events, - const pi_event *event_list) { - return EventsWaitFails ? PI_ERROR_UNKNOWN : PI_SUCCESS; +ur_result_t redefinedEventWait(void *) { + return EventsWaitFails ? UR_RESULT_ERROR_UNKNOWN : UR_RESULT_SUCCESS; } -pi_result after_piEventGetInfo(pi_event event, pi_event_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - if (param_value) - *static_cast(param_value) = pi_event_status(-1); - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_event_status); +ur_result_t after_urEventGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = + ur_event_status_t(-1); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_event_status_t); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST_F(PipeTest, NonBlockingOperationFail) { - Mock.redefineAfter( - after_piDeviceGetInfo); - Mock.redefine(redefinedEventsWait); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_replace_callback("urEventWait", &redefinedEventWait); bool Success = false; Pipe::read(q, Success); @@ -179,8 +175,8 @@ TEST_F(PipeTest, NonBlockingOperationFail) { // Test the OpenCL 1.0 case: no error code after waiting. EventsWaitFails = false; - Mock.redefineAfter( - after_piEventGetInfo); + mock::getCallbacks().set_after_callback("urEventGetInfo", + &after_urEventGetInfo); Pipe::read(q, Success); ASSERT_FALSE(Success); diff --git a/sycl/unittests/program_manager/BuildLog.cpp b/sycl/unittests/program_manager/BuildLog.cpp index a1829d4c06e4d..fc4ff8abbbde2 100644 --- a/sycl/unittests/program_manager/BuildLog.cpp +++ b/sycl/unittests/program_manager/BuildLog.cpp @@ -12,9 +12,9 @@ #include #include #include -#include -#include #include +#include +#include #include #include @@ -29,38 +29,37 @@ static constexpr auto WarningLevelEnvVar = "SYCL_RT_WARNING_LEVEL"; static bool LogRequested = false; -static pi_result redefinedProgramGetBuildInfo( - pi_program program, pi_device device, pi_program_build_info param_name, - size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - - if (param_value_size_ret) { - *param_value_size_ret = 1; +static ur_result_t redefinedProgramGetBuildInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppPropSizeRet) { + **params.ppPropSizeRet = 1; } - if (param_value) { - *static_cast(param_value) = '1'; + if (*params.ppPropValue) { + *static_cast(*params.ppPropValue) = '1'; } - if (param_name == PI_PROGRAM_BUILD_INFO_LOG) { + if (*params.ppropName == UR_PROGRAM_BUILD_INFO_LOG) { LogRequested = true; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static void setupCommonTestAPIs(sycl::unittest::PiMock &Mock) { +static void setupCommonTestAPIs(sycl::unittest::UrMock<> &Mock) { using namespace sycl::detail; - Mock.redefineBefore( - redefinedProgramGetBuildInfo); + mock::getCallbacks().set_before_callback("urProgramGetBuildInfo", + &redefinedProgramGetBuildInfo); } TEST(BuildLog, OutputNothingOnLevel1) { + sycl::unittest::UrMock<> mock; using namespace sycl::detail; using namespace sycl::unittest; ScopedEnvVar var(WarningLevelEnvVar, "1", SYCLConfig::reset); - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); setupCommonTestAPIs(Mock); const sycl::device Dev = Plt.get_devices()[0]; @@ -80,13 +79,14 @@ TEST(BuildLog, OutputNothingOnLevel1) { } TEST(BuildLog, OutputLogOnLevel2) { + sycl::unittest::UrMock<> mock; using namespace sycl::detail; using namespace sycl::unittest; ScopedEnvVar var(WarningLevelEnvVar, "2", SYCLConfig::reset); - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); setupCommonTestAPIs(Mock); const sycl::device Dev = Plt.get_devices()[0]; diff --git a/sycl/unittests/program_manager/DynamicLinking.cpp b/sycl/unittests/program_manager/DynamicLinking.cpp index 645b71efd74e1..c8c9b101baf9e 100644 --- a/sycl/unittests/program_manager/DynamicLinking.cpp +++ b/sycl/unittests/program_manager/DynamicLinking.cpp @@ -1,9 +1,9 @@ #include #include -#include -#include #include +#include +#include #include @@ -37,31 +37,32 @@ KERNEL_INFO(AOTCaseKernel) } // namespace sycl namespace { -sycl::unittest::PiArray +sycl::unittest::UrArray createPropertySet(const std::vector &Symbols) { - sycl::unittest::PiPropertySet PropSet; - sycl::unittest::PiArray Props; + sycl::unittest::UrPropertySet PropSet; + sycl::unittest::UrArray Props; for (const std::string &Symbol : Symbols) { - std::vector Storage(sizeof(pi_uint32)); + std::vector Storage(sizeof(uint32_t)); uint32_t Val = 1; auto *DataPtr = reinterpret_cast(&Val); std::uninitialized_copy(DataPtr, DataPtr + sizeof(uint32_t), Storage.data()); - sycl::unittest::PiProperty Prop(Symbol, Storage, SYCL_PROPERTY_TYPE_UINT32); + + sycl::unittest::UrProperty Prop(Symbol, Storage, SYCL_PROPERTY_TYPE_UINT32); Props.push_back(Prop); } return Props; } -sycl::unittest::PiImage +sycl::unittest::UrImage generateImage(std::initializer_list KernelNames, const std::vector &ExportedSymbols, const std::vector &ImportedSymbols, unsigned char Magic, - sycl::detail::pi::PiDeviceBinaryType BinType = + sycl::detail::ur::DeviceBinaryType BinType = SYCL_DEVICE_BINARY_TYPE_SPIRV) { - sycl::unittest::PiPropertySet PropSet; + sycl::unittest::UrPropertySet PropSet; if (!ExportedSymbols.empty()) PropSet.insert(__SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS, createPropertySet(ExportedSymbols)); @@ -70,10 +71,10 @@ generateImage(std::initializer_list KernelNames, createPropertySet(ImportedSymbols)); std::vector Bin{Magic}; - sycl::unittest::PiArray Entries = + sycl::unittest::UrArray Entries = sycl::unittest::makeEmptyKernels(KernelNames); - sycl::unittest::PiImage Img{ + sycl::unittest::UrImage Img{ BinType, __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options @@ -95,7 +96,7 @@ static constexpr unsigned MUTUAL_DEP_PRG_B = 17; static constexpr unsigned AOT_CASE_PRG_NATIVE = 23; static constexpr unsigned AOT_CASE_PRG_DEP_NATIVE = 29; -static sycl::unittest::PiImage Imgs[] = { +static sycl::unittest::UrImage Imgs[] = { generateImage({"BasicCaseKernel"}, {}, {"BasicCaseKernelDep"}, BASIC_CASE_PRG), generateImage({"BasicCaseKernelDep"}, {"BasicCaseKernelDep"}, @@ -119,20 +120,21 @@ static sycl::unittest::PiImage Imgs[] = { AOT_CASE_PRG_DEP_NATIVE, SYCL_DEVICE_BINARY_TYPE_NATIVE)}; // Registers mock devices images in the SYCL RT -static sycl::unittest::PiImageArray<9> ImgArray{Imgs}; +static sycl::unittest::UrImageArray<9> ImgArray{Imgs}; TEST(DynamicLinking, BasicCase) { - auto Mock = setupRuntimeLinkingMock(); + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); CapturedLinkingData.clear(); Q.single_task([=]() {}); - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramCreateCalls, 3u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 3u); // Both programs should be linked together. - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramLinkCalls, 1u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( {BASIC_CASE_PRG, BASIC_CASE_PRG_DEP, BASIC_CASE_PRG_DEP_DEP})); // And the linked program should be used to create a kernel. @@ -142,8 +144,8 @@ TEST(DynamicLinking, BasicCase) { TEST(DynamicLinking, UnresolvedDep) { try { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); Q.single_task([=]() {}); FAIL(); @@ -155,17 +157,18 @@ TEST(DynamicLinking, UnresolvedDep) { } TEST(DynamicLinking, MutualDependency) { - auto Mock = setupRuntimeLinkingMock(); + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); CapturedLinkingData.clear(); Q.single_task([=]() {}); - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramCreateCalls, 2u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 2u); // Both programs should be linked together. - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramLinkCalls, 1u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( {MUTUAL_DEP_PRG_A, MUTUAL_DEP_PRG_B})); // And the linked program should be used to create a kernel. @@ -177,13 +180,13 @@ TEST(DynamicLinking, MutualDependency) { Q.single_task([=]() {}); // The program contianing this kernel should be taken from the // in-memory cache. - ASSERT_EQ(CapturedLinkingData.NumOfPiProgramCreateCalls, 0u); + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 0u); } TEST(DynamicLinking, AheadOfTime) { try { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); Q.single_task([=]() {}); FAIL(); diff --git a/sycl/unittests/program_manager/SubDevices.cpp b/sycl/unittests/program_manager/SubDevices.cpp index 21dfde831ac41..26d5b4cf148a0 100644 --- a/sycl/unittests/program_manager/SubDevices.cpp +++ b/sycl/unittests/program_manager/SubDevices.cpp @@ -8,88 +8,77 @@ #include -#include -#include +#include +#include #include #include -static pi_device rootDevice; -static pi_device piSubDev1 = (pi_device)0x1; -static pi_device piSubDev2 = (pi_device)0x2; +static ur_device_handle_t rootDevice; +static ur_device_handle_t urSubDev1 = (ur_device_handle_t)0x1; +static ur_device_handle_t urSubDev2 = (ur_device_handle_t)0x2; namespace { -pi_result redefinedDeviceGetInfo(pi_device device, pi_device_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_PARTITION_PROPERTIES) { - if (!param_value) { - *param_value_size_ret = 2 * sizeof(pi_device_partition_property); +ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { + if (!*params.ppPropValue) { + **params.ppPropSizeRet = 2 * sizeof(ur_device_partition_t); } else { - ((pi_device_partition_property *)param_value)[0] = - PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; - ((pi_device_partition_property *)param_value)[1] = - PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + ((ur_device_partition_t *)*params.ppPropValue)[0] = + UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + ((ur_device_partition_t *)*params.ppPropValue)[1] = + UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; } } - if (param_name == PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { - if (!param_value) { - *param_value_size_ret = sizeof(pi_device_affinity_domain); + if (*params.ppropName == UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { + if (!*params.ppPropValue) { + **params.ppPropSizeRet = sizeof(ur_device_affinity_domain_flags_t); } else { - ((pi_device_affinity_domain *)param_value)[0] = - PI_DEVICE_AFFINITY_DOMAIN_NUMA | - PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; + ((ur_device_affinity_domain_flags_t *)*params.ppPropValue)[0] = + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA | + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; } } - if (param_name == PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES) { - ((pi_uint32 *)param_value)[0] = 2; + if (*params.ppropName == UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES) { + ((uint32_t *)*params.ppPropValue)[0] = 2; } - if (param_name == PI_DEVICE_INFO_PARENT_DEVICE) { - if (device == piSubDev1 || device == piSubDev2) - ((pi_device *)param_value)[0] = rootDevice; + if (*params.ppropName == UR_DEVICE_INFO_PARENT_DEVICE) { + if (*params.phDevice == urSubDev1 || *params.phDevice == urSubDev2) + ((ur_device_handle_t *)*params.ppPropValue)[0] = rootDevice; else - ((pi_device *)param_value)[0] = nullptr; + ((ur_device_handle_t *)*params.ppPropValue)[0] = nullptr; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedDevicePartition( - pi_device Device, const pi_device_partition_property *Properties, - pi_uint32 NumDevices, pi_device *OutDevices, pi_uint32 *OutNumDevices) { - if (OutNumDevices) - *OutNumDevices = 2; - if (OutDevices) { - OutDevices[0] = {}; - OutDevices[1] = {}; +ur_result_t redefinedDevicePartition(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevicesRet) + **params.ppNumDevicesRet = 2; + if (*params.pphSubDevices) { + (*params.pphSubDevices)[0] = {}; + (*params.pphSubDevices)[1] = {}; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedDeviceRetain(pi_device c) { return PI_SUCCESS; } +ur_result_t redefinedDeviceRetain(void *) { return UR_RESULT_SUCCESS; } -pi_result redefinedDeviceRelease(pi_device c) { return PI_SUCCESS; } +ur_result_t redefinedDeviceRelease(void *) { return UR_RESULT_SUCCESS; } -pi_result redefinedProgramBuild( - pi_program prog, pi_uint32, const pi_device *, const char *, - void (*pfn_notify)(pi_program program, void *user_data), void *user_data) { +ur_result_t redefinedProgramBuild(void *) { static int m = 0; m++; // if called more than once return an error if (m > 1) - return PI_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedContextCreate(const pi_context_properties *Properties, - pi_uint32 NumDevices, const pi_device *Devices, - void (*PFnNotify)(const char *ErrInfo, - const void *PrivateInfo, - size_t CB, void *UserData), - void *UserData, pi_context *RetContext) { - return PI_SUCCESS; -} +ur_result_t redefinedContextCreate(void *) { return UR_RESULT_SUCCESS; } } // anonymous namespace // Check that program is built once for all sub-devices @@ -97,20 +86,20 @@ pi_result redefinedContextCreate(const pi_context_properties *Properties, // context. TEST(SubDevices, DISABLED_BuildProgramForSubdevices) { // Setup Mock APIs - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedDeviceGetInfo); - Mock.redefineBefore( - redefinedDevicePartition); - Mock.redefineBefore( - redefinedDeviceRetain); - Mock.redefineBefore( - redefinedDeviceRelease); - Mock.redefineBefore( - redefinedProgramBuild); - Mock.redefineBefore( - redefinedContextCreate); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + mock::getCallbacks().set_before_callback("urDevicePartition", + &redefinedDevicePartition); + mock::getCallbacks().set_before_callback("urDeviceRetain", + &redefinedDeviceRetain); + mock::getCallbacks().set_before_callback("urDeviceRelease", + &redefinedDeviceRelease); + mock::getCallbacks().set_before_callback("urProgramBuild", + &redefinedProgramBuild); + mock::getCallbacks().set_before_callback("urContextCreate", + &redefinedContextCreate); // Create 2 sub-devices and use first platform device as a root device const sycl::device device = Plt.get_devices()[0]; @@ -119,9 +108,9 @@ TEST(SubDevices, DISABLED_BuildProgramForSubdevices) { // Initialize sub-devices auto PltImpl = sycl::detail::getSyclObjImpl(Plt); auto subDev1 = - std::make_shared(piSubDev1, PltImpl); + std::make_shared(urSubDev1, PltImpl); auto subDev2 = - std::make_shared(piSubDev2, PltImpl); + std::make_shared(urSubDev2, PltImpl); sycl::context Ctx{ {device, sycl::detail::createSyclObjFromImpl(subDev1), sycl::detail::createSyclObjFromImpl(subDev2)}}; @@ -133,12 +122,12 @@ TEST(SubDevices, DISABLED_BuildProgramForSubdevices) { sycl::detail::ProgramManager::getInstance().addImages(&devBinStruct); // Build program via getBuiltPIProgram API - sycl::detail::ProgramManager::getInstance().getBuiltPIProgram( + sycl::detail::ProgramManager::getInstance().getBuiltURProgram( sycl::detail::getSyclObjImpl(Ctx), subDev1, sycl::detail::KernelInfo>::getName()); - // This call should re-use built binary from the cache. If piProgramBuild is + // This call should re-use built binary from the cache. If urProgramBuild is // called again, the test will fail as second call of redefinedProgramBuild - sycl::detail::ProgramManager::getInstance().getBuiltPIProgram( + sycl::detail::ProgramManager::getInstance().getBuiltURProgram( sycl::detail::getSyclObjImpl(Ctx), subDev2, sycl::detail::KernelInfo>::getName()); } diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 319a6509d5e67..5eb6fdb7e8f4e 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -13,8 +13,8 @@ #include #include -#include -#include +#include +#include #include @@ -43,23 +43,23 @@ struct KernelInfo : public unittest::MockKernelInfoBase { } // namespace _V1 } // namespace sycl -static sycl::unittest::PiImage generateEAMTestKernelImage() { +static sycl::unittest::UrImage generateEAMTestKernelImage() { using namespace sycl::unittest; // Eliminated arguments are 1st and 3rd. std::vector KernelEAM{0b00000101}; - PiProperty EAMKernelPOI = makeKernelParamOptInfo( + UrProperty EAMKernelPOI = makeKernelParamOptInfo( EAMTestKernelName, EAMTestKernelNumArgs, KernelEAM); - PiArray ImgKPOI{std::move(EAMKernelPOI)}; + UrArray ImgKPOI{std::move(EAMKernelPOI)}; - PiPropertySet PropSet; + UrPropertySet PropSet; PropSet.insert(__SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO, std::move(ImgKPOI)); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = makeEmptyKernels({EAMTestKernelName}); + UrArray Entries = makeEmptyKernels({EAMTestKernelName}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -70,16 +70,16 @@ static sycl::unittest::PiImage generateEAMTestKernelImage() { return Img; } -static sycl::unittest::PiImage generateEAMTestKernel2Image() { +static sycl::unittest::UrImage generateEAMTestKernel2Image() { using namespace sycl::unittest; - PiPropertySet PropSet; + UrPropertySet PropSet; std::vector Bin{6, 7, 8, 9, 10, 11}; // Random data - PiArray Entries = makeEmptyKernels({EAMTestKernel2Name}); + UrArray Entries = makeEmptyKernels({EAMTestKernel2Name}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -90,20 +90,20 @@ static sycl::unittest::PiImage generateEAMTestKernel2Image() { return Img; } -static sycl::unittest::PiImage EAMImg = generateEAMTestKernelImage(); -static sycl::unittest::PiImage EAM2Img = generateEAMTestKernel2Image(); -static sycl::unittest::PiImageArray<1> EAMImgArray{&EAMImg}; -static sycl::unittest::PiImageArray<1> EAM2ImgArray{&EAM2Img}; +static sycl::unittest::UrImage EAMImg = generateEAMTestKernelImage(); +static sycl::unittest::UrImage EAM2Img = generateEAMTestKernel2Image(); +static sycl::unittest::UrImageArray<1> EAMImgArray{&EAMImg}; +static sycl::unittest::UrImageArray<1> EAM2ImgArray{&EAM2Img}; -// pi_program address is used as a key for ProgramManager::NativePrograms -// storage. redefinedProgramLinkCommon makes pi_program address equal to 0x1. +// ur_program_handle_t address is used as a key for ProgramManager::NativePrograms +// storage. redefinedProgramLinkCommon makes ur_program_handle_t address equal to 0x1. // Make sure that size of Bin is different for device images used in these tests // and greater than 1. -inline pi_result redefinedProgramCreateEAM(pi_context, const void *, size_t, - pi_program *ret_program) { - static size_t PiProgramAddr = 2; - *ret_program = reinterpret_cast(PiProgramAddr++); - return PI_SUCCESS; +inline ur_result_t redefinedProgramCreateEAM(void *pParams) { + auto params = *static_cast(pParams); + static size_t UrProgramAddr = 2; + **params.pphProgram = reinterpret_cast(UrProgramAddr++); + return UR_RESULT_SUCCESS; } class MockHandler : public sycl::handler { @@ -164,7 +164,7 @@ const sycl::detail::KernelArgMask *getKernelArgMaskFromBundle( auto SyclKernelImpl = sycl::detail::getSyclObjImpl(SyclKernel); std::shared_ptr DeviceImageImpl = SyclKernelImpl->getDeviceImage(); - sycl::detail::pi::PiProgram Program = DeviceImageImpl->get_program_ref(); + ur_program_handle_t Program = DeviceImageImpl->get_ur_program_ref(); EXPECT_TRUE(nullptr == ExecKernel->MSyclKernel || !ExecKernel->MSyclKernel->isCreatedFromSource()); @@ -174,15 +174,15 @@ const sycl::detail::KernelArgMask *getKernelArgMaskFromBundle( } // After both kernels are compiled ProgramManager.NativePrograms contains info -// about each pi_program. However, the result of the linkage of these kernels -// isn't stored in ProgramManager.NativePrograms. +// about each UR program handle. However, the result of the linkage of these +// kernels isn't stored in ProgramManager.NativePrograms. // Check that eliminated arg mask can be found for one of kernels in a // kernel bundle after two kernels are compiled and linked. TEST(EliminatedArgMask, KernelBundleWith2Kernels) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedProgramCreateEAM); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urProgramCreateWithIL", + &redefinedProgramCreateEAM); const sycl::device Dev = Plt.get_devices()[0]; sycl::queue Queue{Dev}; diff --git a/sycl/unittests/program_manager/itt_annotations.cpp b/sycl/unittests/program_manager/itt_annotations.cpp index 444b2aaacdaf6..edcb9a9baf88b 100644 --- a/sycl/unittests/program_manager/itt_annotations.cpp +++ b/sycl/unittests/program_manager/itt_annotations.cpp @@ -10,8 +10,8 @@ #include #include -#include -#include +#include +#include #include #include @@ -41,14 +41,17 @@ static void unset_env(const char *name) { bool HasITTEnabled = false; -static pi_result -redefinedProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, - size_t spec_size, - const void *spec_value) { - if (spec_id == sycl::detail::ITTSpecConstId) - HasITTEnabled = true; - - return PI_SUCCESS; +static ur_result_t redefinedProgramSetSpecializationConstants(void *pParams) { + auto params = + *static_cast(pParams); + for (uint32_t SpecConstIndex = 0; SpecConstIndex < *params.pcount; + SpecConstIndex++) { + if ((*params.ppSpecConstants)[SpecConstIndex].id == + sycl::detail::ITTSpecConstId) + HasITTEnabled = true; + } + + return UR_RESULT_SUCCESS; } static void reset() { @@ -62,11 +65,11 @@ TEST(ITTNotify, UseKernelBundle) { reset(); - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore< - sycl::detail::PiApiKind::piextProgramSetSpecializationConstant>( - redefinedProgramSetSpecializationConstant); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback( + "urProgramSetSpecializationConstants", + &redefinedProgramSetSpecializationConstants); const sycl::device Dev = Plt.get_devices()[0]; @@ -90,11 +93,11 @@ TEST(ITTNotify, VarNotSet) { reset(); - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore< - sycl::detail::PiApiKind::piextProgramSetSpecializationConstant>( - redefinedProgramSetSpecializationConstant); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback( + "urProgramSetSpecializationConstants", + &redefinedProgramSetSpecializationConstants); const sycl::device Dev = Plt.get_devices()[0]; diff --git a/sycl/unittests/program_manager/passing_link_and_compile_options.cpp b/sycl/unittests/program_manager/passing_link_and_compile_options.cpp index 8eb1f038b010e..893e8bdb41ee7 100644 --- a/sycl/unittests/program_manager/passing_link_and_compile_options.cpp +++ b/sycl/unittests/program_manager/passing_link_and_compile_options.cpp @@ -11,8 +11,8 @@ #include #include -#include -#include +#include +#include #include @@ -56,25 +56,25 @@ struct KernelInfo : public unittest::MockKernelInfoBase { } // namespace sycl template -static sycl::unittest::PiImage +static sycl::unittest::UrImage generateEAMTestKernelImage(std::string _cmplOptions, std::string _lnkOptions) { using namespace sycl::unittest; std::vector KernelEAM1{0b00000101}; - PiProperty EAMKernelPOI = + UrProperty EAMKernelPOI = makeKernelParamOptInfo(sycl::detail::KernelInfo::getName(), EAMTestKernelNumArgs1, KernelEAM1); - PiArray ImgKPOI{std::move(EAMKernelPOI)}; + UrArray ImgKPOI{std::move(EAMKernelPOI)}; - PiPropertySet PropSet; + UrPropertySet PropSet; PropSet.insert(__SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO, std::move(ImgKPOI)); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = + UrArray Entries = makeEmptyKernels({sycl::detail::KernelInfo::getName()}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec _cmplOptions, // Compile options _lnkOptions, // Link options @@ -84,59 +84,52 @@ generateEAMTestKernelImage(std::string _cmplOptions, std::string _lnkOptions) { return Img; } -inline pi_result redefinedProgramLink(pi_context, pi_uint32, const pi_device *, - const char *_linkOpts, pi_uint32, - const pi_program *, - void (*)(pi_program, void *), void *, - pi_program *) { - assert(_linkOpts != nullptr); - auto add_link_opts = std::string(_linkOpts); +inline ur_result_t redefinedProgramLink(void *pParams) { + auto params = *static_cast(pParams); + assert(*params.ppOptions != nullptr); + auto add_link_opts = std::string(*params.ppOptions); if (!add_link_opts.empty()) { if (!current_link_options.empty()) current_link_options += " "; - current_link_options += std::string(_linkOpts); + current_link_options += std::string(*params.ppOptions); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -inline pi_result redefinedProgramCompile(pi_program, pi_uint32, - const pi_device *, - const char *_compileOpts, pi_uint32, - const pi_program *, const char **, - void (*)(pi_program, void *), void *) { - assert(_compileOpts != nullptr); - auto add_compile_opts = std::string(_compileOpts); +inline ur_result_t redefinedProgramCompile(void *pParams) { + auto params = *static_cast(pParams); + assert(*params.ppOptions != nullptr); + auto add_compile_opts = std::string(*params.ppOptions); if (!add_compile_opts.empty()) { if (!current_compile_options.empty()) current_compile_options += " "; - current_compile_options += std::string(_compileOpts); + current_compile_options += std::string(*params.ppOptions); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -inline pi_result redefinedProgramBuild( - pi_program prog, pi_uint32, const pi_device *, const char *options, - void (*pfn_notify)(pi_program program, void *user_data), void *user_data) { - assert(options != nullptr); - current_build_opts = std::string(options); - return PI_SUCCESS; +inline ur_result_t redefinedProgramBuild(void *pParams) { + auto params = *static_cast(pParams); + assert(*params.ppOptions != nullptr); + current_build_opts = std::string(*params.ppOptions); + return UR_RESULT_SUCCESS; } TEST(Link_Compile_Options, compile_link_Options_Test_empty_options) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedProgramCompile); - Mock.redefineBefore( - redefinedProgramLink); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urProgramCompileExp", + &redefinedProgramCompile); + mock::getCallbacks().set_before_callback("urProgramLinkExp", + &redefinedProgramLink); const sycl::device Dev = Plt.get_devices()[0]; current_link_options.clear(); current_compile_options.clear(); std::string expected_options = ""; - static sycl::unittest::PiImage DevImage = + static sycl::unittest::UrImage DevImage = generateEAMTestKernelImage(expected_options, expected_options); - static sycl::unittest::PiImageArray<1> DevImageArray_{&DevImage}; + static sycl::unittest::UrImageArray<1> DevImageArray_{&DevImage}; auto KernelID_1 = sycl::get_kernel_id(); sycl::queue Queue{Dev}; const sycl::context Ctx = Queue.get_context(); @@ -150,12 +143,12 @@ TEST(Link_Compile_Options, compile_link_Options_Test_empty_options) { } TEST(Link_Compile_Options, compile_link_Options_Test_filled_options) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedProgramCompile); - Mock.redefineBefore( - redefinedProgramLink); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urProgramCompileExp", + &redefinedProgramCompile); + mock::getCallbacks().set_before_callback("urProgramLinkExp", + &redefinedProgramLink); const sycl::device Dev = Plt.get_devices()[0]; current_link_options.clear(); current_compile_options.clear(); @@ -163,11 +156,11 @@ TEST(Link_Compile_Options, compile_link_Options_Test_filled_options) { "-cl-opt-disable -cl-fp32-correctly-rounded-divide-sqrt", expected_link_options_1 = "-cl-denorms-are-zero -cl-no-signed-zeros"; - static sycl::unittest::PiImage DevImage_1 = + static sycl::unittest::UrImage DevImage_1 = generateEAMTestKernelImage(expected_compile_options_1, expected_link_options_1); - static sycl::unittest::PiImageArray<1> DevImageArray = {&DevImage_1}; + static sycl::unittest::UrImageArray<1> DevImageArray = {&DevImage_1}; auto KernelID_1 = sycl::get_kernel_id(); sycl::queue Queue{Dev}; const sycl::context Ctx = Queue.get_context(); @@ -185,23 +178,23 @@ TEST(Link_Compile_Options, compile_link_Options_Test_filled_options) { // TODO : Add check for linking 2 device images together when implemented. TEST(Link_Compile_Options, check_sycl_build) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedProgramCompile); - Mock.redefineBefore( - redefinedProgramLink); - Mock.redefineBefore( - redefinedProgramBuild); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urProgramCompileExp", + &redefinedProgramCompile); + mock::getCallbacks().set_before_callback("urProgramLinkExp", + &redefinedProgramLink); + mock::getCallbacks().set_before_callback("urProgramBuildExp", + &redefinedProgramBuild); const sycl::device Dev = Plt.get_devices()[0]; current_link_options.clear(); current_compile_options.clear(); std::string expected_compile_options = "-cl-opt-disable", expected_link_options = "-cl-denorms-are-zero"; - static sycl::unittest::PiImage DevImage = + static sycl::unittest::UrImage DevImage = generateEAMTestKernelImage(expected_compile_options, expected_link_options); - static sycl::unittest::PiImageArray<1> DevImageArray{&DevImage}; + static sycl::unittest::UrImageArray<1> DevImageArray{&DevImage}; auto KernelID = sycl::get_kernel_id(); sycl::context Ctx{Dev}; sycl::queue Queue{Ctx, Dev}; diff --git a/sycl/unittests/queue/CMakeLists.txt b/sycl/unittests/queue/CMakeLists.txt index bf2819c69833c..5317d82354f77 100644 --- a/sycl/unittests/queue/CMakeLists.txt +++ b/sycl/unittests/queue/CMakeLists.txt @@ -7,4 +7,5 @@ add_sycl_unittest(QueueTests OBJECT GetProfilingInfo.cpp ShortcutFunctions.cpp InOrderQueue.cpp + InteropRetain.cpp ) diff --git a/sycl/unittests/queue/DeviceCheck.cpp b/sycl/unittests/queue/DeviceCheck.cpp index f934c8ba333e6..784541d7cb5f3 100644 --- a/sycl/unittests/queue/DeviceCheck.cpp +++ b/sycl/unittests/queue/DeviceCheck.cpp @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#include #include #include #include -#include #include +#include +#include using namespace sycl; @@ -20,50 +20,47 @@ namespace { inline constexpr auto EnableDefaultContextsName = "SYCL_ENABLE_DEFAULT_CONTEXTS"; -pi_device ParentDevice = nullptr; -pi_platform PiPlatform = nullptr; +ur_device_handle_t ParentDevice = nullptr; +ur_platform_handle_t UrPlatform = nullptr; -pi_result redefinedDeviceGetInfoAfter(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_PARTITION_PROPERTIES) { - if (param_value) { +ur_result_t redefinedDeviceGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { + if (*params.ppPropValue) { auto *Result = - reinterpret_cast(param_value); - *Result = PI_DEVICE_PARTITION_EQUALLY; + reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_PARTITION_EQUALLY; } - if (param_value_size_ret) - *param_value_size_ret = sizeof(pi_device_partition_property); - } else if (param_name == PI_DEVICE_INFO_MAX_COMPUTE_UNITS) { - auto *Result = reinterpret_cast(param_value); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_device_partition_t); + } else if (*params.ppropName == UR_DEVICE_INFO_MAX_COMPUTE_UNITS) { + auto *Result = reinterpret_cast(*params.ppPropValue); *Result = 2; - } else if (param_name == PI_DEVICE_INFO_PARENT_DEVICE) { - auto *Result = reinterpret_cast(param_value); - *Result = (device == ParentDevice) ? nullptr : ParentDevice; - } else if (param_name == PI_DEVICE_INFO_PLATFORM) { - auto *Result = reinterpret_cast(param_value); - *Result = PiPlatform; - } else if (param_name == PI_DEVICE_INFO_EXTENSIONS) { - if (param_value_size_ret) { - *param_value_size_ret = 0; + } else if (*params.ppropName == UR_DEVICE_INFO_PARENT_DEVICE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = (*params.phDevice == ParentDevice) ? nullptr : ParentDevice; + } else if (*params.ppropName == UR_DEVICE_INFO_PLATFORM) { + auto *Result = + reinterpret_cast(*params.ppPropValue); + *Result = UrPlatform; + } else if (*params.ppropName == UR_DEVICE_INFO_EXTENSIONS) { + if (*params.ppPropSizeRet) { + **params.ppPropSizeRet = 0; } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedDevicePartitionAfter( - pi_device device, const pi_device_partition_property *properties, - pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices) { - if (out_devices) { - for (size_t I = 0; I < num_devices; ++I) { - out_devices[I] = reinterpret_cast(1000 + I); +ur_result_t redefinedDevicePartitionAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.pphSubDevices) { + for (size_t I = 0; I < *params.pNumDevices; ++I) { + *params.pphSubDevices[I] = reinterpret_cast(1000 + I); } } - if (out_num_devices) - *out_num_devices = num_devices; - return PI_SUCCESS; + if (*params.ppNumDevicesRet) + **params.ppNumDevicesRet = *params.pNumDevices; + return UR_RESULT_SUCCESS; } // Check that the device is verified to be either a member of the context or a @@ -73,17 +70,17 @@ TEST(QueueDeviceCheck, CheckDeviceRestriction) { EnableDefaultContextsName, "1", detail::SYCLConfig::reset); - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); - PiPlatform = detail::getSyclObjImpl(Plt)->getHandleRef(); + UrPlatform = detail::getSyclObjImpl(Plt)->getHandleRef(); context DefaultCtx = Plt.ext_oneapi_get_default_context(); device Dev = DefaultCtx.get_devices()[0]; - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); - Mock.redefineAfter( - redefinedDevicePartitionAfter); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_after_callback("urDevicePartition", + &redefinedDevicePartitionAfter); // Device is a member of the context. { @@ -120,7 +117,7 @@ TEST(QueueDeviceCheck, CheckDeviceRestriction) { { ParentDevice = nullptr; device Device = detail::createSyclObjFromImpl( - std::make_shared(reinterpret_cast(0x01), + std::make_shared(reinterpret_cast(0x01), detail::getSyclObjImpl(Plt))); queue Q{Device}; EXPECT_NE(Q.get_context(), DefaultCtx); diff --git a/sycl/unittests/queue/EventClear.cpp b/sycl/unittests/queue/EventClear.cpp index 9dba15d63be69..2000235b1f15d 100644 --- a/sycl/unittests/queue/EventClear.cpp +++ b/sycl/unittests/queue/EventClear.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include using namespace sycl; @@ -25,28 +25,26 @@ std::unique_ptr TestContext; const int ExpectedEventThreshold = 128; -pi_result redefinedQueueCreateEx(pi_context context, pi_device device, - pi_queue_properties *properties, - pi_queue *queue) { - assert(properties && properties[0] == PI_QUEUE_FLAGS); +ur_result_t redefinedQueueCreate(void *pParams) { + auto params = *static_cast(pParams); + assert(*params.ppProperties); // Use in-order queues to force storing events for calling wait on them, - // rather than calling piQueueFinish. - if (properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { - return PI_ERROR_INVALID_QUEUE_PROPERTIES; + // rather than calling urQueueFinish. + if ((*params.ppProperties)->flags & + UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEventsWait(pi_uint32 num_events, - const pi_event *event_list) { +ur_result_t redefinedEventsWait(void *) { ++TestContext->NEventsWaitedFor; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEventGetInfoAfter(pi_event event, pi_event_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - EXPECT_EQ(param_name, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) +ur_result_t redefinedEventGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppropName, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) << "Unexpected event info requested"; // Report first half of events as complete. // Report second half of events as running. @@ -54,39 +52,41 @@ pi_result redefinedEventGetInfoAfter(pi_event event, pi_event_info param_name, // events are likely to be removed oldest first, and stops removing // at the first non-completed event. static int Counter = 0; - auto *Result = reinterpret_cast(param_value); - *Result = (Counter < (ExpectedEventThreshold / 2)) ? PI_EVENT_COMPLETE - : PI_EVENT_RUNNING; + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = (Counter < (ExpectedEventThreshold / 2)) ? UR_EVENT_STATUS_COMPLETE + : UR_EVENT_STATUS_RUNNING; Counter++; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEventRetain(pi_event event) { +ur_result_t redefinedEventRetain(void *) { ++TestContext->EventReferenceCount; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEventRelease(pi_event event) { +ur_result_t redefinedEventRelease(void *) { --TestContext->EventReferenceCount; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -void preparePiMock(unittest::PiMock &Mock) { - Mock.redefineBefore( - redefinedQueueCreateEx); - Mock.redefineBefore(redefinedEventsWait); - Mock.redefineAfter( - redefinedEventGetInfoAfter); - Mock.redefineBefore(redefinedEventRetain); - Mock.redefineBefore(redefinedEventRelease); +void prepareUrMock(unittest::UrMock<> &Mock) { + mock::getCallbacks().set_before_callback("urQueueCreate", + &redefinedQueueCreate); + mock::getCallbacks().set_before_callback("urEventWait", &redefinedEventsWait); + mock::getCallbacks().set_after_callback("urEventGetInfo", + &redefinedEventGetInfoAfter); + mock::getCallbacks().set_before_callback("urEventRetain", + &redefinedEventRetain); + mock::getCallbacks().set_before_callback("urEventRelease", + &redefinedEventRelease); } // Check that the USM events are cleared from the queue upon call to wait(), // so that they are not waited for multiple times. TEST(QueueEventClear, ClearOnQueueWait) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - preparePiMock(Mock); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + prepareUrMock(Mock); context Ctx{Plt.get_devices()[0]}; TestContext.reset(new TestCtx(Ctx)); @@ -105,9 +105,9 @@ TEST(QueueEventClear, ClearOnQueueWait) { // Check that shared events are cleaned up from the queue once their number // exceeds a threshold. TEST(QueueEventClear, CleanupOnThreshold) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - preparePiMock(Mock); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + prepareUrMock(Mock); context Ctx{Plt.get_devices()[0]}; TestContext.reset(new TestCtx(Ctx)); diff --git a/sycl/unittests/queue/GetProfilingInfo.cpp b/sycl/unittests/queue/GetProfilingInfo.cpp index 13f49b5bafa4d..d74702c2f85d8 100644 --- a/sycl/unittests/queue/GetProfilingInfo.cpp +++ b/sycl/unittests/queue/GetProfilingInfo.cpp @@ -16,9 +16,9 @@ #include #include -#include -#include #include +#include +#include #include @@ -26,41 +26,35 @@ class InfoTestKernel; MOCK_INTEGRATION_HEADER(InfoTestKernel) -static pi_result -redefinedPiEventGetProfilingInfo(pi_event event, pi_profiling_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - return PI_SUCCESS; +static ur_result_t redefinedUrEventGetProfilingInfo(void *) { + return UR_RESULT_SUCCESS; } -static pi_result redefinedPiDevicesGet(pi_platform platform, - pi_device_type device_type, - pi_uint32 num_entries, - pi_device *devices, - pi_uint32 *num_devices) { +static ur_result_t redefinedUrDeviceGet(void *pParams) { + auto params = *static_cast(pParams); // Host/Device timer syncronization isn't done all the time (cached), so we // need brand new device for some of the testcases. static std::intptr_t device_id = 10; - if (num_devices) - *num_devices = 1; + if (*params.ppNumDevices) + **params.ppNumDevices = 1; - if (devices && num_entries > 0) - devices[0] = reinterpret_cast(++device_id); + if (*params.pphDevices && *params.pNumEntries > 0) + *params.pphDevices[0] = reinterpret_cast(++device_id); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(GetProfilingInfo, normal_pass_without_exception) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedPiEventGetProfilingInfo); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urEventGetProfilingInfo", + &redefinedUrEventGetProfilingInfo); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; - static auto DevImage = + static sycl::unittest::UrImage DevImage = sycl::unittest::generateDefaultImage({"InfoTestKernel"}); - static sycl::unittest::PiImageArray<1> DevImageArray = {&DevImage}; + static sycl::unittest::UrImageArray<1> DevImageArray = {&DevImage}; auto KernelID = sycl::get_kernel_id(); sycl::queue Queue{ Ctx, Dev, sycl::property_list{sycl::property::queue::enable_profiling{}}}; @@ -89,15 +83,15 @@ TEST(GetProfilingInfo, normal_pass_without_exception) { } TEST(GetProfilingInfo, command_exception_check) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedPiEventGetProfilingInfo); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urEventGetProfilingInfo", + &redefinedUrEventGetProfilingInfo); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; - static auto DevImage = + static sycl::unittest::UrImage DevImage = sycl::unittest::generateDefaultImage({"InfoTestKernel"}); - static sycl::unittest::PiImageArray<1> DevImageArray = {&DevImage}; + static sycl::unittest::UrImageArray<1> DevImageArray = {&DevImage}; auto KernelID = sycl::get_kernel_id(); sycl::queue Queue{Ctx, Dev}; auto KernelBundle = sycl::get_kernel_bundle( @@ -190,15 +184,15 @@ TEST(GetProfilingInfo, exception_check_no_queue) { } TEST(GetProfilingInfo, check_if_now_dead_queue_property_set) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedPiEventGetProfilingInfo); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urEventGetProfilingInfo", + &redefinedUrEventGetProfilingInfo); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; - static auto DevImage = + static sycl::unittest::UrImage DevImage = sycl::unittest::generateDefaultImage({"InfoTestKernel"}); - static sycl::unittest::PiImageArray<1> DevImageArray = {&DevImage}; + static sycl::unittest::UrImageArray<1> DevImageArray = {&DevImage}; auto KernelID = sycl::get_kernel_id(); const int globalWIs{512}; sycl::event event; @@ -230,16 +224,16 @@ TEST(GetProfilingInfo, check_if_now_dead_queue_property_set) { } TEST(GetProfilingInfo, check_if_now_dead_queue_property_not_set) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedPiEventGetProfilingInfo); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urEventGetProfilingInfo", + &redefinedUrEventGetProfilingInfo); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; - static auto DevImage = + static sycl::unittest::UrImage DevImage = sycl::unittest::generateDefaultImage({"InfoTestKernel"}); - static sycl::unittest::PiImageArray<1> DevImageArray = {&DevImage}; + static sycl::unittest::UrImageArray<1> DevImageArray = {&DevImage}; auto KernelID = sycl::get_kernel_id(); const int globalWIs{512}; sycl::event event; @@ -298,21 +292,20 @@ TEST(GetProfilingInfo, check_if_now_dead_queue_property_not_set) { bool DeviceTimerCalled; -pi_result redefinedPiGetDeviceAndHostTimer(pi_device Device, - uint64_t *DeviceTime, - uint64_t *HostTime) { +ur_result_t redefinedUrGetGlobalTimestamps(void *) { DeviceTimerCalled = true; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(GetProfilingInfo, check_no_command_submission_time_when_event_profiling_disabled) { using namespace sycl; - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); - Mock.redefine(redefinedPiDevicesGet); - Mock.redefine( - redefinedPiGetDeviceAndHostTimer); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefinedUrDeviceGet); + mock::getCallbacks().set_replace_callback("urDeviceGetGlobalTimestamps", + &redefinedUrGetGlobalTimestamps); device Dev = Plt.get_devices()[0]; context Ctx{Dev}; queue Queue{Ctx, Dev}; @@ -330,11 +323,12 @@ TEST(GetProfilingInfo, // accessor TEST(GetProfilingInfo, check_command_submission_time_with_host_accessor) { using namespace sycl; - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); - Mock.redefine(redefinedPiDevicesGet); - Mock.redefine( - redefinedPiGetDeviceAndHostTimer); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefinedUrDeviceGet); + mock::getCallbacks().set_replace_callback("urDeviceGetGlobalTimestamps", + &redefinedUrGetGlobalTimestamps); device Dev = Plt.get_devices()[0]; context Ctx{Dev}; queue Queue{Ctx, Dev, property::queue::enable_profiling()}; @@ -352,39 +346,35 @@ TEST(GetProfilingInfo, check_command_submission_time_with_host_accessor) { EXPECT_TRUE(DeviceTimerCalled); } -pi_result redefinedFailedPiGetDeviceAndHostTimer(pi_device Device, - uint64_t *DeviceTime, - uint64_t *HostTime) { - return PI_ERROR_INVALID_OPERATION; +ur_result_t redefinedFailedUrGetGlobalTimestamps(void *) { + return UR_RESULT_ERROR_INVALID_OPERATION; } -static pi_result redefinedDeviceGetInfoAcc(pi_device device, - pi_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - if (param_name == PI_DEVICE_INFO_TYPE) { - auto *Result = reinterpret_cast<_pi_device_type *>(param_value); - *Result = PI_DEVICE_TYPE_ACC; +static ur_result_t redefinedDeviceGetInfoAcc(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_TYPE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_TYPE_FPGA; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(GetProfilingInfo, fallback_profiling_PiGetDeviceAndHostTimer_unsupported) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefine(redefinedPiDevicesGet); - Mock.redefineBefore( - redefinedPiEventGetProfilingInfo); - Mock.redefine( - redefinedFailedPiGetDeviceAndHostTimer); - Mock.redefineAfter( - redefinedDeviceGetInfoAcc); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefinedUrDeviceGet); + mock::getCallbacks().set_before_callback("urEventGetProfilingInfo", + &redefinedUrEventGetProfilingInfo); + mock::getCallbacks().set_replace_callback( + "urDeviceGetGlobalTimestamps", &redefinedFailedUrGetGlobalTimestamps); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAcc); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; - static auto DevImage = + static sycl::unittest::UrImage DevImage = sycl::unittest::generateDefaultImage({"InfoTestKernel"}); - static sycl::unittest::PiImageArray<1> DevImageArray = {&DevImage}; + static sycl::unittest::UrImageArray<1> DevImageArray = {&DevImage}; auto KernelID = sycl::get_kernel_id(); sycl::queue Queue{ Ctx, Dev, sycl::property_list{sycl::property::queue::enable_profiling{}}}; @@ -410,20 +400,21 @@ TEST(GetProfilingInfo, fallback_profiling_PiGetDeviceAndHostTimer_unsupported) { } TEST(GetProfilingInfo, fallback_profiling_mock_piEnqueueKernelLaunch) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefine(redefinedPiDevicesGet); - Mock.redefineBefore( - redefinedPiEventGetProfilingInfo); - Mock.redefine( - redefinedFailedPiGetDeviceAndHostTimer); - Mock.redefineAfter( - redefinedDeviceGetInfoAcc); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_replace_callback("urDeviceGet", + &redefinedUrDeviceGet); + mock::getCallbacks().set_before_callback("urEventGetProfilingInfo", + &redefinedUrEventGetProfilingInfo); + mock::getCallbacks().set_replace_callback( + "urDeviceGetGlobalTimestamps", &redefinedFailedUrGetGlobalTimestamps); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAcc); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; - static auto DevImage = + static sycl::unittest::UrImage DevImage = sycl::unittest::generateDefaultImage({"InfoTestKernel"}); - static sycl::unittest::PiImageArray<1> DevImageArray = {&DevImage}; + static sycl::unittest::UrImageArray<1> DevImageArray = {&DevImage}; auto KernelID = sycl::get_kernel_id(); sycl::queue Queue{ Ctx, Dev, sycl::property_list{sycl::property::queue::enable_profiling{}}}; diff --git a/sycl/unittests/queue/Hash.cpp b/sycl/unittests/queue/Hash.cpp index 000850743c882..227b07f01d63d 100644 --- a/sycl/unittests/queue/Hash.cpp +++ b/sycl/unittests/queue/Hash.cpp @@ -1,13 +1,13 @@ #include #include -#include +#include #include using namespace sycl; // Checks that the queue hash uses its unique ID. TEST(QueueHash, QueueHashUsesID) { - unittest::PiMock Mock; + unittest::UrMock<> Mock; queue Q; unsigned long long ID = detail::getSyclObjImpl(Q)->getQueueID(); ASSERT_EQ(std::hash{}(ID), std::hash{}(Q)); diff --git a/sycl/unittests/queue/InOrderQueue.cpp b/sycl/unittests/queue/InOrderQueue.cpp index 684eae329d819..4c6405a737a5b 100644 --- a/sycl/unittests/queue/InOrderQueue.cpp +++ b/sycl/unittests/queue/InOrderQueue.cpp @@ -1,27 +1,25 @@ #include -#include +#include #include #include using namespace sycl; static bool InOrderFlagSeen = false; -pi_result piextQueueCreateRedefineBefore(pi_context context, pi_device device, - pi_queue_properties *properties, - pi_queue *queue) { - EXPECT_TRUE(properties != nullptr); - EXPECT_TRUE(properties[0] == PI_QUEUE_FLAGS); - InOrderFlagSeen = - !(properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE); - return PI_SUCCESS; +ur_result_t urQueueCreateRedefineBefore(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_TRUE(*params.ppProperties != nullptr); + InOrderFlagSeen = !((*params.ppProperties)->flags & + UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE); + return UR_RESULT_SUCCESS; } TEST(InOrderQueue, CheckFlagIsPassed) { - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); - Mock.redefineBefore( - piextQueueCreateRedefineBefore); + mock::getCallbacks().set_before_callback("urQueueCreate", + &urQueueCreateRedefineBefore); EXPECT_FALSE(InOrderFlagSeen); queue q1{}; diff --git a/sycl/unittests/pi/piInteropRetain.cpp b/sycl/unittests/queue/InteropRetain.cpp similarity index 58% rename from sycl/unittests/pi/piInteropRetain.cpp rename to sycl/unittests/queue/InteropRetain.cpp index b76007328c336..c29d3b9e93c5a 100644 --- a/sycl/unittests/pi/piInteropRetain.cpp +++ b/sycl/unittests/queue/InteropRetain.cpp @@ -1,4 +1,4 @@ -//==--------------------- piInteropRetain.cpp -- check proper retain calls -==// +//==--------------------- InteropRetain.cpp -- check proper retain calls ---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,33 +12,34 @@ #include #include -#include +#include namespace { using namespace sycl; static int QueueRetainCalled = 0; -pi_result redefinedQueueRetain(pi_queue Queue) { +ur_result_t redefinedQueueRetain(void *) { ++QueueRetainCalled; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(PiInteropTest, CheckRetain) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); context Ctx{Plt.get_devices()[0]}; - // The queue construction should not call to piQueueRetain. Instead - // piQueueCreate should return the "retained" queue. - Mock.redefineBefore(redefinedQueueRetain); + // The queue construction should not call to urQueueRetain. Instead + // urQueueCreate should return the "retained" queue. + mock::getCallbacks().set_before_callback("urQueueRetain", + &redefinedQueueRetain); queue Q{Ctx, default_selector()}; EXPECT_TRUE(QueueRetainCalled == 0); cl_command_queue OCLQ = get_native(Q); EXPECT_TRUE(QueueRetainCalled == 1); - // The make_queue should not call to piQueueRetain. The - // piextCreateQueueWithNative handle should do the "retain" if needed. + // The make_queue should not call to urQueueRetain. The + // urQueueCreateWithNativeHandle should do the "retain" if needed. queue Q1 = make_queue(OCLQ, Ctx); EXPECT_TRUE(QueueRetainCalled == 1); } diff --git a/sycl/unittests/queue/ShortcutFunctions.cpp b/sycl/unittests/queue/ShortcutFunctions.cpp index c0bf747d753be..f6b8dd99dd2f5 100644 --- a/sycl/unittests/queue/ShortcutFunctions.cpp +++ b/sycl/unittests/queue/ShortcutFunctions.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include @@ -29,61 +29,39 @@ struct TestCtx { static std::unique_ptr TestContext; -pi_result redefinedEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, - pi_bool blocking_write, size_t offset, - size_t size, const void *ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +ur_result_t redefinedEnqueueMemBufferWrite(void *) { TestContext->BufferWriteCalled = true; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, - pi_bool blocking_read, size_t offset, - size_t size, void *ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +ur_result_t redefinedEnqueueMemBufferRead(void *) { TestContext->BufferReadCalled = true; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEnqueueMemBufferCopy(pi_queue command_queue, - pi_mem src_buffer, pi_mem dst_buffer, - size_t src_offset, size_t dst_offset, - size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +ur_result_t redefinedEnqueueMemBufferCopy(void *) { TestContext->BufferCopyCalled = true; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, - const void *pattern, - size_t pattern_size, size_t offset, - size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +ur_result_t redefinedEnqueueMemBufferFill(void *) { TestContext->BufferFillCalled = true; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(ShortcutFunctions, ShortcutsCallCorrectPIFunctions) { - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); - Mock.redefine( - redefinedEnqueueMemBufferWrite); - Mock.redefine( - redefinedEnqueueMemBufferRead); - Mock.redefine( - redefinedEnqueueMemBufferCopy); + mock::getCallbacks().set_replace_callback("urEnqueueMemBufferWrite", + &redefinedEnqueueMemBufferWrite); + mock::getCallbacks().set_replace_callback("urEnqueueMemBufferRead", + &redefinedEnqueueMemBufferRead); + mock::getCallbacks().set_replace_callback("urEnqueueMemBufferCopy", + &redefinedEnqueueMemBufferCopy); - Mock.redefine( - redefinedEnqueueMemBufferFill); + mock::getCallbacks().set_replace_callback("urEnqueueMemBufferFill", + &redefinedEnqueueMemBufferFill); context Ctx(Plt); queue Q{Ctx, default_selector()}; @@ -212,7 +190,7 @@ TEST(ShortcutFunctions, ShortcutsCallCorrectPIFunctions) { Q.update_host(Acc); Q.wait(); - // No PI functions expected. + // No UR functions expected. } // Queue.fill(accessor Dest, T src) diff --git a/sycl/unittests/queue/USM.cpp b/sycl/unittests/queue/USM.cpp index 03ff8c538d7ed..28a6f589b6650 100644 --- a/sycl/unittests/queue/USM.cpp +++ b/sycl/unittests/queue/USM.cpp @@ -1,4 +1,3 @@ -//==--------------- USM.cpp --- dependency chain unit tests ----------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,7 +8,7 @@ #include #include -#include +#include #include @@ -21,51 +20,48 @@ struct { } TestContext; // Dummy event values for bookkeeping -pi_event WAIT = nullptr; -pi_event MEMCPY = nullptr; -pi_event MEMSET = nullptr; +ur_event_handle_t WAIT = nullptr; +ur_event_handle_t MEMCPY = nullptr; +ur_event_handle_t MEMSET = nullptr; template auto getVal(T obj) { return detail::getSyclObjImpl(obj)->getHandleRef(); } -pi_result redefinedEnqueueEventsWaitAfter(pi_queue, pi_uint32 NumDeps, - const pi_event *Deps, - pi_event *Event) { - EXPECT_EQ(NumDeps, TestContext.Deps.size()); - for (size_t i = 0; i < NumDeps; ++i) { - EXPECT_EQ(Deps[i], getVal(TestContext.Deps[i])); +ur_result_t redefinedEnqueueEventsWaitAfter(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pnumEventsInWaitList, TestContext.Deps.size()); + for (size_t i = 0; i < *params.pnumEventsInWaitList; ++i) { + EXPECT_EQ((*params.pphEventWaitList)[i], getVal(TestContext.Deps[i])); } - WAIT = *Event; - return PI_SUCCESS; + WAIT = **params.pphEvent; + return UR_RESULT_SUCCESS; } -pi_result redefinedUSMEnqueueMemcpyAfter(pi_queue, pi_bool, void *, - const void *, size_t, pi_uint32, - const pi_event *, pi_event *Event) { +ur_result_t redefinedUSMEnqueueMemcpyAfter(void *pParams) { + auto params = *static_cast(pParams); // Set MEMCPY to the event produced by the original USMEnqueueMemcpy - MEMCPY = *Event; - return PI_SUCCESS; + MEMCPY = **params.pphEvent; + return UR_RESULT_SUCCESS; } -pi_result redefinedUSMEnqueueMemsetAfter(pi_queue, void *, const void *, size_t, - size_t, pi_uint32, const pi_event *, - pi_event *Event) { +ur_result_t redefinedUSMEnqueueMemFillAfter(void *pParams) { + auto params = *static_cast(pParams); // Set MEMSET to the event produced by the original USMEnqueueMemcpy - MEMSET = *Event; - return PI_SUCCESS; + MEMSET = **params.pphEvent; + return UR_RESULT_SUCCESS; } -// Check that zero-length USM memset/memcpy use piEnqueueEventsWait. +// Check that zero-length USM memset/memcpy use urEnqueueEventsWait. TEST(USM, NoOpPreservesDependencyChain) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineAfter( - redefinedEnqueueEventsWaitAfter); - Mock.redefineAfter( - redefinedUSMEnqueueMemcpyAfter); - Mock.redefineAfter( - redefinedUSMEnqueueMemsetAfter); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_after_callback("urEnqueueEventsWait", + &redefinedEnqueueEventsWaitAfter); + mock::getCallbacks().set_after_callback("urEnqueueUSMMemcpy", + &redefinedUSMEnqueueMemcpyAfter); + mock::getCallbacks().set_after_callback("urEnqueueUSMFill", + &redefinedUSMEnqueueMemFillAfter); context Ctx{Plt.get_devices()[0]}; queue Q{Ctx, default_selector()}; diff --git a/sycl/unittests/queue/Wait.cpp b/sycl/unittests/queue/Wait.cpp index f9ec3fb6083ac..023f07a0a5284 100644 --- a/sycl/unittests/queue/Wait.cpp +++ b/sycl/unittests/queue/Wait.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include @@ -20,60 +20,50 @@ using namespace sycl; struct TestCtx { bool SupportOOO = true; - bool PiQueueFinishCalled = false; + bool UrQueueFinishCalled = false; int NEventsWaitedFor = 0; int EventReferenceCount = 0; }; static TestCtx TestContext; -pi_result redefinedQueueCreateEx(pi_context context, pi_device device, - pi_queue_properties *properties, - pi_queue *queue) { - assert(properties && properties[0] == PI_QUEUE_FLAGS); +ur_result_t redefinedQueueCreate(void *pParams) { + auto params = *static_cast(pParams); if (!TestContext.SupportOOO && - properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { - return PI_ERROR_INVALID_QUEUE_PROPERTIES; + (*params.ppProperties)->flags & + UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, - const void *Pattern, size_t PatternSize, - size_t Count, - pi_uint32 Num_events_in_waitlist, - const pi_event *Events_waitlist, - pi_event *Event) { +ur_result_t redefinedEnqueueUSMFill(void *) { TestContext.EventReferenceCount = 1; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { + +ur_result_t redefinedEnqueueMemBufferFill(void *) { TestContext.EventReferenceCount = 1; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedQueueFinish(pi_queue Queue) { - TestContext.PiQueueFinishCalled = true; - return PI_SUCCESS; +ur_result_t redefinedQueueFinish(void *) { + TestContext.UrQueueFinishCalled = true; + return UR_RESULT_SUCCESS; } -pi_result redefinedEventsWait(pi_uint32 num_events, - const pi_event *event_list) { + +ur_result_t redefinedEventWait(void *) { ++TestContext.NEventsWaitedFor; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEventRetain(pi_event event) { +ur_result_t redefinedEventRetain(void *) { ++TestContext.EventReferenceCount; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEventRelease(pi_event event) { +ur_result_t redefinedEventRelease(void *) { --TestContext.EventReferenceCount; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } event submitTask(queue &Q, buffer &Buf) { @@ -84,18 +74,21 @@ event submitTask(queue &Q, buffer &Buf) { } TEST(QueueWait, QueueWaitTest) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedQueueCreateEx); - Mock.redefineBefore(redefinedQueueFinish); - Mock.redefineBefore( - redefinedUSMEnqueueMemset); - Mock.redefineBefore(redefinedEventsWait); - Mock.redefineBefore( - redefinedEnqueueMemBufferFill); - Mock.redefineBefore(redefinedEventRetain); - Mock.redefineBefore(redefinedEventRelease); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urQueueCreate", + &redefinedQueueCreate); + mock::getCallbacks().set_before_callback("urQueueFinish", + &redefinedQueueFinish); + mock::getCallbacks().set_before_callback("urEnqueueUSMFill", + &redefinedEnqueueUSMFill); + mock::getCallbacks().set_before_callback("urEventWait", &redefinedEventWait); + mock::getCallbacks().set_before_callback("urEnqueueMemBufferFill", + &redefinedEnqueueMemBufferFill); + mock::getCallbacks().set_before_callback("urEventRetain", + &redefinedEventRetain); + mock::getCallbacks().set_before_callback("urEventRelease", + &redefinedEventRelease); context Ctx{Plt.get_devices()[0]}; queue Q{Ctx, default_selector()}; @@ -104,11 +97,11 @@ TEST(QueueWait, QueueWaitTest) { // USM API event TestContext = {}; Q.memset(HostAlloc, 42, 1); - // No need to keep the event since we'll use piQueueFinish. + // No need to keep the event since we'll use urQueueFinish. ASSERT_EQ(TestContext.EventReferenceCount, 0); Q.wait(); ASSERT_EQ(TestContext.NEventsWaitedFor, 0); - ASSERT_TRUE(TestContext.PiQueueFinishCalled); + ASSERT_TRUE(TestContext.UrQueueFinishCalled); // Events with temporary ownership { @@ -119,7 +112,7 @@ TEST(QueueWait, QueueWaitTest) { // Still owned by the execution graph ASSERT_EQ(TestContext.EventReferenceCount, 1); ASSERT_EQ(TestContext.NEventsWaitedFor, 0); - ASSERT_TRUE(TestContext.PiQueueFinishCalled); + ASSERT_TRUE(TestContext.UrQueueFinishCalled); } // Blocked commands @@ -143,7 +136,7 @@ TEST(QueueWait, QueueWaitTest) { Q.wait(); // Only a single event (the last one) should be waited for here. ASSERT_EQ(TestContext.NEventsWaitedFor, 1); - ASSERT_TRUE(TestContext.PiQueueFinishCalled); + ASSERT_TRUE(TestContext.UrQueueFinishCalled); } // Test behaviour for emulating an OOO queue with multiple in-order ones. @@ -156,7 +149,7 @@ TEST(QueueWait, QueueWaitTest) { Q.wait(); ASSERT_EQ(TestContext.EventReferenceCount, 0); ASSERT_EQ(TestContext.NEventsWaitedFor, 1); - ASSERT_FALSE(TestContext.PiQueueFinishCalled); + ASSERT_FALSE(TestContext.UrQueueFinishCalled); } } // namespace diff --git a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp index 4b91b8c3063d3..d0b26bbd7663b 100644 --- a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp +++ b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp @@ -2,17 +2,17 @@ #include "SchedulerTestUtils.hpp" #include -#include #include #include +#include #include using namespace sycl; TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) { - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); queue QueueDev(context(Plt), default_selector_v); MockScheduler MS; diff --git a/sycl/unittests/scheduler/AllocaLinking.cpp b/sycl/unittests/scheduler/AllocaLinking.cpp index 9f7da55f7d8b7..b60a5740ab9e0 100644 --- a/sycl/unittests/scheduler/AllocaLinking.cpp +++ b/sycl/unittests/scheduler/AllocaLinking.cpp @@ -9,7 +9,7 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include +#include #include @@ -17,41 +17,39 @@ using namespace sycl; static bool HostUnifiedMemory = false; -static pi_result redefinedDeviceGetInfoAfter(pi_device Device, - pi_device_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - if (ParamName == PI_DEVICE_INFO_HOST_UNIFIED_MEMORY) { - auto *Result = reinterpret_cast(ParamValue); +static ur_result_t redefinedDeviceGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_HOST_UNIFIED_MEMORY) { + auto *Result = reinterpret_cast(*params.ppPropValue); *Result = HostUnifiedMemory; - } else if (ParamName == PI_DEVICE_INFO_TYPE) { - auto *Result = reinterpret_cast<_pi_device_type *>(ParamValue); - *Result = PI_DEVICE_TYPE_CPU; + } else if (*params.ppropName == UR_DEVICE_INFO_TYPE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_TYPE_CPU; } // This mock device has no sub-devices - if (ParamName == PI_DEVICE_INFO_PARTITION_PROPERTIES) { - if (ParamValueSizeRet) { - *ParamValueSizeRet = 0; + if (*params.ppropName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { + if (*params.ppPropSizeRet) { + **params.ppPropSizeRet = 0; } } - if (ParamName == PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { - assert(ParamValueSize == sizeof(pi_device_affinity_domain)); - if (ParamValue) { - *static_cast(ParamValue) = 0; + if (*params.ppropName == UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { + assert(**params.ppPropSizeRet == sizeof(ur_device_affinity_domain_flags_t)); + if (*params.ppPropValue) { + *static_cast(*params.ppPropValue) = + 0; } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST_F(SchedulerTest, AllocaLinking) { HostUnifiedMemory = false; - sycl::unittest::PiMock Mock; - sycl::queue Q{Mock.getPlatform().get_devices()[0]}; - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); + sycl::unittest::UrMock<> Mock; + sycl::queue Q{sycl::platform().get_devices()[0]}; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); sycl::detail::QueueImplPtr QImpl = detail::getSyclObjImpl(Q); MockScheduler MS; diff --git a/sycl/unittests/scheduler/BlockedCommands.cpp b/sycl/unittests/scheduler/BlockedCommands.cpp index e5ab988687493..f25300818a2a1 100644 --- a/sycl/unittests/scheduler/BlockedCommands.cpp +++ b/sycl/unittests/scheduler/BlockedCommands.cpp @@ -9,14 +9,15 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include +#include using namespace sycl; using namespace testing; TEST_F(SchedulerTest, BlockedCommands) { - sycl::unittest::PiMock Mock; - sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; + /* + sycl::unittest::UrMock<> Mock; + sycl::queue Q{sycl::platform().get_devices()[0], MAsyncHandler}; MockCommand MockCmd(detail::getSyclObjImpl(Q)); MockCmd.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; @@ -46,26 +47,26 @@ TEST_F(SchedulerTest, BlockedCommands) { Res = detail::EnqueueResultT{}; MockCmd.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - MockCmd.MRetVal = CL_SUCCESS; + MockCmd.MRetVal = UR_RESULT_SUCCESS; Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); ASSERT_TRUE(Enqueued && Res.MResult == detail::EnqueueResultT::SyclEnqueueSuccess) - << "The command is expected to be successfully enqueued.\n"; + << "The command is expected to be successfully enqueued.\n";*/ } TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) { - sycl::unittest::PiMock Mock; - sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; + sycl::unittest::UrMock<> Mock; + sycl::queue Q{sycl::platform().get_devices()[0], MAsyncHandler}; MockCommand A(detail::getSyclObjImpl(Q)); A.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; A.MIsBlockable = true; - A.MRetVal = CL_SUCCESS; + A.MRetVal = UR_RESULT_SUCCESS; MockCommand B(detail::getSyclObjImpl(Q)); B.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; B.MIsBlockable = true; - B.MRetVal = CL_SUCCESS; + B.MRetVal = UR_RESULT_SUCCESS; MockCommand C(detail::getSyclObjImpl(Q)); C.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; @@ -74,7 +75,7 @@ TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) { MockCommand D(detail::getSyclObjImpl(Q)); D.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; D.MIsBlockable = true; - D.MRetVal = CL_SUCCESS; + D.MRetVal = UR_RESULT_SUCCESS; addEdge(&A, &B, nullptr); addEdge(&A, &C, nullptr); @@ -104,8 +105,8 @@ TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) { } TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { - sycl::unittest::PiMock Mock; - sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; + sycl::unittest::UrMock<> Mock; + sycl::queue Q{sycl::platform().get_devices()[0], MAsyncHandler}; MockCommand A(detail::getSyclObjImpl(Q)); A.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; @@ -113,7 +114,7 @@ TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { MockCommand B(detail::getSyclObjImpl(Q)); B.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; - B.MRetVal = CL_OUT_OF_RESOURCES; + B.MRetVal = UR_RESULT_ERROR_OUT_OF_RESOURCES; addEdge(&A, &B, nullptr); @@ -150,18 +151,18 @@ TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { // This unit test is for workaround described in GraphProcessor::enqueueCommand // method. TEST_F(SchedulerTest, EnqueueHostDependency) { - sycl::unittest::PiMock Mock; - sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; + sycl::unittest::UrMock<> Mock; + sycl::queue Q{sycl::platform().get_devices()[0], MAsyncHandler}; MockCommand A(detail::getSyclObjImpl(Q)); A.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; A.MIsBlockable = true; - A.MRetVal = CL_SUCCESS; + A.MRetVal = UR_RESULT_SUCCESS; MockCommand B(detail::getSyclObjImpl(Q)); B.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; B.MIsBlockable = true; - B.MRetVal = CL_SUCCESS; + B.MRetVal = UR_RESULT_SUCCESS; sycl::detail::EventImplPtr DepEvent{ new sycl::detail::event_impl(detail::getSyclObjImpl(Q))}; diff --git a/sycl/unittests/scheduler/Commands.cpp b/sycl/unittests/scheduler/Commands.cpp index 2e686a94f67bf..0732d9e5ecde8 100644 --- a/sycl/unittests/scheduler/Commands.cpp +++ b/sycl/unittests/scheduler/Commands.cpp @@ -8,31 +8,31 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include +#include "ur_mock_helpers.hpp" +#include #include using namespace sycl; -pi_result redefinePiEnqueueEventsWaitWithBarrier(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { +ur_result_t redefineEnqueueEventsWaitWithBarrier(void *pParams) { + auto params = + *static_cast(pParams); - for (pi_uint32 i = 0; i != NumEventsInWaitList; ++i) - EXPECT_NE(EventWaitList[i], nullptr); + for (uint32_t i = 0; i != *params.pnumEventsInWaitList; ++i) + EXPECT_NE((*params.pphEventWaitList)[i], nullptr); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } // Hack that allows to return a context in redefinePiEventGetInfo -sycl::detail::pi::PiContext queue_global_context = nullptr; +ur_context_handle_t queue_global_context = nullptr; -pi_result redefinePiEventGetInfo(pi_event, pi_event_info, size_t, - void *param_value, size_t *) { - *reinterpret_cast(param_value) = +ur_result_t redefineUrEventGetInfo(void *pParams) { + auto params = *static_cast(pParams); + *reinterpret_cast(*params.ppPropValue) = queue_global_context; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } // @@ -47,11 +47,11 @@ pi_result redefinePiEventGetInfo(pi_event, pi_event_info, size_t, // } // TEST_F(SchedulerTest, WaitEmptyEventWithBarrier) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); - Mock.redefineBefore( - redefinePiEnqueueEventsWaitWithBarrier); + mock::getCallbacks().set_before_callback( + "urEnqueueEventsWaitWithBarrier", &redefineEnqueueEventsWaitWithBarrier); queue Queue{Plt.get_devices()[0]}; sycl::detail::QueueImplPtr QueueImpl = detail::getSyclObjImpl(Queue); @@ -59,17 +59,15 @@ TEST_F(SchedulerTest, WaitEmptyEventWithBarrier) { queue_global_context = detail::getSyclObjImpl(Queue.get_context())->getHandleRef(); - Mock.redefineBefore( - redefinePiEventGetInfo); + mock::getCallbacks().set_before_callback("urEventGetInfo", + &redefineUrEventGetInfo); auto EmptyEvent = std::make_shared(); - pi_event PIEvent = nullptr; - pi_result Res = mock_piEventCreate(/*context = */ (pi_context)0x1, &PIEvent); - EXPECT_TRUE(PI_SUCCESS == Res); + ur_event_handle_t UREvent = mock::createDummyHandle(); auto Event = - std::make_shared(PIEvent, Queue.get_context()); + std::make_shared(UREvent, Queue.get_context()); using EventList = std::vector; std::vector InputEventWaitLists = { diff --git a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp index 95a327979d3c3..fdada9b624714 100644 --- a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp +++ b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp @@ -8,9 +8,10 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" +#include "ur_mock_helpers.hpp" #include -#include -#include +#include +#include #include @@ -20,65 +21,66 @@ struct TestCtx { queue &Q1; queue &Q2; - std::shared_ptr Ctx1; - std::shared_ptr Ctx2; + // These used to be shared_ptr but that was causing problems due to Mock + // teardown clearing default overrides between tests. + const detail::context_impl &Ctx1; + const detail::context_impl &Ctx2; - pi_event EventCtx1 = nullptr; + ur_event_handle_t EventCtx1 = nullptr; - pi_event EventCtx2 = nullptr; + ur_event_handle_t EventCtx2 = nullptr; bool EventCtx1WasWaited = false; bool EventCtx2WasWaited = false; TestCtx(queue &Queue1, queue &Queue2) - : Q1(Queue1), Q2(Queue2), Ctx1{detail::getSyclObjImpl(Q1.get_context())}, - Ctx2{detail::getSyclObjImpl(Q2.get_context())} { + : Q1(Queue1), Q2(Queue2), + Ctx1(*detail::getSyclObjImpl(Q1.get_context()).get()), + Ctx2(*detail::getSyclObjImpl(Q2.get_context()).get()) { - pi_result Res = mock_piEventCreate((pi_context)0x0, &EventCtx1); - EXPECT_TRUE(PI_SUCCESS == Res); - - Res = mock_piEventCreate((pi_context)0x0, &EventCtx2); - EXPECT_TRUE(PI_SUCCESS == Res); + EventCtx1 = mock::createDummyHandle(); + EventCtx2 = mock::createDummyHandle(); } }; std::unique_ptr TestContext; -pi_result waitFunc(pi_uint32 N, const pi_event *List) { - EXPECT_EQ(N, 1u) << "piEventsWait called for different contexts\n"; +ur_result_t urEventWaitRedefineCheckEvents(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pnumEvents, 1u) + << "urEnqueueEventsWait called for different contexts\n"; - EXPECT_TRUE((TestContext->EventCtx1 == *List) || - (TestContext->EventCtx2 == *List)) - << "piEventsWait called for unknown event"; + EXPECT_TRUE((TestContext->EventCtx1 == **params.pphEventWaitList) || + (TestContext->EventCtx2 == **params.pphEventWaitList)) + << "urEventsWait called for unknown event"; - if (TestContext->EventCtx1 == *List) + if (TestContext->EventCtx1 == **params.pphEventWaitList) TestContext->EventCtx1WasWaited = true; - if (TestContext->EventCtx2 == *List) + if (TestContext->EventCtx2 == **params.pphEventWaitList) TestContext->EventCtx2WasWaited = true; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result getEventInfoFunc(pi_event Event, pi_event_info PName, size_t PVSize, - void *PV, size_t *PVSizeRet) { - EXPECT_EQ(PName, PI_EVENT_INFO_CONTEXT) << "Unknown param name"; +ur_result_t getEventInfoFunc(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppropName, UR_EVENT_INFO_CONTEXT) << "Unknown param name"; - if (Event == TestContext->EventCtx1) - *reinterpret_cast(PV) = - reinterpret_cast(TestContext->Ctx1->getHandleRef()); - else if (Event == TestContext->EventCtx2) - *reinterpret_cast(PV) = - reinterpret_cast(TestContext->Ctx2->getHandleRef()); + if (*params.phEvent == TestContext->EventCtx1) + *reinterpret_cast(*params.ppPropValue) = + reinterpret_cast(TestContext->Ctx1.getHandleRef()); + else if (*params.phEvent == TestContext->EventCtx2) + *reinterpret_cast(*params.ppPropValue) = + reinterpret_cast(TestContext->Ctx2.getHandleRef()); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } static bool GpiEventsWaitRedefineCalled = false; -pi_result piEventsWaitRedefine(pi_uint32 num_events, - const pi_event *event_list) { +ur_result_t urEventsWaitRedefineCheckCalled(void *) { GpiEventsWaitRedefineCalled = true; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } class StreamAUXCmdsWait_TestKernel; @@ -100,17 +102,17 @@ struct KernelInfo } // namespace _V1 } // namespace sycl -static sycl::unittest::PiImage generateDefaultImage() { +static sycl::unittest::UrImage generateDefaultImage() { using namespace sycl::unittest; - PiPropertySet PropSet; + UrPropertySet PropSet; addESIMDFlag(PropSet); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data - PiArray Entries = + UrArray Entries = makeEmptyKernels({"StreamAUXCmdsWait_TestKernel"}); - PiImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format + UrImage Img{SYCL_DEVICE_BINARY_TYPE_SPIRV, // Format __SYCL_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec "", // Compile options "", // Link options @@ -121,8 +123,8 @@ static sycl::unittest::PiImage generateDefaultImage() { return Img; } -sycl::unittest::PiImage Img = generateDefaultImage(); -sycl::unittest::PiImageArray<1> ImgArray{&Img}; +sycl::unittest::UrImage Img = generateDefaultImage(); +sycl::unittest::UrImageArray<1> ImgArray{&Img}; class EventImplProxyT : public sycl::detail::event_impl { public: @@ -138,8 +140,8 @@ class QueueImplProxyT : public sycl::detail::queue_impl { TEST_F(SchedulerTest, StreamAUXCmdsWait) { { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); std::shared_ptr QueueImpl = detail::getSyclObjImpl(Q); @@ -173,23 +175,21 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) { } { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::queue Q(Plt.get_devices()[0]); std::shared_ptr QueueImpl = detail::getSyclObjImpl(Q); - Mock.redefineBefore(piEventsWaitRedefine); + mock::getCallbacks().set_before_callback("urEventWait", + &urEventsWaitRedefineCheckCalled); auto QueueImplProxy = std::static_pointer_cast(QueueImpl); - pi_event PIEvent = nullptr; - pi_result Res = - mock_piEventCreate(/*context = */ (pi_context)0x1, &PIEvent); - ASSERT_TRUE(PI_SUCCESS == Res); + ur_event_handle_t UREvent = mock::createDummyHandle(); auto EventImpl = std::make_shared(QueueImpl); - EventImpl->getHandleRef() = PIEvent; + EventImpl->getHandleRef() = UREvent; QueueImplProxy->registerStreamServiceEvent(EventImpl); @@ -201,11 +201,12 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) { } TEST_F(SchedulerTest, CommandsWaitForEvents) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); - Mock.redefineBefore(waitFunc); - Mock.redefineBefore(getEventInfoFunc); + mock::getCallbacks().set_before_callback("urEventWait", + &urEventWaitRedefineCheckEvents); + mock::getCallbacks().set_before_callback("urEventGetInfo", &getEventInfoFunc); context Ctx1{Plt.get_devices()[0]}; queue Q1{Ctx1, default_selector_v}; @@ -225,7 +226,7 @@ TEST_F(SchedulerTest, CommandsWaitForEvents) { Events.push_back(E1); Events.push_back(E2); - pi_event EventResult = nullptr; + ur_event_handle_t EventResult = nullptr; Cmd.waitForEventsCall(nullptr, Events, EventResult); diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index d58d48f2f6ad0..6870e9b4d095b 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -9,9 +9,9 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include #include #include +#include #include @@ -23,7 +23,7 @@ using EventImplPtr = std::shared_ptr; constexpr auto DisableCleanupName = "SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP"; -std::vector> PassedNumEvents; +std::vector> PassedNumEvents; bool CheckTestExecutionRequirements(const platform &plt) { // This test only contains device image for SPIR-V capable devices. @@ -40,7 +40,7 @@ enum TestCGType { KERNEL_TASK = 0x00, HOST_TASK = 0x01 }; class DependsOnTests : public ::testing::Test { protected: void SetUp() { - platform Plt = Mock.getPlatform(); + platform Plt = sycl::platform(); if (!CheckTestExecutionRequirements(Plt)) GTEST_SKIP(); @@ -106,10 +106,10 @@ class DependsOnTests : public ::testing::Test { std::vector BlockedCommands{Cmd2, Cmd3}; VerifyBlockedCommandsEnqueue(Cmd1, BlockedCommands); - // One piEventsWait call: + // One urEventWait call: // kernel2 waits for kernel 1 by sending event list to enqueue launch call // (depending on queue property). Cmd3Event.wait() waits for kernel2 via - // piEventsWait. + // urEventWait. ASSERT_EQ(PassedNumEvents.size(), 1u); auto [EventCount, EventArr] = PassedNumEvents[0]; ASSERT_EQ(EventCount, 1u); @@ -148,7 +148,7 @@ class DependsOnTests : public ::testing::Test { } } - unittest::PiMock Mock; + unittest::UrMock<> Mock; unittest::ScopedEnvVar DisabledCleanup{ DisableCleanupName, "1", detail::SYCLConfig::reset}; @@ -293,33 +293,29 @@ TEST_F(DependsOnTests, EnqueueNoMemObjDoubleKernelDepHost) { VerifyBlockedCommandsEnqueue(Cmd1, BlockedCommands); } -std::vector EventsInWaitList; -pi_result redefinedextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, - void *dst_ptr, const void *src_ptr, - size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - *event = createDummyHandle(); - for (auto i = 0u; i < num_events_in_waitlist; i++) { - EventsInWaitList.push_back(events_waitlist[i]); +std::vector EventsInWaitList; +ur_result_t redefinedextUSMEnqueueMemcpy(void *pParams) { + auto params = *static_cast(pParams); + **params.pphEvent = mock::createDummyHandle(); + for (auto i = 0u; i < *params.pnumEventsInWaitList; i++) { + EventsInWaitList.push_back((*params.pphEventWaitList)[i]); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -pi_result redefinedEnqueueEventsWaitWithBarrier( - pi_queue command_queue, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); - for (auto i = 0u; i < num_events_in_wait_list; i++) { - EventsInWaitList.push_back(event_wait_list[i]); +ur_result_t redefinedEnqueueEventsWaitWithBarrier(void *pParams) { + auto params = + *static_cast(pParams); + **params.pphEvent = mock::createDummyHandle(); + for (auto i = 0u; i < *params.pnumEventsInWaitList; i++) { + EventsInWaitList.push_back((*params.pphEventWaitList)[i]); } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { - Mock.redefineBefore( - redefinedextUSMEnqueueMemcpy); + mock::getCallbacks().set_before_callback("urEnqueueUSMMemcpy", + &redefinedextUSMEnqueueMemcpy); sycl::queue Queue = detail::createSyclObjFromImpl(QueueDevImpl); auto HostTaskEvent = @@ -360,8 +356,8 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { } TEST_F(DependsOnTests, BarrierWithWaitList) { - Mock.redefineBefore( - redefinedEnqueueEventsWaitWithBarrier); + mock::getCallbacks().set_before_callback( + "urEnqueueEventsWaitWithBarrier", &redefinedEnqueueEventsWaitWithBarrier); sycl::queue Queue = detail::createSyclObjFromImpl(QueueDevImpl); auto HostTaskEvent = diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 6e3014ce79179..5443099b790f0 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -9,13 +9,13 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include +#include using namespace sycl; TEST_F(SchedulerTest, FailedDependency) { - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); queue Queue(context(Plt), default_selector_v); detail::Requirement MockReq = getMockRequirement(); diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp index 186768b3fd823..1650b720e964c 100644 --- a/sycl/unittests/scheduler/GraphCleanup.cpp +++ b/sycl/unittests/scheduler/GraphCleanup.cpp @@ -9,9 +9,9 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include #include #include +#include #include @@ -28,32 +28,23 @@ using namespace sycl; inline constexpr auto HostUnifiedMemoryName = "SYCL_HOST_UNIFIED_MEMORY"; int val; -static pi_result redefinedEnqueueMemBufferMap( - pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, - pi_map_flags map_flags, size_t offset, size_t size, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event, void **ret_map) { - *event = reinterpret_cast(new int{}); - *ret_map = &val; - return PI_SUCCESS; +static ur_result_t redefinedEnqueueMemBufferMap(void *pParams) { + auto params = *static_cast(pParams); + **params.pphEvent = reinterpret_cast(new int{}); + **params.pppRetMap = &val; + return UR_RESULT_SUCCESS; } -static pi_result redefinedEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, - void *mapped_ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { - *event = reinterpret_cast(new int{}); - return PI_SUCCESS; +static ur_result_t redefinedEnqueueMemUnmap(void *pParams) { + auto params = *static_cast(pParams); + **params.pphEvent = reinterpret_cast(new int{}); + return UR_RESULT_SUCCESS; } -static pi_result redefinedEnqueueMemBufferFill( - pi_queue command_queue, pi_mem buffer, const void *pattern, - size_t pattern_size, size_t offset, size_t size, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event) { - *event = reinterpret_cast(new int{}); - return PI_SUCCESS; +static ur_result_t redefinedEnqueueMemBufferFill(void *pParams) { + auto params = *static_cast(pParams); + **params.pphEvent = reinterpret_cast(new int{}); + return UR_RESULT_SUCCESS; } static void verifyCleanup(detail::MemObjRecord *Record, @@ -164,7 +155,7 @@ static void checkCleanupOnEnqueue(MockScheduler &MS, // Check addCopyBack MockCmd = addNewMockCmds(); LeafMockCmd->getEvent()->getHandleRef() = - reinterpret_cast(new int{}); + reinterpret_cast(new int{}); MS.addCopyBack(&MockReq); verifyCleanup(Record, AllocaCmd, MockCmd, CommandDeleted); @@ -208,14 +199,14 @@ TEST_F(SchedulerTest, PostEnqueueCleanup) { unittest::ScopedEnvVar HostUnifiedMemoryVar{ HostUnifiedMemoryName, "1", detail::SYCLConfig::reset}; - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedEnqueueMemBufferMap); - Mock.redefineBefore( - redefinedEnqueueMemUnmap); - Mock.redefineBefore( - redefinedEnqueueMemBufferFill); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urEnqueueMemBufferMap", + &redefinedEnqueueMemBufferMap); + mock::getCallbacks().set_before_callback("urEnqueueMemUnmap", + &redefinedEnqueueMemUnmap); + mock::getCallbacks().set_before_callback("urEnqueueMemBufferFill", + &redefinedEnqueueMemBufferFill); context Ctx{Plt}; queue Queue{Ctx, default_selector_v}; @@ -278,8 +269,8 @@ TEST_F(SchedulerTest, PostEnqueueCleanup) { // Check that host tasks are cleaned up after completion. TEST_F(SchedulerTest, HostTaskCleanup) { - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); context Ctx{Plt}; queue Queue{Ctx, default_selector_v}; @@ -317,8 +308,8 @@ struct AttachSchedulerWrapper { // Check that stream buffers are released alongside graph cleanup. TEST_F(SchedulerTest, StreamBufferDeallocation) { - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); context Ctx{Plt}; queue Queue{Ctx, default_selector_v}; detail::QueueImplPtr QueueImplPtr = detail::getSyclObjImpl(Queue); @@ -366,21 +357,22 @@ class MockAuxResource { bool EventCompleted = false; -pi_result redefinedEventGetInfo(pi_event Event, pi_event_info PName, - size_t PVSize, void *PV, size_t *PVSizeRet) { - EXPECT_EQ(PName, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) +ur_result_t redefinedEventGetInfo(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppropName, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) << "Unknown param name"; - EXPECT_EQ(PVSize, 4u); - *(static_cast(PV)) = - EventCompleted ? PI_EVENT_COMPLETE : PI_EVENT_SUBMITTED; - return PI_SUCCESS; + EXPECT_EQ(*params.ppropSize, 4u); + *(static_cast(*params.ppPropValue)) = + EventCompleted ? UR_EVENT_STATUS_COMPLETE : UR_EVENT_STATUS_SUBMITTED; + return UR_RESULT_SUCCESS; } // Check that auxiliary resources are released alongside graph cleanup. TEST_F(SchedulerTest, AuxiliaryResourcesDeallocation) { - unittest::PiMock Mock; - Mock.redefine(redefinedEventGetInfo); - platform Plt = Mock.getPlatform(); + unittest::UrMock<> Mock; + mock::getCallbacks().set_replace_callback("urEventGetInfo", + &redefinedEventGetInfo); + platform Plt = sycl::platform(); context Ctx{Plt}; queue Queue{Ctx, default_selector_v}; detail::QueueImplPtr QueueImplPtr = detail::getSyclObjImpl(Queue); diff --git a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp index 69d6c10bf9049..1a2e981c2ae99 100644 --- a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp +++ b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp @@ -9,9 +9,9 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include #include #include +#include #include @@ -40,7 +40,7 @@ enum TestCGType { KERNEL_TASK, HOST_TASK, BARRIER }; class BarrierHandlingWithHostTask : public ::testing::Test { protected: void SetUp() { - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); sycl::context SyclContext(Plt); sycl::device SyclDev = @@ -123,7 +123,7 @@ class BarrierHandlingWithHostTask : public ::testing::Test { Events.push_back(KernelEventImpl); } - sycl::unittest::PiMock Mock; + sycl::unittest::UrMock<> Mock; sycl::unittest::ScopedEnvVar DisabledCleanup{ DisableCleanupName, "1", sycl::detail::SYCLConfig< diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index 9ce9a1f944349..7e8bd6dd4c9a0 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -9,8 +9,8 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include #include +#include #include #include @@ -18,59 +18,42 @@ namespace { using namespace sycl; -pi_result redefinedEnqueueMemBufferReadRect( - pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - EXPECT_EQ(num_events_in_wait_list, 0u); - return PI_SUCCESS; +ur_result_t redefinedEnqueueMemBufferReadRect(void *pParams) { + auto params = + *static_cast(pParams); + EXPECT_EQ(*params.pnumEventsInWaitList, 0u); + return UR_RESULT_SUCCESS; } -pi_result redefinedEnqueueMemBufferWriteRect( - pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - const void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - EXPECT_EQ(num_events_in_wait_list, 0u); - return PI_SUCCESS; +ur_result_t redefinedEnqueueMemBufferWriteRect(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pnumEventsInWaitList, 0u); + return UR_RESULT_SUCCESS; } -pi_result redefinedEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, - pi_bool blocking_map, - pi_map_flags map_flags, size_t offset, - size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event, void **ret_map) { - EXPECT_EQ(num_events_in_wait_list, 0u); - return PI_SUCCESS; +ur_result_t redefinedEnqueueMemBufferMap(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pnumEventsInWaitList, 0u); + return UR_RESULT_SUCCESS; } -pi_result redefinedEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, - void *mapped_ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { - EXPECT_EQ(num_events_in_wait_list, 0u); - return PI_SUCCESS; +ur_result_t redefinedEnqueueMemUnmap(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pnumEventsInWaitList, 0u); + return UR_RESULT_SUCCESS; } TEST_F(SchedulerTest, InOrderQueueDeps) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedEnqueueMemBufferReadRect); - Mock.redefineBefore( - redefinedEnqueueMemBufferWriteRect); - Mock.redefineBefore( - redefinedEnqueueMemBufferMap); - Mock.redefineBefore( - redefinedEnqueueMemUnmap); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urEnqueueMemBufferReadRect", + &redefinedEnqueueMemBufferReadRect); + mock::getCallbacks().set_before_callback("urEnqueueMemBufferWriteRect", + &redefinedEnqueueMemBufferWriteRect); + mock::getCallbacks().set_before_callback("urEnqueueMemBufferMap", + &redefinedEnqueueMemBufferMap); + mock::getCallbacks().set_before_callback("urEnqueueMemUnmap", + &redefinedEnqueueMemUnmap); context Ctx{Plt.get_devices()[0]}; queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()}; @@ -102,14 +85,14 @@ TEST_F(SchedulerTest, InOrderQueueDeps) { } bool BarrierCalled = false; -pi_event ExpectedEvent = nullptr; -pi_result redefinedEnqueueEventsWaitWithBarrier( - pi_queue command_queue, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - EXPECT_EQ(num_events_in_wait_list, 1u); - EXPECT_EQ(ExpectedEvent, *event_wait_list); +ur_event_handle_t ExpectedEvent = nullptr; +ur_result_t redefinedEnqueueEventsWaitWithBarrier(void *pParams) { + auto params = + *static_cast(pParams); + EXPECT_EQ(*params.pnumEventsInWaitList, 1u); + EXPECT_EQ(ExpectedEvent, **params.pphEventWaitList); BarrierCalled = true; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } sycl::event submitKernel(sycl::queue &Q) { @@ -120,10 +103,10 @@ sycl::event submitKernel(sycl::queue &Q) { TEST_F(SchedulerTest, InOrderQueueIsolatedDeps) { // Check that isolated kernels (i.e. those that don't modify the graph) // are handled properly during filtering. - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedEnqueueEventsWaitWithBarrier); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback( + "urEnqueueEventsWaitWithBarrier", &redefinedEnqueueEventsWaitWithBarrier); context Ctx{Plt.get_devices()[0]}; queue Q1{Ctx, default_selector_v, property::queue::in_order()}; diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index 58b28237a4958..c37753735c658 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -9,9 +9,9 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include -#include #include +#include +#include #include #include @@ -24,19 +24,19 @@ using namespace sycl; size_t GEventsWaitCounter = 0; -inline pi_result redefinedEventsWait(pi_uint32 num_events, - const pi_event *event_list) { - if (num_events > 0) { +inline ur_result_t redefinedEventsWait(void *pParams) { + auto params = *static_cast(pParams); + if (*params.pnumEvents > 0) { GEventsWaitCounter++; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) { GEventsWaitCounter = 0; - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore(redefinedEventsWait); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urEventWait", &redefinedEventsWait); context Ctx{Plt}; queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()}; @@ -53,31 +53,29 @@ TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) { enum class CommandType { KERNEL = 1, MEMSET = 2 }; std::vector> ExecutedCommands; -inline pi_result customEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *, - pi_uint32 EventsCount, - const pi_event *, pi_event *) { - ExecutedCommands.push_back({CommandType::KERNEL, EventsCount}); - return PI_SUCCESS; +inline ur_result_t customEnqueueKernelLaunch(void *pParams) { + auto params = *static_cast(pParams); + ExecutedCommands.push_back( + {CommandType::KERNEL, *params.pnumEventsInWaitList}); + return UR_RESULT_SUCCESS; } -inline pi_result customextUSMEnqueueMemset(pi_queue, void *, const void *, - size_t, size_t, - pi_uint32 EventsCount, - const pi_event *, pi_event *) { - ExecutedCommands.push_back({CommandType::MEMSET, EventsCount}); - return PI_SUCCESS; + +inline ur_result_t customEnqueueUSMFill(void *pParams) { + auto params = *static_cast(pParams); + ExecutedCommands.push_back( + {CommandType::MEMSET, *params.pnumEventsInWaitList}); + return UR_RESULT_SUCCESS; } TEST_F(SchedulerTest, InOrderQueueCrossDeps) { ExecutedCommands.clear(); - sycl::unittest::PiMock Mock; - Mock.redefineBefore( - customEnqueueKernelLaunch); - Mock.redefineBefore( - customextUSMEnqueueMemset); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", + &customEnqueueKernelLaunch); + mock::getCallbacks().set_before_callback("urEnqueueUSMFill", + &customEnqueueUSMFill); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); context Ctx{Plt}; queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()}; @@ -124,13 +122,13 @@ TEST_F(SchedulerTest, InOrderQueueCrossDeps) { TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) { ExecutedCommands.clear(); - sycl::unittest::PiMock Mock; - Mock.redefineBefore( - customEnqueueKernelLaunch); - Mock.redefineBefore( - customextUSMEnqueueMemset); + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", + &customEnqueueKernelLaunch); + mock::getCallbacks().set_before_callback("urEnqueueUSMFill", + &customEnqueueUSMFill); - sycl::platform Plt = Mock.getPlatform(); + sycl::platform Plt = sycl::platform(); context Ctx{Plt}; queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()}; diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index 255786a0ad4f0..7131ecafe0c04 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include @@ -70,8 +70,8 @@ class LimitedHandlerSimulation : public LimitedHandler { // Only check events dependency in queue_impl::finalizeHandler TEST_F(SchedulerTest, InOrderQueueSyncCheck) { - sycl::unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + platform Plt = sycl::platform(); const sycl::device Dev = Plt.get_devices()[0]; auto Queue = std::make_shared( diff --git a/sycl/unittests/scheduler/KernelFusion.cpp b/sycl/unittests/scheduler/KernelFusion.cpp index 025f8ece059eb..a3959fb4a94f4 100644 --- a/sycl/unittests/scheduler/KernelFusion.cpp +++ b/sycl/unittests/scheduler/KernelFusion.cpp @@ -9,9 +9,9 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include #include #include +#include #include @@ -74,8 +74,8 @@ bool dependsOnViaEvent(detail::Command *Dependent, detail::Command *Dependee) { } TEST_F(SchedulerTest, CancelKernelFusion) { - unittest::PiMock Mock; - platform Plt = Mock.getPlatform(); + unittest::UrMock<> Mock; + platform Plt = sycl::platform(); if (!CheckTestExecRequirements(Plt)) return; diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp index b8aadcfb258f0..7d07dffedf56b 100644 --- a/sycl/unittests/scheduler/LeafLimit.cpp +++ b/sycl/unittests/scheduler/LeafLimit.cpp @@ -11,8 +11,8 @@ #include #include -#include #include +#include #include #include @@ -28,8 +28,8 @@ inline constexpr auto DisableCleanupName = // correctly with dependency tracking when leaf-limit for generic commands is // overflowed. TEST_F(SchedulerTest, LeafLimit) { - sycl::unittest::PiMock Mock; - sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; + sycl::unittest::UrMock<> Mock; + sycl::queue Q{sycl::platform().get_devices()[0], MAsyncHandler}; // All of the mock commands are owned on the test side, prevent post enqueue // cleanup from deleting some of them. diff --git a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp index 565c3b2a2314c..456dec8f0b5cb 100644 --- a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp +++ b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp @@ -10,8 +10,8 @@ #include "SchedulerTestUtils.hpp" #include -#include #include +#include #include #include @@ -37,7 +37,8 @@ TEST_F(SchedulerTest, LeafLimitDiffContexts) { detail::SYCLConfig::reset}; // Ensure the mock plugin has been initialized prior to selecting a device. - unittest::PiMock::EnsureMockPluginInitialized(); + // unittest::UrMock::EnsureMockPluginInitialized(); + sycl::unittest::UrMock<> Mock; device Device; struct QueueRelatedObjects { diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp index e0732926537b0..5e17ac0b42334 100644 --- a/sycl/unittests/scheduler/LeavesCollection.cpp +++ b/sycl/unittests/scheduler/LeavesCollection.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include @@ -44,8 +44,8 @@ std::shared_ptr createEmptyCommand(const Requirement &Req) { } TEST_F(LeavesCollectionTest, PushBack) { - sycl::unittest::PiMock Mock; - sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; + sycl::unittest::UrMock<> Mock; + sycl::queue Q{sycl::platform().get_devices()[0], MAsyncHandler}; static constexpr size_t GenericCmdsCapacity = 8; @@ -113,8 +113,8 @@ TEST_F(LeavesCollectionTest, PushBack) { } TEST_F(LeavesCollectionTest, Remove) { - sycl::unittest::PiMock Mock; - sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; + sycl::unittest::UrMock<> Mock; + sycl::queue Q{sycl::platform().get_devices()[0], MAsyncHandler}; static constexpr size_t GenericCmdsCapacity = 8; diff --git a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp index b08b211d1e2dc..1a5fa726170b8 100644 --- a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp +++ b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp @@ -9,7 +9,7 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include +#include using namespace sycl; @@ -26,7 +26,7 @@ class MemObjMock : public sycl::detail::SYCLMemObjI { MemObjType getType() const override { return MemObjType::Buffer; } - void *allocateMem(ContextImplPtr, bool, void *, sycl::detail::pi::PiEvent &) { + void *allocateMem(ContextImplPtr, bool, void *, ur_event_handle_t &) { return nullptr; } @@ -51,8 +51,8 @@ static sycl::device getDeviceWithHostUnifiedMemory(sycl::platform &Plt) { } TEST_F(SchedulerTest, LinkedAllocaDependencies) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); sycl::device Dev = getDeviceWithHostUnifiedMemory(Plt); // 1. create two commands: alloca + alloca and link them diff --git a/sycl/unittests/scheduler/MemObjCommandCleanup.cpp b/sycl/unittests/scheduler/MemObjCommandCleanup.cpp index e89f5ac18c517..8bb44e68e1fe1 100644 --- a/sycl/unittests/scheduler/MemObjCommandCleanup.cpp +++ b/sycl/unittests/scheduler/MemObjCommandCleanup.cpp @@ -9,15 +9,15 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include +#include #include using namespace sycl; TEST_F(SchedulerTest, MemObjCommandCleanupAllocaUsers) { - sycl::unittest::PiMock Mock; - sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; + sycl::unittest::UrMock<> Mock; + sycl::queue Q{sycl::platform().get_devices()[0], MAsyncHandler}; MockScheduler MS; buffer BufA(range<1>(1)); @@ -59,8 +59,8 @@ TEST_F(SchedulerTest, MemObjCommandCleanupAllocaUsers) { } TEST_F(SchedulerTest, MemObjCommandCleanupAllocaDeps) { - sycl::unittest::PiMock Mock; - sycl::queue Q{Mock.getPlatform().get_devices()[0], MAsyncHandler}; + sycl::unittest::UrMock<> Mock; + sycl::queue Q{sycl::platform().get_devices()[0], MAsyncHandler}; MockScheduler MS; buffer Buf(range<1>(1)); diff --git a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp index 24a19977844fb..4c242a0c538df 100644 --- a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp +++ b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp @@ -8,8 +8,9 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" +#include "ur_mock_helpers.hpp" -#include +#include #include @@ -18,77 +19,73 @@ using namespace sycl; -static pi_result redefinedDeviceGetInfoAfter(pi_device Device, - pi_device_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - if (ParamName == PI_DEVICE_INFO_HOST_UNIFIED_MEMORY) { - auto *Result = reinterpret_cast(ParamValue); +static ur_result_t redefinedDeviceGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_HOST_UNIFIED_MEMORY) { + auto *Result = reinterpret_cast(*params.ppPropValue); *Result = false; - } else if (ParamName == PI_DEVICE_INFO_TYPE) { - auto *Result = reinterpret_cast<_pi_device_type *>(ParamValue); - *Result = PI_DEVICE_TYPE_CPU; + } else if (*params.ppropName == UR_DEVICE_INFO_TYPE) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = UR_DEVICE_TYPE_CPU; } // This mock device has no sub-devices - if (ParamName == PI_DEVICE_INFO_PARTITION_PROPERTIES) { - if (ParamValueSizeRet) { - *ParamValueSizeRet = 0; + if (*params.ppropName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { + if (*params.ppPropSizeRet) { + **params.ppPropSizeRet = 0; } } - if (ParamName == PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { - assert(ParamValueSize == sizeof(pi_device_affinity_domain)); - if (ParamValue) { - *static_cast(ParamValue) = 0; + if (*params.ppropName == UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { + assert(*params.ppropSize == sizeof(ur_device_affinity_domain_flags_t)); + if (*params.ppPropValue) { + *static_cast(*params.ppPropValue) = + 0; } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result -redefinedMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, - void *host_ptr, pi_mem *ret_mem, - const pi_mem_properties *properties = nullptr) { - EXPECT_EQ(flags, PI_MEM_FLAGS_ACCESS_RW); - return PI_SUCCESS; +static ur_result_t redefinedMemBufferCreate(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(*params.pflags, UR_MEM_FLAG_READ_WRITE); + return UR_RESULT_SUCCESS; } -static pi_context InteropPiContext = nullptr; -static pi_result redefinedMemGetInfoAfter(pi_mem mem, pi_mem_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - auto *Result = reinterpret_cast(param_value); - *Result = InteropPiContext; - return PI_SUCCESS; - - if (param_name == PI_MEM_CONTEXT) { - auto *Result = reinterpret_cast(param_value); - *Result = InteropPiContext; - } else if (param_name == PI_MEM_SIZE) { - auto *Result = reinterpret_cast(param_value); +static ur_context_handle_t InteropUrContext = nullptr; + +static ur_result_t redefinedMemGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = InteropUrContext; + return UR_RESULT_SUCCESS; + + if (*params.ppropName == UR_MEM_INFO_CONTEXT) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = InteropUrContext; + } else if (*params.ppropName == UR_MEM_INFO_SIZE) { + auto *Result = reinterpret_cast(*params.ppPropValue); *Result = 8; } } -static pi_result -redefinedMemCreateWithNativeHandle(pi_native_handle native_handle, - pi_context context, bool own_native_handle, - pi_mem *mem) { - *mem = detail::pi::cast(native_handle); - return PI_SUCCESS; + +static ur_result_t redefinedMemCreateWithNativeHandle(void *pParams) { + auto params = + *static_cast(pParams); + **params.pphMem = detail::ur::cast(*params.phNativeMem); + return UR_RESULT_SUCCESS; } TEST_F(SchedulerTest, NoHostUnifiedMemory) { - unittest::PiMock Mock; - queue Q{Mock.getPlatform().get_devices()[0]}; - Mock.redefineAfter( - redefinedDeviceGetInfoAfter); - Mock.redefineBefore( - redefinedMemBufferCreate); - Mock.redefineAfter(redefinedMemGetInfoAfter); - Mock.redefineBefore( - redefinedMemCreateWithNativeHandle); + unittest::UrMock<> Mock; + queue Q{sycl::platform().get_devices()[0]}; + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfoAfter); + mock::getCallbacks().set_before_callback("urMemBufferCreate", + &redefinedMemBufferCreate); + mock::getCallbacks().set_after_callback("urMemGetInfo", + &redefinedMemGetInfoAfter); + mock::getCallbacks().set_before_callback("urMemBufferCreateWithNativeHandle", + &redefinedMemCreateWithNativeHandle); sycl::detail::QueueImplPtr QImpl = detail::getSyclObjImpl(Q); MockScheduler MS; @@ -191,16 +188,14 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { } // Check that interoperability memory objects are initialized. { - pi_mem MockInteropBuffer = nullptr; - pi_result PIRes = mock_piMemBufferCreate( - /*pi_context=*/0x0, /*pi_mem_flags=*/PI_MEM_FLAGS_ACCESS_RW, /*size=*/1, - /*host_ptr=*/nullptr, &MockInteropBuffer); - EXPECT_TRUE(PI_SUCCESS == PIRes); + ur_mem_handle_t MockInteropBuffer = + mock::createDummyHandle(); context InteropContext = Q.get_context(); - InteropPiContext = detail::getSyclObjImpl(InteropContext)->getHandleRef(); + InteropUrContext = detail::getSyclObjImpl(InteropContext)->getHandleRef(); auto BufI = std::make_shared( - detail::pi::cast(MockInteropBuffer), Q.get_context(), + detail::ur::cast(MockInteropBuffer), + Q.get_context(), std::make_unique< detail::SYCLMemObjAllocatorHolder, char>>(), /* OwnNativeHandle */ true, event()); diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp index 4f1413fcd75a0..3a513ca98079c 100644 --- a/sycl/unittests/scheduler/QueueFlushing.cpp +++ b/sycl/unittests/scheduler/QueueFlushing.cpp @@ -8,40 +8,40 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" +#include "detail/event_impl.hpp" +#include "ur_mock_helpers.hpp" -#include +#include using namespace sycl; -static pi_queue ExpectedDepQueue = nullptr; +static ur_queue_handle_t ExpectedDepQueue = nullptr; static bool QueueFlushed = false; static bool EventStatusQueried = false; -static pi_event_status EventStatus = PI_EVENT_QUEUED; +static ur_event_status_t EventStatus = UR_EVENT_STATUS_QUEUED; -static pi_result redefinedQueueFlush(pi_queue Queue) { - EXPECT_EQ(ExpectedDepQueue, Queue); +static ur_result_t redefinedQueueFlush(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ(ExpectedDepQueue, *params.phQueue); EXPECT_FALSE(QueueFlushed); QueueFlushed = true; - EventStatus = PI_EVENT_SUBMITTED; - return PI_SUCCESS; + EventStatus = UR_EVENT_STATUS_SUBMITTED; + return UR_RESULT_SUCCESS; } -static pi_result redefinedEventGetInfoAfter(pi_event event, - pi_event_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - EXPECT_NE(event, nullptr); - if (param_name == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - auto *Status = reinterpret_cast(param_value); +static ur_result_t redefinedEventGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_NE(*params.phEvent, nullptr); + if (*params.ppropName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + auto *Status = reinterpret_cast(*params.ppPropValue); *Status = EventStatus; EventStatusQueried = true; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } static void resetTestCtx() { - EventStatus = PI_EVENT_QUEUED; + EventStatus = UR_EVENT_STATUS_QUEUED; QueueFlushed = false; EventStatusQueried = false; } @@ -52,11 +52,9 @@ static void addDepAndEnqueue(detail::Command *Cmd, MockCommand DepCmd(DepQueue); std::vector ToCleanUp; - pi_event PIEvent = nullptr; - pi_result CallRet = mock_piEventCreate(/*pi_context=*/0x0, &PIEvent); - EXPECT_TRUE(PI_SUCCESS == CallRet); + ur_event_handle_t UREvent = mock::createDummyHandle(); - DepCmd.getEvent()->getHandleRef() = PIEvent; + DepCmd.getEvent()->getHandleRef() = UREvent; (void)Cmd->addDep(detail::DepDesc{&DepCmd, &MockReq, nullptr}, ToCleanUp); detail::EnqueueResultT Res; @@ -75,7 +73,7 @@ static void testCommandEnqueue(detail::Command *Cmd, static void testEventStatusCheck(detail::Command *Cmd, detail::QueueImplPtr &DepQueue, detail::Requirement &MockReq, - pi_event_status ReturnedEventStatus) { + ur_event_status_t ReturnedEventStatus) { resetTestCtx(); EventStatus = ReturnedEventStatus; addDepAndEnqueue(Cmd, DepQueue, MockReq); @@ -83,11 +81,12 @@ static void testEventStatusCheck(detail::Command *Cmd, } TEST_F(SchedulerTest, QueueFlushing) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore(redefinedQueueFlush); - Mock.redefineAfter( - redefinedEventGetInfoAfter); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urQueueFlush", + &redefinedQueueFlush); + mock::getCallbacks().set_after_callback("urEventGetInfo", + &redefinedEventGetInfoAfter); context Ctx{Plt}; queue QueueA{Ctx, default_selector_v}; @@ -100,14 +99,10 @@ TEST_F(SchedulerTest, QueueFlushing) { buffer Buf(&val, range<1>(1)); detail::Requirement MockReq = getMockRequirement(Buf); - pi_mem PIBuf = nullptr; - pi_result Ret = mock_piMemBufferCreate(/*pi_context=*/0x0, - PI_MEM_FLAGS_ACCESS_RW, /*size=*/1, - /*host_ptr=*/nullptr, &PIBuf); - EXPECT_TRUE(Ret == PI_SUCCESS); + ur_mem_handle_t URBuf = mock::createDummyHandle(); detail::AllocaCommand AllocaCmd = detail::AllocaCommand(QueueImplA, MockReq); - AllocaCmd.MMemAllocation = PIBuf; + AllocaCmd.MMemAllocation = URBuf; void *MockHostPtr; detail::EnqueueResultT Res; std::vector ToCleanUp; @@ -157,11 +152,9 @@ TEST_F(SchedulerTest, QueueFlushing) { detail::EventImplPtr DepEvent{new detail::event_impl(QueueImplB)}; DepEvent->setContextImpl(QueueImplB->getContextImplPtr()); - pi_event PIEvent = nullptr; - pi_result CallRet = mock_piEventCreate(/*pi_context=*/0x0, &PIEvent); - EXPECT_TRUE(PI_SUCCESS == CallRet); + ur_event_handle_t UREvent = mock::createDummyHandle(); - DepEvent->getHandleRef() = PIEvent; + DepEvent->getHandleRef() = UREvent; (void)Cmd.addDep(DepEvent, ToCleanUp); MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); EXPECT_TRUE(QueueFlushed); @@ -179,11 +172,9 @@ TEST_F(SchedulerTest, QueueFlushing) { DepEvent.reset(new detail::event_impl(TempQueueImpl)); DepEvent->setContextImpl(TempQueueImpl->getContextImplPtr()); - pi_event PIEvent = nullptr; - pi_result CallRet = mock_piEventCreate(/*pi_context=*/0x0, &PIEvent); - EXPECT_TRUE(PI_SUCCESS == CallRet); + ur_event_handle_t UREvent = mock::createDummyHandle(); - DepEvent->getHandleRef() = PIEvent; + DepEvent->getHandleRef() = UREvent; } (void)Cmd.addDep(DepEvent, ToCleanUp); MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); @@ -205,19 +196,15 @@ TEST_F(SchedulerTest, QueueFlushing) { access::mode::read_write}; MockCommand DepCmdA(QueueImplB); - pi_event PIEvent = nullptr; - pi_result CallRet = mock_piEventCreate(/*pi_context=*/0x0, &PIEvent); - EXPECT_TRUE(PI_SUCCESS == CallRet); + ur_event_handle_t UREvent = mock::createDummyHandle(); - DepCmdA.getEvent()->getHandleRef() = PIEvent; + DepCmdA.getEvent()->getHandleRef() = UREvent; (void)Cmd.addDep(detail::DepDesc{&DepCmdA, &MockReq, nullptr}, ToCleanUp); MockCommand DepCmdB(QueueImplB); - PIEvent = nullptr; - CallRet = mock_piEventCreate(/*pi_context=*/0x0, &PIEvent); - EXPECT_TRUE(PI_SUCCESS == CallRet); + UREvent = mock::createDummyHandle(); - DepCmdB.getEvent()->getHandleRef() = PIEvent; + DepCmdB.getEvent()->getHandleRef() = UREvent; (void)Cmd.addDep(detail::DepDesc{&DepCmdB, &MockReq, nullptr}, ToCleanUp); // The check is performed in redefinedQueueFlush MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); @@ -230,11 +217,9 @@ TEST_F(SchedulerTest, QueueFlushing) { access::mode::read_write}; MockCommand DepCmd(QueueImplB); - pi_event PIEvent = nullptr; - pi_result CallRet = mock_piEventCreate(/*pi_context=*/0x0, &PIEvent); - EXPECT_TRUE(PI_SUCCESS == CallRet); + ur_event_handle_t UREvent = mock::createDummyHandle(); - DepCmd.getEvent()->getHandleRef() = PIEvent; + DepCmd.getEvent()->getHandleRef() = UREvent; (void)CmdA.addDep(detail::DepDesc{&DepCmd, &MockReq, nullptr}, ToCleanUp); MockScheduler::enqueueCommand(&CmdA, Res, detail::NON_BLOCKING); @@ -250,16 +235,16 @@ TEST_F(SchedulerTest, QueueFlushing) { { detail::MapMemObject CmdA{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, access::mode::read_write}; - testEventStatusCheck(&CmdA, QueueImplB, MockReq, PI_EVENT_SUBMITTED); + testEventStatusCheck(&CmdA, QueueImplB, MockReq, UR_EVENT_STATUS_SUBMITTED); detail::MapMemObject CmdB{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, access::mode::read_write}; - testEventStatusCheck(&CmdB, QueueImplB, MockReq, PI_EVENT_RUNNING); + testEventStatusCheck(&CmdB, QueueImplB, MockReq, UR_EVENT_STATUS_RUNNING); detail::MapMemObject CmdC{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, access::mode::read_write}; - testEventStatusCheck(&CmdC, QueueImplB, MockReq, PI_EVENT_COMPLETE); + testEventStatusCheck(&CmdC, QueueImplB, MockReq, UR_EVENT_STATUS_COMPLETE); } - // Check that nullptr pi_events are handled correctly. + // Check that nullptr UR event handles are handled correctly. { resetTestCtx(); detail::MapMemObject CmdA{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, diff --git a/sycl/unittests/scheduler/RequiredWGSize.cpp b/sycl/unittests/scheduler/RequiredWGSize.cpp index 29b9f8fc2b8ac..3cfa1958ed524 100644 --- a/sycl/unittests/scheduler/RequiredWGSize.cpp +++ b/sycl/unittests/scheduler/RequiredWGSize.cpp @@ -10,8 +10,8 @@ #include #include -#include -#include +#include +#include #include #include @@ -24,37 +24,31 @@ bool KernelGetGroupInfoCalled = false; std::array IncomingLocalSize = {0, 0, 0}; std::array RequiredLocalSize = {0, 0, 0}; -static pi_result redefinedKernelGetGroupInfo(pi_kernel kernel, pi_device device, - pi_kernel_group_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { +static ur_result_t redefinedKernelGetGroupInfo(void *pParams) { + auto params = *static_cast(pParams); KernelGetGroupInfoCalled = true; - if (param_name == PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE) { - if (param_value_size_ret) { - *param_value_size_ret = 3 * sizeof(size_t); - } else if (param_value) { - auto size = static_cast(param_value); + if (*params.ppropName == UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE) { + if (*params.ppPropSizeRet) { + **params.ppPropSizeRet = 3 * sizeof(size_t); + } else if (*params.ppPropValue) { + auto size = static_cast(*params.ppPropValue); size[0] = RequiredLocalSize[0]; size[1] = RequiredLocalSize[1]; size[2] = RequiredLocalSize[2]; } } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } -static pi_result redefinedEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *LocalSize, - pi_uint32, const pi_event *, - pi_event *) { - if (LocalSize) { - IncomingLocalSize[0] = LocalSize[0]; - IncomingLocalSize[1] = LocalSize[1]; - IncomingLocalSize[2] = LocalSize[2]; +static ur_result_t redefinedEnqueueKernelLaunch(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppLocalWorkSize) { + IncomingLocalSize[0] = (*params.ppLocalWorkSize)[0]; + IncomingLocalSize[1] = (*params.ppLocalWorkSize)[1]; + IncomingLocalSize[2] = (*params.ppLocalWorkSize)[2]; } - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } static void reset() { @@ -64,12 +58,12 @@ static void reset() { } static void performChecks() { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedEnqueueKernelLaunch); - Mock.redefineBefore( - redefinedKernelGetGroupInfo); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", + &redefinedEnqueueKernelLaunch); + mock::getCallbacks().set_before_callback("urKernelGetGroupInfo", + &redefinedKernelGetGroupInfo); const sycl::device Dev = Plt.get_devices()[0]; sycl::queue Queue{Dev}; diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 97426dd06097c..549f836fd2c45 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -60,7 +60,7 @@ class MockCommand : public sycl::detail::Command { return &MRequirement; }; - cl_int enqueueImp() override { return MRetVal; } + ur_result_t enqueueImp() override { return MRetVal; } MOCK_METHOD3(enqueue, bool(sycl::detail::EnqueueResultT &, sycl::detail::BlockingT, @@ -71,12 +71,12 @@ class MockCommand : public sycl::detail::Command { return sycl::detail::Command::enqueue(EnqueueResult, Blocking, ToCleanUp); } - cl_int MRetVal = CL_SUCCESS; + ur_result_t MRetVal = UR_RESULT_SUCCESS; void waitForEventsCall( std::shared_ptr Queue, std::vector> &RawEvents, - pi_event &Event) { + ur_event_handle_t &Event) { Command::waitForEvents(Queue, RawEvents, Event); } diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index a45a1cb6842cb..852d59a43e123 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -11,8 +11,8 @@ #include #include -#include #include +#include using namespace sycl; diff --git a/sycl/unittests/stream/stream.cpp b/sycl/unittests/stream/stream.cpp index c25e744801f6d..0811abff8cf77 100644 --- a/sycl/unittests/stream/stream.cpp +++ b/sycl/unittests/stream/stream.cpp @@ -8,8 +8,8 @@ #include -#include -#include +#include +#include #include @@ -19,20 +19,18 @@ size_t GBufferCreateCounter = 0; -static pi_result -redefinedMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, - void *host_ptr, pi_mem *ret_mem, - const pi_mem_properties *properties = nullptr) { +static ur_result_t redefinedMemBufferCreate(void *pParams) { + auto params = *static_cast(pParams); ++GBufferCreateCounter; - *ret_mem = nullptr; - return PI_SUCCESS; + **params.pphBuffer = nullptr; + return UR_RESULT_SUCCESS; } TEST(Stream, TestStreamConstructorExceptionNoAllocation) { - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - redefinedMemBufferCreate); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urMemBufferCreate", + &redefinedMemBufferCreate); const sycl::device Dev = Plt.get_devices()[0]; sycl::context Ctx{Dev}; diff --git a/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp b/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp index 79f19504abea1..ca54cf0d908d6 100644 --- a/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp +++ b/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp @@ -9,10 +9,11 @@ #include #include #include -#include +#include #include #include "ThreadUtils.h" +#include "ur_mock_helpers.hpp" namespace { using namespace sycl; @@ -21,30 +22,30 @@ constexpr std::size_t NArgs = 16; constexpr std::size_t ThreadCount = 4; constexpr std::size_t LaunchCount = 8; -pi_uint32 LastArgSet = -1; +uint32_t LastArgSet = -1; std::size_t LastThread = -1; -pi_result redefined_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, - size_t arg_size, const void *arg_value) { - EXPECT_EQ((LastArgSet + 1) % NArgs, arg_index); - LastArgSet = arg_index; - std::size_t ArgValue = *static_cast(arg_value); - if (arg_index == 0) +ur_result_t redefined_urKernelSetArgValue(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ((LastArgSet + 1) % NArgs, *params.pargIndex); + LastArgSet = *params.pargIndex; + std::size_t ArgValue = *static_cast(*params.ppArgValue); + if (*params.pargIndex == 0) LastThread = ArgValue; else EXPECT_EQ(LastThread, ArgValue); - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } TEST(KernelEnqueue, InteropKernel) { - unittest::PiMock Mock; + unittest::UrMock<> Mock; redefineMockForKernelInterop(Mock); - Mock.redefine( - redefined_piKernelSetArg); + mock::getCallbacks().set_replace_callback("urKernelSetArgValue", + &redefined_urKernelSetArgValue); - platform Plt = Mock.getPlatform(); + platform Plt = sycl::platform(); queue Q; - DummyHandleT Handle; + ur_native_handle_t Handle = mock::createDummyHandle(); auto KernelCL = reinterpret_cast::template input_type>(&Handle); auto Kernel = diff --git a/sycl/unittests/ur/CMakeLists.txt b/sycl/unittests/ur/CMakeLists.txt new file mode 100644 index 0000000000000..ce8e0e207c716 --- /dev/null +++ b/sycl/unittests/ur/CMakeLists.txt @@ -0,0 +1,8 @@ +set(CMAKE_CXX_EXTENSIONS OFF) + +add_sycl_unittest(UrTests OBJECT + UrUtility.cpp +) + +add_dependencies(UrTests sycl) +target_include_directories(UrTests PRIVATE SYSTEM ${sycl_inc_dir}) diff --git a/sycl/unittests/ur/UrUtility.cpp b/sycl/unittests/ur/UrUtility.cpp new file mode 100644 index 0000000000000..5c06ad861a765 --- /dev/null +++ b/sycl/unittests/ur/UrUtility.cpp @@ -0,0 +1,26 @@ +//==--------------------- UrUtility.cpp -- check for internal ur utilities -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include + +namespace { +using namespace sycl; + +TEST(UrUtilityTest, CheckUrCastOCLEventVector) { + // Current special case for vectors of OpenCL vectors. This may change in the + // future. + std::vector EVec{(cl_event)0}; + ur_native_handle_t ENativeHandle = detail::ur::cast(EVec); + EXPECT_EQ(ENativeHandle, (ur_native_handle_t)EVec[0]); +} + +} // namespace diff --git a/sycl/unittests/windows/dllmain.cpp b/sycl/unittests/windows/dllmain.cpp index 5a2ebc5e6a421..f99364fe11720 100644 --- a/sycl/unittests/windows/dllmain.cpp +++ b/sycl/unittests/windows/dllmain.cpp @@ -12,8 +12,8 @@ * distinct binary executable. */ -#include -#include +#include +#include #include #include @@ -26,19 +26,23 @@ extern "C" BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, static std::atomic TearDownCalls{0}; -pi_result redefinedTearDown(void *PluginParameter) { +// Before the port this was an override for LoaderTearDown, UR's mock +// functionality can't override loader functions but AdapterRelease is called +// in the runtime in the same place as LoaderTearDown +ur_result_t redefinedAdapterRelease(void *) { fprintf(stderr, "intercepted tear down\n"); ++TearDownCalls; - return PI_SUCCESS; + return UR_RESULT_SUCCESS; } #endif TEST(Windows, DllMainCall) { #ifdef _WIN32 - sycl::unittest::PiMock Mock; - sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore(redefinedTearDown); + sycl::unittest::UrMock<> Mock; + sycl::platform Plt = sycl::platform(); + mock::getCallbacks().set_before_callback("urAdapterRelease", + &redefinedAdapterRelease); // Teardown calls are only expected on sycl.dll library unload, not when // process gets terminated. diff --git a/sycl/unittests/xpti_trace/NodeCreation.cpp b/sycl/unittests/xpti_trace/NodeCreation.cpp index 9bafd6f1e7307..257000e3ec2f4 100644 --- a/sycl/unittests/xpti_trace/NodeCreation.cpp +++ b/sycl/unittests/xpti_trace/NodeCreation.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include #include #include +#include #include @@ -42,7 +42,7 @@ class NodeCreation : public ::testing::Test { "libxptifw.so", [] {}}; unittest::ScopedEnvVar XPTISubscriber{"XPTI_SUBSCRIBERS", "libxptitest_subscriber.so", [] {}}; - sycl::unittest::PiMock MockPlugin; + sycl::unittest::UrMock<> MockPlugin; static constexpr char FileName[] = "NodeCreation.cpp"; static constexpr char FunctionName[] = "TestCaseExecution"; diff --git a/sycl/unittests/xpti_trace/QueueApiFailures.cpp b/sycl/unittests/xpti_trace/QueueApiFailures.cpp index c634f2dfce299..a250f213cb998 100644 --- a/sycl/unittests/xpti_trace/QueueApiFailures.cpp +++ b/sycl/unittests/xpti_trace/QueueApiFailures.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include #include #include +#include #include @@ -26,16 +26,12 @@ XPTI_CALLBACK_API bool queryReceivedNotifications(uint16_t &TraceType, XPTI_CALLBACK_API void resetReceivedNotifications(); XPTI_CALLBACK_API void addAnalyzedTraceType(uint16_t); -inline pi_result redefinedPluginGetLastError(char **message) { - return PI_ERROR_INVALID_VALUE; +inline ur_result_t redefinedAdapterGetLastError(void *) { + return UR_RESULT_ERROR_INVALID_VALUE; } -pi_result redefinedEnqueueKernelLaunch( - pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; +ur_result_t redefinedEnqueueKernelLaunch(void *) { + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } class QueueApiFailures : public ::testing::Test { @@ -70,7 +66,7 @@ class QueueApiFailures : public ::testing::Test { "libxptifw.so", [] {}}; unittest::ScopedEnvVar XPTISubscriber{"XPTI_SUBSCRIBERS", "libxptitest_subscriber.so", [] {}}; - sycl::unittest::PiMock MockPlugin; + sycl::unittest::UrMock<> MockPlugin; static constexpr char FileName[] = "QueueApiFailures.cpp"; static constexpr char FunctionName[] = "TestCaseExecution"; @@ -96,10 +92,10 @@ class QueueApiFailures : public ::testing::Test { }; TEST_F(QueueApiFailures, QueueSubmit) { - MockPlugin.redefine( - redefinedEnqueueKernelLaunch); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefinedEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q; bool ExceptionCaught = false; try { @@ -123,10 +119,10 @@ TEST_F(QueueApiFailures, QueueSubmit) { } TEST_F(QueueApiFailures, QueueSingleTask) { - MockPlugin.redefine( - redefinedEnqueueKernelLaunch); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefinedEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q; bool ExceptionCaught = false; try { @@ -145,20 +141,15 @@ TEST_F(QueueApiFailures, QueueSingleTask) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, - const void *Pattern, size_t PatternSize, - size_t Count, - pi_uint32 Num_events_in_waitlist, - const pi_event *Events_waitlist, - pi_event *Event) { - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; +ur_result_t redefinedEnqueueUSMFill(void *) { + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } TEST_F(QueueApiFailures, QueueMemset) { - MockPlugin.redefine( - redefinedUSMEnqueueMemset); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urEnqueueUSMFill", + &redefinedEnqueueUSMFill); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q; bool ExceptionCaught = false; unsigned char *HostAlloc = (unsigned char *)sycl::malloc_host(1, Q); @@ -179,20 +170,15 @@ TEST_F(QueueApiFailures, QueueMemset) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, - void *dst_ptr, const void *src_ptr, - size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; +ur_result_t redefinedUSMEnqueueMemcpy(void *) { + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } TEST_F(QueueApiFailures, QueueMemcpy) { - MockPlugin.redefine( - redefinedUSMEnqueueMemcpy); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", + &redefinedUSMEnqueueMemcpy); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q; bool ExceptionCaught = false; unsigned char *HostAllocSrc = (unsigned char *)sycl::malloc_host(1, Q); @@ -216,10 +202,10 @@ TEST_F(QueueApiFailures, QueueMemcpy) { } TEST_F(QueueApiFailures, QueueCopy) { - MockPlugin.redefine( - redefinedUSMEnqueueMemcpy); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", + &redefinedUSMEnqueueMemcpy); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q; bool ExceptionCaught = false; unsigned char *HostAllocSrc = (unsigned char *)sycl::malloc_host(1, Q); @@ -242,19 +228,11 @@ TEST_F(QueueApiFailures, QueueCopy) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedUSMEnqueueFill(pi_queue Queue, void *Ptr, - const void *Pattern, size_t PatternSize, - size_t Count, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; -} - TEST_F(QueueApiFailures, QueueFill) { - MockPlugin.redefine( - redefinedUSMEnqueueFill); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urEnqueueUSMFill", + &redefinedEnqueueUSMFill); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q; bool ExceptionCaught = false; unsigned char *HostAlloc = (unsigned char *)sycl::malloc_host(1, Q); @@ -275,20 +253,15 @@ TEST_F(QueueApiFailures, QueueFill) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -inline pi_result redefinedUSMEnqueuePrefetch(pi_queue queue, const void *ptr, - size_t size, - pi_usm_migration_flags flags, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; +inline ur_result_t redefinedUSMEnqueuePrefetch(void *) { + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } TEST_F(QueueApiFailures, QueuePrefetch) { - MockPlugin.redefine( - redefinedUSMEnqueuePrefetch); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urEnqueueUSMPrefetch", + &redefinedUSMEnqueuePrefetch); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q; bool ExceptionCaught = false; unsigned char *HostAlloc = (unsigned char *)sycl::malloc_host(4, Q); @@ -309,18 +282,15 @@ TEST_F(QueueApiFailures, QueuePrefetch) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -inline pi_result redefinedUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, - size_t length, - pi_mem_advice advice, - pi_event *event) { - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; +inline ur_result_t redefinedUSMEnqueueMemAdvise(void *) { + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } TEST_F(QueueApiFailures, QueueMemAdvise) { - MockPlugin.redefine( - redefinedUSMEnqueueMemAdvise); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urEnqueueUSMAdvise", + &redefinedUSMEnqueueMemAdvise); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q; bool ExceptionCaught = false; unsigned char *HostAlloc = (unsigned char *)sycl::malloc_host(1, Q); @@ -342,10 +312,10 @@ TEST_F(QueueApiFailures, QueueMemAdvise) { } TEST_F(QueueApiFailures, QueueParallelFor) { - MockPlugin.redefine( - redefinedEnqueueKernelLaunch); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefinedEnqueueKernelLaunch); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q; bool ExceptionCaught = false; const int globalWIs{512}; @@ -365,9 +335,8 @@ TEST_F(QueueApiFailures, QueueParallelFor) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -inline pi_result redefinedEventsWait(pi_uint32 num_events, - const pi_event *event_list) { - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; +inline ur_result_t redefinedEventWait(void *) { + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } inline void silentAsyncHandler(exception_list Exceptions) { @@ -375,9 +344,9 @@ inline void silentAsyncHandler(exception_list Exceptions) { } TEST_F(QueueApiFailures, QueueHostTaskWaitFail) { - MockPlugin.redefine(redefinedEventsWait); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urEventWait", &redefinedEventWait); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q(default_selector(), silentAsyncHandler); bool ExceptionCaught = false; event EventToDepend; @@ -414,8 +383,8 @@ TEST_F(QueueApiFailures, QueueHostTaskWaitFail) { } TEST_F(QueueApiFailures, QueueHostTaskFail) { - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); enum ExceptionType { STD_EXCEPTION = 0, SYCL_EXCEPTION }; auto Test = [&](ExceptionType ExType) { sycl::queue Q(default_selector(), silentAsyncHandler); @@ -466,24 +435,20 @@ std::mutex m; std::condition_variable cv; bool EnqueueKernelLaunchCalled = false; -pi_result redefinedEnqueueKernelLaunchWithStatus( - pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { +ur_result_t redefinedEnqueueKernelLaunchWithStatus(void *) { { std::lock_guard lk(m); EnqueueKernelLaunchCalled = true; } cv.notify_one(); - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } TEST_F(QueueApiFailures, QueueKernelAsync) { - MockPlugin.redefine( - redefinedEnqueueKernelLaunchWithStatus); - MockPlugin.redefine( - redefinedPluginGetLastError); + mock::getCallbacks().set_replace_callback( + "urEnqueueKernelLaunch", &redefinedEnqueueKernelLaunchWithStatus); + mock::getCallbacks().set_replace_callback("urAdapterGetLastError", + &redefinedAdapterGetLastError); sycl::queue Q(default_selector(), silentAsyncHandler); bool ExceptionCaught = false; diff --git a/sycl/unittests/xpti_trace/QueueIDCheck.cpp b/sycl/unittests/xpti_trace/QueueIDCheck.cpp index 1baf72b87a59a..3824d433d6057 100644 --- a/sycl/unittests/xpti_trace/QueueIDCheck.cpp +++ b/sycl/unittests/xpti_trace/QueueIDCheck.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include #include #include +#include #include #include @@ -46,7 +46,7 @@ class QueueID : public ::testing::Test { "libxptifw.so", [] {}}; unittest::ScopedEnvVar XPTISubscriber{"XPTI_SUBSCRIBERS", "libxptitest_subscriber.so", [] {}}; - sycl::unittest::PiMock MockPlugin; + sycl::unittest::UrMock<> MockPlugin; static constexpr size_t KernelSize = 1; @@ -65,19 +65,19 @@ class QueueID : public ::testing::Test { } }; -pi_queue QueueHandle = nullptr; -inline pi_result redefinedQueueCreate(pi_context, pi_device, - pi_queue_properties *, pi_queue *queue) { +ur_queue_handle_t QueueHandle = nullptr; +inline ur_result_t redefinedQueueCreate(void *pParams) { + auto params = *static_cast(pParams); QueueHandle = nullptr; - if (queue) - QueueHandle = *queue; - return PI_SUCCESS; + if (*params.pphQueue) + QueueHandle = **params.pphQueue; + return UR_RESULT_SUCCESS; } TEST_F(QueueID, QueueID_QueueCreationAndDestroy) { - sycl::platform Plt{MockPlugin.getPlatform()}; - MockPlugin.redefineAfter( - redefinedQueueCreate); + sycl::platform Plt{sycl::platform()}; + mock::getCallbacks().set_after_callback("urQueueCreate", + &redefinedQueueCreate); sycl::context Context{Plt}; addAnalyzedTraceType(xpti::trace_queue_create); addAnalyzedTraceType(xpti::trace_queue_destroy); diff --git a/sycl/pi_win_proxy_loader/CMakeLists.txt b/sycl/ur_win_proxy_loader/CMakeLists.txt similarity index 70% rename from sycl/pi_win_proxy_loader/CMakeLists.txt rename to sycl/ur_win_proxy_loader/CMakeLists.txt index 297a9dcd59598..107f01991f9da 100644 --- a/sycl/pi_win_proxy_loader/CMakeLists.txt +++ b/sycl/ur_win_proxy_loader/CMakeLists.txt @@ -1,19 +1,19 @@ -project(pi_win_proxy_loader) +project(ur_win_proxy_loader) set_property(SOURCE ${CMAKE_CURRENT_BINARY_DIR}/versioninfo.rc PROPERTY COMPILE_DEFINITIONS - "RC_VERSION_FIELD_1=${SYCL_MAJOR_VERSION}" + "RC_VERSION_FIELD_1=${SYCL_MAJOR_VERSION}" "RC_VERSION_FIELD_2=${SYCL_MAJOR_VERSION}" "RC_VERSION_FIELD_3=${SYCL_PATCH_VERSION}" "RC_VERSION_FIELD_4=${SYCL_DEV_ABI_VERSION}" "RC_FILE_VERSION=\"${SYCL_VERSION_STRING}\"" - "RC_INTERNAL_NAME=\"pi_win_proxy_loader\"" - "RC_PRODUCT_NAME=\"pi_win_proxy_loader\"" + "RC_INTERNAL_NAME=\"ur_win_proxy_loader\"" + "RC_PRODUCT_NAME=\"ur_win_proxy_loader\"" "RC_PRODUCT_VERSION=\"${SYCL_VERSION_STRING}\"" "RC_COPYRIGHT=\"Copyright (C) 2023 Intel Inc. All Rights Reserved\"") configure_file(../../llvm/resources/windows_version_resource.rc ${CMAKE_CURRENT_BINARY_DIR}/versioninfo.rc @ONLY) -add_library(pi_win_proxy_loader SHARED pi_win_proxy_loader.cpp ${CMAKE_CURRENT_BINARY_DIR}/versioninfo.rc) -install(TARGETS pi_win_proxy_loader - RUNTIME DESTINATION "bin" COMPONENT pi_win_proxy_loader +add_library(ur_win_proxy_loader SHARED ur_win_proxy_loader.cpp ${CMAKE_CURRENT_BINARY_DIR}/versioninfo.rc) +install(TARGETS ur_win_proxy_loader + RUNTIME DESTINATION "bin" COMPONENT ur_win_proxy_loader ) if (MSVC) @@ -50,20 +50,18 @@ if (MSVC) set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "") set(CMAKE_CXX_FLAGS_DEBUG "") -# Handle the debug version for the Microsoft compiler as a special case by -# creating a debug version of the static library that uses the flags used by -# the SYCL runtime - add_library(pi_win_proxy_loaderd SHARED pi_win_proxy_loader.cpp ${CMAKE_CURRENT_BINARY_DIR}/versioninfo.rc) - target_compile_options(pi_win_proxy_loaderd PRIVATE ${WINUNLOAD_CXX_FLAGS_DEBUG}) - target_compile_options(pi_win_proxy_loader PRIVATE ${WINUNLOAD_CXX_FLAGS_RELEASE}) - target_link_libraries(pi_win_proxy_loaderd PRIVATE shlwapi) - target_link_libraries(pi_win_proxy_loader PRIVATE shlwapi) + # Handle the debug version for the Microsoft compiler as a special case by + # creating a debug version of the static library that uses the flags used by + # the SYCL runtime + add_library(ur_win_proxy_loaderd SHARED ur_win_proxy_loader.cpp ${CMAKE_CURRENT_BINARY_DIR}/versioninfo.rc) + target_compile_options(ur_win_proxy_loaderd PRIVATE ${WINUNLOAD_CXX_FLAGS_DEBUG}) + target_compile_options(ur_win_proxy_loader PRIVATE ${WINUNLOAD_CXX_FLAGS_RELEASE}) + target_link_libraries(ur_win_proxy_loaderd PRIVATE shlwapi) + target_link_libraries(ur_win_proxy_loader PRIVATE shlwapi) # 0x2000: LOAD_LIBRARY_SAFE_CURRENT_DIRS flag. Using this flag means that loading dependency DLLs # from the current directory is only allowed if it is under a directory in the Safe load list. - target_link_options(pi_win_proxy_loaderd PRIVATE /DEPENDENTLOADFLAG:0x2000) - target_link_options(pi_win_proxy_loader PRIVATE /DEPENDENTLOADFLAG:0x2000) - install(TARGETS pi_win_proxy_loaderd - RUNTIME DESTINATION "bin" COMPONENT pi_win_proxy_loader) + target_link_options(ur_win_proxy_loaderd PRIVATE /DEPENDENTLOADFLAG:0x2000) + target_link_options(ur_win_proxy_loader PRIVATE /DEPENDENTLOADFLAG:0x2000) + install(TARGETS ur_win_proxy_loaderd + RUNTIME DESTINATION "bin" COMPONENT ur_win_proxy_loader) endif() - - diff --git a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp b/sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp similarity index 83% rename from sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp rename to sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp index 2376ee638c4d2..7538475eb9616 100644 --- a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp +++ b/sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp @@ -1,4 +1,4 @@ -//==------------ pi_win_proxy_loader.cpp - SYCL standard source file ------==// +//==------------ ur_win_proxy_loader.cpp - SYCL standard source file ------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,10 +14,10 @@ // (static var destruction or DllMain() can both occur after) // The workaround is this proxy_loader. It is statically linked by the SYCL // library and thus is a real dependency and is not unloaded from memory until -// after SYCL itself is unloaded. It calls LoadLibrary on all the PI Plugins +// after SYCL itself is unloaded. It calls LoadLibrary on all the UR adapters // that SYCL will use during its initialization, which ensures that those plugin // DLLs are not unloaded until after. -// Note that this property is not transitive. If any of the PI DLLs in turn +// Note that this property is not transitive. If any of the UR DLLs in turn // dynamically load some other DLL during their lifecycle there is no guarantee // that the "grandchild" won't be unloaded early. They would need to employ a // similar approach. @@ -39,7 +39,7 @@ #include #include -#include "pi_win_proxy_loader.hpp" +#include "ur_win_proxy_loader.hpp" #ifdef _WIN32 @@ -83,22 +83,22 @@ std::wstring getCurrentDSODir() { return Path; } -// these are cribbed from include/sycl/detail/pi.hpp +// these are cribbed from include/sycl/detail/ur.hpp // a new plugin must be added to both places. #ifdef _MSC_VER -#define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll" -#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" -#define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" -#define __SYCL_HIP_PLUGIN_NAME "pi_hip.dll" -#define __SYCL_UNIFIED_RUNTIME_PLUGIN_NAME "pi_unified_runtime.dll" -#define __SYCL_NATIVE_CPU_PLUGIN_NAME "pi_native_cpu.dll" +#define __SYCL_UNIFIED_RUNTIME_LOADER_NAME "ur_loader.dll" +#define __SYCL_OPENCL_ADAPTER_NAME "ur_adapter_opencl.dll" +#define __SYCL_LEVEL_ZERO_ADAPTER_NAME "ur_adapter_level_zero.dll" +#define __SYCL_CUDA_ADAPTER_NAME "ur_adapter_cuda.dll" +#define __SYCL_HIP_ADAPTER_NAME "ur_adapter_hip.dll" +#define __SYCL_NATIVE_CPU_ADAPTER_NAME "ur_adapter_native_cpu.dll" #else // llvm-mingw -#define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dll" -#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dll" -#define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dll" -#define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dll" -#define __SYCL_UNIFIED_RUNTIME_PLUGIN_NAME "libpi_unified_runtime.dll" -#define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.dll" +#define __SYCL_UNIFIED_RUNTIME_LOADER_NAME "libur_loader.dll" +#define __SYCL_OPENCL_ADAPTER_NAME "libur_adapter_opencl.dll" +#define __SYCL_LEVEL_ZERO_ADAPTER_NAME "libur_adapter_level_zero.dll" +#define __SYCL_CUDA_ADAPTER_NAME "libur_adapter_cuda.dll" +#define __SYCL_HIP_ADAPTER_NAME "libur_adapter_hip.dll" +#define __SYCL_NATIVE_CPU_ADAPTER_NAME "libur_adapter_native_cpu.dll" #endif // ------------------------------------ @@ -127,7 +127,7 @@ void preloadLibraries() { assert(false && "Failed to update DLL search path"); } - // this path duplicates sycl/detail/pi.cpp:initializePlugins + // this path duplicates sycl/detail/ur.cpp:initializePlugins std::filesystem::path LibSYCLDir(getCurrentDSODir()); MapT &dllMap = getDllMap(); @@ -142,12 +142,12 @@ void preloadLibraries() { auto path = LibSYCLDir / pluginName; dllMap.emplace(path, LoadLibraryEx(path.wstring().c_str(), NULL, flags)); }; - loadPlugin(__SYCL_OPENCL_PLUGIN_NAME); - loadPlugin(__SYCL_LEVEL_ZERO_PLUGIN_NAME); - loadPlugin(__SYCL_CUDA_PLUGIN_NAME); - loadPlugin(__SYCL_HIP_PLUGIN_NAME); - loadPlugin(__SYCL_UNIFIED_RUNTIME_PLUGIN_NAME); - loadPlugin(__SYCL_NATIVE_CPU_PLUGIN_NAME); + loadPlugin(__SYCL_UNIFIED_RUNTIME_LOADER_NAME); + loadPlugin(__SYCL_OPENCL_ADAPTER_NAME); + loadPlugin(__SYCL_LEVEL_ZERO_ADAPTER_NAME); + loadPlugin(__SYCL_CUDA_ADAPTER_NAME); + loadPlugin(__SYCL_HIP_ADAPTER_NAME); + loadPlugin(__SYCL_NATIVE_CPU_ADAPTER_NAME); // Restore system error handling. (void)SetErrorMode(SavedMode); @@ -208,14 +208,14 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, // handle to DLL module switch (fdwReason) { case DLL_PROCESS_ATTACH: if (PrintPiTrace) - std::cout << "---> DLL_PROCESS_ATTACH pi_win_proxy_loader.dll\n" + std::cout << "---> DLL_PROCESS_ATTACH ur_win_proxy_loader.dll\n" << std::endl; preloadLibraries(); break; case DLL_PROCESS_DETACH: if (PrintPiTrace) - std::cout << "---> DLL_PROCESS_DETACH pi_win_proxy_loader.dll\n" + std::cout << "---> DLL_PROCESS_DETACH ur_win_proxy_loader.dll\n" << std::endl; break; case DLL_THREAD_ATTACH: diff --git a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.hpp b/sycl/ur_win_proxy_loader/ur_win_proxy_loader.hpp similarity index 88% rename from sycl/pi_win_proxy_loader/pi_win_proxy_loader.hpp rename to sycl/ur_win_proxy_loader/ur_win_proxy_loader.hpp index e1ab2b69611c7..5c1b887fe11f4 100644 --- a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.hpp +++ b/sycl/ur_win_proxy_loader/ur_win_proxy_loader.hpp @@ -1,4 +1,4 @@ -//==------------ pi_win_proxy_loader.hpp - SYCL standard header file ------==// +//==------------ ur_win_proxy_loader.hpp - SYCL standard header file ------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information.