From 2276f329723ef946e73efa2c9ab03062e9642e92 Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Wed, 25 Oct 2023 17:16:40 -0600 Subject: [PATCH] Mergeback 6.0 fixes into develop (#306) * Separate gfx942 specific code (#289) Co-authored-by: Stanley Tsang * Split rocm-cmake dependency out before hip include (#293) * Split rocm-cmake dependency out before hip include * Update comments * Fix cpp-check reported issues Fixed a number of issues that static analysis picked up: - Made some functions const since they don't modify member state - Made some parameters const, since they're never modified - Fixes for several benchmark/test functions - Removed unused variable declarations - Added missing input data transfer from host to device - Added some member variables to constructor initializer list - Added override keyword in several places - Fixed up item placeholders in some printf statements * Fix cpp-check reported issues * Removed host to data transfer from memcpy benchmark. Since this benchmark only tests memcpy performance between device buffers, we don't really need to copy data into these from the host. * update googlebenchmark version (#302) * Avoid a segmentation fault when clearing cached blocks (#297) (#305) Co-authored-by: Tom Benson --------- Co-authored-by: Eiden Yoshida <47196116+eidenyoshida@users.noreply.github.com> Co-authored-by: Lauren Wrubleski Co-authored-by: Wayne Franz Co-authored-by: Tom Benson --- CMakeLists.txt | 14 +++-- cmake/Dependencies.cmake | 34 +----------- cmake/ROCmCMakeBuildToolsDependency.cmake | 53 +++++++++++++++++++ .../backend/rocprim/thread/thread_load.hpp | 7 ++- .../backend/rocprim/thread/thread_store.hpp | 9 +++- 5 files changed, 76 insertions(+), 41 deletions(-) create mode 100644 cmake/ROCmCMakeBuildToolsDependency.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 88557bb9..930a4790 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,11 +64,9 @@ set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -# Find and verify HIP. -include(VerifyCompiler) - -# Get dependencies (required here to get rocm-cmake) -include(Dependencies) +# rocm-cmake has to be included early so that it's available to set GPU_TARGETS +# If hip is included prior to setting that then it defaults to building only for the current architecture +include(ROCmCMakeBuildToolsDependency) # Setup GPU targets for rocm platform if(NOT (CMAKE_CXX_COMPILER MATCHES ".*nvcc$" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) @@ -87,6 +85,12 @@ if(NOT (CMAKE_CXX_COMPILER MATCHES ".*nvcc$" OR "${CMAKE_CXX_COMPILER_ID}" STREQ endif() endif() +# Find and verify HIP. +include(VerifyCompiler) + +# Get dependencies (except rocm-cmake, included earlier) +include(Dependencies) + if(BUILD_ADDRESS_SANITIZER) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index df9ccb67..3653f492 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -126,7 +126,7 @@ if(USER_BUILD_BENCHMARK) FetchContent_Declare( googlebench GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG v1.6.1 + GIT_TAG v1.8.0 ) FetchContent_MakeAvailable(googlebench) if(NOT TARGET benchmark::benchmark) @@ -137,29 +137,6 @@ if(USER_BUILD_BENCHMARK) endif() endif(USER_BUILD_BENCHMARK) -if(NOT DEPENDENCIES_FORCE_DOWNLOAD) - find_package(ROCM 0.7.3 CONFIG QUIET PATHS "${ROCM_ROOT}") -endif() -if(NOT ROCM_FOUND) - message(STATUS "ROCm CMake not found. Fetching...") - # We don't really want to consume the build and test targets of ROCm CMake. - # CMake 3.18 allows omitting them, even though there's a CMakeLists.txt in source root. - if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) - set(SOURCE_SUBDIR_ARG SOURCE_SUBDIR "DISABLE ADDING TO BUILD") - else() - set(SOURCE_SUBDIR_ARG) - endif() - FetchContent_Declare( - rocm-cmake - URL https://github.com/RadeonOpenCompute/rocm-cmake/archive/refs/tags/rocm-5.2.0.tar.gz - ${SOURCE_SUBDIR_ARG} - ) - FetchContent_MakeAvailable(rocm-cmake) - find_package(ROCM CONFIG REQUIRED NO_DEFAULT_PATH PATHS "${rocm-cmake_SOURCE_DIR}") -else() - find_package(ROCM 0.7.3 CONFIG REQUIRED PATHS "${ROCM_ROOT}") -endif() - # CUB (only for CUDA platform) if(HIP_COMPILER STREQUAL "nvcc") @@ -256,12 +233,3 @@ else() unset(BUILD_SHARED_LIBS CACHE ) endif() set(ROCM_WARN_TOOLCHAIN_VAR ${USER_ROCM_WARN_TOOLCHAIN_VAR} CACHE BOOL "") - -include(ROCMSetupVersion) -include(ROCMCreatePackage) -include(ROCMInstallTargets) -include(ROCMPackageConfigHelpers) -include(ROCMInstallSymlinks) -include(ROCMHeaderWrapper) -include(ROCMCheckTargetIds) -include(ROCMClients) diff --git a/cmake/ROCmCMakeBuildToolsDependency.cmake b/cmake/ROCmCMakeBuildToolsDependency.cmake new file mode 100644 index 00000000..bdb0d468 --- /dev/null +++ b/cmake/ROCmCMakeBuildToolsDependency.cmake @@ -0,0 +1,53 @@ +# MIT License +# +# Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +if(NOT DEPENDENCIES_FORCE_DOWNLOAD) + find_package(ROCM 0.7.3 CONFIG QUIET PATHS "${ROCM_ROOT}") +endif() +if(NOT ROCM_FOUND) + message(STATUS "ROCm CMake not found. Fetching...") + # We don't really want to consume the build and test targets of ROCm CMake. + # CMake 3.18 allows omitting them, even though there's a CMakeLists.txt in source root. + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) + set(SOURCE_SUBDIR_ARG SOURCE_SUBDIR "DISABLE ADDING TO BUILD") + else() + set(SOURCE_SUBDIR_ARG) + endif() + FetchContent_Declare( + rocm-cmake + URL https://github.com/RadeonOpenCompute/rocm-cmake/archive/refs/tags/rocm-5.2.0.tar.gz + ${SOURCE_SUBDIR_ARG} + ) + FetchContent_MakeAvailable(rocm-cmake) + find_package(ROCM CONFIG REQUIRED NO_DEFAULT_PATH PATHS "${rocm-cmake_SOURCE_DIR}") +else() + find_package(ROCM 0.7.3 CONFIG REQUIRED PATHS "${ROCM_ROOT}") +endif() + +include(ROCMSetupVersion) +include(ROCMCreatePackage) +include(ROCMInstallTargets) +include(ROCMPackageConfigHelpers) +include(ROCMInstallSymlinks) +include(ROCMHeaderWrapper) +include(ROCMCheckTargetIds) +include(ROCMClients) diff --git a/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp b/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp index 76579e70..d3fc38b4 100644 --- a/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp +++ b/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp @@ -83,11 +83,16 @@ HIPCUB_DEVICE __forceinline__ T AsmThreadLoad(void * ptr) HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, uint64_t, uint64_t, flat_load_dwordx2, v, wait_cmd); \ HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, double, uint64_t, flat_load_dwordx2, v, wait_cmd); -#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) +#if defined(__gfx940__) || defined(__gfx941__) HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CA, "sc0", ""); HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CG, "sc1", ""); HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CV, "sc0 sc1", "vmcnt"); HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_VOLATILE, "sc0 sc1", "vmcnt"); +#elif defined(__gfx942__) +HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CA, "sc0", ""); +HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CG, "sc0 nt", ""); +HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CV, "sc0", "vmcnt"); +HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_VOLATILE, "sc0", "vmcnt"); #else HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CA, "glc", ""); HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CG, "glc slc", ""); diff --git a/hipcub/include/hipcub/backend/rocprim/thread/thread_store.hpp b/hipcub/include/hipcub/backend/rocprim/thread/thread_store.hpp index 7ac86798..7e7e00ef 100644 --- a/hipcub/include/hipcub/backend/rocprim/thread/thread_store.hpp +++ b/hipcub/include/hipcub/backend/rocprim/thread/thread_store.hpp @@ -83,11 +83,16 @@ HIPCUB_DEVICE __forceinline__ void AsmThreadStore(void * ptr, T val) HIPCUB_ASM_THREAD_STORE(cache_modifier, llvm_cache_modifier, uint64_t, uint64_t, flat_store_dwordx2, v, wait_cmd); \ HIPCUB_ASM_THREAD_STORE(cache_modifier, llvm_cache_modifier, double, uint64_t, flat_store_dwordx2, v, wait_cmd); -#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) -HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WB, "sc0 sc1", ""); // TODO: gfx942 validation +#if defined(__gfx940__) || defined(__gfx941__) +HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WB, "sc0 sc1", ""); HIPCUB_ASM_THREAD_STORE_GROUP(STORE_CG, "sc0 sc1", ""); HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WT, "sc0 sc1", "vmcnt"); HIPCUB_ASM_THREAD_STORE_GROUP(STORE_VOLATILE, "sc0 sc1", "vmcnt"); +#elif defined(__gfx942__) +HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WB, "sc0", ""); +HIPCUB_ASM_THREAD_STORE_GROUP(STORE_CG, "sc0 nt", ""); +HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WT, "sc0", "vmcnt"); +HIPCUB_ASM_THREAD_STORE_GROUP(STORE_VOLATILE, "sc0", "vmcnt"); #else HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WB, "glc", ""); HIPCUB_ASM_THREAD_STORE_GROUP(STORE_CG, "glc slc", "");