Skip to content

Commit

Permalink
Mergeback 6.0 fixes into develop (#306)
Browse files Browse the repository at this point in the history
* Separate gfx942 specific code (#289)

Co-authored-by: Stanley Tsang <stanley.tsang@amd.com>

* Split rocm-cmake dependency out before hip include (#293)

* Split rocm-cmake dependency out before hip include

* Update comments

* Fix cpp-check reported issues

Fixed a number of issues that static analysis picked up:
  - Made some functions const since they don't modify member state
  - Made some parameters const, since they're never modified
  - Fixes for several benchmark/test functions
    - Removed unused variable declarations
    - Added missing input data transfer from host to device
    - Added some member variables to constructor initializer list
    - Added override keyword in several places
    - Fixed up item placeholders in some printf statements

* Fix cpp-check reported issues

* Removed host to data transfer from memcpy benchmark.
Since this benchmark only tests memcpy performance between device buffers,
we don't really need to copy data into these from the host.

* update googlebenchmark version (#302)

* Avoid a segmentation fault when clearing cached blocks (#297) (#305)

Co-authored-by: Tom Benson <benson31@llnl.gov>

---------

Co-authored-by: Eiden Yoshida <47196116+eidenyoshida@users.noreply.github.com>
Co-authored-by: Lauren Wrubleski <Lauren.Wrubleski@amd.com>
Co-authored-by: Wayne Franz <wayfranz@amd.com>
Co-authored-by: Tom Benson <benson31@llnl.gov>
  • Loading branch information
5 people authored Oct 25, 2023
1 parent 3952cf2 commit 2276f32
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 41 deletions.
14 changes: 9 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,9 @@ set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# Find and verify HIP.
include(VerifyCompiler)

# Get dependencies (required here to get rocm-cmake)
include(Dependencies)
# rocm-cmake has to be included early so that it's available to set GPU_TARGETS
# If hip is included prior to setting that then it defaults to building only for the current architecture
include(ROCmCMakeBuildToolsDependency)

# Setup GPU targets for rocm platform
if(NOT (CMAKE_CXX_COMPILER MATCHES ".*nvcc$" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU"))
Expand All @@ -87,6 +85,12 @@ if(NOT (CMAKE_CXX_COMPILER MATCHES ".*nvcc$" OR "${CMAKE_CXX_COMPILER_ID}" STREQ
endif()
endif()

# Find and verify HIP.
include(VerifyCompiler)

# Get dependencies (except rocm-cmake, included earlier)
include(Dependencies)

if(BUILD_ADDRESS_SANITIZER)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan")
Expand Down
34 changes: 1 addition & 33 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ if(USER_BUILD_BENCHMARK)
FetchContent_Declare(
googlebench
GIT_REPOSITORY https://github.com/google/benchmark.git
GIT_TAG v1.6.1
GIT_TAG v1.8.0
)
FetchContent_MakeAvailable(googlebench)
if(NOT TARGET benchmark::benchmark)
Expand All @@ -137,29 +137,6 @@ if(USER_BUILD_BENCHMARK)
endif()
endif(USER_BUILD_BENCHMARK)

if(NOT DEPENDENCIES_FORCE_DOWNLOAD)
find_package(ROCM 0.7.3 CONFIG QUIET PATHS "${ROCM_ROOT}")
endif()
if(NOT ROCM_FOUND)
message(STATUS "ROCm CMake not found. Fetching...")
# We don't really want to consume the build and test targets of ROCm CMake.
# CMake 3.18 allows omitting them, even though there's a CMakeLists.txt in source root.
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
set(SOURCE_SUBDIR_ARG SOURCE_SUBDIR "DISABLE ADDING TO BUILD")
else()
set(SOURCE_SUBDIR_ARG)
endif()
FetchContent_Declare(
rocm-cmake
URL https://github.com/RadeonOpenCompute/rocm-cmake/archive/refs/tags/rocm-5.2.0.tar.gz
${SOURCE_SUBDIR_ARG}
)
FetchContent_MakeAvailable(rocm-cmake)
find_package(ROCM CONFIG REQUIRED NO_DEFAULT_PATH PATHS "${rocm-cmake_SOURCE_DIR}")
else()
find_package(ROCM 0.7.3 CONFIG REQUIRED PATHS "${ROCM_ROOT}")
endif()

# CUB (only for CUDA platform)
if(HIP_COMPILER STREQUAL "nvcc")

Expand Down Expand Up @@ -256,12 +233,3 @@ else()
unset(BUILD_SHARED_LIBS CACHE )
endif()
set(ROCM_WARN_TOOLCHAIN_VAR ${USER_ROCM_WARN_TOOLCHAIN_VAR} CACHE BOOL "")

include(ROCMSetupVersion)
include(ROCMCreatePackage)
include(ROCMInstallTargets)
include(ROCMPackageConfigHelpers)
include(ROCMInstallSymlinks)
include(ROCMHeaderWrapper)
include(ROCMCheckTargetIds)
include(ROCMClients)
53 changes: 53 additions & 0 deletions cmake/ROCmCMakeBuildToolsDependency.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# MIT License
#
# Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

if(NOT DEPENDENCIES_FORCE_DOWNLOAD)
find_package(ROCM 0.7.3 CONFIG QUIET PATHS "${ROCM_ROOT}")
endif()
if(NOT ROCM_FOUND)
message(STATUS "ROCm CMake not found. Fetching...")
# We don't really want to consume the build and test targets of ROCm CMake.
# CMake 3.18 allows omitting them, even though there's a CMakeLists.txt in source root.
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
set(SOURCE_SUBDIR_ARG SOURCE_SUBDIR "DISABLE ADDING TO BUILD")
else()
set(SOURCE_SUBDIR_ARG)
endif()
FetchContent_Declare(
rocm-cmake
URL https://github.com/RadeonOpenCompute/rocm-cmake/archive/refs/tags/rocm-5.2.0.tar.gz
${SOURCE_SUBDIR_ARG}
)
FetchContent_MakeAvailable(rocm-cmake)
find_package(ROCM CONFIG REQUIRED NO_DEFAULT_PATH PATHS "${rocm-cmake_SOURCE_DIR}")
else()
find_package(ROCM 0.7.3 CONFIG REQUIRED PATHS "${ROCM_ROOT}")
endif()

include(ROCMSetupVersion)
include(ROCMCreatePackage)
include(ROCMInstallTargets)
include(ROCMPackageConfigHelpers)
include(ROCMInstallSymlinks)
include(ROCMHeaderWrapper)
include(ROCMCheckTargetIds)
include(ROCMClients)
7 changes: 6 additions & 1 deletion hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,16 @@ HIPCUB_DEVICE __forceinline__ T AsmThreadLoad(void * ptr)
HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, uint64_t, uint64_t, flat_load_dwordx2, v, wait_cmd); \
HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, double, uint64_t, flat_load_dwordx2, v, wait_cmd);

#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
#if defined(__gfx940__) || defined(__gfx941__)
HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CA, "sc0", "");
HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CG, "sc1", "");
HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CV, "sc0 sc1", "vmcnt");
HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_VOLATILE, "sc0 sc1", "vmcnt");
#elif defined(__gfx942__)
HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CA, "sc0", "");
HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CG, "sc0 nt", "");
HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CV, "sc0", "vmcnt");
HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_VOLATILE, "sc0", "vmcnt");
#else
HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CA, "glc", "");
HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CG, "glc slc", "");
Expand Down
9 changes: 7 additions & 2 deletions hipcub/include/hipcub/backend/rocprim/thread/thread_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,16 @@ HIPCUB_DEVICE __forceinline__ void AsmThreadStore(void * ptr, T val)
HIPCUB_ASM_THREAD_STORE(cache_modifier, llvm_cache_modifier, uint64_t, uint64_t, flat_store_dwordx2, v, wait_cmd); \
HIPCUB_ASM_THREAD_STORE(cache_modifier, llvm_cache_modifier, double, uint64_t, flat_store_dwordx2, v, wait_cmd);

#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WB, "sc0 sc1", ""); // TODO: gfx942 validation
#if defined(__gfx940__) || defined(__gfx941__)
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WB, "sc0 sc1", "");
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_CG, "sc0 sc1", "");
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WT, "sc0 sc1", "vmcnt");
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_VOLATILE, "sc0 sc1", "vmcnt");
#elif defined(__gfx942__)
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WB, "sc0", "");
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_CG, "sc0 nt", "");
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WT, "sc0", "vmcnt");
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_VOLATILE, "sc0", "vmcnt");
#else
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_WB, "glc", "");
HIPCUB_ASM_THREAD_STORE_GROUP(STORE_CG, "glc slc", "");
Expand Down

0 comments on commit 2276f32

Please sign in to comment.