Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rebase HIP 6.x + Update hip-tests #796

Merged
merged 33 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
84cd3d4
CHIP_USE_EXTERNAL_HIP_TESTS ON
pvelesko Mar 9, 2024
a297110
create hipconfig symlink
pvelesko Mar 9, 2024
c51a120
update HIPCC - passthrough --genco
pvelesko Mar 9, 2024
35987cd
Rebase HIP on develop 6.x
pvelesko Mar 10, 2024
d56e619
remove simple_kernel sample
pvelesko Mar 10, 2024
403284f
update HIPCC submodule
pvelesko Mar 10, 2024
4c8f00a
add hipStreamAttachMemAsync to CHIPBindings
pvelesko Mar 11, 2024
53b9892
placeholders for cooperative groups
pvelesko Mar 11, 2024
6e85c23
CHIPBindings
pvelesko Mar 11, 2024
1a8aa46
add -gdwarf-4
pvelesko Mar 11, 2024
f6cb210
update CHIPBindings
pvelesko Mar 11, 2024
e39a6b7
__managed__ keyword
pvelesko Mar 11, 2024
dcb79f4
update hip-tests
pvelesko Mar 11, 2024
d873b66
expectedArgs.cpp test file
pvelesko Mar 11, 2024
a6c2bd5
devicelib __chip__fns32
pvelesko Mar 13, 2024
2db8685
update hip-tests
pvelesko Mar 13, 2024
b265f08
Fix cmake HIP_VERSION
pvelesko Mar 13, 2024
97c4ff8
Cmake - build tests
pvelesko Mar 13, 2024
154b4c0
update hip-tests
pvelesko Mar 14, 2024
cf2e07c
tests cmake cleanup
pvelesko Mar 15, 2024
c5e4808
HipLowerZeroLegthArrays: process PtrToint constexprs
linehill Mar 21, 2024
4f8dcc6
copy in old tests
pvelesko Apr 1, 2024
5b00b1b
CHIP_USE_EXTERNAL_HIP_TESTS OFF
pvelesko Apr 1, 2024
99d54d6
disable tests which are now incompatible with HIP 6.x
pvelesko Apr 1, 2024
05da6d9
remove inline keyword to suppress warnings
pvelesko Apr 1, 2024
d1d3166
linter adjust
pvelesko Apr 2, 2024
3977e97
PR comments
pvelesko Apr 2, 2024
366d99e
document that coop groups are unimplemented
pvelesko Apr 2, 2024
79180bc
clang-format main
pvelesko Apr 2, 2024
31daec5
device-side placeholders for malloc, free, wall_clock
pvelesko Apr 2, 2024
6d9d9da
update Features.md
pvelesko Apr 2, 2024
e8d3ec9
fmt
pvelesko Apr 2, 2024
13d7590
adjust clang-tidy
pvelesko Apr 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
Checks: 'readability-identifier-naming'
Checks: '-*,readability-*,modernize-*,clang-analyzer-*'
WarningsAsErrors: ''
HeaderFilterRegex: './src/.*'
AnalyzeTemporaryDtors: false
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/clang-tidy-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
files-changed-only: true
lines-changed-only: true
extensions: 'cc,hh'
tidy-checks: 'readability-identifier-naming'
tidy-checks: '-*,readability-*,modernize-*,clang-analyzer-*'
- name: Fail fast?!
if: steps.linter.outputs.checks-failed > 0
run: exit 1
Expand Down
29 changes: 13 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ add_compile_options(-Wno-format-extra-args -mf16c)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-duplicate-decl-specifier \
-Wno-tautological-constant-compare -Wno-c++20-extensions -Wno-unused-result \
-Wno-delete-abstract-non-virtual-dtor -Wno-deprecated-declarations -Wunused-command-line-argument")
-Wno-delete-abstract-non-virtual-dtor -Wno-deprecated-declarations -Wunused-command-line-argument -gdwarf-4")

# end temporary
cmake_minimum_required(VERSION 3.20 FATAL_ERROR)
Expand Down Expand Up @@ -217,6 +217,8 @@ option(CHIP_USE_EXTERNAL_HIP_TESTS "Use Catch2 tests from the hip-tests submodul
option(CHIP_ENABLE_NON_COMPLIANT_DEVICELIB_CODE "Enable non-compliant devicelib code such as calling LLVM builtins from inside kernel code. Enables certain unsigned long devicelib func variants" OFF)
option(CHIP_FAST_MATH "Use native_ OpenCL functions which are fast but their precision is implementation defined" OFF)
option(CHIP_USE_INTEL_USM "When enabled, cl_intel_unified_shared_memory extension, when available, will be used for HIP allocations in the OpenCL backend" ON)
option(CATCH2_DISCOVER_TESTS_COMPILE_TIME "Discover the tests at compile time" ON)

# This mitigation might be necessary on some systems with an older runtime.
# This mitigation makes memory resident (disable swapping) on the GPU
# This has a significant impact on the cost of a GPU malloc
Expand Down Expand Up @@ -355,6 +357,7 @@ add_dependencies(CHIP hipcc.bin hipconfig.bin)

file(MAKE_DIRECTORY ${HIPCC_BUILD_PATH})
file(CREATE_LINK hipcc.bin ${HIPCC_BUILD_PATH}/hipcc SYMBOLIC)
file(CREATE_LINK hipconfig.bin ${HIPCC_BUILD_PATH}/hipconfig SYMBOLIC)

add_subdirectory(bin)

Expand Down Expand Up @@ -566,7 +569,9 @@ set(_versionInfo "# Auto-generated by cmake\n")
set(HIP_VERSION_MAJOR 5)
set(HIP_VERSION_MINOR 1)
set(HIP_VERSION_PATCH 0)
set(HIP_VERSION_PATCH_GITHASH 0) # TODO
set(HIP_VERSION_GITHASH 0)
set(HIP_VERSION_PATCH_GITHASH "${HIP_VERSION_PATCH}.${HIP_VERSION_GITHASH}")
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}-${HIP_VERSION_PATCH_GITHASH}")

add_to_config(_versionInfo HIP_VERSION_MAJOR "${HIP_VERSION_MAJOR}")
add_to_config(_versionInfo HIP_VERSION_MINOR "${HIP_VERSION_MINOR}")
Expand Down Expand Up @@ -700,21 +705,13 @@ SET(HIP_PATH ${CMAKE_BINARY_DIR}) # pick up build sources, not install sources

set(SAVED_WARN_DEPRECATED ${CMAKE_WARN_DEPRECATED})
set(CMAKE_WARN_DEPRECATED OFF)
if(CHIP_USE_EXTERNAL_HIP_TESTS)
add_subdirectory(hip-tests/catch catch)
else()
add_subdirectory(HIP/tests/catch catch)
endif()
set(CMAKE_WARN_DEPRECATED ${SAVED_WARN_DEPRECATED})

if(CHIP_BUILD_TESTS)
add_subdirectory(tests/cuda)
add_subdirectory(tests/devicelib)
add_subdirectory(tests/hiprtc)
add_subdirectory(tests/fromLibCeed)
add_subdirectory(tests/compiler)
add_subdirectory(tests/runtime)
add_subdirectory(tests/post-install)
if(CHIP_USE_EXTERNAL_HIP_TESTS)
add_subdirectory(hip-tests/catch catch)
else()
add_subdirectory(HIP/tests/catch catch)
endif()
add_subdirectory(tests)
endif()

if(CHIP_BUILD_SAMPLES)
Expand Down
2 changes: 1 addition & 1 deletion HIP
Submodule HIP updated 288 files
2 changes: 1 addition & 1 deletion HIPCC
Submodule HIPCC updated 1 files
+3 −0 src/hipBin_spirv.h
78 changes: 78 additions & 0 deletions bitcode/devicelib.cl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,84 @@
#error __opencl_c_generic_address_space needed!
#endif

NOOPT void* device_malloc(unsigned int size) {return (void*)0;};
NOOPT void device_free(void* ptr) {};

// Given a 32/64-bit value exec mask and an integer value base (between 0 and WAVEFRONT_SIZE),
// find the n-th (given by offset) set bit in the exec mask from the base bit, and return the bit position.
// If not found, return -1.
// In HIP long long is 64-bit integer. In OpenCL it's 128-bit integer.
EXPORT int __chip__fns64(unsigned long int mask, unsigned int base, int offset) {
unsigned long int temp_mask = mask;
int temp_offset = offset;

if (offset == 0) {
temp_mask &= (1 << base);
temp_offset = 1;
}
else if (offset < 0) {
temp_mask = __builtin_bitreverse64(mask);
base = 63 - base;
temp_offset = -offset;
}

temp_mask = temp_mask & ((~0ULL) << base);
if (__builtin_popcountll(temp_mask) < temp_offset)
return -1;
int total = 0;
for (int i = 0x20; i > 0; i >>= 1) {
unsigned long int temp_mask_lo = temp_mask & ((1ULL << i) - 1);
int pcnt = __builtin_popcountll(temp_mask_lo);
if (pcnt < temp_offset) {
temp_mask = temp_mask >> i;
temp_offset -= pcnt;
total += i;
}
else {
temp_mask = temp_mask_lo;
}
}
if (offset < 0)
return 63 - total;
else
return total;
}

EXPORT int __chip__fns32(unsigned long int mask, unsigned int base, int offset) {
unsigned long int temp_mask = mask;
int temp_offset = offset;
if (offset == 0) {
temp_mask &= (1 << base);
temp_offset = 1;
}
else if (offset < 0) {
temp_mask = __builtin_bitreverse64(mask);
base = 63 - base;
temp_offset = -offset;
}
temp_mask = temp_mask & ((~0ULL) << base);
if (__builtin_popcountll(temp_mask) < temp_offset)
return -1;
int total = 0;
for (int i = 0x20; i > 0; i >>= 1) {
unsigned long int temp_mask_lo = temp_mask & ((1ULL << i) - 1);
int pcnt = __builtin_popcountll(temp_mask_lo);
if (pcnt < temp_offset) {
temp_mask = temp_mask >> i;
temp_offset -= pcnt;
total += i;
}
else {
temp_mask = temp_mask_lo;
}
}
if (offset < 0)
return 63 - total;
else
return total;
}


EXPORT unsigned /* long */ long int
__chip_umul64hi(unsigned /* long */ long int x,
unsigned /* long */ long int y) {
Expand Down
4 changes: 4 additions & 0 deletions docs/Features.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ CUDA features not present in HIP are unsupported unless explicitly stated otherw

* few memory APIs (hipMemPrefetchAsync, hipMemAdvise)

* Cooperative Groups API

* Device-side malloc/free

#### partially supported

* Texture Objects of 1D/2D type are supported; 3D, LOD, Grad,
Expand Down
2 changes: 1 addition & 1 deletion hip-tests
31 changes: 23 additions & 8 deletions include/hip/devicelib/integer/int_intrinsics.hh
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,33 @@ __bitinsert_u32(unsigned int src0, unsigned int src1, unsigned int offset,
return __chip_bitinsert_u32(src0, src1, offset, width);
}

extern "C" __device__ uint64_t __chip_bitinsert_u64(uint64_t src0,
uint64_t src1,
uint64_t offset,
uint64_t width);
extern "C++" inline __device__ uint64_t __bitinsert_u64(uint64_t src0,
uint64_t src1,
uint64_t offset,
uint64_t width) {
extern "C" __device__ unsigned long long int
__chip_bitinsert_u64(unsigned long long int src0, unsigned long long int src1,
unsigned long long int offset,
unsigned long long int width);
extern "C++" inline __device__ unsigned long long int
__bitinsert_u64(unsigned long long int src0, unsigned long long int src1,
unsigned long long int offset, unsigned long long int width) {
return __chip_bitinsert_u64(src0, src1, offset, width);
}
#endif // CHIP_ENABLE_NON_COMPLIANT_DEVICELIB_CODE

// int was replaced with int
// int64_t was replaced with long long int
extern "C" __device__ int __chip__fns32(unsigned long long int mask,
unsigned int base, int offset);
extern "C++" inline __device__ int __fns32(unsigned long long int mask,
unsigned int base, int offset) {
return __chip__fns32(mask, base, offset);
}

extern "C" __device__ int __chip__fns64(unsigned long long int mask,
unsigned int base, int offset);
extern "C++" inline __device__ int __fns64(unsigned long long int mask,
unsigned int base, int offset) {
return __chip__fns64(mask, base, offset);
}

extern "C" __device__ unsigned int __chip_brev(unsigned int x); // Custom
extern "C++" inline __device__ unsigned int __brev(unsigned int x) {
return __chip_brev(x);
Expand Down
130 changes: 65 additions & 65 deletions include/hip/devicelib/type_casting_intrinsics.hh
Original file line number Diff line number Diff line change
Expand Up @@ -25,50 +25,50 @@

#include <hip/devicelib/macros.hh>

extern "C++" inline __device__ float __double2float_rd(double x);
extern "C++" inline __device__ float __double2float_rn(double x);
extern "C++" inline __device__ float __double2float_ru(double x);
extern "C++" inline __device__ float __double2float_rz(double x);
extern "C++" inline __device__ int __double2hiint(double x);
extern "C++" inline __device__ int __double2int_rd(double x);
extern "C++" inline __device__ int __double2int_rn(double x);
extern "C++" inline __device__ int __double2int_ru(double x);
extern "C++" inline __device__ int __double2int_rz(double x);
extern "C++" inline __device__ long long int __double2ll_rd(double x);
extern "C++" inline __device__ long long int __double2ll_rn(double x);
extern "C++" inline __device__ long long int __double2ll_ru(double x);
extern "C++" inline __device__ long long int __double2ll_rz(double x);
extern "C++" inline __device__ int __double2loint(double x);
extern "C++" inline __device__ unsigned int __double2uint_rd(double x);
extern "C++" inline __device__ unsigned int __double2uint_rn(double x);
extern "C++" inline __device__ unsigned int __double2uint_ru(double x);
extern "C++" inline __device__ unsigned int __double2uint_rz(double x);
extern "C++" inline __device__ unsigned long long int __double2ull_rd(double x);
extern "C++" inline __device__ unsigned long long int __double2ull_rn(double x);
extern "C++" inline __device__ unsigned long long int __double2ull_ru(double x);
extern "C++" inline __device__ unsigned long long int __double2ull_rz(double x);
extern "C++" __device__ float __double2float_rd(double x);
extern "C++" __device__ float __double2float_rn(double x);
extern "C++" __device__ float __double2float_ru(double x);
extern "C++" __device__ float __double2float_rz(double x);
extern "C++" __device__ int __double2hiint(double x);
extern "C++" __device__ int __double2int_rd(double x);
extern "C++" __device__ int __double2int_rn(double x);
extern "C++" __device__ int __double2int_ru(double x);
extern "C++" __device__ int __double2int_rz(double x);
extern "C++" __device__ long long int __double2ll_rd(double x);
extern "C++" __device__ long long int __double2ll_rn(double x);
extern "C++" __device__ long long int __double2ll_ru(double x);
extern "C++" __device__ long long int __double2ll_rz(double x);
extern "C++" __device__ int __double2loint(double x);
extern "C++" __device__ unsigned int __double2uint_rd(double x);
extern "C++" __device__ unsigned int __double2uint_rn(double x);
extern "C++" __device__ unsigned int __double2uint_ru(double x);
extern "C++" __device__ unsigned int __double2uint_rz(double x);
extern "C++" __device__ unsigned long long int __double2ull_rd(double x);
extern "C++" __device__ unsigned long long int __double2ull_rn(double x);
extern "C++" __device__ unsigned long long int __double2ull_ru(double x);
extern "C++" __device__ unsigned long long int __double2ull_rz(double x);

extern "C" __device__ long long int __chip_double_as_longlong(double x);
extern "C++" inline __device__ long long int __double_as_longlong(double x) {
return __chip_double_as_longlong(x);
}

extern "C++" inline __device__ int __float2int_rd(float x);
extern "C++" inline __device__ int __float2int_rn(float x);
extern "C++" inline __device__ int __float2int_ru(float);
extern "C++" inline __device__ int __float2int_rz(float x);
extern "C++" inline __device__ long long int __float2ll_rd(float x);
extern "C++" inline __device__ long long int __float2ll_rn(float x);
extern "C++" inline __device__ long long int __float2ll_ru(float x);
extern "C++" inline __device__ long long int __float2ll_rz(float x);
extern "C++" inline __device__ unsigned int __float2uint_rd(float x);
extern "C++" inline __device__ unsigned int __float2uint_rn(float x);
extern "C++" inline __device__ unsigned int __float2uint_ru(float x);
extern "C++" inline __device__ unsigned int __float2uint_rz(float x);
extern "C++" inline __device__ unsigned long long int __float2ull_rd(float x);
extern "C++" inline __device__ unsigned long long int __float2ull_rn(float x);
extern "C++" inline __device__ unsigned long long int __float2ull_ru(float x);
extern "C++" inline __device__ unsigned long long int __float2ull_rz(float x);
extern "C++" __device__ int __float2int_rd(float x);
extern "C++" __device__ int __float2int_rn(float x);
extern "C++" __device__ int __float2int_ru(float);
extern "C++" __device__ int __float2int_rz(float x);
extern "C++" __device__ long long int __float2ll_rd(float x);
extern "C++" __device__ long long int __float2ll_rn(float x);
extern "C++" __device__ long long int __float2ll_ru(float x);
extern "C++" __device__ long long int __float2ll_rz(float x);
extern "C++" __device__ unsigned int __float2uint_rd(float x);
extern "C++" __device__ unsigned int __float2uint_rn(float x);
extern "C++" __device__ unsigned int __float2uint_ru(float x);
extern "C++" __device__ unsigned int __float2uint_rz(float x);
extern "C++" __device__ unsigned long long int __float2ull_rd(float x);
extern "C++" __device__ unsigned long long int __float2ull_rn(float x);
extern "C++" __device__ unsigned long long int __float2ull_ru(float x);
extern "C++" __device__ unsigned long long int __float2ull_rz(float x);

extern "C" __device__ int __chip_float_as_int(float x);
extern "C++" inline __device__ int __float_as_int(float x) {
Expand All @@ -80,50 +80,50 @@ extern "C++" inline __device__ unsigned int __float_as_uint(float x) {
return __chip_float_as_uint(x);
}

extern "C++" inline __device__ double __hiloint2double(int hi, int lo);
extern "C++" inline __device__ double __int2double_rn(int x);
extern "C++" inline __device__ float __int2float_rd(int x);
extern "C++" inline __device__ float __int2float_rn(int x);
extern "C++" inline __device__ float __int2float_ru(int x);
extern "C++" inline __device__ float __int2float_rz(int x);
extern "C++" __device__ double __hiloint2double(int hi, int lo);
extern "C++" __device__ double __int2double_rn(int x);
extern "C++" __device__ float __int2float_rd(int x);
extern "C++" __device__ float __int2float_rn(int x);
extern "C++" __device__ float __int2float_ru(int x);
extern "C++" __device__ float __int2float_rz(int x);

extern "C" __device__ float __chip_int_as_float(int x);
extern "C++" inline __device__ float __int_as_float(int x) {
return __chip_int_as_float(x);
}

extern "C++" inline __device__ double __ll2double_rd(long long int x);
extern "C++" inline __device__ double __ll2double_rn(long long int x);
extern "C++" inline __device__ double __ll2double_ru(long long int x);
extern "C++" inline __device__ double __ll2double_rz(long long int x);
extern "C++" inline __device__ float __ll2float_rd(long long int x);
extern "C++" inline __device__ float __ll2float_rn(long long int x);
extern "C++" inline __device__ float __ll2float_ru(long long int x);
extern "C++" inline __device__ float __ll2float_rz(long long int x);
extern "C++" __device__ double __ll2double_rd(long long int x);
extern "C++" __device__ double __ll2double_rn(long long int x);
extern "C++" __device__ double __ll2double_ru(long long int x);
extern "C++" __device__ double __ll2double_rz(long long int x);
extern "C++" __device__ float __ll2float_rd(long long int x);
extern "C++" __device__ float __ll2float_rn(long long int x);
extern "C++" __device__ float __ll2float_ru(long long int x);
extern "C++" __device__ float __ll2float_rz(long long int x);

extern "C" __device__ double __chip_longlong_as_double(long long int x);
extern "C++" inline __device__ double __longlong_as_double(long long int x) {
return __chip_longlong_as_double(x);
}

extern "C++" inline __device__ double __uint2double_rn(unsigned int x);
extern "C++" inline __device__ float __uint2float_rd(unsigned int x);
extern "C++" inline __device__ float __uint2float_rn(unsigned int x);
extern "C++" inline __device__ float __uint2float_ru(unsigned int x);
extern "C++" inline __device__ float __uint2float_rz(unsigned int x);
extern "C++" __device__ double __uint2double_rn(unsigned int x);
extern "C++" __device__ float __uint2float_rd(unsigned int x);
extern "C++" __device__ float __uint2float_rn(unsigned int x);
extern "C++" __device__ float __uint2float_ru(unsigned int x);
extern "C++" __device__ float __uint2float_rz(unsigned int x);

extern "C" __device__ float __chip_uint_as_float(uint x);
extern "C++" inline __device__ float __uint_as_float(unsigned int x) {
return __chip_uint_as_float(x);
}

extern "C++" inline __device__ double __ull2double_rd(unsigned long long int x);
extern "C++" inline __device__ double __ull2double_rn(unsigned long long int x);
extern "C++" inline __device__ double __ull2double_ru(unsigned long long int x);
extern "C++" inline __device__ double __ull2double_rz(unsigned long long int x);
extern "C++" inline __device__ float __ull2float_rd(unsigned long long int x);
extern "C++" inline __device__ float __ull2float_rn(unsigned long long int x);
extern "C++" inline __device__ float __ull2float_ru(unsigned long long int x);
extern "C++" inline __device__ float __ull2float_rz(unsigned long long int x);
extern "C++" __device__ double __ull2double_rd(unsigned long long int x);
extern "C++" __device__ double __ull2double_rn(unsigned long long int x);
extern "C++" __device__ double __ull2double_ru(unsigned long long int x);
extern "C++" __device__ double __ull2double_rz(unsigned long long int x);
extern "C++" __device__ float __ull2float_rd(unsigned long long int x);
extern "C++" __device__ float __ull2float_rn(unsigned long long int x);
extern "C++" __device__ float __ull2float_ru(unsigned long long int x);
extern "C++" __device__ float __ull2float_rz(unsigned long long int x);

#endif // include guard
Loading
Loading