Skip to content

Commit

Permalink
Merge pull request #427 from ROCm/release-staging/rocm-rel-6.3
Browse files Browse the repository at this point in the history
F8 support sync with staging
  • Loading branch information
cgmillette authored Oct 10, 2024
2 parents c82ba19 + 9e208d3 commit 3caf60a
Show file tree
Hide file tree
Showing 32 changed files with 1,584 additions and 1,261 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,13 @@ else()
endif()

if (ADDRESS_SANITIZER_ENABLED)
#TODO: Remove next line when rocm-cmake fix is available
set(CMAKE_NO_BUILTIN_CHRPATH ON)
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+" )
else()
#TODO: Remove next line when rocm-cmake fix is available
set(CMAKE_NO_BUILTIN_CHRPATH ON)
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" )
endif()
Expand Down
2 changes: 1 addition & 1 deletion docs/sphinx/requirements.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
rocm-docs-core==1.7.1
rocm-docs-core==1.7.2
2 changes: 1 addition & 1 deletion docs/sphinx/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ requests==2.32.3
# via
# pygithub
# sphinx
rocm-docs-core==1.7.1
rocm-docs-core==1.7.2
# via -r requirements.in
smmap==5.0.1
# via gitdb
Expand Down
50 changes: 27 additions & 23 deletions library/include/rocwmma/internal/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
///
/// Architecture support
/// Guaranteed symbols:
/// ROCWMMA_DEVICE_COMPILE
/// ROCWMMA_ARCH_GFX908
/// ROCWMMA_ARCH_GFX90a
/// ROCWMMA_ARCH_GFX940
Expand All @@ -44,28 +45,35 @@
/// compiler pass, and all other macros rely on their definition.
///
/// Device compiler pass: https://rocm.docs.amd.com/projects/HIP/en/latest/user_guide/hip_porting_guide.html#identifying-current-compilation-pass-host-or-device
#if defined(__gfx908__)
#if defined(__HIP_DEVICE_COMPILE__) && __HIP_DEVICE_COMPILE__
#define ROCWMMA_DEVICE_COMPILE 1
#else
#define ROCWMMA_DEVICE_COMPILE 0
#endif
#if defined(__gfx908__) && ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_GFX908 __gfx908__
#elif defined(__gfx90a__)
#elif defined(__gfx90a__) && ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_GFX90A __gfx90a__
#elif defined(__gfx940__)
#elif defined(__gfx940__) && ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_GFX940 __gfx940__
#elif defined(__gfx941__)
#elif defined(__gfx941__) && ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_GFX941 __gfx941__
#elif defined(__gfx942__)
#elif defined(__gfx942__) && ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_GFX942 __gfx942__
#elif defined(__gfx1100__)
#elif defined(__gfx1100__) && ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_GFX1100 __gfx1100__
#elif defined(__gfx1101__)
#elif defined(__gfx1101__) && ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_GFX1101 __gfx1101__
#elif defined(__gfx1102__)
#elif defined(__gfx1102__) && ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_GFX1102 __gfx1102__
#elif defined(__gfx1200__)
#elif defined(__gfx1200__) && ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_GFX1200 __gfx1200__
#elif defined(__gfx1201__)
#elif defined(__gfx1201__) && ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_GFX1201 __gfx1201__
#else
#elif !ROCWMMA_DEVICE_COMPILE
#define ROCWMMA_ARCH_HOST 1
#else
static_assert(0, "Unsupported architecture");
#endif

#if !defined(ROCWMMA_ARCH_GFX908)
Expand Down Expand Up @@ -112,6 +120,7 @@
/// ROCWMMA_WAVE32_MODE
/// ROCWMMA_BLOCK_DIM_16_SUPPORTED
/// ROCWMMA_BLOCK_DIM_32_SUPPORTED
/// ROCWMMA_ARCH_GFX94X
///
#if ROCWMMA_ARCH_GFX908 || ROCWMMA_ARCH_GFX90A || ROCWMMA_ARCH_GFX940 || ROCWMMA_ARCH_GFX941 \
|| ROCWMMA_ARCH_GFX942
Expand All @@ -133,6 +142,10 @@
#define ROCWMMA_BLOCK_DIM_16_SUPPORTED 1
#endif

#if ROCWMMA_ARCH_GFX940 || ROCWMMA_ARCH_GFX941 || ROCWMMA_ARCH_GFX942
#define ROCWMMA_ARCH_GFX94X 1
#endif

#if !defined(ROCWMMA_ARCH_GFX9)
#define ROCWMMA_ARCH_GFX9 0
#endif
Expand All @@ -154,17 +167,8 @@
#if !defined(ROCWMMA_BLOCK_DIM_32_SUPPORTED)
#define ROCWMMA_BLOCK_DIM_32_SUPPORTED 0
#endif

///
/// Architecture datatypes configuration.
/// FP8_NANOO only supported on gfx940, gfx941 and gfx942
/// Guaranteed symbols:
/// ROCWMMA_ENABLE_FP8_NANOO
///
#if (ROCWMMA_ARCH_GFX940 || ROCWMMA_ARCH_GFX941 || ROCWMMA_ARCH_GFX942)
#define ROCWMMA_USE_FP8_NANOO 1
#else
#define ROCWMMA_USE_FP8_NANOO 0
#if !defined(ROCWMMA_ARCH_GFX94X)
#define ROCWMMA_ARCH_GFX94X 0
#endif

#if defined(NDEBUG)
Expand All @@ -187,7 +191,7 @@ static_assert(!(bool)(ROCWMMA_WAVE32_MODE) && (bool)(ROCWMMA_WAVE64_MODE),
"rocWMMA supports only wave64 for gfx9 arch");
static_assert((bool)(ROCWMMA_BLOCK_DIM_16_SUPPORTED) && (bool)(ROCWMMA_BLOCK_DIM_32_SUPPORTED),
"rocWMMA requires block size of 16 and 32 for gfx9 arch");
#endif
#endif

#if ROCWMMA_ARCH_GFX11
static_assert((bool)(ROCWMMA_WAVE32_MODE) && !(bool)(ROCWMMA_WAVE64_MODE),
Expand Down
Loading

0 comments on commit 3caf60a

Please sign in to comment.