release/20.x: [Clang] Add handlers for 'match_any' and 'match_all' to `gpuintrin.h` (#127504) #127704

llvmbot · 2025-02-18T22:06:14Z

Backport 9a584b0

Requested by: @jhuber6

…llvm#127504) Summary: These helpers are very useful but currently absent. They allow the user to get a bitmask representing the matches within the warp. I have made an executive decision to drop the `predicate` return from `match_all` because it's easily testable with `match_all() == __activemask()`. (cherry picked from commit 9a584b0)

llvmbot · 2025-02-18T22:06:20Z

@jhuber6 What do you think about merging this PR to the release branch?

llvmbot · 2025-02-18T22:06:49Z

@llvm/pr-subscribers-libc

Author: None (llvmbot)

Changes

Backport 9a584b0

Requested by: @jhuber6

Full diff: https://github.com/llvm/llvm-project/pull/127704.diff

5 Files Affected:

(modified) clang/lib/Headers/amdgpuintrin.h (+56)
(modified) clang/lib/Headers/nvptxintrin.h (+74)
(modified) libc/src/__support/GPU/utils.h (+8)
(modified) libc/test/integration/src/__support/GPU/CMakeLists.txt (+9)
(added) libc/test/integration/src/__support/GPU/match.cpp (+35)

diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h
index 9dad99ffe9439..355e75d0b2d42 100644
--- a/clang/lib/Headers/amdgpuintrin.h
+++ b/clang/lib/Headers/amdgpuintrin.h
@@ -162,6 +162,62 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
          ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __lo, __width));
 }
 
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+  uint32_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, !__done)) {
+    if (!__done) {
+      uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+  uint64_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, __done)) {
+    if (!__done) {
+      uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+  uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  __gpu_sync_lane(__lane_mask);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+  uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  __gpu_sync_lane(__lane_mask);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
 // Returns true if the flat pointer points to AMDGPU 'shared' memory.
 _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
   return __builtin_amdgcn_is_shared((void [[clang::address_space(0)]] *)((
diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h
index 40fa2edebe975..f857a87b5f4c7 100644
--- a/clang/lib/Headers/nvptxintrin.h
+++ b/clang/lib/Headers/nvptxintrin.h
@@ -13,6 +13,10 @@
 #error "This file is intended for NVPTX targets or offloading to NVPTX"
 #endif
 
+#ifndef __CUDA_ARCH__
+#define __CUDA_ARCH__ 0
+#endif
+
 #include <stdint.h>
 
 #if !defined(__cplusplus)
@@ -168,6 +172,76 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
          ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width));
 }
 
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_any_sync_i32(__lane_mask, __x);
+
+  uint32_t __match_mask = 0;
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, !__done)) {
+    if (!__done) {
+      uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  return __match_mask;
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_any_sync_i64(__lane_mask, __x);
+
+  uint64_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, __done)) {
+    if (!__done) {
+      uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  int predicate;
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_all_sync_i32p(__lane_mask, __x, &predicate);
+
+  uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  int predicate;
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_all_sync_i64p(__lane_mask, __x, &predicate);
+
+  uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
 // Returns true if the flat pointer points to CUDA 'shared' memory.
 _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
   return __nvvm_isspacep_shared(ptr);
diff --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h
index 323c003f1ff07..0fd3a6498b865 100644
--- a/libc/src/__support/GPU/utils.h
+++ b/libc/src/__support/GPU/utils.h
@@ -92,6 +92,14 @@ LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x,
   return __gpu_shuffle_idx_u32(lane_mask, idx, x, width);
 }
 
+LIBC_INLINE uint64_t match_any(uint64_t lane_mask, uint32_t x) {
+  return __gpu_match_any_u32(lane_mask, x);
+}
+
+LIBC_INLINE uint64_t match_all(uint64_t lane_mask, uint32_t x) {
+  return __gpu_match_all_u32(lane_mask, x);
+}
+
 [[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); }
 
 LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
diff --git a/libc/test/integration/src/__support/GPU/CMakeLists.txt b/libc/test/integration/src/__support/GPU/CMakeLists.txt
index 68bbc3849bc7e..e066830f6cc0d 100644
--- a/libc/test/integration/src/__support/GPU/CMakeLists.txt
+++ b/libc/test/integration/src/__support/GPU/CMakeLists.txt
@@ -18,3 +18,12 @@ add_integration_test(
   LOADER_ARGS
     --threads 64
 )
+
+add_integration_test(
+  match_test
+  SUITE libc-support-gpu-tests
+  SRCS
+    match.cpp
+  LOADER_ARGS
+    --threads 64
+)
diff --git a/libc/test/integration/src/__support/GPU/match.cpp b/libc/test/integration/src/__support/GPU/match.cpp
new file mode 100644
index 0000000000000..0eadb1364eec7
--- /dev/null
+++ b/libc/test/integration/src/__support/GPU/match.cpp
@@ -0,0 +1,35 @@
+//===-- Test for the shuffle operations on the GPU ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/GPU/utils.h"
+#include "test/IntegrationTest/test.h"
+
+using namespace LIBC_NAMESPACE;
+
+// Test to ensure that match any / match all work.
+static void test_match() {
+  uint64_t mask = gpu::get_lane_mask();
+  EXPECT_EQ(1ull << gpu::get_lane_id(),
+            gpu::match_any(mask, gpu::get_lane_id()));
+  EXPECT_EQ(mask, gpu::match_any(mask, 1));
+
+  uint64_t expected = gpu::get_lane_id() < 16 ? 0xffff : 0xffff0000;
+  EXPECT_EQ(expected, gpu::match_any(mask, gpu::get_lane_id() < 16));
+  EXPECT_EQ(mask, gpu::match_all(mask, 1));
+  EXPECT_EQ(0ull, gpu::match_all(mask, gpu::get_lane_id()));
+}
+
+TEST_MAIN(int argc, char **argv, char **envp) {
+  if (gpu::get_thread_id() >= gpu::get_lane_size())
+    return 0;
+
+  test_match();
+
+  return 0;
+}

llvmbot · 2025-02-18T22:06:49Z

@llvm/pr-subscribers-backend-amdgpu

Author: None (llvmbot)

Changes

Backport 9a584b0

Requested by: @jhuber6

Full diff: https://github.com/llvm/llvm-project/pull/127704.diff

5 Files Affected:

(modified) clang/lib/Headers/amdgpuintrin.h (+56)
(modified) clang/lib/Headers/nvptxintrin.h (+74)
(modified) libc/src/__support/GPU/utils.h (+8)
(modified) libc/test/integration/src/__support/GPU/CMakeLists.txt (+9)
(added) libc/test/integration/src/__support/GPU/match.cpp (+35)

diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h
index 9dad99ffe9439..355e75d0b2d42 100644
--- a/clang/lib/Headers/amdgpuintrin.h
+++ b/clang/lib/Headers/amdgpuintrin.h
@@ -162,6 +162,62 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
          ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __lo, __width));
 }
 
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+  uint32_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, !__done)) {
+    if (!__done) {
+      uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+  uint64_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, __done)) {
+    if (!__done) {
+      uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+  uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  __gpu_sync_lane(__lane_mask);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+  uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  __gpu_sync_lane(__lane_mask);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
 // Returns true if the flat pointer points to AMDGPU 'shared' memory.
 _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
   return __builtin_amdgcn_is_shared((void [[clang::address_space(0)]] *)((
diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h
index 40fa2edebe975..f857a87b5f4c7 100644
--- a/clang/lib/Headers/nvptxintrin.h
+++ b/clang/lib/Headers/nvptxintrin.h
@@ -13,6 +13,10 @@
 #error "This file is intended for NVPTX targets or offloading to NVPTX"
 #endif
 
+#ifndef __CUDA_ARCH__
+#define __CUDA_ARCH__ 0
+#endif
+
 #include <stdint.h>
 
 #if !defined(__cplusplus)
@@ -168,6 +172,76 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
          ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width));
 }
 
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_any_sync_i32(__lane_mask, __x);
+
+  uint32_t __match_mask = 0;
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, !__done)) {
+    if (!__done) {
+      uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  return __match_mask;
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_any_sync_i64(__lane_mask, __x);
+
+  uint64_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, __done)) {
+    if (!__done) {
+      uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  int predicate;
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_all_sync_i32p(__lane_mask, __x, &predicate);
+
+  uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  int predicate;
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_all_sync_i64p(__lane_mask, __x, &predicate);
+
+  uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
 // Returns true if the flat pointer points to CUDA 'shared' memory.
 _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
   return __nvvm_isspacep_shared(ptr);
diff --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h
index 323c003f1ff07..0fd3a6498b865 100644
--- a/libc/src/__support/GPU/utils.h
+++ b/libc/src/__support/GPU/utils.h
@@ -92,6 +92,14 @@ LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x,
   return __gpu_shuffle_idx_u32(lane_mask, idx, x, width);
 }
 
+LIBC_INLINE uint64_t match_any(uint64_t lane_mask, uint32_t x) {
+  return __gpu_match_any_u32(lane_mask, x);
+}
+
+LIBC_INLINE uint64_t match_all(uint64_t lane_mask, uint32_t x) {
+  return __gpu_match_all_u32(lane_mask, x);
+}
+
 [[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); }
 
 LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
diff --git a/libc/test/integration/src/__support/GPU/CMakeLists.txt b/libc/test/integration/src/__support/GPU/CMakeLists.txt
index 68bbc3849bc7e..e066830f6cc0d 100644
--- a/libc/test/integration/src/__support/GPU/CMakeLists.txt
+++ b/libc/test/integration/src/__support/GPU/CMakeLists.txt
@@ -18,3 +18,12 @@ add_integration_test(
   LOADER_ARGS
     --threads 64
 )
+
+add_integration_test(
+  match_test
+  SUITE libc-support-gpu-tests
+  SRCS
+    match.cpp
+  LOADER_ARGS
+    --threads 64
+)
diff --git a/libc/test/integration/src/__support/GPU/match.cpp b/libc/test/integration/src/__support/GPU/match.cpp
new file mode 100644
index 0000000000000..0eadb1364eec7
--- /dev/null
+++ b/libc/test/integration/src/__support/GPU/match.cpp
@@ -0,0 +1,35 @@
+//===-- Test for the shuffle operations on the GPU ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/GPU/utils.h"
+#include "test/IntegrationTest/test.h"
+
+using namespace LIBC_NAMESPACE;
+
+// Test to ensure that match any / match all work.
+static void test_match() {
+  uint64_t mask = gpu::get_lane_mask();
+  EXPECT_EQ(1ull << gpu::get_lane_id(),
+            gpu::match_any(mask, gpu::get_lane_id()));
+  EXPECT_EQ(mask, gpu::match_any(mask, 1));
+
+  uint64_t expected = gpu::get_lane_id() < 16 ? 0xffff : 0xffff0000;
+  EXPECT_EQ(expected, gpu::match_any(mask, gpu::get_lane_id() < 16));
+  EXPECT_EQ(mask, gpu::match_all(mask, 1));
+  EXPECT_EQ(0ull, gpu::match_all(mask, gpu::get_lane_id()));
+}
+
+TEST_MAIN(int argc, char **argv, char **envp) {
+  if (gpu::get_thread_id() >= gpu::get_lane_size())
+    return 0;
+
+  test_match();
+
+  return 0;
+}

llvmbot · 2025-02-18T22:06:49Z

@llvm/pr-subscribers-backend-x86

Author: None (llvmbot)

Changes

Backport 9a584b0

Requested by: @jhuber6

Full diff: https://github.com/llvm/llvm-project/pull/127704.diff

5 Files Affected:

(modified) clang/lib/Headers/amdgpuintrin.h (+56)
(modified) clang/lib/Headers/nvptxintrin.h (+74)
(modified) libc/src/__support/GPU/utils.h (+8)
(modified) libc/test/integration/src/__support/GPU/CMakeLists.txt (+9)
(added) libc/test/integration/src/__support/GPU/match.cpp (+35)

diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h
index 9dad99ffe9439..355e75d0b2d42 100644
--- a/clang/lib/Headers/amdgpuintrin.h
+++ b/clang/lib/Headers/amdgpuintrin.h
@@ -162,6 +162,62 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
          ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __lo, __width));
 }
 
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+  uint32_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, !__done)) {
+    if (!__done) {
+      uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+  uint64_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, __done)) {
+    if (!__done) {
+      uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+  uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  __gpu_sync_lane(__lane_mask);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+  uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  __gpu_sync_lane(__lane_mask);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
 // Returns true if the flat pointer points to AMDGPU 'shared' memory.
 _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
   return __builtin_amdgcn_is_shared((void [[clang::address_space(0)]] *)((
diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h
index 40fa2edebe975..f857a87b5f4c7 100644
--- a/clang/lib/Headers/nvptxintrin.h
+++ b/clang/lib/Headers/nvptxintrin.h
@@ -13,6 +13,10 @@
 #error "This file is intended for NVPTX targets or offloading to NVPTX"
 #endif
 
+#ifndef __CUDA_ARCH__
+#define __CUDA_ARCH__ 0
+#endif
+
 #include <stdint.h>
 
 #if !defined(__cplusplus)
@@ -168,6 +172,76 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
          ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width));
 }
 
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_any_sync_i32(__lane_mask, __x);
+
+  uint32_t __match_mask = 0;
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, !__done)) {
+    if (!__done) {
+      uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  return __match_mask;
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_any_sync_i64(__lane_mask, __x);
+
+  uint64_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, __done)) {
+    if (!__done) {
+      uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  int predicate;
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_all_sync_i32p(__lane_mask, __x, &predicate);
+
+  uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  int predicate;
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_all_sync_i64p(__lane_mask, __x, &predicate);
+
+  uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
 // Returns true if the flat pointer points to CUDA 'shared' memory.
 _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
   return __nvvm_isspacep_shared(ptr);
diff --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h
index 323c003f1ff07..0fd3a6498b865 100644
--- a/libc/src/__support/GPU/utils.h
+++ b/libc/src/__support/GPU/utils.h
@@ -92,6 +92,14 @@ LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x,
   return __gpu_shuffle_idx_u32(lane_mask, idx, x, width);
 }
 
+LIBC_INLINE uint64_t match_any(uint64_t lane_mask, uint32_t x) {
+  return __gpu_match_any_u32(lane_mask, x);
+}
+
+LIBC_INLINE uint64_t match_all(uint64_t lane_mask, uint32_t x) {
+  return __gpu_match_all_u32(lane_mask, x);
+}
+
 [[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); }
 
 LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
diff --git a/libc/test/integration/src/__support/GPU/CMakeLists.txt b/libc/test/integration/src/__support/GPU/CMakeLists.txt
index 68bbc3849bc7e..e066830f6cc0d 100644
--- a/libc/test/integration/src/__support/GPU/CMakeLists.txt
+++ b/libc/test/integration/src/__support/GPU/CMakeLists.txt
@@ -18,3 +18,12 @@ add_integration_test(
   LOADER_ARGS
     --threads 64
 )
+
+add_integration_test(
+  match_test
+  SUITE libc-support-gpu-tests
+  SRCS
+    match.cpp
+  LOADER_ARGS
+    --threads 64
+)
diff --git a/libc/test/integration/src/__support/GPU/match.cpp b/libc/test/integration/src/__support/GPU/match.cpp
new file mode 100644
index 0000000000000..0eadb1364eec7
--- /dev/null
+++ b/libc/test/integration/src/__support/GPU/match.cpp
@@ -0,0 +1,35 @@
+//===-- Test for the shuffle operations on the GPU ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/GPU/utils.h"
+#include "test/IntegrationTest/test.h"
+
+using namespace LIBC_NAMESPACE;
+
+// Test to ensure that match any / match all work.
+static void test_match() {
+  uint64_t mask = gpu::get_lane_mask();
+  EXPECT_EQ(1ull << gpu::get_lane_id(),
+            gpu::match_any(mask, gpu::get_lane_id()));
+  EXPECT_EQ(mask, gpu::match_any(mask, 1));
+
+  uint64_t expected = gpu::get_lane_id() < 16 ? 0xffff : 0xffff0000;
+  EXPECT_EQ(expected, gpu::match_any(mask, gpu::get_lane_id() < 16));
+  EXPECT_EQ(mask, gpu::match_all(mask, 1));
+  EXPECT_EQ(0ull, gpu::match_all(mask, gpu::get_lane_id()));
+}
+
+TEST_MAIN(int argc, char **argv, char **envp) {
+  if (gpu::get_thread_id() >= gpu::get_lane_size())
+    return 0;
+
+  test_match();
+
+  return 0;
+}

llvmbot · 2025-02-18T22:06:50Z

@llvm/pr-subscribers-clang

Author: None (llvmbot)

Changes

Backport 9a584b0

Requested by: @jhuber6

Full diff: https://github.com/llvm/llvm-project/pull/127704.diff

5 Files Affected:

(modified) clang/lib/Headers/amdgpuintrin.h (+56)
(modified) clang/lib/Headers/nvptxintrin.h (+74)
(modified) libc/src/__support/GPU/utils.h (+8)
(modified) libc/test/integration/src/__support/GPU/CMakeLists.txt (+9)
(added) libc/test/integration/src/__support/GPU/match.cpp (+35)

diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h
index 9dad99ffe9439..355e75d0b2d42 100644
--- a/clang/lib/Headers/amdgpuintrin.h
+++ b/clang/lib/Headers/amdgpuintrin.h
@@ -162,6 +162,62 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
          ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __lo, __width));
 }
 
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+  uint32_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, !__done)) {
+    if (!__done) {
+      uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+  uint64_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, __done)) {
+    if (!__done) {
+      uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+  uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  __gpu_sync_lane(__lane_mask);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+  uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  __gpu_sync_lane(__lane_mask);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
 // Returns true if the flat pointer points to AMDGPU 'shared' memory.
 _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
   return __builtin_amdgcn_is_shared((void [[clang::address_space(0)]] *)((
diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h
index 40fa2edebe975..f857a87b5f4c7 100644
--- a/clang/lib/Headers/nvptxintrin.h
+++ b/clang/lib/Headers/nvptxintrin.h
@@ -13,6 +13,10 @@
 #error "This file is intended for NVPTX targets or offloading to NVPTX"
 #endif
 
+#ifndef __CUDA_ARCH__
+#define __CUDA_ARCH__ 0
+#endif
+
 #include <stdint.h>
 
 #if !defined(__cplusplus)
@@ -168,6 +172,76 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
          ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width));
 }
 
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_any_sync_i32(__lane_mask, __x);
+
+  uint32_t __match_mask = 0;
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, !__done)) {
+    if (!__done) {
+      uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  return __match_mask;
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_any_sync_i64(__lane_mask, __x);
+
+  uint64_t __match_mask = 0;
+
+  bool __done = 0;
+  while (__gpu_ballot(__lane_mask, __done)) {
+    if (!__done) {
+      uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+      if (__first == __x) {
+        __match_mask = __gpu_lane_mask();
+        __done = 1;
+      }
+    }
+  }
+  __gpu_sync_lane(__lane_mask);
+  return __match_mask;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  int predicate;
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_all_sync_i32p(__lane_mask, __x, &predicate);
+
+  uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+  // Newer targets can use the dedicated CUDA support.
+  int predicate;
+  if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
+    return __nvvm_match_all_sync_i64p(__lane_mask, __x, &predicate);
+
+  uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+  uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
+  return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
+}
+
 // Returns true if the flat pointer points to CUDA 'shared' memory.
 _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
   return __nvvm_isspacep_shared(ptr);
diff --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h
index 323c003f1ff07..0fd3a6498b865 100644
--- a/libc/src/__support/GPU/utils.h
+++ b/libc/src/__support/GPU/utils.h
@@ -92,6 +92,14 @@ LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x,
   return __gpu_shuffle_idx_u32(lane_mask, idx, x, width);
 }
 
+LIBC_INLINE uint64_t match_any(uint64_t lane_mask, uint32_t x) {
+  return __gpu_match_any_u32(lane_mask, x);
+}
+
+LIBC_INLINE uint64_t match_all(uint64_t lane_mask, uint32_t x) {
+  return __gpu_match_all_u32(lane_mask, x);
+}
+
 [[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); }
 
 LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
diff --git a/libc/test/integration/src/__support/GPU/CMakeLists.txt b/libc/test/integration/src/__support/GPU/CMakeLists.txt
index 68bbc3849bc7e..e066830f6cc0d 100644
--- a/libc/test/integration/src/__support/GPU/CMakeLists.txt
+++ b/libc/test/integration/src/__support/GPU/CMakeLists.txt
@@ -18,3 +18,12 @@ add_integration_test(
   LOADER_ARGS
     --threads 64
 )
+
+add_integration_test(
+  match_test
+  SUITE libc-support-gpu-tests
+  SRCS
+    match.cpp
+  LOADER_ARGS
+    --threads 64
+)
diff --git a/libc/test/integration/src/__support/GPU/match.cpp b/libc/test/integration/src/__support/GPU/match.cpp
new file mode 100644
index 0000000000000..0eadb1364eec7
--- /dev/null
+++ b/libc/test/integration/src/__support/GPU/match.cpp
@@ -0,0 +1,35 @@
+//===-- Test for the shuffle operations on the GPU ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/GPU/utils.h"
+#include "test/IntegrationTest/test.h"
+
+using namespace LIBC_NAMESPACE;
+
+// Test to ensure that match any / match all work.
+static void test_match() {
+  uint64_t mask = gpu::get_lane_mask();
+  EXPECT_EQ(1ull << gpu::get_lane_id(),
+            gpu::match_any(mask, gpu::get_lane_id()));
+  EXPECT_EQ(mask, gpu::match_any(mask, 1));
+
+  uint64_t expected = gpu::get_lane_id() < 16 ? 0xffff : 0xffff0000;
+  EXPECT_EQ(expected, gpu::match_any(mask, gpu::get_lane_id() < 16));
+  EXPECT_EQ(mask, gpu::match_all(mask, 1));
+  EXPECT_EQ(0ull, gpu::match_all(mask, gpu::get_lane_id()));
+}
+
+TEST_MAIN(int argc, char **argv, char **envp) {
+  if (gpu::get_thread_id() >= gpu::get_lane_size())
+    return 0;
+
+  test_match();
+
+  return 0;
+}

jhuber6

Approving my own patch feels like a conflict of interest.

tstellar · 2025-02-18T22:24:03Z

@shiltian What do you think about backporting this?

shiltian · 2025-02-18T23:38:17Z

This looks like a new feature so I'm not sure if we need to back port it.

tstellar · 2025-02-20T23:04:36Z

@jhuber6 Why do you want to back port this and what's the impact if we don't?

jhuber6 · 2025-02-20T23:06:40Z

@jhuber6 Why do you want to back port this and what's the impact if we don't?

Sorry, #127703 is the actually important one and I forget to cherry pick it, fixes a test and incorrect behavior. Figured if I was backporting that I could merge this as well, but if that's too much then it's not a big deal.

llvmbot added this to the LLVM 20.X Release milestone Feb 18, 2025

llvmbot requested a review from jhuber6 February 18, 2025 22:06

llvmbot mentioned this pull request Feb 18, 2025

[Clang] Add handlers for 'match_any' and 'match_all' to gpuintrin.h #127504

Merged

llvmbot added clang Clang issues not falling into any other category backend:AMDGPU backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics libc labels Feb 18, 2025

jhuber6 approved these changes Feb 18, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

release/20.x: [Clang] Add handlers for 'match_any' and 'match_all' to `gpuintrin.h` (#127504) #127704

release/20.x: [Clang] Add handlers for 'match_any' and 'match_all' to `gpuintrin.h` (#127504) #127704

llvmbot commented Feb 18, 2025

llvmbot commented Feb 18, 2025

llvmbot commented Feb 18, 2025

llvmbot commented Feb 18, 2025

llvmbot commented Feb 18, 2025

llvmbot commented Feb 18, 2025

jhuber6 left a comment

tstellar commented Feb 18, 2025

shiltian commented Feb 18, 2025

tstellar commented Feb 20, 2025

jhuber6 commented Feb 20, 2025

release/20.x: [Clang] Add handlers for 'match_any' and 'match_all' to gpuintrin.h (#127504) #127704

Are you sure you want to change the base?

release/20.x: [Clang] Add handlers for 'match_any' and 'match_all' to gpuintrin.h (#127504) #127704

Conversation

llvmbot commented Feb 18, 2025

llvmbot commented Feb 18, 2025

llvmbot commented Feb 18, 2025

llvmbot commented Feb 18, 2025

llvmbot commented Feb 18, 2025

llvmbot commented Feb 18, 2025

jhuber6 left a comment

Choose a reason for hiding this comment

tstellar commented Feb 18, 2025

shiltian commented Feb 18, 2025

tstellar commented Feb 20, 2025

jhuber6 commented Feb 20, 2025

release/20.x: [Clang] Add handlers for 'match_any' and 'match_all' to `gpuintrin.h` (#127504) #127704

release/20.x: [Clang] Add handlers for 'match_any' and 'match_all' to `gpuintrin.h` (#127504) #127704