From e99aa4a500b0f271b327432d404a9c1f72c6850a Mon Sep 17 00:00:00 2001
From: Amr Hesham <amr96@programmer.net>
Date: Wed, 28 Aug 2024 22:23:24 +0200
Subject: [PATCH 01/10] [clang][HLSL] Update DXIL/SPIRV hybird CodeGen tests to
 use temp var (#105930)

Update all hybird DXIL/SPIRV codegen tests to use temp variable
representing interchange target

Fixes: #105710
---
 .../builtins/RWBuffer-constructor.hlsl        |   2 +-
 clang/test/CodeGenHLSL/builtins/all.hlsl      | 268 ++++++------------
 clang/test/CodeGenHLSL/builtins/any.hlsl      | 264 ++++++-----------
 clang/test/CodeGenHLSL/builtins/frac.hlsl     |  84 +++---
 clang/test/CodeGenHLSL/builtins/lerp.hlsl     |  58 ++--
 .../test/CodeGenHLSL/builtins/normalize.hlsl  |  73 ++---
 clang/test/CodeGenHLSL/builtins/rsqrt.hlsl    |  84 +++---
 .../semantics/DispatchThreadID.hlsl           |  14 +-
 8 files changed, 307 insertions(+), 540 deletions(-)
diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
index e51eac7f57c2d3..baddfcf2cf1d52 100644
--- a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
@@ -9,4 +9,4 @@ RWBuffer<float> Buf;
 // CHECK: store ptr %[[HandleRes]], ptr %h, align 4
 
 // CHECK-SPIRV: %[[HandleRes:[0-9]+]] = call ptr @llvm.spv.create.handle(i8 1)
-// CHECK-SPIRV: store ptr %[[HandleRes]], ptr %h, align 8
+// CHECK-SPIRV: store ptr %[[HandleRes]], ptr %h, align 8
\ No newline at end of file
diff --git a/clang/test/CodeGenHLSL/builtins/all.hlsl b/clang/test/CodeGenHLSL/builtins/all.hlsl
index b48daa287480ff..39f364c5953d60 100644
--- a/clang/test/CodeGenHLSL/builtins/all.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/all.hlsl
@@ -1,277 +1,193 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,SPIR_NO_HALF,SPIR_CHECK
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF,DXIL_NATIVE_HALF,DXIL_CHECK
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,DXIL_NO_HALF,DXIL_CHECK
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 
 #ifdef __HLSL_ENABLE_16_BIT
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.i16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.i16
 // NATIVE_HALF: ret i1 %hlsl.all
 bool test_all_int16_t(int16_t p0) { return all(p0); }
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v2i16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v2i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2i16
 // NATIVE_HALF: ret i1 %hlsl.all
 bool test_all_int16_t2(int16_t2 p0) { return all(p0); }
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v3i16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v3i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3i16
 // NATIVE_HALF: ret i1 %hlsl.all
 bool test_all_int16_t3(int16_t3 p0) { return all(p0); }
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v4i16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v4i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4i16
 // NATIVE_HALF: ret i1 %hlsl.all
 bool test_all_int16_t4(int16_t4 p0) { return all(p0); }
-
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.i16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.i16
 // NATIVE_HALF: ret i1 %hlsl.all
 bool test_all_uint16_t(uint16_t p0) { return all(p0); }
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v2i16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v2i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2i16
 // NATIVE_HALF: ret i1 %hlsl.all
 bool test_all_uint16_t2(uint16_t2 p0) { return all(p0); }
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v3i16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v3i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3i16
 // NATIVE_HALF: ret i1 %hlsl.all
 bool test_all_uint16_t3(uint16_t3 p0) { return all(p0); }
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v4i16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v4i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4i16
 // NATIVE_HALF: ret i1 %hlsl.all
 bool test_all_uint16_t4(uint16_t4 p0) { return all(p0); }
 #endif // __HLSL_ENABLE_16_BIT
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.f16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.f16
-// DXIL_NO_HALF: %hlsl.all = call i1 @llvm.dx.all.f32
-// SPIR_NO_HALF: %hlsl.all = call i1 @llvm.spv.all.f32
+// CHECK: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.f16
+// NO_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.f32
 // CHECK: ret i1 %hlsl.all
 bool test_all_half(half p0) { return all(p0); }
-
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v2f16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v2f16
-// DXIL_NO_HALF: %hlsl.all = call i1 @llvm.dx.all.v2f32
-// SPIR_NO_HALF: %hlsl.all = call i1 @llvm.spv.all.v2f32
+// CHECK: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2f16
+// NO_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2f32
 // CHECK: ret i1 %hlsl.all
 bool test_all_half2(half2 p0) { return all(p0); }
-
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v3f16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v3f16
-// DXIL_NO_HALF: %hlsl.all = call i1 @llvm.dx.all.v3f32
-// SPIR_NO_HALF: %hlsl.all = call i1 @llvm.spv.all.v3f32
+// CHECK: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3f16
+// NO_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3f32
 // CHECK: ret i1 %hlsl.all
 bool test_all_half3(half3 p0) { return all(p0); }
-
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v4f16
-// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v4f16
-// DXIL_NO_HALF: %hlsl.all = call i1 @llvm.dx.all.v4f32
-// SPIR_NO_HALF: %hlsl.all = call i1 @llvm.spv.all.v4f32
+// CHECK: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4f16
+// NO_HALF: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4f32
 // CHECK: ret i1 %hlsl.all
 bool test_all_half4(half4 p0) { return all(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.f32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.f32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.f32
 // CHECK: ret i1 %hlsl.all
 bool test_all_float(float p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2f32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2f32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2f32
 // CHECK: ret i1 %hlsl.all
 bool test_all_float2(float2 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3f32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3f32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3f32
 // CHECK: ret i1 %hlsl.all
 bool test_all_float3(float3 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4f32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4f32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4f32
 // CHECK: ret i1 %hlsl.all
 bool test_all_float4(float4 p0) { return all(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.f64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.f64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.f64
 // CHECK: ret i1 %hlsl.all
 bool test_all_double(double p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2f64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2f64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2f64
 // CHECK: ret i1 %hlsl.all
 bool test_all_double2(double2 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3f64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3f64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3f64
 // CHECK: ret i1 %hlsl.all
 bool test_all_double3(double3 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4f64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4f64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4f64
 // CHECK: ret i1 %hlsl.all
 bool test_all_double4(double4 p0) { return all(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.i32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.i32
 // CHECK: ret i1 %hlsl.all
 bool test_all_int(int p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2i32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2i32
 // CHECK: ret i1 %hlsl.all
 bool test_all_int2(int2 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3i32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3i32
 // CHECK: ret i1 %hlsl.all
 bool test_all_int3(int3 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4i32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4i32
 // CHECK: ret i1 %hlsl.all
 bool test_all_int4(int4 p0) { return all(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.i32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.i32
 // CHECK: ret i1 %hlsl.all
 bool test_all_uint(uint p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2i32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2i32
 // CHECK: ret i1 %hlsl.all
 bool test_all_uint2(uint2 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3i32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3i32
 // CHECK: ret i1 %hlsl.all
 bool test_all_uint3(uint3 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4i32
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4i32
 // CHECK: ret i1 %hlsl.all
 bool test_all_uint4(uint4 p0) { return all(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.i64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.i64
 // CHECK: ret i1 %hlsl.all
 bool test_all_int64_t(int64_t p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2i64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2i64
 // CHECK: ret i1 %hlsl.all
 bool test_all_int64_t2(int64_t2 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3i64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3i64
 // CHECK: ret i1 %hlsl.all
 bool test_all_int64_t3(int64_t3 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4i64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4i64
 // CHECK: ret i1 %hlsl.all
 bool test_all_int64_t4(int64_t4 p0) { return all(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.i64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.i64
 // CHECK: ret i1 %hlsl.all
 bool test_all_uint64_t(uint64_t p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2i64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2i64
 // CHECK: ret i1 %hlsl.all
 bool test_all_uint64_t2(uint64_t2 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3i64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3i64
 // CHECK: ret i1 %hlsl.all
 bool test_all_uint64_t3(uint64_t3 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4i64
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4i64
 // CHECK: ret i1 %hlsl.all
 bool test_all_uint64_t4(uint64_t4 p0) { return all(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.i1
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.i1
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.i1
 // CHECK: ret i1 %hlsl.all
 bool test_all_bool(bool p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2i1
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2i1
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v2i1
 // CHECK: ret i1 %hlsl.all
 bool test_all_bool2(bool2 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3i1
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3i1
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v3i1
 // CHECK: ret i1 %hlsl.all
 bool test_all_bool3(bool3 p0) { return all(p0); }
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4i1
-// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4i1
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.all = call i1 @llvm.[[TARGET]].all.v4i1
 // CHECK: ret i1 %hlsl.all
 bool test_all_bool4(bool4 p0) { return all(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/any.hlsl b/clang/test/CodeGenHLSL/builtins/any.hlsl
index 84584281a3b7d2..3d9d8e9e689ed0 100644
--- a/clang/test/CodeGenHLSL/builtins/any.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/any.hlsl
@@ -1,304 +1,224 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,SPIR_NO_HALF,SPIR_CHECK
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF,DXIL_NATIVE_HALF,DXIL_CHECK
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,DXIL_NO_HALF,DXIL_CHECK
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 
 #ifdef __HLSL_ENABLE_16_BIT
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.i16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.i16
 // NATIVE_HALF: ret i1 %hlsl.any
 bool test_any_int16_t(int16_t p0) { return any(p0); }
 
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.v2i16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.v2i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2i16
 // NATIVE_HALF: ret i1 %hlsl.any
 bool test_any_int16_t2(int16_t2 p0) { return any(p0); }
 
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.v3i16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.v3i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3i16
 // NATIVE_HALF: ret i1 %hlsl.any
 bool test_any_int16_t3(int16_t3 p0) { return any(p0); }
 
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.v4i16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.v4i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4i16
 // NATIVE_HALF: ret i1 %hlsl.any
 bool test_any_int16_t4(int16_t4 p0) { return any(p0); }
 
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.i16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.i16
 // NATIVE_HALF: ret i1 %hlsl.any
 bool test_any_uint16_t(uint16_t p0) { return any(p0); }
 
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.v2i16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.v2i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2i16
 // NATIVE_HALF: ret i1 %hlsl.any
 bool test_any_uint16_t2(uint16_t2 p0) { return any(p0); }
 
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.v3i16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.v3i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3i16
 // NATIVE_HALF: ret i1 %hlsl.any
 bool test_any_uint16_t3(uint16_t3 p0) { return any(p0); }
 
-// DXIL_NATIVE_HALF: define noundef i1 @
-// SPIR_NATIVE_HALF: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.v4i16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.v4i16
+// NATIVE_HALF: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4i16
 // NATIVE_HALF: ret i1 %hlsl.any
 bool test_any_uint16_t4(uint16_t4 p0) { return any(p0); }
 #endif // __HLSL_ENABLE_16_BIT
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.f16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.f16
-// DXIL_NO_HALF: %hlsl.any = call i1 @llvm.dx.any.f32
-// SPIR_NO_HALF: %hlsl.any = call i1 @llvm.spv.any.f32
+// CHECK: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.f16
+// NO_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.f32
 // CHECK: ret i1 %hlsl.any
 bool test_any_half(half p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.v2f16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.v2f16
-// DXIL_NO_HALF: %hlsl.any = call i1 @llvm.dx.any.v2f32
-// SPIR_NO_HALF: %hlsl.any = call i1 @llvm.spv.any.v2f32
+// CHECK: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2f16
+// NO_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2f32
 // CHECK: ret i1 %hlsl.any
 bool test_any_half2(half2 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.v3f16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.v3f16
-// DXIL_NO_HALF: %hlsl.any = call i1 @llvm.dx.any.v3f32
-// SPIR_NO_HALF: %hlsl.any = call i1 @llvm.spv.any.v3f32
+// CHECK: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3f16
+// NO_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3f32
 // CHECK: ret i1 %hlsl.any
 bool test_any_half3(half3 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_NATIVE_HALF: %hlsl.any = call i1 @llvm.dx.any.v4f16
-// SPIR_NATIVE_HALF: %hlsl.any = call i1 @llvm.spv.any.v4f16
-// DXIL_NO_HALF: %hlsl.any = call i1 @llvm.dx.any.v4f32
-// SPIR_NO_HALF: %hlsl.any = call i1 @llvm.spv.any.v4f32
+// CHECK: define [[FNATTRS]] i1 @
+// NATIVE_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4f16
+// NO_HALF: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4f32
 // CHECK: ret i1 %hlsl.any
 bool test_any_half4(half4 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.f32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.f32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.f32
 // CHECK: ret i1 %hlsl.any
 bool test_any_float(float p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v2f32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v2f32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2f32
 // CHECK: ret i1 %hlsl.any
 bool test_any_float2(float2 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v3f32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v3f32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3f32
 // CHECK: ret i1 %hlsl.any
 bool test_any_float3(float3 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v4f32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v4f32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4f32
 // CHECK: ret i1 %hlsl.any
 bool test_any_float4(float4 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.f64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.f64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.f64
 // CHECK: ret i1 %hlsl.any
 bool test_any_double(double p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v2f64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v2f64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2f64
 // CHECK: ret i1 %hlsl.any
 bool test_any_double2(double2 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v3f64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v3f64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3f64
 // CHECK: ret i1 %hlsl.any
 bool test_any_double3(double3 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v4f64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v4f64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4f64
 // CHECK: ret i1 %hlsl.any
 bool test_any_double4(double4 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.i32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.i32
 // CHECK: ret i1 %hlsl.any
 bool test_any_int(int p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v2i32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v2i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2i32
 // CHECK: ret i1 %hlsl.any
 bool test_any_int2(int2 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v3i32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v3i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3i32
 // CHECK: ret i1 %hlsl.any
 bool test_any_int3(int3 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v4i32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v4i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4i32
 // CHECK: ret i1 %hlsl.any
 bool test_any_int4(int4 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.i32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.i32
 // CHECK: ret i1 %hlsl.any
 bool test_any_uint(uint p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v2i32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v2i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2i32
 // CHECK: ret i1 %hlsl.any
 bool test_any_uint2(uint2 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v3i32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v3i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3i32
 // CHECK: ret i1 %hlsl.any
 bool test_any_uint3(uint3 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v4i32
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v4i32
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4i32
 // CHECK: ret i1 %hlsl.any
 bool test_any_uint4(uint4 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.i64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.i64
 // CHECK: ret i1 %hlsl.any
 bool test_any_int64_t(int64_t p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v2i64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v2i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2i64
 // CHECK: ret i1 %hlsl.any
 bool test_any_int64_t2(int64_t2 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v3i64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v3i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3i64
 // CHECK: ret i1 %hlsl.any
 bool test_any_int64_t3(int64_t3 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v4i64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v4i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4i64
 // CHECK: ret i1 %hlsl.any
 bool test_any_int64_t4(int64_t4 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.i64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.i64
 // CHECK: ret i1 %hlsl.any
 bool test_any_uint64_t(uint64_t p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v2i64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v2i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2i64
 // CHECK: ret i1 %hlsl.any
 bool test_any_uint64_t2(uint64_t2 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v3i64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v3i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3i64
 // CHECK: ret i1 %hlsl.any
 bool test_any_uint64_t3(uint64_t3 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v4i64
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v4i64
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4i64
 // CHECK: ret i1 %hlsl.any
 bool test_any_uint64_t4(uint64_t4 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.i1
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.i1
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.i1
 // CHECK: ret i1 %hlsl.any
 bool test_any_bool(bool p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v2i1
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v2i1
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v2i1
 // CHECK: ret i1 %hlsl.any
 bool test_any_bool2(bool2 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v3i1
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v3i1
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v3i1
 // CHECK: ret i1 %hlsl.any
 bool test_any_bool3(bool3 p0) { return any(p0); }
 
-// DXIL_CHECK: define noundef i1 @
-// SPIR_CHECK: define spir_func noundef i1 @
-// DXIL_CHECK: %hlsl.any = call i1 @llvm.dx.any.v4i1
-// SPIR_CHECK: %hlsl.any = call i1 @llvm.spv.any.v4i1
+// CHECK: define [[FNATTRS]] i1 @
+// CHECK: %hlsl.any = call i1 @llvm.[[TARGET]].any.v4i1
 // CHECK: ret i1 %hlsl.any
 bool test_any_bool4(bool4 p0) { return any(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/frac.hlsl b/clang/test/CodeGenHLSL/builtins/frac.hlsl
index b457f5c2787918..f0fbba978c0237 100644
--- a/clang/test/CodeGenHLSL/builtins/frac.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/frac.hlsl
@@ -1,84 +1,64 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF,DXIL_NO_HALF
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,SPIR_CHECK,NATIVE_HALF,SPIR_NATIVE_HALF
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,SPIR_CHECK,NO_HALF,SPIR_NO_HALF
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 
-// DXIL_NATIVE_HALF: define noundef half @
-// SPIR_NATIVE_HALF: define spir_func noundef half @
-// DXIL_NATIVE_HALF: %hlsl.frac = call half @llvm.dx.frac.f16(
-// SPIR_NATIVE_HALF: %hlsl.frac = call half @llvm.spv.frac.f16(
+// NATIVE_HALF: define [[FNATTRS]] half @
+// NATIVE_HALF: %hlsl.frac = call half @llvm.[[TARGET]].frac.f16(
 // NATIVE_HALF: ret half %hlsl.frac
-// DXIL_NO_HALF: define noundef float @
-// SPIR_NO_HALF: define spir_func noundef float @
-// DXIL_NO_HALF: %hlsl.frac = call float @llvm.dx.frac.f32(
-// SPIR_NO_HALF: %hlsl.frac = call float @llvm.spv.frac.f32(
+// NO_HALF: define [[FNATTRS]] float @
+// NO_HALF: %hlsl.frac = call float @llvm.[[TARGET]].frac.f32(
 // NO_HALF: ret float %hlsl.frac
 half test_frac_half(half p0) { return frac(p0); }
-// DXIL_NATIVE_HALF: define noundef <2 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
-// DXIL_NATIVE_HALF: %hlsl.frac = call <2 x half> @llvm.dx.frac.v2f16
-// SPIR_NATIVE_HALF: %hlsl.frac = call <2 x half> @llvm.spv.frac.v2f16
+// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
+// NATIVE_HALF: %hlsl.frac = call <2 x half> @llvm.[[TARGET]].frac.v2f16
 // NATIVE_HALF: ret <2 x half> %hlsl.frac
-// DXIL_NO_HALF: define noundef <2 x float> @
-// SPIR_NO_HALF: define spir_func noundef <2 x float> @
-// DXIL_NO_HALF: %hlsl.frac = call <2 x float> @llvm.dx.frac.v2f32(
-// SPIR_NO_HALF: %hlsl.frac = call <2 x float> @llvm.spv.frac.v2f32(
+// NO_HALF: define [[FNATTRS]] <2 x float> @
+// NO_HALF: %hlsl.frac = call <2 x float> @llvm.[[TARGET]].frac.v2f32(
 // NO_HALF: ret <2 x float> %hlsl.frac
 half2 test_frac_half2(half2 p0) { return frac(p0); }
-// DXIL_NATIVE_HALF: define noundef <3 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
-// DXIL_NATIVE_HALF: %hlsl.frac = call <3 x half> @llvm.dx.frac.v3f16
-// SPIR_NATIVE_HALF: %hlsl.frac = call <3 x half> @llvm.spv.frac.v3f16
+// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
+// NATIVE_HALF: %hlsl.frac = call <3 x half> @llvm.[[TARGET]].frac.v3f16
 // NATIVE_HALF: ret <3 x half> %hlsl.frac
-// DXIL_NO_HALF: define noundef <3 x float> @
-// SPIR_NO_HALF: define spir_func noundef <3 x float> @
-// DXIL_NO_HALF: %hlsl.frac = call <3 x float> @llvm.dx.frac.v3f32(
-// SPIR_NO_HALF: %hlsl.frac = call <3 x float> @llvm.spv.frac.v3f32(
+// NO_HALF: define [[FNATTRS]] <3 x float> @
+// NO_HALF: %hlsl.frac = call <3 x float> @llvm.[[TARGET]].frac.v3f32(
 // NO_HALF: ret <3 x float> %hlsl.frac
 half3 test_frac_half3(half3 p0) { return frac(p0); }
-// DXIL_NATIVE_HALF: define noundef <4 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
-// DXIL_NATIVE_HALF: %hlsl.frac = call <4 x half> @llvm.dx.frac.v4f16
-// SPIR_NATIVE_HALF: %hlsl.frac = call <4 x half> @llvm.spv.frac.v4f16
+// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
+// NATIVE_HALF: %hlsl.frac = call <4 x half> @llvm.[[TARGET]].frac.v4f16
 // NATIVE_HALF: ret <4 x half> %hlsl.frac
-// DXIL_NO_HALF: define noundef <4 x float> @
-// SPIR_NO_HALF: define spir_func noundef <4 x float> @
-// DXIL_NO_HALF: %hlsl.frac = call <4 x float> @llvm.dx.frac.v4f32(
-// SPIR_NO_HALF: %hlsl.frac = call <4 x float> @llvm.spv.frac.v4f32(
+// NO_HALF: define [[FNATTRS]] <4 x float> @
+// NO_HALF: %hlsl.frac = call <4 x float> @llvm.[[TARGET]].frac.v4f32(
 // NO_HALF: ret <4 x float> %hlsl.frac
 half4 test_frac_half4(half4 p0) { return frac(p0); }
 
-// DXIL_CHECK: define noundef float @
-// SPIR_CHECK: define spir_func noundef float @
-// DXIL_CHECK: %hlsl.frac = call float @llvm.dx.frac.f32(
-// SPIR_CHECK: %hlsl.frac = call float @llvm.spv.frac.f32(
+// CHECK: define [[FNATTRS]] float @
+// CHECK: %hlsl.frac = call float @llvm.[[TARGET]].frac.f32(
 // CHECK: ret float %hlsl.frac
 float test_frac_float(float p0) { return frac(p0); }
-// DXIL_CHECK: define noundef <2 x float> @
-// SPIR_CHECK: define spir_func noundef <2 x float> @
-// DXIL_CHECK: %hlsl.frac = call <2 x float> @llvm.dx.frac.v2f32
-// SPIR_CHECK: %hlsl.frac = call <2 x float> @llvm.spv.frac.v2f32
+// CHECK: define [[FNATTRS]] <2 x float> @
+// CHECK: %hlsl.frac = call <2 x float> @llvm.[[TARGET]].frac.v2f32
 // CHECK: ret <2 x float> %hlsl.frac
 float2 test_frac_float2(float2 p0) { return frac(p0); }
-// DXIL_CHECK: define noundef <3 x float> @
-// SPIR_CHECK: define spir_func noundef <3 x float> @
-// DXIL_CHECK: %hlsl.frac = call <3 x float> @llvm.dx.frac.v3f32
-// SPIR_CHECK: %hlsl.frac = call <3 x float> @llvm.spv.frac.v3f32
+// CHECK: define [[FNATTRS]] <3 x float> @
+// CHECK: %hlsl.frac = call <3 x float> @llvm.[[TARGET]].frac.v3f32
 // CHECK: ret <3 x float> %hlsl.frac
 float3 test_frac_float3(float3 p0) { return frac(p0); }
-// DXIL_CHECK: define noundef <4 x float> @
-// SPIR_CHECK: define spir_func noundef <4 x float> @
-// DXIL_CHECK: %hlsl.frac = call <4 x float> @llvm.dx.frac.v4f32
-// SPIR_CHECK: %hlsl.frac = call <4 x float> @llvm.spv.frac.v4f32
+// CHECK: define [[FNATTRS]] <4 x float> @
+// CHECK: %hlsl.frac = call <4 x float> @llvm.[[TARGET]].frac.v4f32
 // CHECK: ret <4 x float> %hlsl.frac
 float4 test_frac_float4(float4 p0) { return frac(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/lerp.hlsl b/clang/test/CodeGenHLSL/builtins/lerp.hlsl
index 53ac24dd456930..298d157da00a35 100644
--- a/clang/test/CodeGenHLSL/builtins/lerp.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/lerp.hlsl
@@ -1,88 +1,76 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF,DXIL_NO_HALF
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_NO_HALF,SPIR_CHECK
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 
-
-// DXIL_NATIVE_HALF: %hlsl.lerp = call half @llvm.dx.lerp.f16(half %{{.*}}, half %{{.*}}, half %{{.*}})
-// SPIR_NATIVE_HALF: %hlsl.lerp = call half @llvm.spv.lerp.f16(half %{{.*}}, half %{{.*}}, half %{{.*}})
+// NATIVE_HALF: %hlsl.lerp = call half @llvm.[[TARGET]].lerp.f16(half %{{.*}}, half %{{.*}}, half %{{.*}})
 // NATIVE_HALF: ret half %hlsl.lerp
-// DXIL_NO_HALF: %hlsl.lerp = call float @llvm.dx.lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
-// SPIR_NO_HALF: %hlsl.lerp = call float @llvm.spv.lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
+// NO_HALF: %hlsl.lerp = call float @llvm.[[TARGET]].lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
 // NO_HALF: ret float %hlsl.lerp
 half test_lerp_half(half p0) { return lerp(p0, p0, p0); }
 
-// DXIL_NATIVE_HALF: %hlsl.lerp = call <2 x half> @llvm.dx.lerp.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}})
-// SPIR_NATIVE_HALF: %hlsl.lerp = call <2 x half> @llvm.spv.lerp.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}})
+// NATIVE_HALF: %hlsl.lerp = call <2 x half> @llvm.[[TARGET]].lerp.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}})
 // NATIVE_HALF: ret <2 x half> %hlsl.lerp
-// DXIL_NO_HALF: %hlsl.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}})
-// SPIR_NO_HALF: %hlsl.lerp = call <2 x float> @llvm.spv.lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}})
+// NO_HALF: %hlsl.lerp = call <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}})
 // NO_HALF: ret <2 x float> %hlsl.lerp
 half2 test_lerp_half2(half2 p0) { return lerp(p0, p0, p0); }
 
-// DXIL_NATIVE_HALF: %hlsl.lerp = call <3 x half> @llvm.dx.lerp.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}}, <3 x half> %{{.*}})
-// SPIR_NATIVE_HALF: %hlsl.lerp = call <3 x half> @llvm.spv.lerp.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}}, <3 x half> %{{.*}})
+// NATIVE_HALF: %hlsl.lerp = call <3 x half> @llvm.[[TARGET]].lerp.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}}, <3 x half> %{{.*}})
 // NATIVE_HALF: ret <3 x half> %hlsl.lerp
-// DXIL_NO_HALF: %hlsl.lerp = call <3 x float> @llvm.dx.lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}})
-// SPIR_NO_HALF: %hlsl.lerp = call <3 x float> @llvm.spv.lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}})
+// NO_HALF: %hlsl.lerp = call <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}})
 // NO_HALF: ret <3 x float> %hlsl.lerp
 half3 test_lerp_half3(half3 p0) { return lerp(p0, p0, p0); }
 
-// DXIL_NATIVE_HALF: %hlsl.lerp = call <4 x half> @llvm.dx.lerp.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x half> %{{.*}})
-// SPIR_NATIVE_HALF: %hlsl.lerp = call <4 x half> @llvm.spv.lerp.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x half> %{{.*}})
+// NATIVE_HALF: %hlsl.lerp = call <4 x half> @llvm.[[TARGET]].lerp.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x half> %{{.*}})
 // NATIVE_HALF: ret <4 x half> %hlsl.lerp
-// DXIL_NO_HALF: %hlsl.lerp = call <4 x float> @llvm.dx.lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
-// SPIR_NO_HALF: %hlsl.lerp = call <4 x float> @llvm.spv.lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+// NO_HALF: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
 // NO_HALF: ret <4 x float> %hlsl.lerp
 half4 test_lerp_half4(half4 p0) { return lerp(p0, p0, p0); }
 
-// DXIL_CHECK: %hlsl.lerp = call float @llvm.dx.lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
-// SPIR_CHECK: %hlsl.lerp = call float @llvm.spv.lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
+// CHECK: %hlsl.lerp = call float @llvm.[[TARGET]].lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
 // CHECK: ret float %hlsl.lerp
 float test_lerp_float(float p0) { return lerp(p0, p0, p0); }
 
-// DXIL_CHECK: %hlsl.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}})
-// SPIR_CHECK: %hlsl.lerp = call <2 x float> @llvm.spv.lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}})
+// CHECK: %hlsl.lerp = call <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}})
 // CHECK: ret <2 x float> %hlsl.lerp
 float2 test_lerp_float2(float2 p0) { return lerp(p0, p0, p0); }
 
-// DXIL_CHECK: %hlsl.lerp = call <3 x float> @llvm.dx.lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}})
-// SPIR_CHECK: %hlsl.lerp = call <3 x float> @llvm.spv.lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}})
+// CHECK: %hlsl.lerp = call <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}})
 // CHECK: ret <3 x float> %hlsl.lerp
 float3 test_lerp_float3(float3 p0) { return lerp(p0, p0, p0); }
 
-// DXIL_CHECK: %hlsl.lerp = call <4 x float> @llvm.dx.lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
-// SPIR_CHECK: %hlsl.lerp = call <4 x float> @llvm.spv.lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+// CHECK: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
 // CHECK: ret <4 x float> %hlsl.lerp
 float4 test_lerp_float4(float4 p0) { return lerp(p0, p0, p0); }
 
 // CHECK: %[[b:.*]] = load <2 x float>, ptr %p1.addr, align 8
 // CHECK: %[[c:.*]] = load <2 x float>, ptr %p1.addr, align 8
-// DXIL_CHECK: %hlsl.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %splat.splat, <2 x float> %[[b]], <2 x float> %[[c]])
-// SPIR_CHECK: %hlsl.lerp = call <2 x float> @llvm.spv.lerp.v2f32(<2 x float> %splat.splat, <2 x float> %[[b]], <2 x float> %[[c]])
+// CHECK: %hlsl.lerp = call <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %splat.splat, <2 x float> %[[b]], <2 x float> %[[c]])
 // CHECK: ret <2 x float> %hlsl.lerp
 float2 test_lerp_float2_splat(float p0, float2 p1) { return lerp(p0, p1, p1); }
 
 // CHECK: %[[b:.*]] = load <3 x float>, ptr %p1.addr, align 16
 // CHECK: %[[c:.*]] = load <3 x float>, ptr %p1.addr, align 16
-// DXIL_CHECK: %hlsl.lerp = call <3 x float> @llvm.dx.lerp.v3f32(<3 x float> %splat.splat, <3 x float> %[[b]], <3 x float> %[[c]])
-// SPIR_CHECK: %hlsl.lerp = call <3 x float> @llvm.spv.lerp.v3f32(<3 x float> %splat.splat, <3 x float> %[[b]], <3 x float> %[[c]])
+// CHECK: %hlsl.lerp = call <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %splat.splat, <3 x float> %[[b]], <3 x float> %[[c]])
 // CHECK: ret <3 x float> %hlsl.lerp
 float3 test_lerp_float3_splat(float p0, float3 p1) { return lerp(p0, p1, p1); }
 
 // CHECK: %[[b:.*]] = load <4 x float>, ptr %p1.addr, align 16
 // CHECK: %[[c:.*]] = load <4 x float>, ptr %p1.addr, align 16
-// DXIL_CHECK: %hlsl.lerp = call <4 x float> @llvm.dx.lerp.v4f32(<4 x float> %splat.splat, <4 x float> %[[b]], <4 x float> %[[c]])
-// SPIR_CHECK: %hlsl.lerp = call <4 x float> @llvm.spv.lerp.v4f32(<4 x float> %splat.splat, <4 x float> %[[b]], <4 x float> %[[c]])
+// CHECK: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %splat.splat, <4 x float> %[[b]], <4 x float> %[[c]])
 // CHECK:  ret <4 x float> %hlsl.lerp
 float4 test_lerp_float4_splat(float p0, float4 p1) { return lerp(p0, p1, p1); }
diff --git a/clang/test/CodeGenHLSL/builtins/normalize.hlsl b/clang/test/CodeGenHLSL/builtins/normalize.hlsl
index 213959e77e7e1e..83ad607c14a607 100644
--- a/clang/test/CodeGenHLSL/builtins/normalize.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/normalize.hlsl
@@ -1,60 +1,52 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF,DXIL_NO_HALF
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_NO_HALF,SPIR_CHECK
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 
-// DXIL_NATIVE_HALF: define noundef half @
-// SPIR_NATIVE_HALF: define spir_func noundef half @
-// DXIL_NATIVE_HALF: call half @llvm.dx.normalize.f16(half
-// SPIR_NATIVE_HALF: call half @llvm.spv.normalize.f16(half
-// DXIL_NO_HALF: call float @llvm.dx.normalize.f32(float
-// SPIR_NO_HALF: call float @llvm.spv.normalize.f32(float
+// NATIVE_HALF: define [[FNATTRS]] half @
+// NATIVE_HALF: call half @llvm.[[TARGET]].normalize.f16(half
+// NO_HALF: call float @llvm.[[TARGET]].normalize.f32(float
 // NATIVE_HALF: ret half
 // NO_HALF: ret float
 half test_normalize_half(half p0)
 {
     return normalize(p0);
 }
-// DXIL_NATIVE_HALF: define noundef <2 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
-// DXIL_NATIVE_HALF: call <2 x half> @llvm.dx.normalize.v2f16(<2 x half>
-// SPIR_NATIVE_HALF: call <2 x half> @llvm.spv.normalize.v2f16(<2 x half>
-// DXIL_NO_HALF: call <2 x float> @llvm.dx.normalize.v2f32(<2 x float>
-// SPIR_NO_HALF: call <2 x float> @llvm.spv.normalize.v2f32(<2 x float>
+// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
+// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].normalize.v2f16(<2 x half>
+// NO_HALF: call <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float>
 // NATIVE_HALF: ret <2 x half> %hlsl.normalize
 // NO_HALF: ret <2 x float> %hlsl.normalize
 half2 test_normalize_half2(half2 p0)
 {
     return normalize(p0);
 }
-// DXIL_NATIVE_HALF: define noundef <3 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
-// DXIL_NATIVE_HALF: call <3 x half> @llvm.dx.normalize.v3f16(<3 x half>
-// SPIR_NATIVE_HALF: call <3 x half> @llvm.spv.normalize.v3f16(<3 x half>
-// DXIL_NO_HALF: call <3 x float> @llvm.dx.normalize.v3f32(<3 x float>
-// SPIR_NO_HALF: call <3 x float> @llvm.spv.normalize.v3f32(<3 x float>
+// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
+// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].normalize.v3f16(<3 x half>
+// NO_HALF: call <3 x float> @llvm.[[TARGET]].normalize.v3f32(<3 x float>
 // NATIVE_HALF: ret <3 x half> %hlsl.normalize
 // NO_HALF: ret <3 x float> %hlsl.normalize
 half3 test_normalize_half3(half3 p0)
 {
     return normalize(p0);
 }
-// DXIL_NATIVE_HALF: define noundef <4 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
-// DXIL_NATIVE_HALF: call <4 x half> @llvm.dx.normalize.v4f16(<4 x half>
-// SPIR_NATIVE_HALF: call <4 x half> @llvm.spv.normalize.v4f16(<4 x half>
-// DXIL_NO_HALF: call <4 x float> @llvm.dx.normalize.v4f32(<4 x float>
-// SPIR_NO_HALF: call <4 x float> @llvm.spv.normalize.v4f32(<4 x float>
+// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
+// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].normalize.v4f16(<4 x half>
+// NO_HALF: call <4 x float> @llvm.[[TARGET]].normalize.v4f32(<4 x float>
 // NATIVE_HALF: ret <4 x half> %hlsl.normalize
 // NO_HALF: ret <4 x float> %hlsl.normalize
 half4 test_normalize_half4(half4 p0)
@@ -62,37 +54,30 @@ half4 test_normalize_half4(half4 p0)
     return normalize(p0);
 }
 
-// DXIL_CHECK: define noundef float @
-// SPIR_CHECK: define spir_func noundef float @
-// DXIL_CHECK: call float @llvm.dx.normalize.f32(float
-// SPIR_CHECK: call float @llvm.spv.normalize.f32(float
+// CHECK: define [[FNATTRS]] float @
+// CHECK: call float @llvm.[[TARGET]].normalize.f32(float
 // CHECK: ret float
 float test_normalize_float(float p0)
 {
     return normalize(p0);
 }
-// DXIL_CHECK: define noundef <2 x float> @
-// SPIR_CHECK: define spir_func noundef <2 x float> @
-// DXIL_CHECK: %hlsl.normalize = call <2 x float> @llvm.dx.normalize.v2f32(
-// SPIR_CHECK: %hlsl.normalize = call <2 x float> @llvm.spv.normalize.v2f32(<2 x float>
+// CHECK: define [[FNATTRS]] <2 x float> @
+// CHECK: %hlsl.normalize = call <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float>
+
 // CHECK: ret <2 x float> %hlsl.normalize
 float2 test_normalize_float2(float2 p0)
 {
     return normalize(p0);
 }
-// DXIL_CHECK: define noundef <3 x float> @
-// SPIR_CHECK: define spir_func noundef <3 x float> @
-// DXIL_CHECK: %hlsl.normalize = call <3 x float> @llvm.dx.normalize.v3f32(
-// SPIR_CHECK: %hlsl.normalize = call <3 x float> @llvm.spv.normalize.v3f32(<3 x float>
+// CHECK: define [[FNATTRS]] <3 x float> @
+// CHECK: %hlsl.normalize = call <3 x float> @llvm.[[TARGET]].normalize.v3f32(
 // CHECK: ret <3 x float> %hlsl.normalize
 float3 test_normalize_float3(float3 p0)
 {
     return normalize(p0);
 }
-// DXIL_CHECK: define noundef <4 x float> @
-// SPIR_CHECK: define spir_func noundef <4 x float> @
-// DXIL_CHECK: %hlsl.normalize = call <4 x float> @llvm.dx.normalize.v4f32(
-// SPIR_CHECK: %hlsl.normalize = call <4 x float> @llvm.spv.normalize.v4f32(
+// CHECK: define [[FNATTRS]] <4 x float> @
+// CHECK: %hlsl.normalize = call <4 x float> @llvm.[[TARGET]].normalize.v4f32(
 // CHECK: ret <4 x float> %hlsl.normalize
 float4 test_length_float4(float4 p0)
 {
diff --git a/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl b/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
index bb96ad8ea0fc6e..b1b53fc187da67 100644
--- a/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
@@ -1,84 +1,64 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF,DXIL_NO_HALF
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS=noundef -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,SPIR_CHECK,NATIVE_HALF,SPIR_NATIVE_HALF
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,SPIR_CHECK,NO_HALF,SPIR_NO_HALF
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
 
-// DXIL_NATIVE_HALF: define noundef half @
-// SPIR_NATIVE_HALF: define spir_func noundef half @
-// DXIL_NATIVE_HALF: %hlsl.rsqrt = call half @llvm.dx.rsqrt.f16(
-// SPIR_NATIVE_HALF: %hlsl.rsqrt = call half @llvm.spv.rsqrt.f16(
+// NATIVE_HALF: define [[FNATTRS]] half @
+// NATIVE_HALF: %hlsl.rsqrt = call half @llvm.[[TARGET]].rsqrt.f16(
 // NATIVE_HALF: ret half %hlsl.rsqrt
-// DXIL_NO_HALF: define noundef float @
-// SPIR_NO_HALF: define spir_func noundef float @
-// DXIL_NO_HALF: %hlsl.rsqrt = call float @llvm.dx.rsqrt.f32(
-// SPIR_NO_HALF: %hlsl.rsqrt = call float @llvm.spv.rsqrt.f32(
+// NO_HALF: define [[FNATTRS]] float @
+// NO_HALF: %hlsl.rsqrt = call float @llvm.[[TARGET]].rsqrt.f32(
 // NO_HALF: ret float %hlsl.rsqrt
 half test_rsqrt_half(half p0) { return rsqrt(p0); }
-// DXIL_NATIVE_HALF: define noundef <2 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
-// DXIL_NATIVE_HALF: %hlsl.rsqrt = call <2 x half> @llvm.dx.rsqrt.v2f16
-// SPIR_NATIVE_HALF: %hlsl.rsqrt = call <2 x half> @llvm.spv.rsqrt.v2f16
+// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
+// NATIVE_HALF: %hlsl.rsqrt = call <2 x half> @llvm.[[TARGET]].rsqrt.v2f16
 // NATIVE_HALF: ret <2 x half> %hlsl.rsqrt
-// DXIL_NO_HALF: define noundef <2 x float> @
-// SPIR_NO_HALF: define spir_func noundef <2 x float> @
-// DXIL_NO_HALF: %hlsl.rsqrt = call <2 x float> @llvm.dx.rsqrt.v2f32(
-// SPIR_NO_HALF: %hlsl.rsqrt = call <2 x float> @llvm.spv.rsqrt.v2f32(
+// NO_HALF: define [[FNATTRS]] <2 x float> @
+// NO_HALF: %hlsl.rsqrt = call <2 x float> @llvm.[[TARGET]].rsqrt.v2f32(
 // NO_HALF: ret <2 x float> %hlsl.rsqrt
 half2 test_rsqrt_half2(half2 p0) { return rsqrt(p0); }
-// DXIL_NATIVE_HALF: define noundef <3 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
-// DXIL_NATIVE_HALF: %hlsl.rsqrt = call <3 x half> @llvm.dx.rsqrt.v3f16
-// SPIR_NATIVE_HALF: %hlsl.rsqrt = call <3 x half> @llvm.spv.rsqrt.v3f16
+// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
+// NATIVE_HALF: %hlsl.rsqrt = call <3 x half> @llvm.[[TARGET]].rsqrt.v3f16
 // NATIVE_HALF: ret <3 x half> %hlsl.rsqrt
-// DXIL_NO_HALF: define noundef <3 x float> @
-// SPIR_NO_HALF: define spir_func noundef <3 x float> @
-// DXIL_NO_HALF: %hlsl.rsqrt = call <3 x float> @llvm.dx.rsqrt.v3f32(
-// SPIR_NO_HALF: %hlsl.rsqrt = call <3 x float> @llvm.spv.rsqrt.v3f32(
+// NO_HALF: define [[FNATTRS]] <3 x float> @
+// NO_HALF: %hlsl.rsqrt = call <3 x float> @llvm.[[TARGET]].rsqrt.v3f32(
 // NO_HALF: ret <3 x float> %hlsl.rsqrt
 half3 test_rsqrt_half3(half3 p0) { return rsqrt(p0); }
-// DXIL_NATIVE_HALF: define noundef <4 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
-// DXIL_NATIVE_HALF: %hlsl.rsqrt = call <4 x half> @llvm.dx.rsqrt.v4f16
-// SPIR_NATIVE_HALF: %hlsl.rsqrt = call <4 x half> @llvm.spv.rsqrt.v4f16
+// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
+// NATIVE_HALF: %hlsl.rsqrt = call <4 x half> @llvm.[[TARGET]].rsqrt.v4f16
 // NATIVE_HALF: ret <4 x half> %hlsl.rsqrt
-// DXIL_NO_HALF: define noundef <4 x float> @
-// SPIR_NO_HALF: define spir_func noundef <4 x float> @
-// DXIL_NO_HALF: %hlsl.rsqrt = call <4 x float> @llvm.dx.rsqrt.v4f32(
-// SPIR_NO_HALF: %hlsl.rsqrt = call <4 x float> @llvm.spv.rsqrt.v4f32(
+// NO_HALF: define [[FNATTRS]] <4 x float> @
+// NO_HALF: %hlsl.rsqrt = call <4 x float> @llvm.[[TARGET]].rsqrt.v4f32(
 // NO_HALF: ret <4 x float> %hlsl.rsqrt
 half4 test_rsqrt_half4(half4 p0) { return rsqrt(p0); }
 
-// DXIL_CHECK: define noundef float @
-// SPIR_CHECK: define spir_func noundef float @
-// DXIL_CHECK: %hlsl.rsqrt = call float @llvm.dx.rsqrt.f32(
-// SPIR_CHECK: %hlsl.rsqrt = call float @llvm.spv.rsqrt.f32(
+// CHECK: define [[FNATTRS]] float @
+// CHECK: %hlsl.rsqrt = call float @llvm.[[TARGET]].rsqrt.f32(
 // CHECK: ret float %hlsl.rsqrt
 float test_rsqrt_float(float p0) { return rsqrt(p0); }
-// DXIL_CHECK: define noundef <2 x float> @
-// SPIR_CHECK: define spir_func noundef <2 x float> @
-// DXIL_CHECK: %hlsl.rsqrt = call <2 x float> @llvm.dx.rsqrt.v2f32
-// SPIR_CHECK: %hlsl.rsqrt = call <2 x float> @llvm.spv.rsqrt.v2f32
+// CHECK: define [[FNATTRS]] <2 x float> @
+// CHECK: %hlsl.rsqrt = call <2 x float> @llvm.[[TARGET]].rsqrt.v2f32
 // CHECK: ret <2 x float> %hlsl.rsqrt
 float2 test_rsqrt_float2(float2 p0) { return rsqrt(p0); }
-// DXIL_CHECK: define noundef <3 x float> @
-// SPIR_CHECK: define spir_func noundef <3 x float> @
-// DXIL_CHECK: %hlsl.rsqrt = call <3 x float> @llvm.dx.rsqrt.v3f32
-// SPIR_CHECK: %hlsl.rsqrt = call <3 x float> @llvm.spv.rsqrt.v3f32
+// CHECK: define [[FNATTRS]] <3 x float> @
+// CHECK: %hlsl.rsqrt = call <3 x float> @llvm.[[TARGET]].rsqrt.v3f32
 // CHECK: ret <3 x float> %hlsl.rsqrt
 float3 test_rsqrt_float3(float3 p0) { return rsqrt(p0); }
-// DXIL_CHECK: define noundef <4 x float> @
-// SPIR_CHECK: define spir_func noundef <4 x float> @
-// DXIL_CHECK: %hlsl.rsqrt = call <4 x float> @llvm.dx.rsqrt.v4f32
-// SPIR_CHECK: %hlsl.rsqrt = call <4 x float> @llvm.spv.rsqrt.v4f32
+// CHECK: define [[FNATTRS]] <4 x float> @
+// CHECK: %hlsl.rsqrt = call <4 x float> @llvm.[[TARGET]].rsqrt.v4f32
 // CHECK: ret <4 x float> %hlsl.rsqrt
 float4 test_rsqrt_float4(float4 p0) { return rsqrt(p0); }
diff --git a/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl b/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl
index 2004a9d894a579..59c1620334d0e3 100644
--- a/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl
+++ b/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl
@@ -1,22 +1,20 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
-// RUN: %clang_cc1 -triple spirv-linux-vulkan-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL -DTARGET=dx
+// RUN: %clang_cc1 -triple spirv-linux-vulkan-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV -DTARGET=spv
 
 // Make sure SV_DispatchThreadID translated into dx.thread.id.
 
 // CHECK:       define void @foo()
-// CHECK-DXIL:  %[[#ID:]] = call i32 @llvm.dx.thread.id(i32 0)
-// CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.spv.thread.id(i32 0)
+// CHECK-DXIL:  %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0)
+// CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0)
 // CHECK:       call void @{{.*}}foo{{.*}}(i32 %[[#ID]])
 [shader("compute")]
 [numthreads(8,8,1)]
 void foo(uint Idx : SV_DispatchThreadID) {}
 
 // CHECK:       define void @bar()
-// CHECK-DXIL:  %[[#ID_X:]] = call i32 @llvm.dx.thread.id(i32 0)
-// CHECK-SPIRV: %[[#ID_X:]] = call i32 @llvm.spv.thread.id(i32 0)
+// CHECK:       %[[#ID_X:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0)
 // CHECK:       %[[#ID_X_:]] = insertelement <2 x i32> poison, i32 %[[#ID_X]], i64 0
-// CHECK-DXIL:  %[[#ID_Y:]] = call i32 @llvm.dx.thread.id(i32 1)
-// CHECK-SPIRV: %[[#ID_Y:]] = call i32 @llvm.spv.thread.id(i32 1)
+// CHECK:       %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].thread.id(i32 1)
 // CHECK:       %[[#ID_XY:]] = insertelement <2 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1
 // CHECK-DXIL:  call void @{{.*}}bar{{.*}}(<2 x i32> %[[#ID_XY]])
 [shader("compute")]

From 17b7a9da46cef85b1a00b574c18c5f8cd5a761e1 Mon Sep 17 00:00:00 2001
From: Angel Zhang <angel.zhang@amd.com>
Date: Wed, 28 Aug 2024 16:29:32 -0400
Subject: [PATCH 02/10] [mlir][spirv] Add an argmax integration test with
 `mlir-vulkan-runner` (#106426)

This PR adds an integration test for an argmax kernel with
`mlir-vulkan-runner`. This test exercises the `convert-to-spirv` pass
(landed in #95942) and demonstrates that we can use SPIR-V ops as
"intrinsics" among higher-level dialects.

The support for `index` dialect in `mlir-vulkan-runner` is also added.
---
 mlir/test/mlir-vulkan-runner/argmax.mlir      | 109 ++++++++++++++++++
 mlir/tools/mlir-vulkan-runner/CMakeLists.txt  |   1 +
 .../mlir-vulkan-runner/mlir-vulkan-runner.cpp |   4 +-
 .../llvm-project-overlay/mlir/BUILD.bazel     |   1 +
 4 files changed, 114 insertions(+), 1 deletion(-)
 create mode 100644 mlir/test/mlir-vulkan-runner/argmax.mlir

diff --git a/mlir/test/mlir-vulkan-runner/argmax.mlir b/mlir/test/mlir-vulkan-runner/argmax.mlir
new file mode 100644
index 00000000000000..d30c1cb5b58bdc
--- /dev/null
+++ b/mlir/test/mlir-vulkan-runner/argmax.mlir
@@ -0,0 +1,109 @@
+// RUN: mlir-vulkan-runner %s \
+// RUN:  --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:  --entry-point-result=void | FileCheck %s
+
+// This kernel computes the argmax (index of the maximum element) from an array
+// of integers. Each thread computes a lane maximum using a single `scf.for`.
+// Then `gpu.subgroup_reduce` is used to find the maximum across the entire
+// subgroup, which is then used by SPIR-V subgroup ops to compute the argmax
+// of the entire input array. Note that this kernel only works if we have a
+// single workgroup.
+
+// CHECK: [15]
+module attributes {
+  gpu.container_module,
+  spirv.target_env = #spirv.target_env<
+    #spirv.vce<v1.3, [Shader, Groups, GroupNonUniformArithmetic, GroupNonUniformBallot], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>
+} {
+  gpu.module @kernels {
+    gpu.func @kernel_argmax(%input : memref<128xi32>, %output : memref<1xi32>, %total_count_buf : memref<1xi32>) kernel
+      attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [32, 1, 1]>} {
+      %idx0 = arith.constant 0 : index
+      %idx1 = arith.constant 1 : index
+
+      %total_count = memref.load %total_count_buf[%idx0] : memref<1xi32>
+      %lane_count_idx = gpu.subgroup_size : index
+      %lane_count_i32 = index.castu %lane_count_idx : index to i32
+      %lane_id_idx = gpu.thread_id x
+      %lane_id_i32 = index.castu %lane_id_idx : index to i32
+      %lane_res_init = arith.constant 0 : i32
+      %lane_max_init = memref.load %input[%lane_id_idx] : memref<128xi32>
+      %num_batches_i32 = arith.divui %total_count, %lane_count_i32 : i32
+      %num_batches_idx = index.castu %num_batches_i32 : i32 to index
+
+      %lane_res, %lane_max = scf.for %iter = %idx1 to %num_batches_idx step %idx1
+      iter_args(%lane_res_iter = %lane_res_init, %lane_max_iter = %lane_max_init) -> (i32, i32) {
+        %iter_i32 = index.castu %iter : index to i32
+        %mul = arith.muli %lane_count_i32, %iter_i32 : i32
+        %idx_i32 = arith.addi %mul, %lane_id_i32 : i32
+        %idx = index.castu %idx_i32 : i32 to index
+        %elem = memref.load %input[%idx] : memref<128xi32>
+        %gt = arith.cmpi sgt, %elem, %lane_max_iter : i32
+        %lane_res_next = arith.select %gt, %idx_i32, %lane_res_iter : i32
+        %lane_max_next = arith.select %gt, %elem, %lane_max_iter : i32
+        scf.yield %lane_res_next, %lane_max_next : i32, i32
+      }
+
+      %subgroup_max = gpu.subgroup_reduce maxsi %lane_max : (i32) -> (i32)
+      %eq = arith.cmpi eq, %lane_max, %subgroup_max : i32
+      %ballot = spirv.GroupNonUniformBallot <Subgroup> %eq : vector<4xi32>
+      %lsb = spirv.GroupNonUniformBallotFindLSB <Subgroup> %ballot : vector<4xi32>, i32
+      %cond = arith.cmpi eq, %lsb, %lane_id_i32 : i32
+
+      scf.if %cond {
+        memref.store %lane_res, %output[%idx0] : memref<1xi32>
+      }
+
+      gpu.return
+    }
+  }
+
+  func.func @main() {
+    // Allocate 3 buffers.
+    %in_buf = memref.alloc() : memref<128xi32>
+    %out_buf = memref.alloc() : memref<1xi32>
+    %total_count_buf = memref.alloc() : memref<1xi32>
+
+    // Constants.
+    %cst0 = arith.constant 0 : i32
+    %idx0 = arith.constant 0 : index
+    %idx1 = arith.constant 1 : index
+    %idx16 = arith.constant 16 : index
+    %idx32 = arith.constant 32 : index
+    %idx48 = arith.constant 48 : index
+    %idx64 = arith.constant 64 : index
+    %idx80 = arith.constant 80 : index
+    %idx96 = arith.constant 96 : index
+    %idx112 = arith.constant 112 : index
+
+    // Initialize input buffer.
+    %in_vec = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
+    vector.store %in_vec, %in_buf[%idx0] : memref<128xi32>, vector<16xi32>
+    vector.store %in_vec, %in_buf[%idx16] : memref<128xi32>, vector<16xi32>
+    vector.store %in_vec, %in_buf[%idx32] : memref<128xi32>, vector<16xi32>
+    vector.store %in_vec, %in_buf[%idx48] : memref<128xi32>, vector<16xi32>
+    vector.store %in_vec, %in_buf[%idx64] : memref<128xi32>, vector<16xi32>
+    vector.store %in_vec, %in_buf[%idx80] : memref<128xi32>, vector<16xi32>
+    vector.store %in_vec, %in_buf[%idx96] : memref<128xi32>, vector<16xi32>
+    vector.store %in_vec, %in_buf[%idx112] : memref<128xi32>, vector<16xi32>
+
+    // Initialize output buffer.
+    %out_buf2 = memref.cast %out_buf : memref<1xi32> to memref<?xi32>
+    call @fillResource1DInt(%out_buf2, %cst0) : (memref<?xi32>, i32) -> ()
+
+    // Total number of scalars.
+    %total_count = arith.constant 128 : i32
+    %total_count_buf2 = memref.cast %total_count_buf : memref<1xi32> to memref<?xi32>
+    call @fillResource1DInt(%total_count_buf2, %total_count) : (memref<?xi32>, i32) -> ()
+
+    // Launch kernel function and print output.
+    gpu.launch_func @kernels::@kernel_argmax
+        blocks in (%idx1, %idx1, %idx1) threads in (%idx32, %idx1, %idx1)
+        args(%in_buf : memref<128xi32>, %out_buf : memref<1xi32>, %total_count_buf : memref<1xi32>)
+    %out_buf3 = memref.cast %out_buf2 : memref<?xi32> to memref<*xi32>
+    call @printMemrefI32(%out_buf3) : (memref<*xi32>) -> ()
+    return
+  }
+  func.func private @fillResource1DInt(%0 : memref<?xi32>, %1 : i32)
+  func.func private @printMemrefI32(%ptr : memref<*xi32>)
+}
diff --git a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
index 26d6caacb0a7b1..36ec946b168715 100644
--- a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
+++ b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
@@ -57,6 +57,7 @@ if (MLIR_ENABLE_VULKAN_RUNNER)
     MLIRExecutionEngine
     MLIRFuncDialect
     MLIRGPUDialect
+    MLIRIndexDialect
     MLIRIR
     MLIRJitRunner
     MLIRLLVMDialect
diff --git a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
index 2dd539ef83481f..bd34165574c8d2 100644
--- a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
+++ b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
@@ -24,6 +24,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
@@ -110,7 +111,8 @@ int main(int argc, char **argv) {
   registry.insert<mlir::arith::ArithDialect, mlir::LLVM::LLVMDialect,
                   mlir::gpu::GPUDialect, mlir::spirv::SPIRVDialect,
                   mlir::scf::SCFDialect, mlir::func::FuncDialect,
-                  mlir::memref::MemRefDialect, mlir::vector::VectorDialect>();
+                  mlir::memref::MemRefDialect, mlir::vector::VectorDialect,
+                  mlir::index::IndexDialect>();
   mlir::registerBuiltinDialectTranslation(registry);
   mlir::registerLLVMDialectTranslation(registry);
 
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 9d3fc5651f2073..8256eddf06047c 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -10001,6 +10001,7 @@ cc_binary(
         ":GPUToSPIRV",
         ":GPUToVulkanTransforms",
         ":GPUTransforms",
+        ":IndexDialect",
         ":LLVMCommonConversion",
         ":LLVMDialect",
         ":LLVMIRTransforms",

From 38b252aa45abad53d7c07c666569b174a215d94d Mon Sep 17 00:00:00 2001
From: jeffreytan81 <jeffreytan@meta.com>
Date: Wed, 28 Aug 2024 13:34:35 -0700
Subject: [PATCH 03/10] Disable ThreadPlanSingleThreadTimeout during step over
 breakpoint  (#104532)

This PR fixes another race condition in
https://github.com/llvm/llvm-project/pull/90930. The failure was found
by @labath with this log: https://paste.debian.net/hidden/30235a5c/:
```
dotest_wrapper.  <  15> send packet: $z0,224505,1#65
...
b-remote.async>  <  22> send packet: $vCont;s:p1dcf.1dcf#4c
intern-state     GDBRemoteClientBase::Lock::Lock sent packet: \x03
b-remote.async>  < 818> read packet: $T13thread:p1dcf.1dcf;name:a.out;threads:1dcf,1dd2;jstopinfo:5b7b226e616d65223a22612e6f7574222c22726561736f6e223a227369676e616c222c227369676e616c223a31392c22746964223a373633317d2c7b226e616d65223a22612e6f7574222c22746964223a373633347d5d;thread-pcs:0000000000224505,00007f4e4302119a;00:0000000000000000;01:0000000000000000;02:0100000000000000;03:0000000000000000;04:9084997dfc7f0000;05:a8742a0000000000;06:b084997dfc7f0000;07:6084997dfc7f0000;08:0000000000000000;09:00d7e5424e7f0000;0a:d0d9e5424e7f0000;0b:0202000000000000;0c:80cc290000000000;0d:d8cc1c434e7f0000;0e:2886997dfc7f0000;0f:0100000000000000;10:0545220000000000;11:0602000000000000;12:3300000000000000;13:0000000000000000;14:0000000000000000;15:2b00000000000000;16:80fbe5424e7f0000;17:0000000000000000;18:0000000000000000;19:0000000000000000;reason:signal;#b9
```
It shows an async interrupt "\x03" was sent immediately after `vCont;s`
single step over breakpoint at address `0x224505` (which was disabled
before vCont). And the later stop was still at the original PC
(0x224505) not moving forward.

The investigation shows the failure happens when timeout is short and
async interrupt is sent to lldb-server immediately after vCont so
ptrace() resumes and then async interrupts debuggee immediately so
debuggee does not get a chance to execute and move PC. So it enters stop
mode immediately at original PC. `ThreadPlanStepOverBreakpoint` does not
expect PC not moving and reports stop at the original place.

To fix this, the PR prevents `ThreadPlanSingleThreadTimeout` from being
created during `ThreadPlanStepOverBreakpoint` by introduces a new
`SupportsResumeOthers()` method and `ThreadPlanStepOverBreakpoint`
returns false for it. This makes sense because we should never resume
threads during step over breakpoint anyway otherwise it might cause
other threads to miss breakpoint.

---------

Co-authored-by: jeffreytan81 <jeffreytan@fb.com>
---
 lldb/include/lldb/Target/ThreadPlan.h                   | 8 +++++++-
 lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h | 1 +
 lldb/source/Target/ThreadPlanSingleThreadTimeout.cpp    | 6 ++++++
 lldb/source/Target/ThreadPlanStepOverBreakpoint.cpp     | 7 +++++++
 4 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/lldb/include/lldb/Target/ThreadPlan.h b/lldb/include/lldb/Target/ThreadPlan.h
index c336b6bb37df1b..d6da484f4fc137 100644
--- a/lldb/include/lldb/Target/ThreadPlan.h
+++ b/lldb/include/lldb/Target/ThreadPlan.h
@@ -385,7 +385,13 @@ class ThreadPlan : public std::enable_shared_from_this<ThreadPlan>,
   virtual void SetStopOthers(bool new_value);
 
   virtual bool StopOthers();
-  
+
+  // Returns true if the thread plan supports ThreadPlanSingleThreadTimeout to
+  // resume other threads after timeout. If the thread plan returns false it
+  // will prevent ThreadPlanSingleThreadTimeout from being created when this
+  // thread plan is alive.
+  virtual bool SupportsResumeOthers() { return true; }
+
   virtual bool ShouldRunBeforePublicStop() { return false; }
 
   // This is the wrapper for DoWillResume that does generic ThreadPlan logic,
diff --git a/lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h b/lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h
index 1f3aff45c49abe..0da8dbf44ffd8a 100644
--- a/lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h
+++ b/lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h
@@ -23,6 +23,7 @@ class ThreadPlanStepOverBreakpoint : public ThreadPlan {
   void GetDescription(Stream *s, lldb::DescriptionLevel level) override;
   bool ValidatePlan(Stream *error) override;
   bool ShouldStop(Event *event_ptr) override;
+  bool SupportsResumeOthers() override;
   bool StopOthers() override;
   lldb::StateType GetPlanRunState() override;
   bool WillStop() override;
diff --git a/lldb/source/Target/ThreadPlanSingleThreadTimeout.cpp b/lldb/source/Target/ThreadPlanSingleThreadTimeout.cpp
index 806ba95c508b7c..71be81365a2668 100644
--- a/lldb/source/Target/ThreadPlanSingleThreadTimeout.cpp
+++ b/lldb/source/Target/ThreadPlanSingleThreadTimeout.cpp
@@ -76,6 +76,9 @@ void ThreadPlanSingleThreadTimeout::PushNewWithTimeout(Thread &thread,
   if (!thread.GetCurrentPlan()->StopOthers())
     return;
 
+  if (!thread.GetCurrentPlan()->SupportsResumeOthers())
+    return;
+
   auto timeout_plan = new ThreadPlanSingleThreadTimeout(thread, info);
   ThreadPlanSP thread_plan_sp(timeout_plan);
   auto status = thread.QueueThreadPlan(thread_plan_sp,
@@ -102,6 +105,9 @@ void ThreadPlanSingleThreadTimeout::ResumeFromPrevState(Thread &thread,
   if (!thread.GetCurrentPlan()->StopOthers())
     return;
 
+  if (!thread.GetCurrentPlan()->SupportsResumeOthers())
+    return;
+
   auto timeout_plan = new ThreadPlanSingleThreadTimeout(thread, info);
   ThreadPlanSP thread_plan_sp(timeout_plan);
   auto status = thread.QueueThreadPlan(thread_plan_sp,
diff --git a/lldb/source/Target/ThreadPlanStepOverBreakpoint.cpp b/lldb/source/Target/ThreadPlanStepOverBreakpoint.cpp
index f88a2b895931cd..3602527a9231b2 100644
--- a/lldb/source/Target/ThreadPlanStepOverBreakpoint.cpp
+++ b/lldb/source/Target/ThreadPlanStepOverBreakpoint.cpp
@@ -103,6 +103,13 @@ bool ThreadPlanStepOverBreakpoint::ShouldStop(Event *event_ptr) {
 
 bool ThreadPlanStepOverBreakpoint::StopOthers() { return true; }
 
+// This thread plan does a single instruction step over a breakpoint instruction
+// and needs to not resume other threads, so return false to stop the
+// ThreadPlanSingleThreadTimeout from timing out and trying to resume all
+// threads. If all threads gets resumed before we disable, single step and
+// re-enable the breakpoint, we can miss breakpoints on other threads.
+bool ThreadPlanStepOverBreakpoint::SupportsResumeOthers() { return false; }
+
 StateType ThreadPlanStepOverBreakpoint::GetPlanRunState() {
   return eStateStepping;
 }

From 0281339159e6ef0c30acbf146e9c3b06482191c1 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Wed, 28 Aug 2024 13:40:34 -0700
Subject: [PATCH 04/10] Revert "[CodeGen] Use MachineInstr::{all_uses,all_defs}
 (NFC)" (#106451)

Reverts llvm/llvm-project#106404

Breaks:
https://lab.llvm.org/buildbot/#/builders/169/builds/2590
https://lab.llvm.org/buildbot/#/builders/164/builds/2454
---
 .../CodeGen/MachineConvergenceVerifier.cpp    |  4 +++-
 llvm/lib/CodeGen/MachineInstr.cpp             | 20 ++++++++++++-------
 llvm/lib/CodeGen/RegAllocFast.cpp             |  4 +++-
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp b/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp
index ac6b04a202c533..3d3c55faa82465 100644
--- a/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp
@@ -51,7 +51,9 @@ GenericConvergenceVerifier<MachineSSAContext>::findAndCheckConvergenceTokenUsed(
   const MachineRegisterInfo &MRI = Context.getFunction()->getRegInfo();
   const MachineInstr *TokenDef = nullptr;
 
-  for (const MachineOperand &MO : MI.all_uses()) {
+  for (const MachineOperand &MO : MI.operands()) {
+    if (!MO.isReg() || !MO.isUse())
+      continue;
     Register OpReg = MO.getReg();
     if (!OpReg.isVirtual())
       continue;
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 7f81aeb545d328..f21910ee3a444a 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1041,9 +1041,10 @@ unsigned MachineInstr::getBundleSize() const {
 /// Returns true if the MachineInstr has an implicit-use operand of exactly
 /// the given register (not considering sub/super-registers).
 bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const {
-  for (const MachineOperand &MO : all_uses())
-    if (MO.isImplicit() && MO.getReg() == Reg)
+  for (const MachineOperand &MO : operands()) {
+    if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg)
       return true;
+  }
   return false;
 }
 
@@ -1263,8 +1264,10 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
 /// clearKillInfo - Clears kill flags on all operands.
 ///
 void MachineInstr::clearKillInfo() {
-  for (MachineOperand &MO : all_uses())
-    MO.setIsKill(false);
+  for (MachineOperand &MO : operands()) {
+    if (MO.isReg() && MO.isUse())
+      MO.setIsKill(false);
+  }
 }
 
 void MachineInstr::substituteRegister(Register FromReg, Register ToReg,
@@ -1546,9 +1549,12 @@ bool MachineInstr::isLoadFoldBarrier() const {
 /// allDefsAreDead - Return true if all the defs of this instruction are dead.
 ///
 bool MachineInstr::allDefsAreDead() const {
-  for (const MachineOperand &MO : all_defs())
+  for (const MachineOperand &MO : operands()) {
+    if (!MO.isReg() || MO.isUse())
+      continue;
     if (!MO.isDead())
       return false;
+  }
   return true;
 }
 
@@ -2057,8 +2063,8 @@ void MachineInstr::clearRegisterKills(Register Reg,
                                       const TargetRegisterInfo *RegInfo) {
   if (!Reg.isPhysical())
     RegInfo = nullptr;
-  for (MachineOperand &MO : all_uses()) {
-    if (!MO.isKill())
+  for (MachineOperand &MO : operands()) {
+    if (!MO.isReg() || !MO.isUse() || !MO.isKill())
       continue;
     Register OpReg = MO.getReg();
     if ((RegInfo && RegInfo->regsOverlap(Reg, OpReg)) || Reg == OpReg)
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index a0a8a8897af7f2..6babd5a3f1f96f 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -1563,7 +1563,9 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
   bool ReArrangedImplicitMOs = true;
   while (ReArrangedImplicitMOs) {
     ReArrangedImplicitMOs = false;
-    for (MachineOperand &MO : MI.all_uses()) {
+    for (MachineOperand &MO : MI.operands()) {
+      if (!MO.isReg() || !MO.isUse())
+        continue;
       Register Reg = MO.getReg();
       if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
         continue;

From 53d95f3056199b4a9668104c63080d4c94926162 Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang@amd.com>
Date: Wed, 28 Aug 2024 13:52:07 -0700
Subject: [PATCH 05/10] AMDGPU: Rename fail.llvm.fptrunc.round.ll to
 llvm.fptrunc.round.err.ll (#106452)

Also correct the suffix of the intrinsic
---
 .../{fail.llvm.fptrunc.round.ll => llvm.fptrunc.round.err.ll} | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename llvm/test/CodeGen/AMDGPU/{fail.llvm.fptrunc.round.ll => llvm.fptrunc.round.err.ll} (77%)

diff --git a/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
similarity index 77%
rename from llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
rename to llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
index 9fa3eb22a554a8..4bcd0cf5e6a0e5 100644
--- a/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
@@ -4,9 +4,9 @@
 define amdgpu_gs void @test_fptrunc_round_f64(double %a, ptr addrspace(1) %out) {
 ; SDAG-FAIL: LLVM ERROR: Cannot select
 ; GISEL-FAIL: unable to legalize instruction
-  %res = call half @llvm.fptrunc.round.f64(double %a, metadata !"round.upward")
+  %res = call half @llvm.fptrunc.round.f16.f64(double %a, metadata !"round.upward")
   store half %res, ptr addrspace(1) %out, align 4
   ret void
 }
 
-declare half @llvm.fptrunc.round.f64(double, metadata)
+declare half @llvm.fptrunc.round.f16.f64(double, metadata)

From eb9c49c900f43aa79811f80847c97c6596197430 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Wed, 28 Aug 2024 13:53:07 -0700
Subject: [PATCH 06/10] [LTO] Make getImportType a proper function (NFC)
 (#106450)

I'm planning to reduce the memory footprint of ThinLTO indexing by
changing ImportMapTy.  A look-up of the import type will involve data
private to ImportMapTy, so it must be done by a member function of
ImportMapTy.  This patch turns getImportType into a member function so
that a subsequent "real" change will just have to update the
implementation of the function in place.
---
 .../llvm/Transforms/IPO/FunctionImport.h      |  4 ++++
 llvm/lib/Transforms/IPO/FunctionImport.cpp    | 24 +++++++++----------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index 78932c12e76ff8..b7280c56be9cc8 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -143,6 +143,10 @@ class FunctionImporter {
     // order.
     SmallVector<StringRef, 0> getSourceModules() const;
 
+    std::optional<GlobalValueSummary::ImportKind>
+    getImportType(const FunctionsToImportTy &GUIDToImportType,
+                  GlobalValue::GUID GUID) const;
+
     const ImportMapTyImpl &getImportMap() const { return ImportMap; }
 
   private:
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 6227b085f13a60..7a60ae51f02cb4 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -359,6 +359,15 @@ FunctionImporter::ImportMapTy::getSourceModules() const {
   return Modules;
 }
 
+std::optional<GlobalValueSummary::ImportKind>
+FunctionImporter::ImportMapTy::getImportType(
+    const FunctionsToImportTy &GUIDToImportType, GlobalValue::GUID GUID) const {
+  auto Iter = GUIDToImportType.find(GUID);
+  if (Iter == GUIDToImportType.end())
+    return std::nullopt;
+  return Iter->second;
+}
+
 /// Import globals referenced by a function or other globals that are being
 /// imported, if importing such global is possible.
 class GlobalsImporter final {
@@ -1800,15 +1809,6 @@ Expected<bool> FunctionImporter::importFunctions(
 
   IRMover Mover(DestModule);
 
-  auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType,
-                           GlobalValue::GUID GUID)
-      -> std::optional<GlobalValueSummary::ImportKind> {
-    auto Iter = GUIDToImportType.find(GUID);
-    if (Iter == GUIDToImportType.end())
-      return std::nullopt;
-    return Iter->second;
-  };
-
   // Do the actual import of functions now, one Module at a time
   for (const auto &Name : ImportList.getSourceModules()) {
     // Get the module for the import
@@ -1835,7 +1835,7 @@ Expected<bool> FunctionImporter::importFunctions(
       if (!F.hasName())
         continue;
       auto GUID = F.getGUID();
-      auto MaybeImportType = getImportType(ImportGUIDs, GUID);
+      auto MaybeImportType = ImportList.getImportType(ImportGUIDs, GUID);
       bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
 
       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
@@ -1871,7 +1871,7 @@ Expected<bool> FunctionImporter::importFunctions(
       if (!GV.hasName())
         continue;
       auto GUID = GV.getGUID();
-      auto MaybeImportType = getImportType(ImportGUIDs, GUID);
+      auto MaybeImportType = ImportList.getImportType(ImportGUIDs, GUID);
       bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
 
       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
@@ -1891,7 +1891,7 @@ Expected<bool> FunctionImporter::importFunctions(
       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
         continue;
       auto GUID = GA.getGUID();
-      auto MaybeImportType = getImportType(ImportGUIDs, GUID);
+      auto MaybeImportType = ImportList.getImportType(ImportGUIDs, GUID);
       bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
 
       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")

From 26c582bb452eadc3870c56e8eae24feb354a2edf Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Wed, 28 Aug 2024 15:08:44 -0600
Subject: [PATCH 07/10] [DXIL] Don't generate per-variable guards for DirectX
 (#106096)

Thread init guards are generated for local static variables when using
the Microsoft CXX ABI. This ABI is also used for HLSL generation, but
DXIL doesn't need the corresponding _Init_thread_header/footer calls and
doesn't really have a way to handle them in its output targets.

This modifies the language ops when the target is DXIL to exclude this
so that they won't be generated and an alternate guardvar method is used
that is compatible with the usage.

Done to facilitate testing for #89806, but isn't really related
---
 clang/lib/Basic/Targets/DirectX.h             |  7 ++++
 clang/test/CodeGenHLSL/static-local-ctor.hlsl | 37 +++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 clang/test/CodeGenHLSL/static-local-ctor.hlsl

diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h
index a084e2823453fc..cf7ea5e83503dc 100644
--- a/clang/lib/Basic/Targets/DirectX.h
+++ b/clang/lib/Basic/Targets/DirectX.h
@@ -94,6 +94,13 @@ class LLVM_LIBRARY_VISIBILITY DirectXTargetInfo : public TargetInfo {
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
   }
+
+  void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
+    TargetInfo::adjust(Diags, Opts);
+    // The static values this addresses do not apply outside of the same thread
+    // This protection is neither available nor needed
+    Opts.ThreadsafeStatics = false;
+  }
 };
 
 } // namespace targets
diff --git a/clang/test/CodeGenHLSL/static-local-ctor.hlsl b/clang/test/CodeGenHLSL/static-local-ctor.hlsl
new file mode 100644
index 00000000000000..f55f6808672dea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/static-local-ctor.hlsl
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s
+
+// Verify that no per variable _Init_thread instructions are emitted for non-trivial static locals
+// These would normally be emitted by the MicrosoftCXXABI, but the DirectX backend should exlude them
+// Instead, check for the guardvar oparations that should protect the constructor initialization should
+// only take place once.
+
+RWBuffer<int> buf[10];
+
+void InitBuf(RWBuffer<int> buf) {
+  for (unsigned int i = 0; i < 100; i++)
+    buf[i] = 0;
+}
+
+// CHECK-NOT: _Init_thread_epoch
+// CHECK: define internal void @"?main@@YAXXZ"
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[Tmp1:%.*]] = alloca %"class.hlsl::RWBuffer"
+// CHECK-NEXT: [[Tmp2:%.*]] = load i32, ptr
+// CHECK-NEXT: [[Tmp3:%.*]] = and i32 [[Tmp2]], 1
+// CHECK-NEXT: [[Tmp4:%.*]] = icmp eq i32 [[Tmp3]], 0
+// CHECK-NEXT: br i1 [[Tmp4]]
+// CHECK-NOT: _Init_thread_header
+// CHECK: init:
+// CHECK-NEXT: = or i32 [[Tmp2]], 1
+// CHECK-NOT: _Init_thread_footer
+
+
+[shader("compute")]
+[numthreads(1,1,1)]
+void main() {
+  // A non-trivially constructed static local will get checks to verify that it is generated just once
+  static RWBuffer<int> mybuf;
+  mybuf = buf[0];
+  InitBuf(mybuf);
+}
+

From 18c79ca3607bfe9cc6fd083186f3b462f5abff7e Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Wed, 28 Aug 2024 14:23:44 -0700
Subject: [PATCH 08/10] [LV][NFC] Remove unnecessary space in comment

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cb104c4ed2d03d..56f51e14a6eba9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4467,7 +4467,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
       if (EphemeralRecipes.contains(&R))
         continue;
       // Continue early if the recipe is considered to not produce a vector
-      //  result. Note that this includes VPInstruction where some opcodes may
+      // result. Note that this includes VPInstruction where some opcodes may
       // produce a vector, to preserve existing behavior as VPInstructions model
       // aspects not directly mapped to existing IR instructions.
       switch (R.getVPDefID()) {

From 1bc7057a8eb7400dfbb1fc8335efa41abab9884e Mon Sep 17 00:00:00 2001
From: Jakub Kuderski <jakub@nod-labs.com>
Date: Wed, 28 Aug 2024 17:27:06 -0400
Subject: [PATCH 09/10] Revert "[mlir][spirv] Add an argmax integration test
 with `mlir-vulkan-runner`" (#106457)

Reverts llvm/llvm-project#106426.

This caused failures on nvidia:
https://lab.llvm.org/buildbot/#/builders/138/builds/2853
---
 mlir/test/mlir-vulkan-runner/argmax.mlir      | 109 ------------------
 mlir/tools/mlir-vulkan-runner/CMakeLists.txt  |   1 -
 .../mlir-vulkan-runner/mlir-vulkan-runner.cpp |   4 +-
 .../llvm-project-overlay/mlir/BUILD.bazel     |   1 -
 4 files changed, 1 insertion(+), 114 deletions(-)
 delete mode 100644 mlir/test/mlir-vulkan-runner/argmax.mlir

diff --git a/mlir/test/mlir-vulkan-runner/argmax.mlir b/mlir/test/mlir-vulkan-runner/argmax.mlir
deleted file mode 100644
index d30c1cb5b58bdc..00000000000000
--- a/mlir/test/mlir-vulkan-runner/argmax.mlir
+++ /dev/null
@@ -1,109 +0,0 @@
-// RUN: mlir-vulkan-runner %s \
-// RUN:  --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
-// RUN:  --entry-point-result=void | FileCheck %s
-
-// This kernel computes the argmax (index of the maximum element) from an array
-// of integers. Each thread computes a lane maximum using a single `scf.for`.
-// Then `gpu.subgroup_reduce` is used to find the maximum across the entire
-// subgroup, which is then used by SPIR-V subgroup ops to compute the argmax
-// of the entire input array. Note that this kernel only works if we have a
-// single workgroup.
-
-// CHECK: [15]
-module attributes {
-  gpu.container_module,
-  spirv.target_env = #spirv.target_env<
-    #spirv.vce<v1.3, [Shader, Groups, GroupNonUniformArithmetic, GroupNonUniformBallot], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>
-} {
-  gpu.module @kernels {
-    gpu.func @kernel_argmax(%input : memref<128xi32>, %output : memref<1xi32>, %total_count_buf : memref<1xi32>) kernel
-      attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [32, 1, 1]>} {
-      %idx0 = arith.constant 0 : index
-      %idx1 = arith.constant 1 : index
-
-      %total_count = memref.load %total_count_buf[%idx0] : memref<1xi32>
-      %lane_count_idx = gpu.subgroup_size : index
-      %lane_count_i32 = index.castu %lane_count_idx : index to i32
-      %lane_id_idx = gpu.thread_id x
-      %lane_id_i32 = index.castu %lane_id_idx : index to i32
-      %lane_res_init = arith.constant 0 : i32
-      %lane_max_init = memref.load %input[%lane_id_idx] : memref<128xi32>
-      %num_batches_i32 = arith.divui %total_count, %lane_count_i32 : i32
-      %num_batches_idx = index.castu %num_batches_i32 : i32 to index
-
-      %lane_res, %lane_max = scf.for %iter = %idx1 to %num_batches_idx step %idx1
-      iter_args(%lane_res_iter = %lane_res_init, %lane_max_iter = %lane_max_init) -> (i32, i32) {
-        %iter_i32 = index.castu %iter : index to i32
-        %mul = arith.muli %lane_count_i32, %iter_i32 : i32
-        %idx_i32 = arith.addi %mul, %lane_id_i32 : i32
-        %idx = index.castu %idx_i32 : i32 to index
-        %elem = memref.load %input[%idx] : memref<128xi32>
-        %gt = arith.cmpi sgt, %elem, %lane_max_iter : i32
-        %lane_res_next = arith.select %gt, %idx_i32, %lane_res_iter : i32
-        %lane_max_next = arith.select %gt, %elem, %lane_max_iter : i32
-        scf.yield %lane_res_next, %lane_max_next : i32, i32
-      }
-
-      %subgroup_max = gpu.subgroup_reduce maxsi %lane_max : (i32) -> (i32)
-      %eq = arith.cmpi eq, %lane_max, %subgroup_max : i32
-      %ballot = spirv.GroupNonUniformBallot <Subgroup> %eq : vector<4xi32>
-      %lsb = spirv.GroupNonUniformBallotFindLSB <Subgroup> %ballot : vector<4xi32>, i32
-      %cond = arith.cmpi eq, %lsb, %lane_id_i32 : i32
-
-      scf.if %cond {
-        memref.store %lane_res, %output[%idx0] : memref<1xi32>
-      }
-
-      gpu.return
-    }
-  }
-
-  func.func @main() {
-    // Allocate 3 buffers.
-    %in_buf = memref.alloc() : memref<128xi32>
-    %out_buf = memref.alloc() : memref<1xi32>
-    %total_count_buf = memref.alloc() : memref<1xi32>
-
-    // Constants.
-    %cst0 = arith.constant 0 : i32
-    %idx0 = arith.constant 0 : index
-    %idx1 = arith.constant 1 : index
-    %idx16 = arith.constant 16 : index
-    %idx32 = arith.constant 32 : index
-    %idx48 = arith.constant 48 : index
-    %idx64 = arith.constant 64 : index
-    %idx80 = arith.constant 80 : index
-    %idx96 = arith.constant 96 : index
-    %idx112 = arith.constant 112 : index
-
-    // Initialize input buffer.
-    %in_vec = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
-    vector.store %in_vec, %in_buf[%idx0] : memref<128xi32>, vector<16xi32>
-    vector.store %in_vec, %in_buf[%idx16] : memref<128xi32>, vector<16xi32>
-    vector.store %in_vec, %in_buf[%idx32] : memref<128xi32>, vector<16xi32>
-    vector.store %in_vec, %in_buf[%idx48] : memref<128xi32>, vector<16xi32>
-    vector.store %in_vec, %in_buf[%idx64] : memref<128xi32>, vector<16xi32>
-    vector.store %in_vec, %in_buf[%idx80] : memref<128xi32>, vector<16xi32>
-    vector.store %in_vec, %in_buf[%idx96] : memref<128xi32>, vector<16xi32>
-    vector.store %in_vec, %in_buf[%idx112] : memref<128xi32>, vector<16xi32>
-
-    // Initialize output buffer.
-    %out_buf2 = memref.cast %out_buf : memref<1xi32> to memref<?xi32>
-    call @fillResource1DInt(%out_buf2, %cst0) : (memref<?xi32>, i32) -> ()
-
-    // Total number of scalars.
-    %total_count = arith.constant 128 : i32
-    %total_count_buf2 = memref.cast %total_count_buf : memref<1xi32> to memref<?xi32>
-    call @fillResource1DInt(%total_count_buf2, %total_count) : (memref<?xi32>, i32) -> ()
-
-    // Launch kernel function and print output.
-    gpu.launch_func @kernels::@kernel_argmax
-        blocks in (%idx1, %idx1, %idx1) threads in (%idx32, %idx1, %idx1)
-        args(%in_buf : memref<128xi32>, %out_buf : memref<1xi32>, %total_count_buf : memref<1xi32>)
-    %out_buf3 = memref.cast %out_buf2 : memref<?xi32> to memref<*xi32>
-    call @printMemrefI32(%out_buf3) : (memref<*xi32>) -> ()
-    return
-  }
-  func.func private @fillResource1DInt(%0 : memref<?xi32>, %1 : i32)
-  func.func private @printMemrefI32(%ptr : memref<*xi32>)
-}
diff --git a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
index 36ec946b168715..26d6caacb0a7b1 100644
--- a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
+++ b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
@@ -57,7 +57,6 @@ if (MLIR_ENABLE_VULKAN_RUNNER)
     MLIRExecutionEngine
     MLIRFuncDialect
     MLIRGPUDialect
-    MLIRIndexDialect
     MLIRIR
     MLIRJitRunner
     MLIRLLVMDialect
diff --git a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
index bd34165574c8d2..2dd539ef83481f 100644
--- a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
+++ b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
@@ -24,7 +24,6 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Dialect/Index/IR/IndexDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
@@ -111,8 +110,7 @@ int main(int argc, char **argv) {
   registry.insert<mlir::arith::ArithDialect, mlir::LLVM::LLVMDialect,
                   mlir::gpu::GPUDialect, mlir::spirv::SPIRVDialect,
                   mlir::scf::SCFDialect, mlir::func::FuncDialect,
-                  mlir::memref::MemRefDialect, mlir::vector::VectorDialect,
-                  mlir::index::IndexDialect>();
+                  mlir::memref::MemRefDialect, mlir::vector::VectorDialect>();
   mlir::registerBuiltinDialectTranslation(registry);
   mlir::registerLLVMDialectTranslation(registry);
 
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 8256eddf06047c..9d3fc5651f2073 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -10001,7 +10001,6 @@ cc_binary(
         ":GPUToSPIRV",
         ":GPUToVulkanTransforms",
         ":GPUTransforms",
-        ":IndexDialect",
         ":LLVMCommonConversion",
         ":LLVMDialect",
         ":LLVMIRTransforms",

From ee0d70633872a30175cf29f81de7b2dbf771d708 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777@gmail.com>
Date: Thu, 29 Aug 2024 06:10:30 +0800
Subject: [PATCH 10/10] [clang][bytecode] Implement constexpr vector unary
 operators +, -, ~, ! (#105996)

Implement constexpr vector unary operators +, -, ~ and ! .

- Follow the current constant interpreter. All of our boolean operations
on vector types should be '-1' for the 'truth' type.
- Move the following functions from `Sema` to `ASTContext`, because we
used it in new interpreter.
```C++
QualType GetSignedVectorType(QualType V);
QualType GetSignedSizelessVectorType(QualType V);
```

---------

Signed-off-by: yronglin <yronglin777@gmail.com>
---
 clang/lib/AST/ByteCode/Compiler.cpp           | 106 ++++++++++++++++++
 clang/lib/AST/ByteCode/Compiler.h             |   6 +
 clang/test/AST/ByteCode/constexpr-vectors.cpp |  90 +++++++++++++++
 3 files changed, 202 insertions(+)
 create mode 100644 clang/test/AST/ByteCode/constexpr-vectors.cpp

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 0fc942a4f1bc4f..6a77323d939791 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -4991,6 +4991,8 @@ bool Compiler<Emitter>::VisitUnaryOperator(const UnaryOperator *E) {
   const Expr *SubExpr = E->getSubExpr();
   if (SubExpr->getType()->isAnyComplexType())
     return this->VisitComplexUnaryOperator(E);
+  if (SubExpr->getType()->isVectorType())
+    return this->VisitVectorUnaryOperator(E);
   std::optional<PrimType> T = classify(SubExpr->getType());
 
   switch (E->getOpcode()) {
@@ -5312,6 +5314,110 @@ bool Compiler<Emitter>::VisitComplexUnaryOperator(const UnaryOperator *E) {
   return true;
 }
 
+template <class Emitter>
+bool Compiler<Emitter>::VisitVectorUnaryOperator(const UnaryOperator *E) {
+  const Expr *SubExpr = E->getSubExpr();
+  assert(SubExpr->getType()->isVectorType());
+
+  if (DiscardResult)
+    return this->discard(SubExpr);
+
+  auto UnaryOp = E->getOpcode();
+  if (UnaryOp != UO_Plus && UnaryOp != UO_Minus && UnaryOp != UO_LNot &&
+      UnaryOp != UO_Not)
+    return this->emitInvalid(E);
+
+  // Nothing to do here.
+  if (UnaryOp == UO_Plus)
+    return this->delegate(SubExpr);
+
+  if (!Initializing) {
+    std::optional<unsigned> LocalIndex = allocateLocal(SubExpr);
+    if (!LocalIndex)
+      return false;
+    if (!this->emitGetPtrLocal(*LocalIndex, E))
+      return false;
+  }
+
+  // The offset of the temporary, if we created one.
+  unsigned SubExprOffset =
+      this->allocateLocalPrimitive(SubExpr, PT_Ptr, true, false);
+  if (!this->visit(SubExpr))
+    return false;
+  if (!this->emitSetLocal(PT_Ptr, SubExprOffset, E))
+    return false;
+
+  const auto *VecTy = SubExpr->getType()->getAs<VectorType>();
+  PrimType ElemT = classifyVectorElementType(SubExpr->getType());
+  auto getElem = [=](unsigned Offset, unsigned Index) -> bool {
+    if (!this->emitGetLocal(PT_Ptr, Offset, E))
+      return false;
+    return this->emitArrayElemPop(ElemT, Index, E);
+  };
+
+  switch (UnaryOp) {
+  case UO_Minus:
+    for (unsigned I = 0; I != VecTy->getNumElements(); ++I) {
+      if (!getElem(SubExprOffset, I))
+        return false;
+      if (!this->emitNeg(ElemT, E))
+        return false;
+      if (!this->emitInitElem(ElemT, I, E))
+        return false;
+    }
+    break;
+  case UO_LNot: { // !x
+    // In C++, the logic operators !, &&, || are available for vectors. !v is
+    // equivalent to v == 0.
+    //
+    // The result of the comparison is a vector of the same width and number of
+    // elements as the comparison operands with a signed integral element type.
+    //
+    // https://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html
+    QualType ResultVecTy = E->getType();
+    PrimType ResultVecElemT =
+        classifyPrim(ResultVecTy->getAs<VectorType>()->getElementType());
+    for (unsigned I = 0; I != VecTy->getNumElements(); ++I) {
+      if (!getElem(SubExprOffset, I))
+        return false;
+      // operator ! on vectors returns -1 for 'truth', so negate it.
+      if (!this->emitPrimCast(ElemT, PT_Bool, Ctx.getASTContext().BoolTy, E))
+        return false;
+      if (!this->emitInv(E))
+        return false;
+      if (!this->emitPrimCast(PT_Bool, ElemT, VecTy->getElementType(), E))
+        return false;
+      if (!this->emitNeg(ElemT, E))
+        return false;
+      if (ElemT != ResultVecElemT &&
+          !this->emitPrimCast(ElemT, ResultVecElemT, ResultVecTy, E))
+        return false;
+      if (!this->emitInitElem(ResultVecElemT, I, E))
+        return false;
+    }
+    break;
+  }
+  case UO_Not: // ~x
+    for (unsigned I = 0; I != VecTy->getNumElements(); ++I) {
+      if (!getElem(SubExprOffset, I))
+        return false;
+      if (ElemT == PT_Bool) {
+        if (!this->emitInv(E))
+          return false;
+      } else {
+        if (!this->emitComp(ElemT, E))
+          return false;
+      }
+      if (!this->emitInitElem(ElemT, I, E))
+        return false;
+    }
+    break;
+  default:
+    llvm_unreachable("Unsupported unary operators should be handled up front");
+  }
+  return true;
+}
+
 template <class Emitter>
 bool Compiler<Emitter>::visitDeclRef(const ValueDecl *D, const Expr *E) {
   if (DiscardResult)
diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h
index 5acfe3c41796c4..939cc0dae3546f 100644
--- a/clang/lib/AST/ByteCode/Compiler.h
+++ b/clang/lib/AST/ByteCode/Compiler.h
@@ -139,6 +139,7 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
   bool VisitGNUNullExpr(const GNUNullExpr *E);
   bool VisitCXXThisExpr(const CXXThisExpr *E);
   bool VisitUnaryOperator(const UnaryOperator *E);
+  bool VisitVectorUnaryOperator(const UnaryOperator *E);
   bool VisitComplexUnaryOperator(const UnaryOperator *E);
   bool VisitDeclRefExpr(const DeclRefExpr *E);
   bool VisitImplicitValueInitExpr(const ImplicitValueInitExpr *E);
@@ -349,6 +350,11 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
     return *this->classify(ElemType);
   }
 
+  PrimType classifyVectorElementType(QualType T) const {
+    assert(T->isVectorType());
+    return *this->classify(T->getAs<VectorType>()->getElementType());
+  }
+
   bool emitComplexReal(const Expr *SubExpr);
   bool emitComplexBoolCast(const Expr *E);
   bool emitComplexComparison(const Expr *LHS, const Expr *RHS,
diff --git a/clang/test/AST/ByteCode/constexpr-vectors.cpp b/clang/test/AST/ByteCode/constexpr-vectors.cpp
new file mode 100644
index 00000000000000..a738cfe617a0e0
--- /dev/null
+++ b/clang/test/AST/ByteCode/constexpr-vectors.cpp
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 %s -triple x86_64-linux-gnu -std=c++14 -fsyntax-only -verify
+// RUN: %clang_cc1 %s -triple x86_64-linux-gnu -fexperimental-new-constant-interpreter -std=c++14 -fsyntax-only -verify
+
+using FourCharsVecSize __attribute__((vector_size(4))) = char;
+using FourIntsVecSize __attribute__((vector_size(16))) = int;
+using FourLongLongsVecSize __attribute__((vector_size(32))) = long long;
+using FourFloatsVecSize __attribute__((vector_size(16))) = float;
+using FourDoublesVecSize __attribute__((vector_size(32))) = double;
+using FourI128VecSize __attribute__((vector_size(64))) = __int128;
+
+using FourCharsExtVec __attribute__((ext_vector_type(4))) = char;
+using FourIntsExtVec __attribute__((ext_vector_type(4))) = int;
+using FourI128ExtVec __attribute__((ext_vector_type(4))) = __int128;
+
+// Only int vs float makes a difference here, so we only need to test 1 of each.
+// Test Char to make sure the mixed-nature of shifts around char is evident.
+void CharUsage() {
+  constexpr auto H = FourCharsVecSize{-1, -1, 0, -1};
+  constexpr auto InvH = -H;
+  static_assert(InvH[0] == 1 && InvH[1] == 1 && InvH[2] == 0 && InvH[3] == 1, "");
+
+  constexpr auto ae = ~FourCharsVecSize{1, 2, 10, 20};
+  static_assert(ae[0] == -2 && ae[1] == -3 && ae[2] == -11 && ae[3] == -21, "");
+
+  constexpr auto af = !FourCharsVecSize{0, 1, 8, -1};
+  static_assert(af[0] == -1 && af[1] == 0 && af[2] == 0 && af[3] == 0, "");
+}
+
+void CharExtVecUsage() {
+  constexpr auto H = FourCharsExtVec{-1, -1, 0, -1};
+  constexpr auto InvH = -H;
+  static_assert(InvH[0] == 1 && InvH[1] == 1 && InvH[2] == 0 && InvH[3] == 1, "");
+
+  constexpr auto ae = ~FourCharsExtVec{1, 2, 10, 20};
+  static_assert(ae[0] == -2 && ae[1] == -3 && ae[2] == -11 && ae[3] == -21, "");
+
+  constexpr auto af = !FourCharsExtVec{0, 1, 8, -1};
+  static_assert(af[0] == -1 && af[1] == 0 && af[2] == 0 && af[3] == 0, "");
+}
+
+void FloatUsage() {
+  constexpr auto Y = FourFloatsVecSize{1.200000e+01, 1.700000e+01, -1.000000e+00, -1.000000e+00};
+  constexpr auto Z = -Y;
+  static_assert(Z[0] == -1.200000e+01 && Z[1] == -1.700000e+01 && Z[2] == 1.000000e+00 && Z[3] == 1.000000e+00, "");
+
+  // Operator ~ is illegal on floats.
+  constexpr auto ae = ~FourFloatsVecSize{0, 1, 8, -1}; // expected-error {{invalid argument type}}
+
+  constexpr auto af = !FourFloatsVecSize{0, 1, 8, -1};
+  static_assert(af[0] == -1 && af[1] == 0 && af[2] == 0 && af[3] == 0, "");
+}
+
+void FloatVecUsage() {
+  constexpr auto Y = FourFloatsVecSize{1.200000e+01, 1.700000e+01, -1.000000e+00, -1.000000e+00};
+  constexpr auto Z = -Y;
+  static_assert(Z[0] == -1.200000e+01 && Z[1] == -1.700000e+01 && Z[2] == 1.000000e+00 && Z[3] == 1.000000e+00, "");
+
+  // Operator ~ is illegal on floats.
+  constexpr auto ae = ~FourFloatsVecSize{0, 1, 8, -1}; // expected-error {{invalid argument type}}
+
+  constexpr auto af = !FourFloatsVecSize{0, 1, 8, -1};
+  static_assert(af[0] == -1 && af[1] == 0 && af[2] == 0 && af[3] == 0, "");
+}
+
+void I128Usage() {
+  // Operator ~ is illegal on floats, so no test for that.
+  constexpr auto c = ~FourI128VecSize{1, 2, 10, 20};
+   static_assert(c[0] == -2 && c[1] == -3 && c[2] == -11 && c[3] == -21, "");
+
+  constexpr auto d = !FourI128VecSize{0, 1, 8, -1};
+  static_assert(d[0] == -1 && d[1] == 0 && d[2] == 0 && d[3] == 0, "");
+}
+
+void I128VecUsage() {
+  // Operator ~ is illegal on floats, so no test for that.
+  constexpr auto c = ~FourI128ExtVec{1, 2, 10, 20};
+  static_assert(c[0] == -2 && c[1] == -3 && c[2] == -11 && c[3] == -21, "");
+
+  constexpr auto d = !FourI128ExtVec{0, 1, 8, -1};
+  static_assert(d[0] == -1 && d[1] == 0 && d[2] == 0 && d[3] == 0, "");
+}
+
+using FourBoolsExtVec __attribute__((ext_vector_type(4))) = bool;
+void BoolVecUsage() {
+  constexpr auto j = !FourBoolsExtVec{true, false, true, false};
+  static_assert(j[0] == false && j[1] == true && j[2] == false && j[3] == true, "");
+
+  constexpr auto k = ~FourBoolsExtVec{true, false, true, false};
+  static_assert(k[0] == false && k[1] == true && k[2] == false && k[3] == true, "");
+}