Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HLSL][SPIRV]Add SPIRV generation for HLSL dot #104656

Merged
merged 8 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 20 additions & 27 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18471,22 +18471,14 @@ llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
return Arg;
}

Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
if (QT->hasFloatingRepresentation()) {
switch (elementCount) {
case 2:
return Intrinsic::dx_dot2;
case 3:
return Intrinsic::dx_dot3;
case 4:
return Intrinsic::dx_dot4;
}
}
if (QT->hasSignedIntegerRepresentation())
return Intrinsic::dx_sdot;

assert(QT->hasUnsignedIntegerRepresentation());
return Intrinsic::dx_udot;
// Return dot product intrinsic that corresponds to the QT scalar type
Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
if (QT->isFloatingType())
return RT.getFDotIntrinsic();
if (QT->isSignedIntegerType())
return RT.getSDotIntrinsic();
assert(QT->isUnsignedIntegerType());
return RT.getUDotIntrinsic();
}

Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
Expand Down Expand Up @@ -18529,37 +18521,38 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
Value *Op1 = EmitScalarExpr(E->getArg(1));
llvm::Type *T0 = Op0->getType();
llvm::Type *T1 = Op1->getType();

// If the arguments are scalars, just emit a multiply
if (!T0->isVectorTy() && !T1->isVectorTy()) {
if (T0->isFloatingPointTy())
return Builder.CreateFMul(Op0, Op1, "dx.dot");
return Builder.CreateFMul(Op0, Op1, "hlsl.dot");

if (T0->isIntegerTy())
return Builder.CreateMul(Op0, Op1, "dx.dot");
return Builder.CreateMul(Op0, Op1, "hlsl.dot");

// Bools should have been promoted
llvm_unreachable(
"Scalar dot product is only supported on ints and floats.");
}
// For vectors, validate types and emit the appropriate intrinsic

// A VectorSplat should have happened
assert(T0->isVectorTy() && T1->isVectorTy() &&
"Dot product of vector and scalar is not supported.");

// A vector sext or sitofp should have happened
assert(T0->getScalarType() == T1->getScalarType() &&
"Dot product of vectors need the same element types.");

auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
[[maybe_unused]] auto *VecTy1 =
E->getArg(1)->getType()->getAs<VectorType>();
// A HLSLVectorTruncation should have happend

assert(VecTy0->getElementType() == VecTy1->getElementType() &&
"Dot product of vectors need the same element types.");

assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
"Dot product requires vectors to be of the same size.");

return Builder.CreateIntrinsic(
/*ReturnType=*/T0->getScalarType(),
getDotProductIntrinsic(E->getArg(0)->getType(),
VecTy0->getNumElements()),
ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
} break;
case Builtin::BI__builtin_hlsl_lerp: {
Value *X = EmitScalarExpr(E->getArg(0));
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)

//===----------------------------------------------------------------------===//
// End of reserved area for HLSL intrinsic getters.
Expand Down
12 changes: 6 additions & 6 deletions clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

// CHECK-LABEL: builtin_bool_to_float_type_promotion
// CHECK: %conv1 = uitofp i1 %loadedv to double
// CHECK: %dx.dot = fmul double %conv, %conv1
// CHECK: %conv2 = fptrunc double %dx.dot to float
// CHECK: %hlsl.dot = fmul double %conv, %conv1
// CHECK: %conv2 = fptrunc double %hlsl.dot to float
// CHECK: ret float %conv2
float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) {
return __builtin_hlsl_dot ( p0, p1 );
Expand All @@ -12,8 +12,8 @@ float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) {
// CHECK-LABEL: builtin_bool_to_float_arg1_type_promotion
// CHECK: %conv = uitofp i1 %loadedv to double
// CHECK: %conv1 = fpext float %1 to double
// CHECK: %dx.dot = fmul double %conv, %conv1
// CHECK: %conv2 = fptrunc double %dx.dot to float
// CHECK: %hlsl.dot = fmul double %conv, %conv1
// CHECK: %conv2 = fptrunc double %hlsl.dot to float
// CHECK: ret float %conv2
float builtin_bool_to_float_arg1_type_promotion ( bool p0, float p1 ) {
return __builtin_hlsl_dot ( p0, p1 );
Expand All @@ -22,8 +22,8 @@ float builtin_bool_to_float_arg1_type_promotion ( bool p0, float p1 ) {
// CHECK-LABEL: builtin_dot_int_to_float_promotion
// CHECK: %conv = fpext float %0 to double
// CHECK: %conv1 = sitofp i32 %1 to double
// CHECK: dx.dot = fmul double %conv, %conv1
// CHECK: %conv2 = fptrunc double %dx.dot to float
// CHECK: dot = fmul double %conv, %conv1
// CHECK: %conv2 = fptrunc double %hlsl.dot to float
// CHECK: ret float %conv2
float builtin_dot_int_to_float_promotion ( float p0, int p1 ) {
return __builtin_hlsl_dot ( p0, p1 );
Expand Down
207 changes: 109 additions & 98 deletions clang/test/CodeGenHLSL/builtins/dot.hlsl
Original file line number Diff line number Diff line change
@@ -1,161 +1,172 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,NATIVE_HALF
// RUN: --check-prefixes=CHECK,DXCHECK,NATIVE_HALF
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,DXCHECK,NO_HALF

#ifdef __HLSL_ENABLE_16_BIT
// NATIVE_HALF: %dx.dot = mul i16 %0, %1
// NATIVE_HALF: ret i16 %dx.dot
int16_t test_dot_short(int16_t p0, int16_t p1) { return dot(p0, p1); }

// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.sdot.v2i16(<2 x i16> %0, <2 x i16> %1)
// NATIVE_HALF: ret i16 %dx.dot
int16_t test_dot_short2(int16_t2 p0, int16_t2 p1) { return dot(p0, p1); }

// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.sdot.v3i16(<3 x i16> %0, <3 x i16> %1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'd like to see some spv tests so we can see @llvm.spv.sdot, @llvm.spv.udot, and @llvm.spv.fdot get emitted\tested.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if its cleaner to do this in another file im fine with that too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

// NATIVE_HALF: ret i16 %dx.dot
int16_t test_dot_short3(int16_t3 p0, int16_t3 p1) { return dot(p0, p1); }

// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.sdot.v4i16(<4 x i16> %0, <4 x i16> %1)
// NATIVE_HALF: ret i16 %dx.dot
int16_t test_dot_short4(int16_t4 p0, int16_t4 p1) { return dot(p0, p1); }

// NATIVE_HALF: %dx.dot = mul i16 %0, %1
// NATIVE_HALF: ret i16 %dx.dot
uint16_t test_dot_ushort(uint16_t p0, uint16_t p1) { return dot(p0, p1); }

// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.udot.v2i16(<2 x i16> %0, <2 x i16> %1)
// NATIVE_HALF: ret i16 %dx.dot
uint16_t test_dot_ushort2(uint16_t2 p0, uint16_t2 p1) { return dot(p0, p1); }

// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.udot.v3i16(<3 x i16> %0, <3 x i16> %1)
// NATIVE_HALF: ret i16 %dx.dot
uint16_t test_dot_ushort3(uint16_t3 p0, uint16_t3 p1) { return dot(p0, p1); }
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,SPVCHECK,NATIVE_HALF
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,SPVCHECK,NO_HALF

// NATIVE_HALF: %dx.dot = call i16 @llvm.dx.udot.v4i16(<4 x i16> %0, <4 x i16> %1)
// NATIVE_HALF: ret i16 %dx.dot
uint16_t test_dot_ushort4(uint16_t4 p0, uint16_t4 p1) { return dot(p0, p1); }
#endif

// CHECK: %dx.dot = mul i32 %0, %1
// CHECK: ret i32 %dx.dot
// CHECK: %hlsl.dot = mul i32
// CHECK: ret i32 %hlsl.dot
int test_dot_int(int p0, int p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i32 @llvm.dx.sdot.v2i32(<2 x i32> %0, <2 x i32> %1)
// CHECK: ret i32 %dx.dot
// Capture the expected interchange format so not every check needs to be duplicated
// DXCHECK: %hlsl.dot = call i32 @llvm.[[ICF:dx]].sdot.v2i32(<2 x i32>
// SPVCHECK: %hlsl.dot = call i32 @llvm.[[ICF:spv]].sdot.v2i32(<2 x i32>
// CHECK: ret i32 %hlsl.dot
int test_dot_int2(int2 p0, int2 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i32 @llvm.dx.sdot.v3i32(<3 x i32> %0, <3 x i32> %1)
// CHECK: ret i32 %dx.dot
// CHECK: %hlsl.dot = call i32 @llvm.[[ICF]].sdot.v3i32(<3 x i32>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opted to chop off the rest instead of delve into regexpamancy because that's what other tests in this directory have done.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm i like this [[ICF]], hmm we should probably put something in the backlog to update all the tests to do this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I filed an issue: #105710

// CHECK: ret i32 %hlsl.dot
int test_dot_int3(int3 p0, int3 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i32 @llvm.dx.sdot.v4i32(<4 x i32> %0, <4 x i32> %1)
// CHECK: ret i32 %dx.dot
// CHECK: %hlsl.dot = call i32 @llvm.[[ICF]].sdot.v4i32(<4 x i32>
// CHECK: ret i32 %hlsl.dot
int test_dot_int4(int4 p0, int4 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = mul i32 %0, %1
// CHECK: ret i32 %dx.dot
// CHECK: %hlsl.dot = mul i32
// CHECK: ret i32 %hlsl.dot
uint test_dot_uint(uint p0, uint p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i32 @llvm.dx.udot.v2i32(<2 x i32> %0, <2 x i32> %1)
// CHECK: ret i32 %dx.dot
// CHECK: %hlsl.dot = call i32 @llvm.[[ICF]].udot.v2i32(<2 x i32>
// CHECK: ret i32 %hlsl.dot
uint test_dot_uint2(uint2 p0, uint2 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i32 @llvm.dx.udot.v3i32(<3 x i32> %0, <3 x i32> %1)
// CHECK: ret i32 %dx.dot
// CHECK: %hlsl.dot = call i32 @llvm.[[ICF]].udot.v3i32(<3 x i32>
// CHECK: ret i32 %hlsl.dot
uint test_dot_uint3(uint3 p0, uint3 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i32 @llvm.dx.udot.v4i32(<4 x i32> %0, <4 x i32> %1)
// CHECK: ret i32 %dx.dot
// CHECK: %hlsl.dot = call i32 @llvm.[[ICF]].udot.v4i32(<4 x i32>
// CHECK: ret i32 %hlsl.dot
uint test_dot_uint4(uint4 p0, uint4 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = mul i64 %0, %1
// CHECK: ret i64 %dx.dot
// CHECK: %hlsl.dot = mul i64
// CHECK: ret i64 %hlsl.dot
int64_t test_dot_long(int64_t p0, int64_t p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i64 @llvm.dx.sdot.v2i64(<2 x i64> %0, <2 x i64> %1)
// CHECK: ret i64 %dx.dot
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].sdot.v2i64(<2 x i64>
// CHECK: ret i64 %hlsl.dot
int64_t test_dot_long2(int64_t2 p0, int64_t2 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i64 @llvm.dx.sdot.v3i64(<3 x i64> %0, <3 x i64> %1)
// CHECK: ret i64 %dx.dot
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].sdot.v3i64(<3 x i64>
// CHECK: ret i64 %hlsl.dot
int64_t test_dot_long3(int64_t3 p0, int64_t3 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i64 @llvm.dx.sdot.v4i64(<4 x i64> %0, <4 x i64> %1)
// CHECK: ret i64 %dx.dot
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].sdot.v4i64(<4 x i64>
// CHECK: ret i64 %hlsl.dot
int64_t test_dot_long4(int64_t4 p0, int64_t4 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = mul i64 %0, %1
// CHECK: ret i64 %dx.dot
// CHECK: %hlsl.dot = mul i64
// CHECK: ret i64 %hlsl.dot
uint64_t test_dot_ulong(uint64_t p0, uint64_t p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i64 @llvm.dx.udot.v2i64(<2 x i64> %0, <2 x i64> %1)
// CHECK: ret i64 %dx.dot
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].udot.v2i64(<2 x i64>
// CHECK: ret i64 %hlsl.dot
uint64_t test_dot_ulong2(uint64_t2 p0, uint64_t2 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i64 @llvm.dx.udot.v3i64(<3 x i64> %0, <3 x i64> %1)
// CHECK: ret i64 %dx.dot
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].udot.v3i64(<3 x i64>
// CHECK: ret i64 %hlsl.dot
uint64_t test_dot_ulong3(uint64_t3 p0, uint64_t3 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call i64 @llvm.dx.udot.v4i64(<4 x i64> %0, <4 x i64> %1)
// CHECK: ret i64 %dx.dot
// CHECK: %hlsl.dot = call i64 @llvm.[[ICF]].udot.v4i64(<4 x i64>
// CHECK: ret i64 %hlsl.dot
uint64_t test_dot_ulong4(uint64_t4 p0, uint64_t4 p1) { return dot(p0, p1); }

// NATIVE_HALF: %dx.dot = fmul half %0, %1
// NATIVE_HALF: ret half %dx.dot
// NO_HALF: %dx.dot = fmul float %0, %1
// NO_HALF: ret float %dx.dot
#ifdef __HLSL_ENABLE_16_BIT
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I needed somewhere to capture the dx/spv indicator that was run early and every time, so I moved these out of the way.

// NATIVE_HALF: %hlsl.dot = mul i16
// NATIVE_HALF: ret i16 %hlsl.dot
int16_t test_dot_short(int16_t p0, int16_t p1) { return dot(p0, p1); }

// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].sdot.v2i16(<2 x i16>
// NATIVE_HALF: ret i16 %hlsl.dot
int16_t test_dot_short2(int16_t2 p0, int16_t2 p1) { return dot(p0, p1); }

// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].sdot.v3i16(<3 x i16>
// NATIVE_HALF: ret i16 %hlsl.dot
int16_t test_dot_short3(int16_t3 p0, int16_t3 p1) { return dot(p0, p1); }

// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].sdot.v4i16(<4 x i16>
// NATIVE_HALF: ret i16 %hlsl.dot
int16_t test_dot_short4(int16_t4 p0, int16_t4 p1) { return dot(p0, p1); }

// NATIVE_HALF: %hlsl.dot = mul i16
// NATIVE_HALF: ret i16 %hlsl.dot
uint16_t test_dot_ushort(uint16_t p0, uint16_t p1) { return dot(p0, p1); }

// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].udot.v2i16(<2 x i16>
// NATIVE_HALF: ret i16 %hlsl.dot
uint16_t test_dot_ushort2(uint16_t2 p0, uint16_t2 p1) { return dot(p0, p1); }

// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].udot.v3i16(<3 x i16>
// NATIVE_HALF: ret i16 %hlsl.dot
uint16_t test_dot_ushort3(uint16_t3 p0, uint16_t3 p1) { return dot(p0, p1); }

// NATIVE_HALF: %hlsl.dot = call i16 @llvm.[[ICF]].udot.v4i16(<4 x i16>
// NATIVE_HALF: ret i16 %hlsl.dot
uint16_t test_dot_ushort4(uint16_t4 p0, uint16_t4 p1) { return dot(p0, p1); }
#endif

// NATIVE_HALF: %hlsl.dot = fmul half
// NATIVE_HALF: ret half %hlsl.dot
// NO_HALF: %hlsl.dot = fmul float
// NO_HALF: ret float %hlsl.dot
half test_dot_half(half p0, half p1) { return dot(p0, p1); }

// NATIVE_HALF: %dx.dot = call half @llvm.dx.dot2.v2f16(<2 x half> %0, <2 x half> %1)
// NATIVE_HALF: ret half %dx.dot
// NO_HALF: %dx.dot = call float @llvm.dx.dot2.v2f32(<2 x float> %0, <2 x float> %1)
// NO_HALF: ret float %dx.dot
// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v2f16(<2 x half>
// NATIVE_HALF: ret half %hlsl.dot
// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float>
// NO_HALF: ret float %hlsl.dot
half test_dot_half2(half2 p0, half2 p1) { return dot(p0, p1); }

// NATIVE_HALF: %dx.dot = call half @llvm.dx.dot3.v3f16(<3 x half> %0, <3 x half> %1)
// NATIVE_HALF: ret half %dx.dot
// NO_HALF: %dx.dot = call float @llvm.dx.dot3.v3f32(<3 x float> %0, <3 x float> %1)
// NO_HALF: ret float %dx.dot
// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v3f16(<3 x half>
// NATIVE_HALF: ret half %hlsl.dot
// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float>
// NO_HALF: ret float %hlsl.dot
half test_dot_half3(half3 p0, half3 p1) { return dot(p0, p1); }

// NATIVE_HALF: %dx.dot = call half @llvm.dx.dot4.v4f16(<4 x half> %0, <4 x half> %1)
// NATIVE_HALF: ret half %dx.dot
// NO_HALF: %dx.dot = call float @llvm.dx.dot4.v4f32(<4 x float> %0, <4 x float> %1)
// NO_HALF: ret float %dx.dot
// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v4f16(<4 x half>
// NATIVE_HALF: ret half %hlsl.dot
// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float>
// NO_HALF: ret float %hlsl.dot
half test_dot_half4(half4 p0, half4 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = fmul float %0, %1
// CHECK: ret float %dx.dot
// CHECK: %hlsl.dot = fmul float
// CHECK: ret float %hlsl.dot
float test_dot_float(float p0, float p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call float @llvm.dx.dot2.v2f32(<2 x float> %0, <2 x float> %1)
// CHECK: ret float %dx.dot
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float>
// CHECK: ret float %hlsl.dot
float test_dot_float2(float2 p0, float2 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call float @llvm.dx.dot3.v3f32(<3 x float> %0, <3 x float> %1)
// CHECK: ret float %dx.dot
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float>
// CHECK: ret float %hlsl.dot
float test_dot_float3(float3 p0, float3 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call float @llvm.dx.dot4.v4f32(<4 x float> %0, <4 x float> %1)
// CHECK: ret float %dx.dot
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float>
// CHECK: ret float %hlsl.dot
float test_dot_float4(float4 p0, float4 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call float @llvm.dx.dot2.v2f32(<2 x float> %splat.splat, <2 x float> %1)
// CHECK: ret float %dx.dot
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float> %splat.splat, <2 x float>
// CHECK: ret float %hlsl.dot
float test_dot_float2_splat(float p0, float2 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call float @llvm.dx.dot3.v3f32(<3 x float> %splat.splat, <3 x float> %1)
// CHECK: ret float %dx.dot
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float> %splat.splat, <3 x float>
// CHECK: ret float %hlsl.dot
float test_dot_float3_splat(float p0, float3 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = call float @llvm.dx.dot4.v4f32(<4 x float> %splat.splat, <4 x float> %1)
// CHECK: ret float %dx.dot
// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float> %splat.splat, <4 x float>
// CHECK: ret float %hlsl.dot
float test_dot_float4_splat(float p0, float4 p1) { return dot(p0, p1); }

// CHECK: %dx.dot = fmul double %0, %1
// CHECK: ret double %dx.dot
// CHECK: %hlsl.dot = fmul double
// CHECK: ret double %hlsl.dot
double test_dot_double(double p0, double p1) { return dot(p0, p1); }
Loading
Loading