From 3f3e208428a867139f777fe49f7dd7bd648e1617 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzon@farzon.org>
Date: Mon, 29 Jan 2024 20:23:30 -0500
Subject: [PATCH] [HLSL]  Implementation of tan intrinsic HLSL has a tan
 builtin defined here:
 https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-tan
 Currently HLSL builtins redirect to clang builtins. However there is no tan
 clang builtin today.

This change implements #70082

Changes:
1. Create an llvm tan intrinsic.
2. Then expose that intrinsic to hlsl.
---
 clang/docs/LanguageExtensions.rst             |  1 +
 clang/include/clang/Basic/Builtins.td         |  6 ++
 clang/lib/CodeGen/CGBuiltin.cpp               |  4 +-
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      | 32 +++++++++++
 clang/lib/Sema/SemaChecking.cpp               |  1 +
 .../test/CodeGen/builtins-elementwise-math.c  | 16 ++++++
 .../CodeGen/strictfp-elementwise-bulitins.cpp | 10 ++++
 clang/test/CodeGenHLSL/builtins/tan.hlsl      | 56 +++++++++++++++++++
 clang/test/Sema/aarch64-sve-vector-trig-ops.c |  6 ++
 clang/test/Sema/builtins-elementwise-math.c   | 21 +++++++
 clang/test/Sema/riscv-rvv-vector-trig-ops.c   |  6 ++
 .../SemaCXX/builtins-elementwise-math.cpp     |  7 +++
 llvm/include/llvm/IR/Intrinsics.td            |  1 +
 13 files changed, 166 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/tan.hlsl

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index c1420079f75118..60404602c8b358 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -665,6 +665,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
  T __builtin_elementwise_exp2(T x)           returns the base-2 exponential, 2^x, of the specified value      floating point types
 
  T __builtin_elementwise_sqrt(T x)           return the square root of a floating-point number                floating point types
+ T __builtin_elementwise_tan(T x)            return the tangent of x interpreted as an angle in radians       floating point types
  T __builtin_elementwise_roundeven(T x)      round x to the nearest integer value in floating point format,   floating point types
                                              rounding halfway cases to even (that is, to the nearest value
                                              that is an even integer), regardless of the current rounding
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 1af01fe0d700c9..bc6b02af8ea60c 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1284,6 +1284,12 @@ def ElementwiseSqrt : Builtin {
   let Prototype = "void(...)";
 }
 
+def ElementwiseTan : Builtin {
+  let Spellings = ["__builtin_elementwise_tan"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ElementwiseTrunc : Builtin {
   let Spellings = ["__builtin_elementwise_trunc"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f3ab5ad7b08ec8..f586e16f01d3db 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3682,7 +3682,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_sin:
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
-
+  case Builtin::BI__builtin_elementwise_tan:
+    return RValue::get(
+        emitUnaryBuiltin(*this, E, llvm::Intrinsic::tan, "elt.tan"));
   case Builtin::BI__builtin_elementwise_trunc:
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index da153d8f8e0349..03fcb121850424 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -581,6 +581,38 @@ float sqrt(float In);
 _HLSL_BUILTIN_ALIAS(__builtin_sqrt)
 double sqrt(double In);
 
+//===----------------------------------------------------------------------===//
+// tan builtins
+//===----------------------------------------------------------------------===//
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half tan(half);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half2 tan(half2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half3 tan(half3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half4 tan(half4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float tan(float);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float2 tan(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float3 tan(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float4 tan(float4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+double tan(double);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+double2 tan(double2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+double3 tan(double3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+double4 tan(double4);
+
 //===----------------------------------------------------------------------===//
 // trunc builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 502b24bcdf8b42..f49fb4b467176f 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2782,6 +2782,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_nearbyint:
   case Builtin::BI__builtin_elementwise_sin:
   case Builtin::BI__builtin_elementwise_sqrt:
+  case Builtin::BI__builtin_elementwise_tan:
   case Builtin::BI__builtin_elementwise_trunc:
   case Builtin::BI__builtin_elementwise_canonicalize: {
     if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index 1c667e5bff1eab..1b5466abd347d7 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -604,6 +604,22 @@ void test_builtin_elementwise_sqrt(float f1, float f2, double d1, double d2,
   vf2 = __builtin_elementwise_sqrt(vf1);
 }
 
+void test_builtin_elementwise_tan(float f1, float f2, double d1, double d2,
+                                  float4 vf1, float4 vf2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_tan(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT:  call float @llvm.tan.f32(float [[F1]])
+  f2 = __builtin_elementwise_tan(f1);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.tan.f64(double [[D1]])
+  d2 = __builtin_elementwise_tan(d1);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.tan.v4f32(<4 x float> [[VF1]])
+  vf2 = __builtin_elementwise_tan(vf1);
+}
+
 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2,
                                     float4 vf1, float4 vf2) {
   // CHECK-LABEL: define void @test_builtin_elementwise_trunc(
diff --git a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
index fdf865ebbe8911..c72d5949916911 100644
--- a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
+++ b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
@@ -187,6 +187,16 @@ float4 strict_elementwise_sqrt(float4 a) {
   return __builtin_elementwise_sqrt(a);
 }
 
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_tanDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_TAN:%.*]] = tail call <4 x float> @llvm.tan.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_TAN]]
+//
+float4 strict_elementwise_tan(float4 a) {
+  return __builtin_elementwise_tan(a);
+}
+
 // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_truncDv4_f
 // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  entry:
diff --git a/clang/test/CodeGenHLSL/builtins/tan.hlsl b/clang/test/CodeGenHLSL/builtins/tan.hlsl
new file mode 100644
index 00000000000000..88206eee146b0a
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/tan.hlsl
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -D__HLSL_ENABLE_16_BIT -o - | FileCheck %s --check-prefix=NO_HALF
+
+// CHECK: define noundef half @
+// CHECK: call half @llvm.tan.f16(
+// NO_HALF: define noundef float @"?test_tan_half@@YA$halff@$halff@@Z"(
+// NO_HALF: call float @llvm.tan.f32(
+half test_tan_half ( half p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <2 x half> @
+// CHECK: call <2 x half> @llvm.tan.v2f16
+// NO_HALF: define noundef <2 x float> @"?test_tan_float2@@YAT?$__vector@M$01@__clang@@T12@@Z"(
+// NO_HALF: call <2 x float> @llvm.tan.v2f32(
+half2 test_tan_half2 ( half2 p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <3 x half> @
+// CHECK: call <3 x half> @llvm.tan.v3f16
+// NO_HALF: define noundef <3 x float> @"?test_tan_float3@@YAT?$__vector@M$02@__clang@@T12@@Z"(
+// NO_HALF: call <3 x float> @llvm.tan.v3f32(
+half3 test_tan_half3 ( half3 p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <4 x half> @
+// CHECK: call <4 x half> @llvm.tan.v4f16
+// NO_HALF: define noundef <4 x float> @"?test_tan_float4@@YAT?$__vector@M$03@__clang@@T12@@Z"(
+// NO_HALF: call <4 x float> @llvm.tan.v4f32(
+half4 test_tan_half4 ( half4 p0 ) {
+  return tan ( p0 );
+}
+
+// CHECK: define noundef float @
+// CHECK: call float @llvm.tan.f32(
+float test_tan_float ( float p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <2 x float> @
+// CHECK: call <2 x float> @llvm.tan.v2f32
+float2 test_tan_float2 ( float2 p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <3 x float> @
+// CHECK: call <3 x float> @llvm.tan.v3f32
+float3 test_tan_float3 ( float3 p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <4 x float> @
+// CHECK: call <4 x float> @llvm.tan.v4f32
+float4 test_tan_float4 ( float4 p0 ) {
+  return tan ( p0 );
+}
diff --git a/clang/test/Sema/aarch64-sve-vector-trig-ops.c b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
index 7ca941f578c70d..5039599d43ad81 100644
--- a/clang/test/Sema/aarch64-sve-vector-trig-ops.c
+++ b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
@@ -16,3 +16,9 @@ svfloat32_t test_cos_vv_i8mf8(svfloat32_t v) {
   return __builtin_elementwise_cos(v);
   // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
 }
+
+svfloat32_t test_tan_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_tan(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index e7b36285fa7dcf..a4430f186fd58c 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -622,6 +622,27 @@ void test_builtin_elementwise_sqrt(int i, float f, double d, float4 v, int3 iv,
   // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
 }
 
+void test_builtin_elementwise_tan(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+
+  struct Foo s = __builtin_elementwise_tan(f);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+
+  i = __builtin_elementwise_tan();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_elementwise_tan(i);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'int')}}
+
+  i = __builtin_elementwise_tan(f, f);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  u = __builtin_elementwise_tan(u);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned int')}}
+
+  uv = __builtin_elementwise_tan(uv);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
+}
+
 void test_builtin_elementwise_trunc(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
 
   struct Foo s = __builtin_elementwise_trunc(f);
diff --git a/clang/test/Sema/riscv-rvv-vector-trig-ops.c b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
index a457e484860602..ee4c596c431841 100644
--- a/clang/test/Sema/riscv-rvv-vector-trig-ops.c
+++ b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
@@ -17,3 +17,9 @@ vfloat32mf2_t test_cos_vv_i8mf8(vfloat32mf2_t v) {
   return __builtin_elementwise_cos(v);
   // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
 }
+
+vfloat32mf2_t test_tan_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_tan(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
diff --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp
index 44a44ab055e997..356d00139eb3c5 100644
--- a/clang/test/SemaCXX/builtins-elementwise-math.cpp
+++ b/clang/test/SemaCXX/builtins-elementwise-math.cpp
@@ -118,6 +118,13 @@ void test_builtin_elementwise_sqrt() {
   static_assert(!is_const<decltype(__builtin_elementwise_sqrt(b))>::value);
 }
 
+void test_builtin_elementwise_tan() {
+  const float a = 42.0;
+  float b = 42.3;
+  static_assert(!is_const<decltype(__builtin_elementwise_tan(a))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_tan(b))>::value);
+}
+
 void test_builtin_elementwise_log() {
   const float a = 42.0;
   float b = 42.3;
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 3c19c7b063652c..78214cfd8e56fb 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1010,6 +1010,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
   def int_powi : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_anyint_ty]>;
   def int_sin  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_cos  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_tan  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_pow  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
                            [LLVMMatchType<0>, LLVMMatchType<0>]>;
   def int_log  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;