diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 0477fa8434bb40..402189769c20c8 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -143,6 +143,18 @@ TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("tan", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tan", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("tanf", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tanf", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v") + TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4), "_ZGV_LLVM_N4vv") @@ -304,6 +316,22 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("tan", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tan", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tan", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("tanf", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tanf", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("tanf", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v") + TLI_DEFINE_VECFUNC("pow", "__svml_pow2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("pow", "__svml_pow4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("pow", "__svml_pow8", FIXED(8), "_ZGV_LLVM_N8vv") @@ -1238,6 +1266,13 @@ TLI_DEFINE_VECFUNC("tanf", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("tanf", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("tanf", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd8_tan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd4_tan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd2_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("asin", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("asinf", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("asinf", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 9c2c882975f72f..eafb52dfd5543a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1338,7 +1338,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { #endif report_fatal_error("Do not know how to expand the result of this " "operator!"); - + // clang-format off case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; @@ -1408,9 +1408,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FSUB: case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; case ISD::STRICT_FTAN: - case ISD::FTAN: - ExpandFloatRes_FTAN(N, Lo, Hi); - break; + case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break; case ISD::STRICT_FTRUNC: case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; @@ -1420,6 +1418,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; case ISD::STRICT_FREM: case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; + // clang-format on } // If Lo/Hi is null, the sub-method took care of registering results etc. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 42597386e49acf..7b388aa140e522 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -617,6 +617,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue &Lo, SDValue &Hi); void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, SDValue &Lo, SDValue &Hi); + // clang-format off void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -649,10 +650,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + // clang-format on // Float Operand Expansion. bool ExpandFloatOperand(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4c51df1e374e29..4ea989232031fc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -650,6 +650,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // non-optsize case. setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + // clang-format off for (auto VT : { MVT::f32, MVT::f64 }) { // Use ANDPD to simulate FABS. setOperationAction(ISD::FABS, VT, Custom); @@ -668,8 +669,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); - setOperationAction(ISD::FTAN, VT, Expand); + setOperationAction(ISD::FTAN , VT, Expand); } + // clang-format on // Half type will be promoted by default. setF16Action(MVT::f16, Promote); @@ -741,10 +743,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // We don't support sin/cos/fmod + // clang-format off setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - setOperationAction(ISD::FTAN, MVT::f32, Expand); + setOperationAction(ISD::FTAN , MVT::f32, Expand); + // clang-format on if (UseX87) { // Always expand sin/cos functions even though x87 has an instruction. diff --git a/llvm/test/CodeGen/X86/llvm.tan.ll b/llvm/test/CodeGen/X86/llvm.tan.ll index 407b8a5e20297c..9b040cca478e9f 100644 --- a/llvm/test/CodeGen/X86/llvm.tan.ll +++ b/llvm/test/CodeGen/X86/llvm.tan.ll @@ -1,22 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -define half @use_tanf16(half %a) { +define half @use_tanf16(half %a) nounwind { ; CHECK-LABEL: use_tanf16: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: callq __extendhfsf2@PLT ; CHECK-NEXT: callq tanf@PLT ; CHECK-NEXT: callq __truncsfhf2@PLT ; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq %x = call half @llvm.tan.f16(half %a) ret half %x } -define float @use_tanf32(float %a) { +define float @use_tanf32(float %a) nounwind { ; CHECK-LABEL: use_tanf32: ; CHECK: # %bb.0: ; CHECK-NEXT: jmp tanf@PLT # TAILCALL @@ -24,7 +22,7 @@ define float @use_tanf32(float %a) { ret float %x } -define double @use_tanf64(double %a) { +define double @use_tanf64(double %a) nounwind { ; CHECK-LABEL: use_tanf64: ; CHECK: # %bb.0: ; CHECK-NEXT: jmp tan@PLT # TAILCALL @@ -32,7 +30,15 @@ define double @use_tanf64(double %a) { ret double %x } -define fp128 @use_tanfp128(fp128 %a) { +define double @use_tanf80(double %a) nounwind { +; CHECK-LABEL: use_tanf80: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tan@PLT # TAILCALL + %x = call double @llvm.tan.f80(double %a) + ret double %x +} + +define fp128 @use_tanfp128(fp128 %a) nounwind { ; CHECK-LABEL: use_tanfp128: ; CHECK: # %bb.0: ; CHECK-NEXT: jmp tanf128@PLT # TAILCALL @@ -40,14 +46,12 @@ define fp128 @use_tanfp128(fp128 %a) { ret fp128 %x } -define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) { +define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) nounwind { ; CHECK-LABEL: use_tanppc_fp128: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: callq tanl@PLT ; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq %x = call ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %a) ret ppc_fp128 %x diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll index 8d2820a245d952..1627292732b6a8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll @@ -15,6 +15,11 @@ declare float @cosf(float) #0 declare double @llvm.cos.f64(double) #0 declare float @llvm.cos.f32(float) #0 +declare double @tan(double) #0 +declare float @tanf(float) #0 +declare double @llvm.tan.f64(double) #0 +declare float @llvm.tan.f32(float) #0 + declare double @pow(double, double) #0 declare float @powf(float, float) #0 declare double @llvm.pow.f64(double, double) #0 @@ -264,6 +269,114 @@ for.end: ret void } +define void @tan_f64(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @tan_f64( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @tan(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @tan_f32(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @tan_f32( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @tanf(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @tan_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @tan_f64_intrinsic( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.tan.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @tan_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @tan_f32_intrinsic( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.tan.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) { ; CHECK-LABEL: @pow_f64( ; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll index 038852f55f4558..67a2cf2b80e70c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll @@ -356,6 +356,117 @@ for.end: ; preds = %for.body !132 = !{!"llvm.loop.vectorize.width", i32 8} !133 = !{!"llvm.loop.vectorize.enable", i1 true} +define void @tan_f64(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f64( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_tan(<2 x double> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @tan(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +!141 = distinct !{!141, !142, !143} +!142 = !{!"llvm.loop.vectorize.width", i32 2} +!143 = !{!"llvm.loop.vectorize.enable", i1 true} + + +define void @tan_f32(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f32( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_tanf(<8 x float> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @tanf(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: + ret void +} + +!151 = distinct !{!151, !152, !153} +!152 = !{!"llvm.loop.vectorize.width", i32 8} +!153 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @tan_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f64_intrinsic( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_tan(<2 x double> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.tan.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: + ret void +} + +!161 = distinct !{!161, !162, !163} +!162 = !{!"llvm.loop.vectorize.width", i32 2} +!163 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @tan_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f32_intrinsic( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_tanf(<8 x float> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.tan.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41 + +for.end: + ret void +} + + + +!171 = distinct !{!171, !172, !173} +!172 = !{!"llvm.loop.vectorize.width", i32 8} +!173 = !{!"llvm.loop.vectorize.enable", i1 true} + attributes #0 = { nounwind readnone } declare double @sin(double) #0 @@ -366,6 +477,10 @@ declare double @cos(double) #0 declare float @cosf(float) #0 declare double @llvm.cos.f64(double) #0 declare float @llvm.cos.f32(float) #0 +declare double @tan(double) #0 +declare float @tanf(float) #0 +declare double @llvm.tan.f64(double) #0 +declare float @llvm.tan.f32(float) #0 declare float @expf(float) #0 declare float @powf(float, float) #0 declare float @llvm.exp.f32(float) #0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll index 005557d7445caa..2e78e3632feab6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll @@ -230,6 +230,52 @@ for.end: ret void } +define void @tan_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_tan4(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.tan.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @tan_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @tan_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_tanf4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.tan.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) { ; CHECK-LABEL: @pow_f64( ; CHECK: [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll index 2e78a96a44b747..27038f3a24b664 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll @@ -406,6 +406,31 @@ for.end: ; preds = %for.body, %entry ret void } +;CHECK-LABEL: @tan_f32_intrinsic( +;CHECK: vtanf{{.*}}<4 x float> +;CHECK: ret void +declare float @llvm.tan.f32(float) nounwind readnone +define void @tan_f32_intrinsic(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call float @llvm.tan.f32(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv + store float %call, ptr %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + ;CHECK-LABEL: @asin_f32( ;CHECK: vasinf{{.*}}<4 x float> ;CHECK: ret void