Skip to content

Commit

Permalink
address pr comments
Browse files Browse the repository at this point in the history
  • Loading branch information
farzonl committed May 28, 2024
1 parent 4d4d087 commit ae988cd
Show file tree
Hide file tree
Showing 9 changed files with 359 additions and 16 deletions.
35 changes: 35 additions & 0 deletions llvm/include/llvm/Analysis/VecFuncs.def
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,18 @@ TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8), "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("tan", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("tan", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v")

TLI_DEFINE_VECFUNC("tanf", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanf", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v")

TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4), "_ZGV_LLVM_N4vv")

Expand Down Expand Up @@ -304,6 +316,22 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", FIXED(8), "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", FIXED(16), "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("tan", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("tan", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tan", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("tanf", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanf", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("tanf", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("pow", "__svml_pow2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("pow", "__svml_pow4", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("pow", "__svml_pow8", FIXED(8), "_ZGV_LLVM_N8vv")
Expand Down Expand Up @@ -1238,6 +1266,13 @@ TLI_DEFINE_VECFUNC("tanf", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanf", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("tanf", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd8_tan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd4_tan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd2_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")

TLI_DEFINE_VECFUNC("asin", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("asinf", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("asinf", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
Expand Down
7 changes: 3 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1338,7 +1338,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
#endif
report_fatal_error("Do not know how to expand the result of this "
"operator!");

// clang-format off
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
Expand Down Expand Up @@ -1408,9 +1408,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FSUB:
case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
case ISD::STRICT_FTAN:
case ISD::FTAN:
ExpandFloatRes_FTAN(N, Lo, Hi);
break;
case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break;
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
Expand All @@ -1420,6 +1418,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
case ISD::STRICT_FREM:
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
// clang-format on
}

// If Lo/Hi is null, the sub-method took care of registering results etc.
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
SDValue &Lo, SDValue &Hi);
// clang-format off
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
Expand Down Expand Up @@ -649,10 +650,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
// clang-format on

// Float Operand Expansion.
bool ExpandFloatOperand(SDNode *N, unsigned OpNo);
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// non-optsize case.
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);

// clang-format off
for (auto VT : { MVT::f32, MVT::f64 }) {
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS, VT, Custom);
Expand All @@ -668,8 +669,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSIN , VT, Expand);
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FTAN, VT, Expand);
setOperationAction(ISD::FTAN , VT, Expand);
}
// clang-format on

// Half type will be promoted by default.
setF16Action(MVT::f16, Promote);
Expand Down Expand Up @@ -741,10 +743,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);

// We don't support sin/cos/fmod
// clang-format off
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FTAN, MVT::f32, Expand);
setOperationAction(ISD::FTAN , MVT::f32, Expand);
// clang-format on

if (UseX87) {
// Always expand sin/cos functions even though x87 has an instruction.
Expand Down
22 changes: 13 additions & 9 deletions llvm/test/CodeGen/X86/llvm.tan.ll
Original file line number Diff line number Diff line change
@@ -1,53 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s

define half @use_tanf16(half %a) {
define half @use_tanf16(half %a) nounwind {
; CHECK-LABEL: use_tanf16:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: callq tanf@PLT
; CHECK-NEXT: callq __truncsfhf2@PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%x = call half @llvm.tan.f16(half %a)
ret half %x
}

define float @use_tanf32(float %a) {
define float @use_tanf32(float %a) nounwind {
; CHECK-LABEL: use_tanf32:
; CHECK: # %bb.0:
; CHECK-NEXT: jmp tanf@PLT # TAILCALL
%x = call float @llvm.tan.f32(float %a)
ret float %x
}

define double @use_tanf64(double %a) {
define double @use_tanf64(double %a) nounwind {
; CHECK-LABEL: use_tanf64:
; CHECK: # %bb.0:
; CHECK-NEXT: jmp tan@PLT # TAILCALL
%x = call double @llvm.tan.f64(double %a)
ret double %x
}

define fp128 @use_tanfp128(fp128 %a) {
define double @use_tanf80(double %a) nounwind {
; CHECK-LABEL: use_tanf80:
; CHECK: # %bb.0:
; CHECK-NEXT: jmp tan@PLT # TAILCALL
%x = call double @llvm.tan.f80(double %a)
ret double %x
}

define fp128 @use_tanfp128(fp128 %a) nounwind {
; CHECK-LABEL: use_tanfp128:
; CHECK: # %bb.0:
; CHECK-NEXT: jmp tanf128@PLT # TAILCALL
%x = call fp128 @llvm.tan.f128(fp128 %a)
ret fp128 %x
}

define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) {
define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) nounwind {
; CHECK-LABEL: use_tanppc_fp128:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq tanl@PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%x = call ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %a)
ret ppc_fp128 %x
Expand Down
113 changes: 113 additions & 0 deletions llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ declare float @cosf(float) #0
declare double @llvm.cos.f64(double) #0
declare float @llvm.cos.f32(float) #0

declare double @tan(double) #0
declare float @tanf(float) #0
declare double @llvm.tan.f64(double) #0
declare float @llvm.tan.f32(float) #0

declare double @pow(double, double) #0
declare float @powf(float, float) #0
declare double @llvm.pow.f64(double, double) #0
Expand Down Expand Up @@ -264,6 +269,114 @@ for.end:
ret void
}

define void @tan_f64(ptr nocapture %varray) {
; CHECK-LABEL: @tan_f64(
; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
; CHECK: ret void
;
; CHECK-AVX512-VF8-LABEL: @tan_f64(
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
; CHECK-AVX512-VF8: ret void
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to double
%call = tail call double @tan(double %conv)
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
store double %call, ptr %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body

for.end:
ret void
}

define void @tan_f32(ptr nocapture %varray) {
; CHECK-LABEL: @tan_f32(
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
; CHECK: ret void
;
; CHECK-AVX512-VF16-LABEL: @tan_f32(
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
; CHECK-AVX512-VF16: ret void
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to float
%call = tail call float @tanf(float %conv)
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
store float %call, ptr %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body

for.end:
ret void
}

define void @tan_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @tan_f64_intrinsic(
; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
; CHECK: ret void
;
; CHECK-AVX512-VF8-LABEL: @tan_f64_intrinsic(
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
; CHECK-AVX512-VF8: ret void
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to double
%call = tail call double @llvm.tan.f64(double %conv)
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
store double %call, ptr %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body

for.end:
ret void
}

define void @tan_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @tan_f32_intrinsic(
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
; CHECK: ret void
;
; CHECK-AVX512-VF16-LABEL: @tan_f32_intrinsic(
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
; CHECK-AVX512-VF16: ret void
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to float
%call = tail call float @llvm.tan.f32(float %conv)
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
store float %call, ptr %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body

for.end:
ret void
}

define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
; CHECK-LABEL: @pow_f64(
; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
Expand Down
Loading

0 comments on commit ae988cd

Please sign in to comment.