Skip to content

Commit

Permalink
[WebAssembly] Remove saturating fp-to-int target intrinsics
Browse files Browse the repository at this point in the history
Use the target-independent @llvm.fptosi and @llvm.fptoui intrinsics instead.
This includes removing the instrinsics for i32x4.trunc_sat_zero_f64x2_{s,u},
which are now represented in IR as a saturating truncation to a v2i32 followed by
a concatenation with a zero vector.

Differential Revision: https://reviews.llvm.org/D100596
  • Loading branch information
tlively authored and alexcrichton committed Apr 19, 2021
1 parent 6ffbea3 commit 05751c8
Show file tree
Hide file tree
Showing 12 changed files with 220 additions and 717 deletions.
24 changes: 16 additions & 8 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16756,8 +16756,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
{ResT, Src->getType()});
Function *Callee =
CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
return Builder.CreateCall(Callee, {Src});
}
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
Expand All @@ -16767,8 +16767,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
{ResT, Src->getType()});
Function *Callee =
CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
return Builder.CreateCall(Callee, {Src});
}
case WebAssembly::BI__builtin_wasm_min_f32:
Expand Down Expand Up @@ -17164,14 +17164,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
unsigned IntNo;
switch (BuiltinID) {
case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4:
IntNo = Intrinsic::wasm_trunc_sat_zero_signed;
IntNo = Intrinsic::fptosi_sat;
break;
case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4:
IntNo = Intrinsic::wasm_trunc_sat_zero_unsigned;
IntNo = Intrinsic::fptoui_sat;
break;
}
Function *Callee = CGM.getIntrinsic(IntNo);
return Builder.CreateCall(Callee, Vec);
llvm::Type *SrcT = Vec->getType();
llvm::Type *TruncT =
SrcT->getWithNewType(llvm::IntegerType::get(getLLVMContext(), 32));
Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
Value *Trunc = Builder.CreateCall(Callee, Vec);
Value *Splat = Builder.CreateVectorSplat(2, Builder.getInt32(0));
Value *ConcatMask =
llvm::ConstantVector::get({Builder.getInt32(0), Builder.getInt32(1),
Builder.getInt32(2), Builder.getInt32(3)});
return Builder.CreateShuffleVector(Trunc, Splat, ConcatMask);
}
case WebAssembly::BI__builtin_wasm_demote_zero_f64x2_f32x4: {
Value *Vec = EmitScalarExpr(E->getArg(0));
Expand Down
30 changes: 16 additions & 14 deletions clang/test/CodeGen/builtins-wasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,49 +123,49 @@ long long trunc_u_i64_f64(double f) {

int trunc_saturate_s_i32_f32(float f) {
return __builtin_wasm_trunc_saturate_s_i32_f32(f);
// WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float %f)
// WEBASSEMBLY: call i32 @llvm.fptosi.sat.i32.f32(float %f)
// WEBASSEMBLY-NEXT: ret
}

int trunc_saturate_u_i32_f32(float f) {
return __builtin_wasm_trunc_saturate_u_i32_f32(f);
// WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float %f)
// WEBASSEMBLY: call i32 @llvm.fptoui.sat.i32.f32(float %f)
// WEBASSEMBLY-NEXT: ret
}

int trunc_saturate_s_i32_f64(double f) {
return __builtin_wasm_trunc_saturate_s_i32_f64(f);
// WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double %f)
// WEBASSEMBLY: call i32 @llvm.fptosi.sat.i32.f64(double %f)
// WEBASSEMBLY-NEXT: ret
}

int trunc_saturate_u_i32_f64(double f) {
return __builtin_wasm_trunc_saturate_u_i32_f64(f);
// WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double %f)
// WEBASSEMBLY: call i32 @llvm.fptoui.sat.i32.f64(double %f)
// WEBASSEMBLY-NEXT: ret
}

long long trunc_saturate_s_i64_f32(float f) {
return __builtin_wasm_trunc_saturate_s_i64_f32(f);
// WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float %f)
// WEBASSEMBLY: call i64 @llvm.fptosi.sat.i64.f32(float %f)
// WEBASSEMBLY-NEXT: ret
}

long long trunc_saturate_u_i64_f32(float f) {
return __builtin_wasm_trunc_saturate_u_i64_f32(f);
// WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float %f)
// WEBASSEMBLY: call i64 @llvm.fptoui.sat.i64.f32(float %f)
// WEBASSEMBLY-NEXT: ret
}

long long trunc_saturate_s_i64_f64(double f) {
return __builtin_wasm_trunc_saturate_s_i64_f64(f);
// WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double %f)
// WEBASSEMBLY: call i64 @llvm.fptosi.sat.i64.f64(double %f)
// WEBASSEMBLY-NEXT: ret
}

long long trunc_saturate_u_i64_f64(double f) {
return __builtin_wasm_trunc_saturate_u_i64_f64(f);
// WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double %f)
// WEBASSEMBLY: call i64 @llvm.fptoui.sat.i64.f64(double %f)
// WEBASSEMBLY-NEXT: ret
}

Expand Down Expand Up @@ -852,13 +852,13 @@ f64x2 sqrt_f64x2(f64x2 x) {

i32x4 trunc_saturate_s_i32x4_f32x4(f32x4 f) {
return __builtin_wasm_trunc_saturate_s_i32x4_f32x4(f);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.saturate.signed.v4i32.v4f32(<4 x float> %f)
// WEBASSEMBLY: call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %f)
// WEBASSEMBLY-NEXT: ret
}

i32x4 trunc_saturate_u_i32x4_f32x4(f32x4 f) {
return __builtin_wasm_trunc_saturate_u_i32x4_f32x4(f);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.saturate.unsigned.v4i32.v4f32(<4 x float> %f)
// WEBASSEMBLY: call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f)
// WEBASSEMBLY-NEXT: ret
}

Expand Down Expand Up @@ -892,14 +892,16 @@ u16x8 narrow_u_i16x8_i32x4(u32x4 low, u32x4 high) {

i32x4 trunc_sat_zero_s_f64x2_i32x4(f64x2 x) {
return __builtin_wasm_trunc_sat_zero_s_f64x2_i32x4(x);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double> %x)
// WEBASSEMBLY: ret
// WEBASSEMBLY: %0 = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %x)
// WEBASSEMBLY: %1 = shufflevector <2 x i32> %0, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// WEBASSEMBLY: ret <4 x i32> %1
}

u32x4 trunc_sat_zero_u_f64x2_i32x4(f64x2 x) {
return __builtin_wasm_trunc_sat_zero_u_f64x2_i32x4(x);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.unsigned(<2 x double> %x)
// WEBASSEMBLY: ret
// WEBASSEMBLY: %0 = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %x)
// WEBASSEMBLY: %1 = shufflevector <2 x i32> %0, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// WEBASSEMBLY: ret <4 x i32> %1
}

f32x4 wasm_demote_zero_f64x2_f32x4(f64x2 x) {
Expand Down
6 changes: 0 additions & 6 deletions llvm/include/llvm/IR/IntrinsicsWebAssembly.td
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,6 @@ def int_wasm_extadd_pairwise_unsigned :
[IntrNoMem, IntrSpeculatable]>;

// TODO: Remove these if possible if they are merged to the spec.
def int_wasm_trunc_sat_zero_signed :
Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_trunc_sat_zero_unsigned :
Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_demote_zero :
Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty],
[IntrNoMem, IntrSpeculatable]>;
Expand Down
24 changes: 4 additions & 20 deletions llvm/lib/Analysis/ConstantFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1493,8 +1493,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
// WebAssembly float semantics are always known
case Intrinsic::wasm_trunc_signed:
case Intrinsic::wasm_trunc_unsigned:
case Intrinsic::wasm_trunc_saturate_signed:
case Intrinsic::wasm_trunc_saturate_unsigned:
return true;

// Floating point operations cannot be folded in strictfp functions in
Expand Down Expand Up @@ -1896,17 +1894,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
APFloat U = Op->getValueAPF();

if (IntrinsicID == Intrinsic::wasm_trunc_signed ||
IntrinsicID == Intrinsic::wasm_trunc_unsigned ||
IntrinsicID == Intrinsic::wasm_trunc_saturate_signed ||
IntrinsicID == Intrinsic::wasm_trunc_saturate_unsigned) {

bool Saturating = IntrinsicID == Intrinsic::wasm_trunc_saturate_signed ||
IntrinsicID == Intrinsic::wasm_trunc_saturate_unsigned;
bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed ||
IntrinsicID == Intrinsic::wasm_trunc_saturate_signed;
IntrinsicID == Intrinsic::wasm_trunc_unsigned) {
bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed;

if (U.isNaN())
return Saturating ? ConstantInt::get(Ty, 0) : nullptr;
return nullptr;

unsigned Width = Ty->getIntegerBitWidth();
APSInt Int(Width, !Signed);
Expand All @@ -1917,15 +1909,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
if (Status == APFloat::opOK || Status == APFloat::opInexact)
return ConstantInt::get(Ty, Int);

if (!Saturating)
return nullptr;

if (U.isNegative())
return Signed ? ConstantInt::get(Ty, APInt::getSignedMinValue(Width))
: ConstantInt::get(Ty, APInt::getMinValue(Width));
else
return Signed ? ConstantInt::get(Ty, APInt::getSignedMaxValue(Width))
: ConstantInt::get(Ty, APInt::getMaxValue(Width));
return nullptr;
}

if (IntrinsicID == Intrinsic::fptoui_sat ||
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyISD.def
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ HANDLE_NODETYPE(EXTEND_HIGH_S)
HANDLE_NODETYPE(EXTEND_HIGH_U)
HANDLE_NODETYPE(CONVERT_LOW_S)
HANDLE_NODETYPE(CONVERT_LOW_U)
HANDLE_NODETYPE(TRUNC_SAT_ZERO_S)
HANDLE_NODETYPE(TRUNC_SAT_ZERO_U)
HANDLE_NODETYPE(THROW)
HANDLE_NODETYPE(CATCH)
HANDLE_NODETYPE(MEMORY_COPY)
Expand Down
77 changes: 77 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(Op, T, Expand);
}

if (Subtarget->hasNontrappingFPToInt())
for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
for (auto T : {MVT::i32, MVT::i64})
setOperationAction(Op, T, Custom);

// SIMD-specific configuration
if (Subtarget->hasSIMD128()) {
// Hoist bitcasts out of shuffles
Expand All @@ -134,6 +139,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);

// Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u}
setTargetDAGCombine(ISD::CONCAT_VECTORS);

// Support saturating add for i8x16 and i16x8
for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
for (auto T : {MVT::v16i8, MVT::v8i16})
Expand Down Expand Up @@ -198,6 +206,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
{ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
for (auto T : {MVT::v2i64, MVT::v2f64})
setOperationAction(Op, T, Expand);

// But saturating fp_to_int converstions are
for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
setOperationAction(Op, MVT::v4i32, Custom);
}

// As a special case, these operators use the type to mean the type to
Expand Down Expand Up @@ -1233,6 +1245,9 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
case ISD::SRA:
case ISD::SRL:
return LowerShift(Op, DAG);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return LowerFP_TO_INT_SAT(Op, DAG);
}
}

Expand Down Expand Up @@ -1949,6 +1964,21 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
}

SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT ResT = Op.getValueType();
uint64_t Width = Op.getConstantOperandVal(1);

if ((ResT == MVT::i32 || ResT == MVT::i64) && (Width == 32 || Width == 64))
return Op;

if (ResT == MVT::v4i32 && Width == 32)
return Op;

return SDValue();
}

//===----------------------------------------------------------------------===//
// Custom DAG combine hooks
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -2037,6 +2067,8 @@ performVectorConvertLowCombine(SDNode *N,
if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return SDValue();
auto Source = Extract.getOperand(0);
if (Source.getValueType() != MVT::v4i32)
return SDValue();
auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
if (IndexNode == nullptr)
return SDValue();
Expand All @@ -2058,6 +2090,49 @@ performVectorConvertLowCombine(SDNode *N,
return DAG.getNode(Op, SDLoc(N), ResVT, Source);
}

static SDValue
performVectorTruncSatLowCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
auto &DAG = DCI.DAG;
assert(N->getOpcode() == ISD::CONCAT_VECTORS);

// Combine this:
//
// (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
//
// into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
EVT ResVT = N->getValueType(0);
if (ResVT != MVT::v4i32)
return SDValue();

auto FPToInt = N->getOperand(0);
auto FPToIntOp = FPToInt.getOpcode();
if (FPToIntOp != ISD::FP_TO_SINT_SAT && FPToIntOp != ISD::FP_TO_UINT_SAT)
return SDValue();
if (FPToInt.getConstantOperandVal(1) != 32)
return SDValue();

auto Source = FPToInt.getOperand(0);
if (Source.getValueType() != MVT::v2f64)
return SDValue();

auto *Splat = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (!Splat || !Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs))
return SDValue();
if (SplatValue != 0)
return SDValue();

unsigned Op = FPToIntOp == ISD::FP_TO_SINT_SAT
? WebAssemblyISD::TRUNC_SAT_ZERO_S
: WebAssemblyISD::TRUNC_SAT_ZERO_U;

return DAG.getNode(Op, SDLoc(N), ResVT, Source);
}

SDValue
WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
Expand All @@ -2072,5 +2147,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return performVectorConvertLowCombine(N, DCI);
case ISD::CONCAT_VECTORS:
return performVectorTruncSatLowCombine(N, DCI);
}
}
1 change: 1 addition & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ class WebAssemblyTargetLowering final : public TargetLowering {
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;

// Custom DAG combine hooks
SDValue
Expand Down
26 changes: 9 additions & 17 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -96,23 +96,15 @@ defm I64_TRUNC_U_SAT_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
"i64.trunc_sat_f64_u", 0xfc07>,
Requires<[HasNontrappingFPToInt]>;

// Lower llvm.wasm.trunc.saturate.* to saturating instructions
def : Pat<(int_wasm_trunc_saturate_signed F32:$src),
(I32_TRUNC_S_SAT_F32 F32:$src)>;
def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src),
(I32_TRUNC_U_SAT_F32 F32:$src)>;
def : Pat<(int_wasm_trunc_saturate_signed F64:$src),
(I32_TRUNC_S_SAT_F64 F64:$src)>;
def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src),
(I32_TRUNC_U_SAT_F64 F64:$src)>;
def : Pat<(int_wasm_trunc_saturate_signed F32:$src),
(I64_TRUNC_S_SAT_F32 F32:$src)>;
def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src),
(I64_TRUNC_U_SAT_F32 F32:$src)>;
def : Pat<(int_wasm_trunc_saturate_signed F64:$src),
(I64_TRUNC_S_SAT_F64 F64:$src)>;
def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src),
(I64_TRUNC_U_SAT_F64 F64:$src)>;
// Support the explicitly saturating operations as well.
def : Pat<(fp_to_sint_sat F32:$src, (i32 32)), (I32_TRUNC_S_SAT_F32 F32:$src)>;
def : Pat<(fp_to_uint_sat F32:$src, (i32 32)), (I32_TRUNC_U_SAT_F32 F32:$src)>;
def : Pat<(fp_to_sint_sat F64:$src, (i32 32)), (I32_TRUNC_S_SAT_F64 F64:$src)>;
def : Pat<(fp_to_uint_sat F64:$src, (i32 32)), (I32_TRUNC_U_SAT_F64 F64:$src)>;
def : Pat<(fp_to_sint_sat F32:$src, (i32 64)), (I64_TRUNC_S_SAT_F32 F32:$src)>;
def : Pat<(fp_to_uint_sat F32:$src, (i32 64)), (I64_TRUNC_U_SAT_F32 F32:$src)>;
def : Pat<(fp_to_sint_sat F64:$src, (i32 64)), (I64_TRUNC_S_SAT_F64 F64:$src)>;
def : Pat<(fp_to_uint_sat F64:$src, (i32 64)), (I64_TRUNC_U_SAT_F64 F64:$src)>;

// Conversion from floating point to integer pseudo-instructions which don't
// trap on overflow or invalid.
Expand Down
Loading

0 comments on commit 05751c8

Please sign in to comment.