diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 44a301ecc99280..78faadb30d9eb5 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -964,6 +964,13 @@ class APFloat : public APFloatBase { return Val; } + /// Factory for Positive and Negative One. + /// + /// \param Negative True iff the number should be negative. + static APFloat getOne(const fltSemantics &Sem, bool Negative = false) { + return APFloat(Sem, Negative ? -1 : 1); + } + /// Factory for Positive and Negative Infinity. /// /// \param Negative True iff the number should be negative. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e3bd4ea3ffd90e..4fcbe08e4b2b98 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17262,26 +17262,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SDValue V = combineRepeatedFPDivisors(N)) return V; - if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { - // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (auto *N1CFP = dyn_cast(N1)) { - // Compute the reciprocal 1.0 / c2. - const APFloat &N1APF = N1CFP->getValueAPF(); - APFloat Recip(N1APF.getSemantics(), 1); // 1.0 - APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); - // Only do the transform if the reciprocal is a legal fp immediate that - // isn't too nasty (eg NaN, denormal, ...). - if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty - (!LegalOperations || - // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM - // backend)... we should handle this gracefully after Legalize. - // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || - TLI.isOperationLegal(ISD::ConstantFP, VT) || - TLI.isFPImmLegal(Recip, VT, ForCodeSize))) - return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getConstantFP(Recip, DL, VT)); - } + // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or + // the loss is acceptable with AllowReciprocal. + if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) { + // Compute the reciprocal 1.0 / c2. + const APFloat &N1APF = N1CFP->getValueAPF(); + APFloat Recip = APFloat::getOne(N1APF.getSemantics()); + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if (((st == APFloat::opOK && !Recip.isDenormal()) || + (st == APFloat::opInexact && + (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) && + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || + TLI.isOperationLegal(ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT, ForCodeSize))) + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getConstantFP(Recip, DL, VT)); + } + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll index 296be831da7622..7056a4d28fed39 100644 --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -412,10 +412,10 @@ define half @scvtf_f16_i32_7(i32 %int) { ; CHECK-NO16-LABEL: scvtf_f16_i32_7: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: scvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -432,10 +432,10 @@ define half @scvtf_f16_i32_15(i32 %int) { ; CHECK-NO16-LABEL: scvtf_f16_i32_15: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: scvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -452,10 +452,10 @@ define half @scvtf_f16_i64_7(i64 %long) { ; CHECK-NO16-LABEL: scvtf_f16_i64_7: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: scvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -472,10 +472,10 @@ define half @scvtf_f16_i64_15(i64 %long) { ; CHECK-NO16-LABEL: scvtf_f16_i64_15: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: scvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -574,10 +574,10 @@ define half @ucvtf_f16_i32_7(i32 %int) { ; CHECK-NO16-LABEL: ucvtf_f16_i32_7: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: ucvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -594,10 +594,10 @@ define half @ucvtf_f16_i32_15(i32 %int) { ; CHECK-NO16-LABEL: ucvtf_f16_i32_15: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: ucvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -614,10 +614,10 @@ define half @ucvtf_f16_i64_7(i64 %long) { ; CHECK-NO16-LABEL: ucvtf_f16_i64_7: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: ucvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -634,10 +634,10 @@ define half @ucvtf_f16_i64_15(i64 %long) { ; CHECK-NO16-LABEL: ucvtf_f16_i64_15: ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: ucvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 ; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fdiv s0, s1, s0 +; CHECK-NO16-NEXT: fmul s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/fdiv-const.ll b/llvm/test/CodeGen/AArch64/fdiv-const.ll index 5a8f7338434011..7aa89db71adfeb 100644 --- a/llvm/test/CodeGen/AArch64/fdiv-const.ll +++ b/llvm/test/CodeGen/AArch64/fdiv-const.ll @@ -4,8 +4,8 @@ define float @divf32_2(float %a) nounwind { ; CHECK-LABEL: divf32_2: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, #2.00000000 -; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: fmov s1, #0.50000000 +; CHECK-NEXT: fmul s0, s0, s1 ; CHECK-NEXT: ret %r = fdiv float %a, 2.0 ret float %r @@ -46,8 +46,8 @@ define float @divf32_p75_arcp(float %a) nounwind { define half @divf16_2(half %a) nounwind { ; CHECK-LABEL: divf16_2: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov h1, #2.00000000 -; CHECK-NEXT: fdiv h0, h0, h1 +; CHECK-NEXT: fmov h1, #0.50000000 +; CHECK-NEXT: fmul h0, h0, h1 ; CHECK-NEXT: ret %r = fdiv half %a, 2.0 ret half %r @@ -67,9 +67,9 @@ define half @divf16_32768(half %a) nounwind { define half @divf16_32768_arcp(half %a) nounwind { ; CHECK-LABEL: divf16_32768_arcp: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #512 // =0x200 +; CHECK-NEXT: mov w8, #30720 // =0x7800 ; CHECK-NEXT: fmov h1, w8 -; CHECK-NEXT: fmul h0, h0, h1 +; CHECK-NEXT: fdiv h0, h0, h1 ; CHECK-NEXT: ret %r = fdiv arcp half %a, 32768.0 ret half %r @@ -78,8 +78,8 @@ define half @divf16_32768_arcp(half %a) nounwind { define double @divf64_2(double %a) nounwind { ; CHECK-LABEL: divf64_2: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d1, #2.00000000 -; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: fmov d1, #0.50000000 +; CHECK-NEXT: fmul d0, d0, d1 ; CHECK-NEXT: ret %r = fdiv double %a, 2.0 ret double %r @@ -88,8 +88,8 @@ define double @divf64_2(double %a) nounwind { define <4 x float> @divv4f32_2(<4 x float> %a) nounwind { ; CHECK-LABEL: divv4f32_2: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #64, lsl #24 -; CHECK-NEXT: fdiv v0.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v1.4s, #63, lsl #24 +; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %r = fdiv <4 x float> %a, ret <4 x float> %r @@ -141,9 +141,8 @@ define <4 x float> @divv4f32_24816(<4 x float> %a) nounwind { define @divnxv4f32_2( %a) nounwind { ; CHECK-LABEL: divnxv4f32_2: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov z1.s, #2.00000000 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5 ; CHECK-NEXT: ret %r = fdiv %a, splat (float 2.0) ret %r diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll index 4192745abd3477..98276b68481a10 100644 --- a/llvm/test/CodeGen/AArch64/frem-power2.ll +++ b/llvm/test/CodeGen/AArch64/frem-power2.ll @@ -5,11 +5,12 @@ define float @frem2(float %x) { ; CHECK-SD-LABEL: frem2: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov s1, #2.00000000 +; CHECK-SD-NEXT: fmov s1, #0.50000000 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s1, s2, s1, s0 +; CHECK-SD-NEXT: fmov s2, #-2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s1, s1, s2, s0 ; CHECK-SD-NEXT: mvni v2.4s, #128, lsl #24 ; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 @@ -27,10 +28,11 @@ entry: define float @frem2_nsz(float %x) { ; CHECK-SD-LABEL: frem2_nsz: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov s1, #2.00000000 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: fmov s1, #0.50000000 +; CHECK-SD-NEXT: fmov s2, #-2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem2_nsz: @@ -65,10 +67,11 @@ define float @frem2_abs(float %x) { ; CHECK-SD-LABEL: frem2_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: fmov s1, #2.00000000 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: fmov s1, #0.50000000 +; CHECK-SD-NEXT: fmov s2, #-2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem2_abs: @@ -85,9 +88,9 @@ entry: define half @hrem2_nsz(half %x) { ; CHECK-SD-LABEL: hrem2_nsz: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov h1, #2.00000000 +; CHECK-SD-NEXT: fmov h1, #0.50000000 ; CHECK-SD-NEXT: fmov h2, #-2.00000000 -; CHECK-SD-NEXT: fdiv h1, h0, h1 +; CHECK-SD-NEXT: fmul h1, h0, h1 ; CHECK-SD-NEXT: frintz h1, h1 ; CHECK-SD-NEXT: fmadd h0, h1, h2, h0 ; CHECK-SD-NEXT: ret @@ -112,10 +115,11 @@ entry: define double @drem2_nsz(double %x) { ; CHECK-SD-LABEL: drem2_nsz: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov d1, #2.00000000 -; CHECK-SD-NEXT: fdiv d2, d0, d1 -; CHECK-SD-NEXT: frintz d2, d2 -; CHECK-SD-NEXT: fmsub d0, d2, d1, d0 +; CHECK-SD-NEXT: fmov d1, #0.50000000 +; CHECK-SD-NEXT: fmov d2, #-2.00000000 +; CHECK-SD-NEXT: fmul d1, d0, d1 +; CHECK-SD-NEXT: frintz d1, d1 +; CHECK-SD-NEXT: fmadd d0, d1, d2, d0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: drem2_nsz: @@ -176,10 +180,11 @@ entry: define float @fremm2_nsz(float %x) { ; CHECK-SD-LABEL: fremm2_nsz: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov s1, #-2.00000000 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: fmov s1, #-0.50000000 +; CHECK-SD-NEXT: fmov s2, #2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fremm2_nsz: @@ -195,10 +200,11 @@ define float @frem4_abs(float %x) { ; CHECK-SD-LABEL: frem4_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: fmov s1, #4.00000000 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: fmov s1, #0.25000000 +; CHECK-SD-NEXT: fmov s2, #-4.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem4_abs: @@ -216,10 +222,12 @@ define float @frem16_abs(float %x) { ; CHECK-SD-LABEL: frem16_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: fmov s1, #16.00000000 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: mov w8, #1031798784 // =0x3d800000 +; CHECK-SD-NEXT: fmov s2, #-16.00000000 +; CHECK-SD-NEXT: fmov s1, w8 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem16_abs: @@ -237,11 +245,13 @@ define float @frem4294967296_abs(float %x) { ; CHECK-SD-LABEL: frem4294967296_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-SD-NEXT: mov w8, #796917760 // =0x2f800000 ; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: mov w8, #-813694976 // =0xcf800000 +; CHECK-SD-NEXT: fmov s2, w8 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem4294967296_abs: @@ -260,11 +270,13 @@ define float @frem1152921504606846976_abs(float %x) { ; CHECK-SD-LABEL: frem1152921504606846976_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000 +; CHECK-SD-NEXT: mov w8, #562036736 // =0x21800000 ; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: mov w8, #-578813952 // =0xdd800000 +; CHECK-SD-NEXT: fmov s2, w8 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem1152921504606846976_abs: @@ -283,11 +295,13 @@ define float @frem4611686018427387904_abs(float %x) { ; CHECK-SD-LABEL: frem4611686018427387904_abs: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: mov w8, #1585446912 // =0x5e800000 +; CHECK-SD-NEXT: mov w8, #545259520 // =0x20800000 ; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: mov w8, #-562036736 // =0xde800000 +; CHECK-SD-NEXT: fmov s2, w8 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem4611686018427387904_abs: @@ -305,11 +319,12 @@ entry: define float @frem9223372036854775808_abs(float %x) { ; CHECK-SD-LABEL: frem9223372036854775808_abs: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v1.2s, #95, lsl #24 +; CHECK-SD-NEXT: movi v1.2s, #32, lsl #24 ; CHECK-SD-NEXT: fabs s0, s0 -; CHECK-SD-NEXT: fdiv s2, s0, s1 -; CHECK-SD-NEXT: frintz s2, s2 -; CHECK-SD-NEXT: fmsub s0, s2, s1, s0 +; CHECK-SD-NEXT: movi v2.2s, #223, lsl #24 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem9223372036854775808_abs: @@ -326,11 +341,12 @@ entry: define <4 x float> @frem2_vec(<4 x float> %x) { ; CHECK-SD-LABEL: frem2_vec: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24 +; CHECK-SD-NEXT: movi v1.4s, #63, lsl #24 +; CHECK-SD-NEXT: movi v2.4s, #64, lsl #24 ; CHECK-SD-NEXT: mov v3.16b, v0.16b -; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: frintz v2.4s, v2.4s -; CHECK-SD-NEXT: fmls v3.4s, v1.4s, v2.4s +; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: frintz v1.4s, v1.4s +; CHECK-SD-NEXT: fmls v3.4s, v2.4s, v1.4s ; CHECK-SD-NEXT: mvni v1.4s, #128, lsl #24 ; CHECK-SD-NEXT: bit v0.16b, v3.16b, v1.16b ; CHECK-SD-NEXT: ret @@ -387,10 +403,11 @@ entry: define <4 x float> @frem2_nsz_vec(<4 x float> %x) { ; CHECK-SD-LABEL: frem2_nsz_vec: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24 -; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: frintz v2.4s, v2.4s -; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s +; CHECK-SD-NEXT: movi v1.4s, #63, lsl #24 +; CHECK-SD-NEXT: movi v2.4s, #64, lsl #24 +; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: frintz v1.4s, v1.4s +; CHECK-SD-NEXT: fmls v0.4s, v2.4s, v1.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem2_nsz_vec: @@ -445,12 +462,14 @@ entry: define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) { ; CHECK-SD-LABEL: frem1152921504606846976_absv: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000 +; CHECK-SD-NEXT: mov w8, #562036736 // =0x21800000 ; CHECK-SD-NEXT: fabs v0.4s, v0.4s ; CHECK-SD-NEXT: dup v1.4s, w8 -; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: frintz v2.4s, v2.4s -; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s +; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000 +; CHECK-SD-NEXT: dup v2.4s, w8 +; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: frintz v1.4s, v1.4s +; CHECK-SD-NEXT: fmls v0.4s, v2.4s, v1.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem1152921504606846976_absv: diff --git a/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll b/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll index 5386ef425dcb58..64d4a0cf785013 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll @@ -17,7 +17,7 @@ define amdgpu_ps float @uniform_phi_with_undef(float inreg %c, float %v, i32 %x, ; GCN-NEXT: s_mov_b32 exec_lo, s2 ; GCN-NEXT: s_cbranch_execz .LBB0_2 ; GCN-NEXT: ; %bb.1: ; %if -; GCN-NEXT: s_mov_b32 s2, 2.0 +; GCN-NEXT: s_mov_b32 s2, 0x40400000 ; GCN-NEXT: v_div_scale_f32 v1, s3, s2, s2, v0 ; GCN-NEXT: v_rcp_f32_e64 v2, v1 ; GCN-NEXT: s_mov_b32 s3, 1.0 @@ -39,7 +39,7 @@ entry: br i1 %cc, label %if, label %end if: - %v.if = fdiv float %v, 2.0 + %v.if = fdiv float %v, 3.0 br label %end end: diff --git a/llvm/test/CodeGen/ARM/frem-power2.ll b/llvm/test/CodeGen/ARM/frem-power2.ll index 71c2c09c0105c0..63ecd9fec78839 100644 --- a/llvm/test/CodeGen/ARM/frem-power2.ll +++ b/llvm/test/CodeGen/ARM/frem-power2.ll @@ -14,26 +14,28 @@ define float @frem4(float %x) { ; ; CHECK-FP-LABEL: frem4: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-FP-NEXT: vmov s2, r0 +; CHECK-FP-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-FP-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-FP-NEXT: vmov s4, r0 ; CHECK-FP-NEXT: lsrs r0, r0, #31 -; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-FP-NEXT: vrintz.f32 s4, s4 -; CHECK-FP-NEXT: vfms.f32 s2, s4, s0 -; CHECK-FP-NEXT: vmov r1, s2 +; CHECK-FP-NEXT: vmul.f32 s0, s4, s0 +; CHECK-FP-NEXT: vrintz.f32 s0, s0 +; CHECK-FP-NEXT: vfma.f32 s4, s0, s2 +; CHECK-FP-NEXT: vmov r1, s4 ; CHECK-FP-NEXT: bfi r1, r0, #31, #1 ; CHECK-FP-NEXT: mov r0, r1 ; CHECK-FP-NEXT: bx lr ; ; CHECK-M33-LABEL: frem4: ; CHECK-M33: @ %bb.0: @ %entry -; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-M33-NEXT: vmov s2, r0 +; CHECK-M33-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-M33-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-M33-NEXT: vmov s4, r0 ; CHECK-M33-NEXT: lsrs r0, r0, #31 -; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-M33-NEXT: vrintz.f32 s4, s4 -; CHECK-M33-NEXT: vmls.f32 s2, s4, s0 -; CHECK-M33-NEXT: vmov r1, s2 +; CHECK-M33-NEXT: vmul.f32 s0, s4, s0 +; CHECK-M33-NEXT: vrintz.f32 s0, s0 +; CHECK-M33-NEXT: vmla.f32 s4, s0, s2 +; CHECK-M33-NEXT: vmov r1, s4 ; CHECK-M33-NEXT: bfi r1, r0, #31, #1 ; CHECK-M33-NEXT: mov r0, r1 ; CHECK-M33-NEXT: bx lr @@ -53,22 +55,24 @@ define float @frem4_nsz(float %x) { ; ; CHECK-FP-LABEL: frem4_nsz: ; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-FP-NEXT: vmov s2, r0 -; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-FP-NEXT: vrintz.f32 s4, s4 -; CHECK-FP-NEXT: vfms.f32 s2, s4, s0 -; CHECK-FP-NEXT: vmov r0, s2 +; CHECK-FP-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-FP-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-FP-NEXT: vmov s4, r0 +; CHECK-FP-NEXT: vmul.f32 s0, s4, s0 +; CHECK-FP-NEXT: vrintz.f32 s0, s0 +; CHECK-FP-NEXT: vfma.f32 s4, s0, s2 +; CHECK-FP-NEXT: vmov r0, s4 ; CHECK-FP-NEXT: bx lr ; ; CHECK-M33-LABEL: frem4_nsz: ; CHECK-M33: @ %bb.0: @ %entry -; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00 -; CHECK-M33-NEXT: vmov s2, r0 -; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0 -; CHECK-M33-NEXT: vrintz.f32 s4, s4 -; CHECK-M33-NEXT: vmls.f32 s2, s4, s0 -; CHECK-M33-NEXT: vmov r0, s2 +; CHECK-M33-NEXT: vmov.f32 s0, #2.500000e-01 +; CHECK-M33-NEXT: vmov.f32 s2, #-4.000000e+00 +; CHECK-M33-NEXT: vmov s4, r0 +; CHECK-M33-NEXT: vmul.f32 s0, s4, s0 +; CHECK-M33-NEXT: vrintz.f32 s0, s0 +; CHECK-M33-NEXT: vmla.f32 s4, s0, s2 +; CHECK-M33-NEXT: vmov r0, s4 ; CHECK-M33-NEXT: bx lr entry: %fmod = frem nsz float %x, 4.0 diff --git a/llvm/test/CodeGen/ARM/vdiv_combine.ll b/llvm/test/CodeGen/ARM/vdiv_combine.ll index 988844661085ed..899487f9efb2c8 100644 --- a/llvm/test/CodeGen/ARM/vdiv_combine.ll +++ b/llvm/test/CodeGen/ARM/vdiv_combine.ll @@ -5,10 +5,7 @@ define arm_aapcs_vfpcc <2 x float> @t1(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 d2, d0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.s32 d0, d0, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> @@ -20,10 +17,7 @@ entry: define arm_aapcs_vfpcc <2 x float> @t2(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.u32 d2, d0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.u32 d0, d0, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float> @@ -56,17 +50,10 @@ entry: define arm_aapcs_vfpcc <2 x float> @t4(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcvt.f32.s32 d2, d0 -; CHECK-NEXT: vldr s2, LCPI3_0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.s32 d16, d0 +; CHECK-NEXT: vmov.i32 d17, #0x2f000000 +; CHECK-NEXT: vmul.f32 d0, d16, d17 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI3_0: -; CHECK-NEXT: .long 0x50000000 @ float 8.58993459E+9 -; CHECK-NEXT: .end_data_region entry: %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> %div.i = fdiv <2 x float> %vcvt.i, @@ -77,17 +64,8 @@ entry: define arm_aapcs_vfpcc <2 x float> @t5(<2 x i32> %vecinit2.i) nounwind { ; CHECK-LABEL: t5: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcvt.f32.s32 d2, d0 -; CHECK-NEXT: vldr s2, LCPI4_0 -; CHECK-NEXT: vdiv.f32 s1, s5, s2 -; CHECK-NEXT: vdiv.f32 s0, s4, s2 +; CHECK-NEXT: vcvt.f32.s32 d0, d0, #32 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI4_0: -; CHECK-NEXT: .long 0x4f800000 @ float 4.2949673E+9 -; CHECK-NEXT: .end_data_region entry: %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> %div.i = fdiv <2 x float> %vcvt.i, @@ -98,12 +76,7 @@ entry: define arm_aapcs_vfpcc <4 x float> @t6(<4 x i32> %vecinit6.i) nounwind { ; CHECK-LABEL: t6: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q0 -; CHECK-NEXT: vdiv.f32 s3, s11, s4 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float> @@ -115,12 +88,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) { ; CHECK-LABEL: fix_unsigned_i16_to_float: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovl.u16 q8, d0 -; CHECK-NEXT: vmov.f32 s4, #2.000000e+00 -; CHECK-NEXT: vcvt.f32.u32 q2, q8 -; CHECK-NEXT: vdiv.f32 s3, s11, s4 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 +; CHECK-NEXT: vcvt.f32.u32 q0, q8, #1 ; CHECK-NEXT: bx lr %conv = uitofp <4 x i16> %in to <4 x float> %shift = fdiv <4 x float> %conv, @@ -131,12 +99,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) { ; CHECK-LABEL: fix_signed_i16_to_float: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovl.s16 q8, d0 -; CHECK-NEXT: vmov.f32 s4, #2.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q8 -; CHECK-NEXT: vdiv.f32 s3, s11, s4 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 +; CHECK-NEXT: vcvt.f32.s32 q0, q8, #1 ; CHECK-NEXT: bx lr %conv = sitofp <4 x i16> %in to <4 x float> %shift = fdiv <4 x float> %conv, @@ -152,13 +115,12 @@ define arm_aapcs_vfpcc <2 x float> @fix_i64_to_float(<2 x i64> %in) { ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl ___floatundisf ; CHECK-NEXT: vmov r2, r1, d8 -; CHECK-NEXT: vmov s18, r0 -; CHECK-NEXT: vmov.f32 s16, #2.000000e+00 +; CHECK-NEXT: vmov s19, r0 +; CHECK-NEXT: vmov.i32 d8, #0x3f000000 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl ___floatundisf -; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: vdiv.f32 s1, s18, s16 -; CHECK-NEXT: vdiv.f32 s0, s2, s16 +; CHECK-NEXT: vmov s18, r0 +; CHECK-NEXT: vmul.f32 d0, d9, d8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {lr} ; CHECK-NEXT: bx lr @@ -177,13 +139,13 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) { ; CHECK-NEXT: bl ___floatundidf ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 -; CHECK-NEXT: vmov.f64 d8, #2.000000e+00 +; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: bl ___floatundidf ; CHECK-NEXT: vmov d16, r0, r1 -; CHECK-NEXT: vdiv.f64 d1, d9, d8 -; CHECK-NEXT: vdiv.f64 d0, d16, d8 +; CHECK-NEXT: vmul.f64 d1, d9, d8 +; CHECK-NEXT: vmul.f64 d0, d16, d8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {lr} ; CHECK-NEXT: bx lr @@ -196,19 +158,8 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) { define arm_aapcs_vfpcc <8 x float> @test7(<8 x i32> %in) nounwind { ; CHECK-LABEL: test7: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.f32 s12, #8.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q4, q0 -; CHECK-NEXT: vcvt.f32.s32 q2, q1 -; CHECK-NEXT: vdiv.f32 s3, s19, s12 -; CHECK-NEXT: vdiv.f32 s7, s11, s12 -; CHECK-NEXT: vdiv.f32 s2, s18, s12 -; CHECK-NEXT: vdiv.f32 s6, s10, s12 -; CHECK-NEXT: vdiv.f32 s1, s17, s12 -; CHECK-NEXT: vdiv.f32 s5, s9, s12 -; CHECK-NEXT: vdiv.f32 s0, s16, s12 -; CHECK-NEXT: vdiv.f32 s4, s8, s12 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 +; CHECK-NEXT: vcvt.f32.s32 q1, q1, #3 ; CHECK-NEXT: bx lr entry: %vcvt.i = sitofp <8 x i32> %in to <8 x float> @@ -220,19 +171,8 @@ entry: define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) { ; CHECK-LABEL: test8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s4, #2.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q0 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 -; CHECK-NEXT: vldr s3, LCPI11_0 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #1 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI11_0: -; CHECK-NEXT: .long 0x7fc00000 @ float NaN -; CHECK-NEXT: .end_data_region %vcvt.i = sitofp <4 x i32> %in to <4 x float> %div.i = fdiv <4 x float> %vcvt.i, ret <4 x float> %div.i @@ -241,19 +181,8 @@ define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) { define arm_aapcs_vfpcc <3 x float> @test_illegal_int_to_fp(<3 x i32> %in) { ; CHECK-LABEL: test_illegal_int_to_fp: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s4, #4.000000e+00 -; CHECK-NEXT: vcvt.f32.s32 q2, q0 -; CHECK-NEXT: vdiv.f32 s2, s10, s4 -; CHECK-NEXT: vdiv.f32 s1, s9, s4 -; CHECK-NEXT: vdiv.f32 s0, s8, s4 -; CHECK-NEXT: vldr s3, LCPI12_0 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .data_region -; CHECK-NEXT: LCPI12_0: -; CHECK-NEXT: .long 0x7fc00000 @ float NaN -; CHECK-NEXT: .end_data_region %conv = sitofp <3 x i32> %in to <3 x float> %res = fdiv <3 x float> %conv, ret <3 x float> %res diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll index 47ac8848a437d8..ac65a1112be439 100644 --- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll @@ -1367,7 +1367,7 @@ define void @bcast_unfold_fdiv_v16f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 -; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB42_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1386,7 +1386,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp %tmp4 = load <16 x float>, ptr %tmp2, align 4 - %tmp5 = fdiv <16 x float> %tmp4, + %tmp5 = fdiv <16 x float> %tmp4, store <16 x float> %tmp5, ptr %tmp2, align 4 %tmp7 = add i64 %tmp, 16 %tmp8 = icmp eq i64 %tmp7, 1024 @@ -1400,7 +1400,7 @@ define void @bcast_unfold_fdiv_v8f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 -; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB43_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1419,7 +1419,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp %tmp4 = load <8 x float>, ptr %tmp2, align 4 - %tmp5 = fdiv <8 x float> %tmp4, + %tmp5 = fdiv <8 x float> %tmp4, store <8 x float> %tmp5, ptr %tmp2, align 4 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 @@ -1433,7 +1433,7 @@ define void @bcast_unfold_fdiv_v4f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 -; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB44_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1451,7 +1451,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp %tmp4 = load <4 x float>, ptr %tmp2, align 4 - %tmp5 = fdiv <4 x float> %tmp4, + %tmp5 = fdiv <4 x float> %tmp4, store <4 x float> %tmp5, ptr %tmp2, align 4 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 @@ -1465,7 +1465,7 @@ define void @bcast_unfold_fdiv_v8f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB45_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1484,7 +1484,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp %tmp4 = load <8 x double>, ptr %tmp2, align 8 - %tmp5 = fdiv <8 x double> %tmp4, + %tmp5 = fdiv <8 x double> %tmp4, store <8 x double> %tmp5, ptr %tmp2, align 8 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 @@ -1498,7 +1498,7 @@ define void @bcast_unfold_fdiv_v4f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB46_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1517,7 +1517,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp %tmp4 = load <4 x double>, ptr %tmp2, align 8 - %tmp5 = fdiv <4 x double> %tmp4, + %tmp5 = fdiv <4 x double> %tmp4, store <4 x double> %tmp5, ptr %tmp2, align 8 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 @@ -1531,7 +1531,7 @@ define void @bcast_unfold_fdiv_v2f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3.0E+0,3.0E+0] ; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB47_1: # %bb1 @@ -1550,7 +1550,7 @@ bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp %tmp4 = load <2 x double>, ptr %tmp2, align 8 - %tmp5 = fdiv <2 x double> %tmp4, + %tmp5 = fdiv <2 x double> %tmp4, store <2 x double> %tmp5, ptr %tmp2, align 8 %tmp7 = add i64 %tmp, 2 %tmp8 = icmp eq i64 %tmp7, 1024 diff --git a/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll index 33a7ec9bfc7944..ba09ba8b6402be 100644 --- a/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll +++ b/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll @@ -14,7 +14,7 @@ define double @unsafe_fp_math_default0(double %x) { ; SAFE: divsd ; UNSAFE: mulsd - %div = fdiv double %x, 2.0 + %div = fdiv double %x, 3.0 ret double %div } @@ -22,7 +22,7 @@ define double @unsafe_fp_math_default0(double %x) { define double @unsafe_fp_math_off(double %x) #0 { ; SAFE: divsd ; UNSAFE: divsd - %div = fdiv double %x, 2.0 + %div = fdiv double %x, 3.0 ret double %div } @@ -31,7 +31,7 @@ define double @unsafe_fp_math_default1(double %x) { ; With unsafe math enabled, can change this div to a mul. ; SAFE: divsd ; UNSAFE: mulsd - %div = fdiv double %x, 2.0 + %div = fdiv double %x, 3.0 ret double %div } @@ -39,7 +39,7 @@ define double @unsafe_fp_math_default1(double %x) { define double @unsafe_fp_math_on(double %x) #1 { ; SAFE: mulsd ; UNSAFE: mulsd - %div = fdiv double %x, 2.0 + %div = fdiv double %x, 3.0 ret double %div } @@ -48,7 +48,7 @@ define double @unsafe_fp_math_default2(double %x) { ; With unsafe math enabled, can change this div to a mul. ; SAFE: divsd ; UNSAFE: mulsd - %div = fdiv double %x, 2.0 + %div = fdiv double %x, 3.0 ret double %div }