From 1c0b3d51e5a81103c28f6d30dbf75f43b97c506f Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Fri, 2 Feb 2024 21:40:40 +0000 Subject: [PATCH] [ARM] Switch to soft promoting half types. (#80440) The traditional promotion is known to generate wrong code. Fixes #73805. --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 +- llvm/lib/Target/ARM/ARMISelLowering.h | 4 + llvm/test/CodeGen/ARM/aes-erratum-fix.ll | 1098 ++++++++--------- llvm/test/CodeGen/ARM/arm-half-promote.ll | 155 ++- llvm/test/CodeGen/ARM/fp16-args.ll | 40 - llvm/test/CodeGen/ARM/fp16-instructions.ll | 166 +-- llvm/test/CodeGen/ARM/fp16-promote.ll | 146 +-- llvm/test/CodeGen/ARM/llvm.exp10.ll | 63 +- llvm/test/CodeGen/ARM/llvm.frexp.ll | 59 +- .../vecreduce-fadd-legalization-soft-float.ll | 64 +- .../ARM/vecreduce-fadd-legalization-strict.ll | 3 - .../vecreduce-fmax-legalization-soft-float.ll | 55 +- .../vecreduce-fmin-legalization-soft-float.ll | 55 +- .../vecreduce-fmul-legalization-soft-float.ll | 32 +- .../ARM/vecreduce-fmul-legalization-strict.ll | 3 - llvm/test/CodeGen/Thumb2/mve-vabd.ll | 30 +- 16 files changed, 929 insertions(+), 1048 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index bf8c877a547cdf..b5c4a8a322ea7f 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -9055,7 +9055,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget); if (getTypeAction(*DAG.getContext(), EltVT) == - TargetLowering::TypePromoteFloat) { + TargetLowering::TypeSoftPromoteHalf) { // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32, // but the type system will try to do that if we don't intervene. // Reinterpret any such vector-element insertion as one with the @@ -9065,7 +9065,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits()); assert(getTypeAction(*DAG.getContext(), IEltVT) != - TargetLowering::TypePromoteFloat); + TargetLowering::TypeSoftPromoteHalf); SDValue VecIn = Op.getOperand(0); EVT VecVT = VecIn.getValueType(); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index d61a68780e3e1a..b13ddf697cb806 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -762,6 +762,10 @@ class VectorType; ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator = nullptr) const override; + bool softPromoteHalfType() const override { return true; } + + bool useFPRegsForHalfType() const override { return true; } + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll index f9b62df37ff329..9c2da345956d1a 100644 --- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll +++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll @@ -1355,102 +1355,89 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FIX-NOSCHED-NEXT: .pad #24 -; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24 +; CHECK-FIX-NOSCHED-NEXT: .pad #12 +; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_3 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1] -; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17 -; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16] -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0] -; CHECK-FIX-NOSCHED-NEXT: uxth r4, r3 -; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 -; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #8] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7 -; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6 -; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3] +; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] +; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2] ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16 -; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1] ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: uxth r10, r5 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1] ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB36_4 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_2: -; CHECK-FIX-NOSCHED-NEXT: vmov r4, r6, d1 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, r3, d0 -; CHECK-FIX-NOSCHED-NEXT: lsr r5, r4, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r1, r6, #16 -; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6 -; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r12, r3, #16 -; CHECK-FIX-NOSCHED-NEXT: uxth r9, r4 -; CHECK-FIX-NOSCHED-NEXT: uxth r6, r3 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r1, d0[0] ; CHECK-FIX-NOSCHED-NEXT: b .LBB36_5 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_3: -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #14] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #8] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #6] -; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #10] -; CHECK-FIX-NOSCHED-NEXT: ldrh r10, [r2] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #4] +; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #8 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r2:32] +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r3:32] +; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #4 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r3:32] +; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #12 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r3:32] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d16[0] ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2] +; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1] ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1] ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_2 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_4: -; CHECK-FIX-NOSCHED-NEXT: vmov r5, r3, d1 -; CHECK-FIX-NOSCHED-NEXT: mov r4, r7 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d0[1] -; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d0[0]}, [r1:16] -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d0[0] -; CHECK-FIX-NOSCHED-NEXT: uxth r9, r5 -; CHECK-FIX-NOSCHED-NEXT: uxth r11, r3 -; CHECK-FIX-NOSCHED-NEXT: uxth r6, r7 -; CHECK-FIX-NOSCHED-NEXT: lsr r12, r7, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r1, r3, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r5, r5, #16 -; CHECK-FIX-NOSCHED-NEXT: mov r7, r4 -; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3] +; CHECK-FIX-NOSCHED-NEXT: ldrh r1, [r1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1] ; CHECK-FIX-NOSCHED-NEXT: .LBB36_5: -; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r6, r12, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r3, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r9, r5, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r1, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r8, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r3, r7, r3, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r4, r0, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r1 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, lr, r12, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r3 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r11, r10, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r1 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r6, r5, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r1 +; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r9, r1, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r1 +; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] -; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24 +; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr: @@ -1460,94 +1447,79 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < ; CHECK-CORTEX-FIX-NEXT: .pad #24 ; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 -; CHECK-CORTEX-FIX-NEXT: beq .LBB36_3 +; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1: ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] -; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8 -; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16] -; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d18[0] -; CHECK-CORTEX-FIX-NEXT: uxth r7, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16 -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1] -; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: uxth r7, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16 +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: vmov r3, r6, d17 -; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: uxth r7, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16 -; CHECK-CORTEX-FIX-NEXT: uxth r11, r6 -; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16 -; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 -; CHECK-CORTEX-FIX-NEXT: bne .LBB36_4 +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: mov r3, r6 +; CHECK-CORTEX-FIX-NEXT: b .LBB36_3 ; CHECK-CORTEX-FIX-NEXT: .LBB36_2: -; CHECK-CORTEX-FIX-NEXT: vmov r1, r7, d0 -; CHECK-CORTEX-FIX-NEXT: uxth r0, r1 -; CHECK-CORTEX-FIX-NEXT: uxth r6, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r12, r7, #16 -; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16 -; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: mov r0, r3 -; CHECK-CORTEX-FIX-NEXT: vmov r7, r3, d1 -; CHECK-CORTEX-FIX-NEXT: uxth r10, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r5, r7, #16 -; CHECK-CORTEX-FIX-NEXT: uxth lr, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16 -; CHECK-CORTEX-FIX-NEXT: mov r3, r0 -; CHECK-CORTEX-FIX-NEXT: b .LBB36_5 -; CHECK-CORTEX-FIX-NEXT: .LBB36_3: -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2] -; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r2, #12] -; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #14] -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #2] +; CHECK-CORTEX-FIX-NEXT: add r3, r2, #8 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r2:32] +; CHECK-CORTEX-FIX-NEXT: add r7, r2, #4 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r3:32] +; CHECK-CORTEX-FIX-NEXT: add r3, r2, #12 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r7:32] +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r3:32] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[0] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #4] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[1] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2] +; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #6] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #8] -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0] +; CHECK-CORTEX-FIX-NEXT: .LBB36_3: +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d17[3] ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 -; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2 -; CHECK-CORTEX-FIX-NEXT: .LBB36_4: -; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0 -; CHECK-CORTEX-FIX-NEXT: vmov.32 r5, d0[1] -; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16] -; CHECK-CORTEX-FIX-NEXT: uxth r6, r5 -; CHECK-CORTEX-FIX-NEXT: lsr r12, r5, #16 -; CHECK-CORTEX-FIX-NEXT: vmov r5, r7, d1 -; CHECK-CORTEX-FIX-NEXT: vmov.32 r1, d16[0] -; CHECK-CORTEX-FIX-NEXT: uxth r10, r5 -; CHECK-CORTEX-FIX-NEXT: lsr r5, r5, #16 -; CHECK-CORTEX-FIX-NEXT: uxth lr, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16 -; CHECK-CORTEX-FIX-NEXT: uxth r0, r1 -; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16 -; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: beq .LBB36_5 +; CHECK-CORTEX-FIX-NEXT: @ %bb.4: +; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r1] +; CHECK-CORTEX-FIX-NEXT: b .LBB36_6 ; CHECK-CORTEX-FIX-NEXT: .LBB36_5: +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r0, d0[0] +; CHECK-CORTEX-FIX-NEXT: .LBB36_6: +; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r4, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16 -; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r10, r5, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r9, r7, r4, lsl #16 +; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d0[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d0[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d0[3] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d1[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d1[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d1[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d1[3] ; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16 ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16 -; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r1, r3, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r11, r5, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r3, r1, lsl #16 +; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #16] @ 4-byte Reload ; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11 -; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r9, lsl #16 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r9 +; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r10, lsl #16 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6 @@ -1604,210 +1576,179 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1 ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FIX-NOSCHED-NEXT: .pad #24 -; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24 -; CHECK-FIX-NOSCHED-NEXT: vmov r12, s0 +; CHECK-FIX-NOSCHED-NEXT: .pad #12 +; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_2 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1] -; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17 -; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r12 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0] -; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7 -; CHECK-FIX-NOSCHED-NEXT: uxth r2, r3 -; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 -; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6 -; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16 -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16 -; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: uxth r3, r5 -; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s2, s0 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1] +; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0] +; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3] +; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill ; CHECK-FIX-NOSCHED-NEXT: b .LBB37_3 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_2: -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #14] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #12] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #8] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #6] -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #2] +; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #8 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r1:32] +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r2:32] +; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #4 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r2:32] +; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #12 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r2:32] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1] ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1, #10] -; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #4] -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0] +; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3] +; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[0] +; CHECK-FIX-NOSCHED-NEXT: vmov s2, r2 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_3: +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d3[3] ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d3[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d3[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d3[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d2[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d2[2] ; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_5 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.4: -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r6, d2[1] -; CHECK-FIX-NOSCHED-NEXT: mov r3, r2 -; CHECK-FIX-NOSCHED-NEXT: mov r2, r7 -; CHECK-FIX-NOSCHED-NEXT: vmov r4, r7, d3 -; CHECK-FIX-NOSCHED-NEXT: vmov.16 d2[0], r12 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d2[0] -; CHECK-FIX-NOSCHED-NEXT: uxth r5, r6 -; CHECK-FIX-NOSCHED-NEXT: lsr r12, r6, #16 -; CHECK-FIX-NOSCHED-NEXT: uxth r10, r4 -; CHECK-FIX-NOSCHED-NEXT: uxth r11, r7 -; CHECK-FIX-NOSCHED-NEXT: lsr r9, r7, #16 -; CHECK-FIX-NOSCHED-NEXT: mov r7, r2 -; CHECK-FIX-NOSCHED-NEXT: mov r2, r3 -; CHECK-FIX-NOSCHED-NEXT: lsr r4, r4, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1] ; CHECK-FIX-NOSCHED-NEXT: b .LBB37_6 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_5: -; CHECK-FIX-NOSCHED-NEXT: vmov r3, r6, d3 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, r5, d2 -; CHECK-FIX-NOSCHED-NEXT: lsr r4, r3, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r9, r6, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r12, r5, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16 -; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6 -; CHECK-FIX-NOSCHED-NEXT: uxth r10, r3 -; CHECK-FIX-NOSCHED-NEXT: uxth r5, r5 +; CHECK-FIX-NOSCHED-NEXT: mov r0, lr +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d2[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1] +; CHECK-FIX-NOSCHED-NEXT: vmov s0, lr +; CHECK-FIX-NOSCHED-NEXT: mov lr, r0 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_6: -; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov r0, s0 +; CHECK-FIX-NOSCHED-NEXT: vmov r6, s2 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r12, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r6, r6, r8, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r12, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r4, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r6 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r0, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r4, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r2, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r9, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r9, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, lr, r7, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] -; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24 +; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_val: ; CHECK-CORTEX-FIX: @ %bb.0: ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-CORTEX-FIX-NEXT: .pad #28 -; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #28 -; CHECK-CORTEX-FIX-NEXT: vmov r2, s0 +; CHECK-CORTEX-FIX-NEXT: .pad #12 +; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #12 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 -; CHECK-CORTEX-FIX-NEXT: beq .LBB37_2 +; CHECK-CORTEX-FIX-NEXT: beq .LBB37_3 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1: ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] -; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1] -; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r2 -; CHECK-CORTEX-FIX-NEXT: vmov.32 r7, d16[0] -; CHECK-CORTEX-FIX-NEXT: uxth r6, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16 -; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: uxth r7, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16 -; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #24] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: vmov r3, r7, d17 -; CHECK-CORTEX-FIX-NEXT: uxth r6, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16 -; CHECK-CORTEX-FIX-NEXT: uxth r11, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16 -; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #16] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: b .LBB37_3 +; CHECK-CORTEX-FIX-NEXT: vmov.f32 s2, s0 +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3] +; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[0] +; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[1] +; CHECK-CORTEX-FIX-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 +; CHECK-CORTEX-FIX-NEXT: bne .LBB37_4 ; CHECK-CORTEX-FIX-NEXT: .LBB37_2: -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1] -; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r1, #12] -; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r1, #14] -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #24] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #2] -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #4] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d2[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3] +; CHECK-CORTEX-FIX-NEXT: vmov s0, lr +; CHECK-CORTEX-FIX-NEXT: b .LBB37_5 +; CHECK-CORTEX-FIX-NEXT: .LBB37_3: +; CHECK-CORTEX-FIX-NEXT: add r2, r1, #8 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r1:32] +; CHECK-CORTEX-FIX-NEXT: add r3, r1, #4 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r2:32] +; CHECK-CORTEX-FIX-NEXT: add r2, r1, #12 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r3:32] +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r2:32] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #6] +; CHECK-CORTEX-FIX-NEXT: vmov s2, r2 +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #8] -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #10] -; CHECK-CORTEX-FIX-NEXT: .LBB37_3: -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 -; CHECK-CORTEX-FIX-NEXT: beq .LBB37_5 -; CHECK-CORTEX-FIX-NEXT: @ %bb.4: -; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d2[1] -; CHECK-CORTEX-FIX-NEXT: vmov.16 d2[0], r2 -; CHECK-CORTEX-FIX-NEXT: vmov r4, r6, d3 -; CHECK-CORTEX-FIX-NEXT: uxth r10, r4 -; CHECK-CORTEX-FIX-NEXT: lsr r4, r4, #16 -; CHECK-CORTEX-FIX-NEXT: uxth lr, r6 -; CHECK-CORTEX-FIX-NEXT: lsr r8, r6, #16 -; CHECK-CORTEX-FIX-NEXT: uxth r5, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16 -; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d2[0] -; CHECK-CORTEX-FIX-NEXT: uxth r0, r2 -; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16 -; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: b .LBB37_6 +; CHECK-CORTEX-FIX-NEXT: beq .LBB37_2 +; CHECK-CORTEX-FIX-NEXT: .LBB37_4: +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3] ; CHECK-CORTEX-FIX-NEXT: .LBB37_5: -; CHECK-CORTEX-FIX-NEXT: vmov r2, r3, d2 -; CHECK-CORTEX-FIX-NEXT: uxth r0, r2 -; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16 -; CHECK-CORTEX-FIX-NEXT: uxth r5, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16 -; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: mov r0, r7 -; CHECK-CORTEX-FIX-NEXT: vmov r6, r7, d3 -; CHECK-CORTEX-FIX-NEXT: uxth r10, r6 -; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16 -; CHECK-CORTEX-FIX-NEXT: uxth lr, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16 -; CHECK-CORTEX-FIX-NEXT: mov r7, r0 -; CHECK-CORTEX-FIX-NEXT: .LBB37_6: -; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r7, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r11, r6, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r7, r10, lsl #16 +; CHECK-CORTEX-FIX-NEXT: ldm sp, {r6, r7} @ 8-byte Folded Reload +; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r4, lsl #16 ; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16 -; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r10, r4, lsl #16 -; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r2, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16 -; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r2, r3, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #24] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r6, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r3 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r2 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11 -; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r9, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r9, r2, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r7, r6, lsl #16 +; CHECK-CORTEX-FIX-NEXT: vmov r7, s2 +; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16 +; CHECK-CORTEX-FIX-NEXT: vmov r6, s0 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r7 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r4 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r0 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], lr +; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r8, lsl #16 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r4 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r5 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r2 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r3 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r5 ; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] -; CHECK-CORTEX-FIX-NEXT: add sp, sp, #28 +; CHECK-CORTEX-FIX-NEXT: add sp, sp, #12 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} br i1 %0, label %5, label %11 @@ -3567,102 +3508,89 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FIX-NOSCHED-NEXT: .pad #24 -; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24 +; CHECK-FIX-NOSCHED-NEXT: .pad #12 +; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_3 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1] -; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17 -; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16] -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0] -; CHECK-FIX-NOSCHED-NEXT: uxth r4, r3 -; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 -; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #8] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7 -; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6 -; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3] +; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] +; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2] ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16 -; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1] ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: uxth r10, r5 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1] ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB82_4 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_2: -; CHECK-FIX-NOSCHED-NEXT: vmov r4, r6, d1 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, r3, d0 -; CHECK-FIX-NOSCHED-NEXT: lsr r5, r4, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r1, r6, #16 -; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6 -; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r12, r3, #16 -; CHECK-FIX-NOSCHED-NEXT: uxth r9, r4 -; CHECK-FIX-NOSCHED-NEXT: uxth r6, r3 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r1, d0[0] ; CHECK-FIX-NOSCHED-NEXT: b .LBB82_5 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_3: -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #14] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #8] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #6] -; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #10] -; CHECK-FIX-NOSCHED-NEXT: ldrh r10, [r2] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #4] +; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #8 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r2:32] +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r3:32] +; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #4 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r3:32] +; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #12 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r3:32] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d16[0] ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2] +; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1] ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1] ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_2 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_4: -; CHECK-FIX-NOSCHED-NEXT: vmov r5, r3, d1 -; CHECK-FIX-NOSCHED-NEXT: mov r4, r7 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d0[1] -; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d0[0]}, [r1:16] -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d0[0] -; CHECK-FIX-NOSCHED-NEXT: uxth r9, r5 -; CHECK-FIX-NOSCHED-NEXT: uxth r11, r3 -; CHECK-FIX-NOSCHED-NEXT: uxth r6, r7 -; CHECK-FIX-NOSCHED-NEXT: lsr r12, r7, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r1, r3, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r5, r5, #16 -; CHECK-FIX-NOSCHED-NEXT: mov r7, r4 -; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3] +; CHECK-FIX-NOSCHED-NEXT: ldrh r1, [r1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1] ; CHECK-FIX-NOSCHED-NEXT: .LBB82_5: -; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r6, r12, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r3, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r9, r5, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r1, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r8, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r3, r7, r3, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r4, r0, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r1 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, lr, r12, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r3 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r11, r10, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r1 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r6, r5, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r1 +; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r9, r1, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r1 +; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] -; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24 +; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_ptr: @@ -3672,94 +3600,79 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < ; CHECK-CORTEX-FIX-NEXT: .pad #24 ; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 -; CHECK-CORTEX-FIX-NEXT: beq .LBB82_3 +; CHECK-CORTEX-FIX-NEXT: beq .LBB82_2 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1: ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] -; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8 -; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16] -; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d18[0] -; CHECK-CORTEX-FIX-NEXT: uxth r7, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16 -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1] -; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: uxth r7, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16 +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: vmov r3, r6, d17 -; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: uxth r7, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16 -; CHECK-CORTEX-FIX-NEXT: uxth r11, r6 -; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16 -; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 -; CHECK-CORTEX-FIX-NEXT: bne .LBB82_4 +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: mov r3, r6 +; CHECK-CORTEX-FIX-NEXT: b .LBB82_3 ; CHECK-CORTEX-FIX-NEXT: .LBB82_2: -; CHECK-CORTEX-FIX-NEXT: vmov r1, r7, d0 -; CHECK-CORTEX-FIX-NEXT: uxth r0, r1 -; CHECK-CORTEX-FIX-NEXT: uxth r6, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r12, r7, #16 -; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16 -; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: mov r0, r3 -; CHECK-CORTEX-FIX-NEXT: vmov r7, r3, d1 -; CHECK-CORTEX-FIX-NEXT: uxth r10, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r5, r7, #16 -; CHECK-CORTEX-FIX-NEXT: uxth lr, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16 -; CHECK-CORTEX-FIX-NEXT: mov r3, r0 -; CHECK-CORTEX-FIX-NEXT: b .LBB82_5 -; CHECK-CORTEX-FIX-NEXT: .LBB82_3: -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2] -; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r2, #12] -; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #14] -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #2] +; CHECK-CORTEX-FIX-NEXT: add r3, r2, #8 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r2:32] +; CHECK-CORTEX-FIX-NEXT: add r7, r2, #4 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r3:32] +; CHECK-CORTEX-FIX-NEXT: add r3, r2, #12 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r7:32] +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r3:32] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[0] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #4] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[1] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2] +; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #6] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #8] -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0] +; CHECK-CORTEX-FIX-NEXT: .LBB82_3: +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d17[3] ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 -; CHECK-CORTEX-FIX-NEXT: beq .LBB82_2 -; CHECK-CORTEX-FIX-NEXT: .LBB82_4: -; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0 -; CHECK-CORTEX-FIX-NEXT: vmov.32 r5, d0[1] -; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16] -; CHECK-CORTEX-FIX-NEXT: uxth r6, r5 -; CHECK-CORTEX-FIX-NEXT: lsr r12, r5, #16 -; CHECK-CORTEX-FIX-NEXT: vmov r5, r7, d1 -; CHECK-CORTEX-FIX-NEXT: vmov.32 r1, d16[0] -; CHECK-CORTEX-FIX-NEXT: uxth r10, r5 -; CHECK-CORTEX-FIX-NEXT: lsr r5, r5, #16 -; CHECK-CORTEX-FIX-NEXT: uxth lr, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16 -; CHECK-CORTEX-FIX-NEXT: uxth r0, r1 -; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16 -; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: beq .LBB82_5 +; CHECK-CORTEX-FIX-NEXT: @ %bb.4: +; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r1] +; CHECK-CORTEX-FIX-NEXT: b .LBB82_6 ; CHECK-CORTEX-FIX-NEXT: .LBB82_5: +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r0, d0[0] +; CHECK-CORTEX-FIX-NEXT: .LBB82_6: +; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r4, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16 -; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r10, r5, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r9, r7, r4, lsl #16 +; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d0[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d0[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d0[3] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d1[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d1[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d1[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d1[3] ; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16 ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16 -; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r1, r3, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r11, r5, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r3, r1, lsl #16 +; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #16] @ 4-byte Reload ; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11 -; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r9, lsl #16 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r9 +; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r10, lsl #16 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6 @@ -3816,210 +3729,179 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1 ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FIX-NOSCHED-NEXT: .pad #24 -; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24 -; CHECK-FIX-NOSCHED-NEXT: vmov r12, s0 +; CHECK-FIX-NOSCHED-NEXT: .pad #12 +; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_2 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1] -; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17 -; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r12 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0] -; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7 -; CHECK-FIX-NOSCHED-NEXT: uxth r2, r3 -; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16 -; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6 -; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16 -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16 -; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: uxth r3, r5 -; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s2, s0 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1] +; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0] +; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3] +; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill ; CHECK-FIX-NOSCHED-NEXT: b .LBB83_3 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_2: -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #14] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #12] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #8] -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #6] -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #2] +; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #8 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r1:32] +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r2:32] +; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #4 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r2:32] +; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #12 +; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r2:32] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1] ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1, #10] -; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #4] -; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0] +; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3] +; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[0] +; CHECK-FIX-NOSCHED-NEXT: vmov s2, r2 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_3: +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d3[3] ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 -; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d3[2] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d3[1] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d3[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d2[3] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d2[2] ; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_5 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.4: -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r6, d2[1] -; CHECK-FIX-NOSCHED-NEXT: mov r3, r2 -; CHECK-FIX-NOSCHED-NEXT: mov r2, r7 -; CHECK-FIX-NOSCHED-NEXT: vmov r4, r7, d3 -; CHECK-FIX-NOSCHED-NEXT: vmov.16 d2[0], r12 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d2[0] -; CHECK-FIX-NOSCHED-NEXT: uxth r5, r6 -; CHECK-FIX-NOSCHED-NEXT: lsr r12, r6, #16 -; CHECK-FIX-NOSCHED-NEXT: uxth r10, r4 -; CHECK-FIX-NOSCHED-NEXT: uxth r11, r7 -; CHECK-FIX-NOSCHED-NEXT: lsr r9, r7, #16 -; CHECK-FIX-NOSCHED-NEXT: mov r7, r2 -; CHECK-FIX-NOSCHED-NEXT: mov r2, r3 -; CHECK-FIX-NOSCHED-NEXT: lsr r4, r4, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1] ; CHECK-FIX-NOSCHED-NEXT: b .LBB83_6 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_5: -; CHECK-FIX-NOSCHED-NEXT: vmov r3, r6, d3 -; CHECK-FIX-NOSCHED-NEXT: vmov r0, r5, d2 -; CHECK-FIX-NOSCHED-NEXT: lsr r4, r3, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r9, r6, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr r12, r5, #16 -; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16 -; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6 -; CHECK-FIX-NOSCHED-NEXT: uxth r10, r3 -; CHECK-FIX-NOSCHED-NEXT: uxth r5, r5 +; CHECK-FIX-NOSCHED-NEXT: mov r0, lr +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d2[0] +; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1] +; CHECK-FIX-NOSCHED-NEXT: vmov s0, lr +; CHECK-FIX-NOSCHED-NEXT: mov lr, r0 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_6: -; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov r0, s0 +; CHECK-FIX-NOSCHED-NEXT: vmov r6, s2 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r12, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r6, r6, r8, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r12, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r4, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r6 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 -; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r0, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r4, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r2, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r9, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r9, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0 -; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16 +; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, lr, r7, lsl #16 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] -; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24 +; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_val: ; CHECK-CORTEX-FIX: @ %bb.0: ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-CORTEX-FIX-NEXT: .pad #28 -; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #28 -; CHECK-CORTEX-FIX-NEXT: vmov r2, s0 +; CHECK-CORTEX-FIX-NEXT: .pad #12 +; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #12 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 -; CHECK-CORTEX-FIX-NEXT: beq .LBB83_2 +; CHECK-CORTEX-FIX-NEXT: beq .LBB83_3 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1: ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] -; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1] -; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r2 -; CHECK-CORTEX-FIX-NEXT: vmov.32 r7, d16[0] -; CHECK-CORTEX-FIX-NEXT: uxth r6, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16 -; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: uxth r7, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16 -; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #24] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: vmov r3, r7, d17 -; CHECK-CORTEX-FIX-NEXT: uxth r6, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16 -; CHECK-CORTEX-FIX-NEXT: uxth r11, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16 -; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #16] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: b .LBB83_3 +; CHECK-CORTEX-FIX-NEXT: vmov.f32 s2, s0 +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3] +; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[0] +; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[1] +; CHECK-CORTEX-FIX-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 +; CHECK-CORTEX-FIX-NEXT: bne .LBB83_4 ; CHECK-CORTEX-FIX-NEXT: .LBB83_2: -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1] -; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r1, #12] -; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r1, #14] -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #24] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #2] -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #4] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d2[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3] +; CHECK-CORTEX-FIX-NEXT: vmov s0, lr +; CHECK-CORTEX-FIX-NEXT: b .LBB83_5 +; CHECK-CORTEX-FIX-NEXT: .LBB83_3: +; CHECK-CORTEX-FIX-NEXT: add r2, r1, #8 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r1:32] +; CHECK-CORTEX-FIX-NEXT: add r3, r1, #4 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r2:32] +; CHECK-CORTEX-FIX-NEXT: add r2, r1, #12 +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r3:32] +; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r2:32] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #6] +; CHECK-CORTEX-FIX-NEXT: vmov s2, r2 +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3] ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #8] -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #10] -; CHECK-CORTEX-FIX-NEXT: .LBB83_3: -; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1] +; CHECK-CORTEX-FIX-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 -; CHECK-CORTEX-FIX-NEXT: beq .LBB83_5 -; CHECK-CORTEX-FIX-NEXT: @ %bb.4: -; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d2[1] -; CHECK-CORTEX-FIX-NEXT: vmov.16 d2[0], r2 -; CHECK-CORTEX-FIX-NEXT: vmov r4, r6, d3 -; CHECK-CORTEX-FIX-NEXT: uxth r10, r4 -; CHECK-CORTEX-FIX-NEXT: lsr r4, r4, #16 -; CHECK-CORTEX-FIX-NEXT: uxth lr, r6 -; CHECK-CORTEX-FIX-NEXT: lsr r8, r6, #16 -; CHECK-CORTEX-FIX-NEXT: uxth r5, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16 -; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d2[0] -; CHECK-CORTEX-FIX-NEXT: uxth r0, r2 -; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16 -; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: b .LBB83_6 +; CHECK-CORTEX-FIX-NEXT: beq .LBB83_2 +; CHECK-CORTEX-FIX-NEXT: .LBB83_4: +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2] +; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3] ; CHECK-CORTEX-FIX-NEXT: .LBB83_5: -; CHECK-CORTEX-FIX-NEXT: vmov r2, r3, d2 -; CHECK-CORTEX-FIX-NEXT: uxth r0, r2 -; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16 -; CHECK-CORTEX-FIX-NEXT: uxth r5, r3 -; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16 -; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-CORTEX-FIX-NEXT: mov r0, r7 -; CHECK-CORTEX-FIX-NEXT: vmov r6, r7, d3 -; CHECK-CORTEX-FIX-NEXT: uxth r10, r6 -; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16 -; CHECK-CORTEX-FIX-NEXT: uxth lr, r7 -; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16 -; CHECK-CORTEX-FIX-NEXT: mov r7, r0 -; CHECK-CORTEX-FIX-NEXT: .LBB83_6: -; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r7, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r11, r6, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r7, r10, lsl #16 +; CHECK-CORTEX-FIX-NEXT: ldm sp, {r6, r7} @ 8-byte Folded Reload +; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r4, lsl #16 ; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16 -; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r10, r4, lsl #16 -; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r2, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16 -; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r2, r3, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #24] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r6, lsl #16 -; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp] @ 4-byte Reload -; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r3 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r2 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11 -; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r9, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r9, r2, lsl #16 +; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r7, r6, lsl #16 +; CHECK-CORTEX-FIX-NEXT: vmov r7, s2 +; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16 +; CHECK-CORTEX-FIX-NEXT: vmov r6, s0 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r7 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r4 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r0 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], lr +; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r8, lsl #16 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r4 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r5 -; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r2 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r3 +; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r5 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] -; CHECK-CORTEX-FIX-NEXT: add sp, sp, #28 +; CHECK-CORTEX-FIX-NEXT: add sp, sp, #12 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} br i1 %0, label %5, label %11 diff --git a/llvm/test/CodeGen/ARM/arm-half-promote.ll b/llvm/test/CodeGen/ARM/arm-half-promote.ll index d6a8a9b9538f17..e1ab75b2ac7f16 100644 --- a/llvm/test/CodeGen/ARM/arm-half-promote.ll +++ b/llvm/test/CodeGen/ARM/arm-half-promote.ll @@ -2,78 +2,113 @@ define arm_aapcs_vfpcc { <8 x half>, <8 x half> } @f1() { ; CHECK-LABEL: _f1 -; CHECK: vpush {d8} -; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 -; CHECK-NEXT: vmov.i32 d8, #0x0 -; CHECK-NEXT: vmov.i32 d0, #0x0 -; CHECK-NEXT: vmov.i32 d1, #0x0 -; CHECK-NEXT: vmov.i32 d2, #0x0 -; CHECK-NEXT: vmov.i32 d3, #0x0 -; CHECK-NEXT: vmov.i32 d4, #0x0 -; CHECK-NEXT: vmov.i32 d5, #0x0 -; CHECK-NEXT: vmov.i32 d6, #0x0 -; CHECK-NEXT: vmov.i32 d7, #0x0 -; CHECK-NEXT: vmov.f32 s1, s16 -; CHECK-NEXT: vmov.f32 s3, s16 -; CHECK-NEXT: vmov.f32 s5, s16 -; CHECK-NEXT: vmov.f32 s7, s16 -; CHECK-NEXT: vmov.f32 s9, s16 -; CHECK-NEXT: vmov.f32 s11, s16 -; CHECK-NEXT: vmov.f32 s13, s16 -; CHECK-NEXT: vmov.f32 s15, s16 -; CHECK-NEXT: vpop {d8} +; CHECK: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: vmov d4, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d16[1] +; CHECK-NEXT: vmov d8, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d16[2] +; CHECK-NEXT: vmov d5, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d16[3] +; CHECK-NEXT: vmov d9, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[0] +; CHECK-NEXT: vmov d6, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[1] +; CHECK-NEXT: vmov d10, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[2] +; CHECK-NEXT: vmov d7, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[3] +; CHECK-NEXT: vmov d11, r0, r0 +; CHECK: vmov.f32 s0, s8 +; CHECK: vmov.f32 s1, s16 +; CHECK: vmov.f32 s2, s10 +; CHECK: vmov.f32 s3, s18 +; CHECK: vmov.f32 s4, s12 +; CHECK: vmov.f32 s5, s20 +; CHECK: vmov.f32 s6, s14 +; CHECK: vmov.f32 s7, s22 +; CHECK: vmov.f32 s9, s16 +; CHECK: vmov.f32 s11, s18 +; CHECK: vmov.f32 s13, s20 +; CHECK: vmov.f32 s15, s22 +; CHECK: vpop {d8, d9, d10, d11} ; CHECK-NEXT: bx lr + ret { <8 x half>, <8 x half> } zeroinitializer } define swiftcc { <8 x half>, <8 x half> } @f2() { ; CHECK-LABEL: _f2 -; CHECK: vpush {d8} -; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 -; CHECK-NEXT: vmov.i32 d8, #0x0 -; CHECK-NEXT: vmov.i32 d0, #0x0 -; CHECK-NEXT: vmov.i32 d1, #0x0 -; CHECK-NEXT: vmov.i32 d2, #0x0 -; CHECK-NEXT: vmov.i32 d3, #0x0 -; CHECK-NEXT: vmov.i32 d4, #0x0 -; CHECK-NEXT: vmov.i32 d5, #0x0 -; CHECK-NEXT: vmov.i32 d6, #0x0 -; CHECK-NEXT: vmov.i32 d7, #0x0 -; CHECK-NEXT: vmov.f32 s1, s16 -; CHECK-NEXT: vmov.f32 s3, s16 -; CHECK-NEXT: vmov.f32 s5, s16 -; CHECK-NEXT: vmov.f32 s7, s16 -; CHECK-NEXT: vmov.f32 s9, s16 -; CHECK-NEXT: vmov.f32 s11, s16 -; CHECK-NEXT: vmov.f32 s13, s16 -; CHECK-NEXT: vmov.f32 s15, s16 -; CHECK-NEXT: vpop {d8} +; CHECK: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: vmov d4, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d16[1] +; CHECK-NEXT: vmov d8, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d16[2] +; CHECK-NEXT: vmov d5, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d16[3] +; CHECK-NEXT: vmov d9, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[0] +; CHECK-NEXT: vmov d6, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[1] +; CHECK-NEXT: vmov d10, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[2] +; CHECK-NEXT: vmov d7, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[3] +; CHECK-NEXT: vmov d11, r0, r0 +; CHECK: vmov.f32 s0, s8 +; CHECK: vmov.f32 s1, s16 +; CHECK: vmov.f32 s2, s10 +; CHECK: vmov.f32 s3, s18 +; CHECK: vmov.f32 s4, s12 +; CHECK: vmov.f32 s5, s20 +; CHECK: vmov.f32 s6, s14 +; CHECK: vmov.f32 s7, s22 +; CHECK: vmov.f32 s9, s16 +; CHECK: vmov.f32 s11, s18 +; CHECK: vmov.f32 s13, s20 +; CHECK: vmov.f32 s15, s22 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: bx lr + ret { <8 x half>, <8 x half> } zeroinitializer } define fastcc { <8 x half>, <8 x half> } @f3() { ; CHECK-LABEL: _f3 -; CHECK: vpush {d8} -; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 -; CHECK-NEXT: vmov.i32 d8, #0x0 -; CHECK-NEXT: vmov.i32 d0, #0x0 -; CHECK-NEXT: vmov.i32 d1, #0x0 -; CHECK-NEXT: vmov.i32 d2, #0x0 -; CHECK-NEXT: vmov.i32 d3, #0x0 -; CHECK-NEXT: vmov.i32 d4, #0x0 -; CHECK-NEXT: vmov.i32 d5, #0x0 -; CHECK-NEXT: vmov.i32 d6, #0x0 -; CHECK-NEXT: vmov.i32 d7, #0x0 -; CHECK-NEXT: vmov.f32 s1, s16 -; CHECK-NEXT: vmov.f32 s3, s16 -; CHECK-NEXT: vmov.f32 s5, s16 -; CHECK-NEXT: vmov.f32 s7, s16 -; CHECK-NEXT: vmov.f32 s9, s16 -; CHECK-NEXT: vmov.f32 s11, s16 -; CHECK-NEXT: vmov.f32 s13, s16 -; CHECK-NEXT: vmov.f32 s15, s16 -; CHECK-NEXT: vpop {d8} +; CHECK: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: vmov d4, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d16[1] +; CHECK-NEXT: vmov d8, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d16[2] +; CHECK-NEXT: vmov d5, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d16[3] +; CHECK-NEXT: vmov d9, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[0] +; CHECK-NEXT: vmov d6, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[1] +; CHECK-NEXT: vmov d10, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[2] +; CHECK-NEXT: vmov d7, r0, r0 +; CHECK-NEXT: vmov.u16 r0, d17[3] +; CHECK-NEXT: vmov d11, r0, r0 +; CHECK: vmov.f32 s0, s8 +; CHECK: vmov.f32 s1, s16 +; CHECK: vmov.f32 s2, s10 +; CHECK: vmov.f32 s3, s18 +; CHECK: vmov.f32 s4, s12 +; CHECK: vmov.f32 s5, s20 +; CHECK: vmov.f32 s6, s14 +; CHECK: vmov.f32 s7, s22 +; CHECK: vmov.f32 s9, s16 +; CHECK: vmov.f32 s11, s18 +; CHECK: vmov.f32 s13, s20 +; CHECK: vmov.f32 s15, s22 +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: bx lr ret { <8 x half>, <8 x half> } zeroinitializer diff --git a/llvm/test/CodeGen/ARM/fp16-args.ll b/llvm/test/CodeGen/ARM/fp16-args.ll index 18bbcd12c768a3..cd039b87d4a3fb 100644 --- a/llvm/test/CodeGen/ARM/fp16-args.ll +++ b/llvm/test/CodeGen/ARM/fp16-args.ll @@ -46,46 +46,6 @@ entry: } define <4 x half> @foo_vec(<4 x half> %a) { -; SOFT-LABEL: foo_vec: -; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: vmov s0, r3 -; SOFT-NEXT: vmov s2, r1 -; SOFT-NEXT: vcvtb.f32.f16 s0, s0 -; SOFT-NEXT: vmov s4, r0 -; SOFT-NEXT: vcvtb.f32.f16 s2, s2 -; SOFT-NEXT: vmov s6, r2 -; SOFT-NEXT: vcvtb.f32.f16 s4, s4 -; SOFT-NEXT: vcvtb.f32.f16 s6, s6 -; SOFT-NEXT: vadd.f32 s0, s0, s0 -; SOFT-NEXT: vadd.f32 s2, s2, s2 -; SOFT-NEXT: vcvtb.f16.f32 s0, s0 -; SOFT-NEXT: vadd.f32 s4, s4, s4 -; SOFT-NEXT: vcvtb.f16.f32 s2, s2 -; SOFT-NEXT: vadd.f32 s6, s6, s6 -; SOFT-NEXT: vcvtb.f16.f32 s4, s4 -; SOFT-NEXT: vcvtb.f16.f32 s6, s6 -; SOFT-NEXT: vmov r0, s4 -; SOFT-NEXT: vmov r1, s2 -; SOFT-NEXT: vmov r2, s6 -; SOFT-NEXT: vmov r3, s0 -; SOFT-NEXT: bx lr -; -; HARD-LABEL: foo_vec: -; HARD: @ %bb.0: @ %entry -; HARD-NEXT: vcvtb.f32.f16 s4, s3 -; HARD-NEXT: vcvtb.f32.f16 s2, s2 -; HARD-NEXT: vcvtb.f32.f16 s6, s1 -; HARD-NEXT: vcvtb.f32.f16 s0, s0 -; HARD-NEXT: vadd.f32 s2, s2, s2 -; HARD-NEXT: vadd.f32 s0, s0, s0 -; HARD-NEXT: vcvtb.f16.f32 s2, s2 -; HARD-NEXT: vadd.f32 s4, s4, s4 -; HARD-NEXT: vcvtb.f16.f32 s0, s0 -; HARD-NEXT: vadd.f32 s6, s6, s6 -; HARD-NEXT: vcvtb.f16.f32 s3, s4 -; HARD-NEXT: vcvtb.f16.f32 s1, s6 -; HARD-NEXT: bx lr -; ; FULL-SOFT-LE-LABEL: foo_vec: ; FULL-SOFT-LE: @ %bb.0: @ %entry ; FULL-SOFT-LE-NEXT: vmov d16, r0, r1 diff --git a/llvm/test/CodeGen/ARM/fp16-instructions.ll b/llvm/test/CodeGen/ARM/fp16-instructions.ll index 8477cb7e02b234..1988cb1d2f9039 100644 --- a/llvm/test/CodeGen/ARM/fp16-instructions.ll +++ b/llvm/test/CodeGen/ARM/fp16-instructions.ll @@ -85,8 +85,8 @@ entry: ; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]] ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 -; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 -; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 +; CHECK-HARDFP-VFP3: vmov r +; CHECK-HARDFP-VFP3: vmov.f32 s ; CHECK-HARDFP-VFP3: bl __aeabi_h2f ; CHECK-HARDFP-VFP3: bl __aeabi_h2f ; CHECK-HARDFP-VFP3: vadd.f32 @@ -368,8 +368,8 @@ entry: ; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]] ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 -; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 -; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 +; CHECK-HARDFP-VFP3: vmov r +; CHECK-HARDFP-VFP3: vmov.f32 s ; CHECK-HARDFP-VFP3: bl __aeabi_h2f ; CHECK-HARDFP-VFP3: bl __aeabi_h2f ; CHECK-HARDFP-VFP3: vdiv.f32 @@ -590,8 +590,8 @@ entry: ; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]] ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 -; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 -; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 +; CHECK-HARDFP-VFP3: vmov r +; CHECK-HARDFP-VFP3: vmov.f32 s ; CHECK-HARDFP-VFP3: bl __aeabi_h2f ; CHECK-HARDFP-VFP3: bl __aeabi_h2f ; CHECK-HARDFP-VFP3: vmul.f32 @@ -700,18 +700,19 @@ define half @select_cc1(ptr %a0) { ; CHECK-LABEL: select_cc1: -; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 s0, -; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-A32-NEXT: movne r0, -; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 -; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-T32: it eq -; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-T32: vcmp.f32 +; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32-NEXT: itt eq +; CHECK-SOFTFP-FP16-T32-NEXT: movweq r0, +; CHECK-SOFTFP-FP16-T32-NEXT: movteq r0, } ; FIXME: more tests need to be added for VSELGE and VSELGT. @@ -727,18 +728,19 @@ define half @select_cc_ge1(ptr %a0) { ; CHECK-LABEL: select_cc_ge1: -; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, -; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-A32-NEXT: movlt r0, -; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-T32-NEXT: it ge -; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-T32-NEXT: itt ge +; CHECK-SOFTFP-FP16-T32-NEXT: movwge r0, +; CHECK-SOFTFP-FP16-T32-NEXT: movtge r0, } define half @select_cc_ge2(ptr %a0) { @@ -749,18 +751,19 @@ define half @select_cc_ge2(ptr %a0) { ; CHECK-LABEL: select_cc_ge2: -; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 +; CHECK-HARDFP-FULLFP16: vcmp.f16 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, -; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-A32-NEXT: movhi r0, -; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-T32-NEXT: it ls -; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-T32-NEXT: itt ls +; CHECK-SOFTFP-FP16-T32-NEXT: movwls r0, +; CHECK-SOFTFP-FP16-T32-NEXT: movtls r0, } define half @select_cc_ge3(ptr %a0) { @@ -771,18 +774,19 @@ define half @select_cc_ge3(ptr %a0) { ; CHECK-LABEL: select_cc_ge3: -; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 +; CHECK-HARDFP-FULLFP16: vcmp.f16 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, -; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-A32-NEXT: movls r0, -; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-T32-NEXT: it hi -; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-T32-NEXT: itt hi +; CHECK-SOFTFP-FP16-T32-NEXT: movwhi r0, +; CHECK-SOFTFP-FP16-T32-NEXT: movthi r0, } define half @select_cc_ge4(ptr %a0) { @@ -793,18 +797,19 @@ define half @select_cc_ge4(ptr %a0) { ; CHECK-LABEL: select_cc_ge4: -; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-A32-NEXT: movge r0, -; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-T32-NEXT: it lt -; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-T32-NEXT: itt lt +; CHECK-SOFTFP-FP16-T32-NEXT: movwlt r0, +; CHECK-SOFTFP-FP16-T32-NEXT: movtlt r0, } ; 37. VSELGT @@ -816,18 +821,19 @@ define half @select_cc_gt1(ptr %a0) { ; CHECK-LABEL: select_cc_gt1: -; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-A32-NEXT: movle r0, -; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-T32-NEXT: it gt -; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-T32-NEXT: itt gt +; CHECK-SOFTFP-FP16-T32-NEXT: movwgt r0, +; CHECK-SOFTFP-FP16-T32-NEXT: movtgt r0, } define half @select_cc_gt2(ptr %a0) { @@ -838,18 +844,19 @@ define half @select_cc_gt2(ptr %a0) { ; CHECK-LABEL: select_cc_gt2: -; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 +; CHECK-HARDFP-FULLFP16: vcmp.f16 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-A32-NEXT: movmi r0, -; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-T32-NEXT: it pl -; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-T32-NEXT: itt pl +; CHECK-SOFTFP-FP16-T32-NEXT: movwpl r0, +; CHECK-SOFTFP-FP16-T32-NEXT: movtpl r0, } define half @select_cc_gt3(ptr %a0) { @@ -860,18 +867,19 @@ define half @select_cc_gt3(ptr %a0) { ; CHECK-LABEL: select_cc_gt3: -; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-A32-NEXT: movgt r0, -; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-T32-NEXT: it le -; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-T32-NEXT: itt le +; CHECK-SOFTFP-FP16-T32-NEXT: movwle r0, +; CHECK-SOFTFP-FP16-T32-NEXT: movtle r0, } define half @select_cc_gt4(ptr %a0) { @@ -882,18 +890,19 @@ define half @select_cc_gt4(ptr %a0) { ; CHECK-LABEL: select_cc_gt4: -; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 +; CHECK-HARDFP-FULLFP16: vcmp.f16 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-A32-NEXT: movpl r0, -; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-T32-NEXT: it mi -; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16-T32-NEXT: itt mi +; CHECK-SOFTFP-FP16-T32-NEXT: movwmi r0, +; CHECK-SOFTFP-FP16-T32-NEXT: movtmi r0, } ; 38. VSELVS @@ -924,26 +933,25 @@ entry: ; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0 ; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}} ; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]] -; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 +; CHECK-SOFTFP-FP16-A32: ldr r1, .LCP{{.*}} ; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0 -; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}} ; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]] -; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]] -; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]] +; CHECK-SOFTFP-FP16-A32: mov r0, r1 +; CHECK-SOFTFP-FP16-A32-NEXT: movne r0, #2 +; CHECK-SOFTFP-FP16-A32-NEXT: movvs r0, r1 ; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0 ; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}} ; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]] -; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}} ; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0 -; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 ; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FP16-T32: it eq -; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]] -; CHECK-SOFTFP-FP16-T32: it vs -; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]] -; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]] +; CHECK-SOFTFP-FP16-T32: itt eq +; CHECK-SOFTFP-FP16-T32-NEXT: movweq r1, +; CHECK-SOFTFP-FP16-T32-NEXT: movteq r1, +; CHECK-SOFTFP-FP16-T32-NEXT: itt vs +; CHECK-SOFTFP-FP16-T32-NEXT: movwvs r1, +; CHECK-SOFTFP-FP16-T32-NEXT: movtvs r1, +; CHECK-SOFTFP-FP16-T32-NEXT: uxth r0, r1 } ; 40. VSUB @@ -986,8 +994,8 @@ entry: ; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]] ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 -; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 -; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 +; CHECK-HARDFP-VFP3: vmov r +; CHECK-HARDFP-VFP3: vmov.f32 s ; CHECK-HARDFP-VFP3: bl __aeabi_h2f ; CHECK-HARDFP-VFP3: bl __aeabi_h2f ; CHECK-HARDFP-VFP3: vsub.f32 diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll index 69820850893efc..9c01129ff30d85 100644 --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -660,10 +660,8 @@ define void @test_maxnum(ptr %p, ptr %q) #0 { ; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 ; CHECK-VFP: vcmp.f32 ; CHECK-VFP: vmrs -; CHECK-VFP: vmovlt.f32 +; CHECK-VFP: movge ; CHECK-NOVFP: bl __aeabi_fcmpge -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL: bl __aeabi_f2h define void @test_minimum(ptr %p) #0 { %a = load half, ptr %p, align 2 %c = fcmp ult half %a, 1.0 @@ -680,10 +678,8 @@ define void @test_minimum(ptr %p) #0 { ; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 ; CHECK-VFP: vcmp.f32 ; CHECK-VFP: vmrs -; CHECK-VFP: vmovhi.f32 +; CHECK-VFP: movls ; CHECK-NOVFP: bl __aeabi_fcmple -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL: bl __aeabi_f2h define void @test_maximum(ptr %p) #0 { %a = load half, ptr %p, align 2 %c = fcmp ugt half %a, 1.0 @@ -692,45 +688,15 @@ define void @test_maximum(ptr %p) #0 { ret void } -; CHECK-FP16-LABEL: test_copysign: -; CHECK-FP16: ldrh r2, [r0] -; CHECK-FP16-NEXT: vmov.i32 d16, #0x80000000 -; CHECK-FP16-NEXT: ldrh r1, [r1] -; CHECK-FP16-NEXT: vmov s0, r2 -; CHECK-FP16-NEXT: vmov s2, r1 -; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-FP16-NEXT: vcvtb.f32.f16 s2, s2 -; CHECK-FP16-NEXT: vbit d0, d1, d16 -; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0 -; CHECK-FP16-NEXT: vmov r1, s0 -; CHECK-FP16-NEXT: strh r1, [r0] -; CHECK-FP16-NEXT: bx lr - -; CHECK-LIBCALL-LABEL: test_copysign: -; CHECK-LIBCALL-VFP: .fnstart -; CHECK-LIBCALL-VFP-NEXT: .save {r4, r5, r11, lr} -; CHECK-LIBCALL-VFP-NEXT: push {r4, r5, r11, lr} -; CHECK-LIBCALL-VFP-NEXT: .vsave {d8, d9} -; CHECK-LIBCALL-VFP-NEXT: vpush {d8, d9} -; CHECK-LIBCALL-VFP-NEXT: mov r5, r0 -; CHECK-LIBCALL-VFP-NEXT: ldrh r0, [r0] -; CHECK-LIBCALL-VFP-NEXT: mov r4, r1 -; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL-VFP: ldrh r1, [r4] -; CHECK-LIBCALL-VFP-NEXT: vmov s18, r0 -; CHECK-LIBCALL-VFP-NEXT: vmov.i32 d8, #0x80000000 -; CHECK-LIBCALL-VFP-NEXT: mov r0, r1 -; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL-VFP: vmov s0, r0 -; CHECK-LIBCALL-VFP-NEXT: vbif d0, d9, d8 -; CHECK-LIBCALL-VFP-NEXT: vmov r0, s0 -; CHECK-LIBCALL: bl __aeabi_f2h -; CHECK-LIBCALL-VFP: strh r0, [r5] -; CHECK-LIBCALL-VFP-NEXT: vpop {d8, d9} -; CHECK-LIBCALL-VFP-NEXT: pop {r4, r5, r11, pc} -; CHECK-NOVFP: and -; CHECK-NOVFP: bic -; CHECK-NOVFP: orr +; CHECK-ALL-LABEL: test_copysign: +; CHECK-ALL: ldrh r2, [r0] +; CHECK-ALL-NEXT: ldrh r1, [r1] +; CHECK-ALL-NEXT: and r1, r1, #32768 +; CHECK-ALL-NEXT: bfc r2, #15, #17 +; CHECK-ALL-NEXT: orr r1, r2, r1 +; CHECK-ALL-NEXT: strh r1, [r0] +; CHECK-ALL-NEXT: bx lr + define void @test_copysign(ptr %p, ptr %q) #0 { %a = load half, ptr %p, align 2 %b = load half, ptr %q, align 2 @@ -832,16 +798,23 @@ define void @test_round(ptr %p) { ; CHECK-FP16-LABEL: test_fmuladd: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 +; CHECK-FP16: vmul.f32 +; CHECK-FP16: vcvtb.f16.f32 +; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vmla.f32 +; CHECK-FP16: vadd.f32 ; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL-LABEL: test_fmuladd: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL-VFP: vmla.f32 +; CHECK-LIBCALL-VFP: vmul.f32 ; CHECK-NOVFP: bl __aeabi_fmul ; CHECK-LIBCALL: bl __aeabi_f2h +; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL-VFP: vadd.f32 +; CHECK-NOVFP: bl __aeabi_fadd +; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fmuladd(ptr %p, ptr %q, ptr %r) #0 { %a = load half, ptr %p, align 2 %b = load half, ptr %q, align 2 @@ -858,41 +831,21 @@ define void @test_fmuladd(ptr %p, ptr %q, ptr %r) #0 { ; CHECK-ALL-LABEL: test_insertelement: ; CHECK-ALL: sub sp, sp, #8 -; CHECK-VFP: and -; CHECK-VFP: mov -; CHECK-VFP: ldrd -; CHECK-VFP: orr -; CHECK-VFP: ldrh -; CHECK-VFP: stm -; CHECK-VFP: strh -; CHECK-VFP: ldrh -; CHECK-VFP: ldrh -; CHECK-VFP: ldrh -; CHECK-VFP: ldrh -; CHECK-VFP: strh -; CHECK-VFP: strh -; CHECK-VFP: strh -; CHECK-VFP: strh - -; CHECK-NOVFP: ldrh -; CHECK-NOVFP: ldrh -; CHECK-NOVFP: ldrh -; CHECK-NOVFP: ldrh -; CHECK-NOVFP-DAG: strh -; CHECK-NOVFP-DAG: strh -; CHECK-NOVFP-DAG: mov -; CHECK-NOVFP-DAG: ldrh -; CHECK-NOVFP-DAG: orr -; CHECK-NOVFP-DAG: strh -; CHECK-NOVFP-DAG: strh -; CHECK-NOVFP-DAG: strh -; CHECK-NOVFP-DAG: ldrh -; CHECK-NOVFP-DAG: ldrh -; CHECK-NOVFP-DAG: ldrh -; CHECK-NOVFP-DAG: strh -; CHECK-NOVFP-DAG: strh -; CHECK-NOVFP-DAG: strh -; CHECK-NOVFP-DAG: strh +; CHECK-ALL-DAG: and +; CHECK-ALL-DAG: mov +; CHECK-ALL-DAG: ldrd +; CHECK-ALL-DAG: orr +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: stm +; CHECK-ALL: ldrh +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: strh +; CHECK-ALL: strh ; CHECK-ALL: add sp, sp, #8 define void @test_insertelement(ptr %p, ptr %q, i32 %i) #0 { @@ -904,24 +857,15 @@ define void @test_insertelement(ptr %p, ptr %q, i32 %i) #0 { } ; CHECK-ALL-LABEL: test_extractelement: -; CHECK-VFP: push {{{.*}}, lr} -; CHECK-VFP: sub sp, sp, #8 -; CHECK-VFP: ldrd -; CHECK-VFP: mov -; CHECK-VFP: orr -; CHECK-VFP: ldrh -; CHECK-VFP: strh -; CHECK-VFP: add sp, sp, #8 -; CHECK-VFP: pop {{{.*}}, pc} -; CHECK-NOVFP: ldrh -; CHECK-NOVFP: strh -; CHECK-NOVFP: ldrh -; CHECK-NOVFP: strh -; CHECK-NOVFP: ldrh -; CHECK-NOVFP: strh -; CHECK-NOVFP: ldrh -; CHECK-NOVFP: strh -; CHECK-NOVFP: ldrh +; CHECK-ALL: push {{{.*}}, lr} +; CHECK-ALL: sub sp, sp, #8 +; CHECK-ALL: ldrd +; CHECK-ALL: mov +; CHECK-ALL: orr +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: add sp, sp, #8 +; CHECK-ALL: pop {{{.*}}, pc} define void @test_extractelement(ptr %p, ptr %q, i32 %i) #0 { %a = load <4 x half>, ptr %q, align 8 %b = extractelement <4 x half> %a, i32 %i diff --git a/llvm/test/CodeGen/ARM/llvm.exp10.ll b/llvm/test/CodeGen/ARM/llvm.exp10.ll index 9e2688c988f765..eb72fe8c1e1b78 100644 --- a/llvm/test/CodeGen/ARM/llvm.exp10.ll +++ b/llvm/test/CodeGen/ARM/llvm.exp10.ll @@ -36,6 +36,8 @@ define <1 x half> @exp10_v1f16(<1 x half> %x) { ; CHECK-NEXT: bl __gnu_f2h_ieee ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: bl exp10f +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: pop {r7, pc} %r = call <1 x half> @llvm.exp10.v1f16(<1 x half> %x) ret <1 x half> %r @@ -44,19 +46,26 @@ define <1 x half> @exp10_v1f16(<1 x half> %x) { define <2 x half> @exp10_v2f16(<2 x half> %x) { ; CHECK-LABEL: exp10_v2f16: ; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: bl exp10f ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: strh.w r0, [sp, #6] ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: bl exp10f ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: strh.w r0, [sp, #4] +; CHECK-NEXT: add r0, sp, #4 +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: vmov.32 r1, d16[1] +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r4, pc} %r = call <2 x half> @llvm.exp10.v2f16(<2 x half> %x) ret <2 x half> %r } @@ -65,24 +74,27 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) { ; CHECK-LABEL: exp10_v3f16: ; CHECK: @ %bb.0: ; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: bl exp10f ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: bl exp10f ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: pkhbt r5, r0, r6, lsl #16 ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: bl exp10f ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: vmov d16, r5, r0 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: vmov.u16 r1, d16[1] +; CHECK-NEXT: vmov.u16 r2, d16[2] ; CHECK-NEXT: pop {r4, r5, r6, pc} %r = call <3 x half> @llvm.exp10.v3f16(<3 x half> %x) ret <3 x half> %r @@ -93,31 +105,34 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) { ; CHECK: @ %bb.0: ; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r3 ; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: bl exp10f ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: bl exp10f ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: pkhbt r6, r0, r7, lsl #16 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: bl exp10f ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: bl exp10f ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: pkhbt r0, r0, r5, lsl #16 +; CHECK-NEXT: vmov d16, r0, r6 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: vmov.u16 r1, d16[1] +; CHECK-NEXT: vmov.u16 r2, d16[2] +; CHECK-NEXT: vmov.u16 r3, d16[3] ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} %r = call <4 x half> @llvm.exp10.v4f16(<4 x half> %x) diff --git a/llvm/test/CodeGen/ARM/llvm.frexp.ll b/llvm/test/CodeGen/ARM/llvm.frexp.ll index 7dbaa639fa409b..e79ddbe93336ea 100644 --- a/llvm/test/CodeGen/ARM/llvm.frexp.ll +++ b/llvm/test/CodeGen/ARM/llvm.frexp.ll @@ -52,36 +52,36 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) { define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { ; CHECK-LABEL: test_frexp_v2f16_v2i32: ; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: bl __gnu_h2f_ieee -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: add r5, sp, #4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: vld1.32 {d8[0]}, [r5:32] +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __gnu_h2f_ieee -; CHECK-NEXT: add r4, sp, #4 +; CHECK-NEXT: add r4, sp, #8 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: mov r7, sp -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl frexpf -; CHECK-NEXT: vld1.32 {d8[0]}, [r7:32] -; CHECK-NEXT: vld1.32 {d8[1]}, [r4:32] ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: strh.w r0, [sp, #14] ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strh.w r0, [sp, #12] +; CHECK-NEXT: add r0, sp, #12 +; CHECK-NEXT: vld1.32 {d8[1]}, [r4:32] +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] ; CHECK-NEXT: vmov r2, r3, d8 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: vmov.32 r1, d16[1] +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) ret { <2 x half>, <2 x i32> } %result } @@ -89,23 +89,28 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) { ; CHECK-LABEL: test_frexp_v2f16_v2i32_only_use_fract: ; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl __gnu_h2f_ieee -; CHECK-NEXT: mov r1, sp +; CHECK-NEXT: add r1, sp, #8 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: strh.w r0, [sp, #14] ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: add r1, sp, #4 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: strh.w r0, [sp, #12] +; CHECK-NEXT: add r0, sp, #12 +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: vmov.32 r1, d16[1] +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: pop {r4, pc} %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) %result.0 = extractvalue { <2 x half>, <2 x i32> } %result, 0 ret <2 x half> %result.0 diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll index e52b78ca0ea2dd..0415c327d099f6 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll @@ -11,28 +11,36 @@ define half @test_v4f16_reassoc(<4 x half> %a) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r4, #255 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: orr r4, r4, #65280 +; CHECK-NEXT: mov r8, #255 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: orr r8, r8, #65280 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: and r0, r3, r4 +; CHECK-NEXT: and r0, r0, r8 ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: and r0, r5, r4 -; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: and r0, r7, r4 -; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: and r0, r6, r4 +; CHECK-NEXT: and r0, r6, r8 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __aeabi_fadd +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r6, r8 +; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_fadd -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r4, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_fadd ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} @@ -46,28 +54,36 @@ define half @test_v4f16_seq(<4 x half> %a) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r4, #255 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: orr r4, r4, #65280 +; CHECK-NEXT: mov r8, #255 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: orr r8, r8, #65280 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: and r0, r3, r4 +; CHECK-NEXT: and r0, r0, r8 ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: and r0, r5, r4 -; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: and r0, r7, r4 -; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: and r0, r6, r4 +; CHECK-NEXT: and r0, r6, r8 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __aeabi_fadd +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r6, r8 +; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_fadd -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r4, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_fadd ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll index 3541df933d0752..fe81324d6679bc 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll @@ -43,9 +43,6 @@ define half @test_v1f16_neutral(<1 x half> %a) nounwind { ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: bl __aeabi_f2h -; CHECK-NEXT: mov r1, #255 -; CHECK-NEXT: orr r1, r1, #65280 -; CHECK-NEXT: and r0, r0, r1 ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a) diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll index 8cfcdbd3b4467d..d3518fe4686079 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll @@ -9,44 +9,41 @@ declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128>) define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-LABEL: test_v4f16: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: mov r9, #255 -; CHECK-NEXT: mov r8, r3 -; CHECK-NEXT: orr r9, r9, #65280 -; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: and r0, r0, r9 -; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov r8, #255 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: orr r8, r8, #65280 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: and r0, r0, r8 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: and r0, r5, r9 +; CHECK-NEXT: and r0, r6, r8 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_fcmpgt -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: and r0, r6, r9 -; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: bl fmaxf +; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: movne r5, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: bl __aeabi_fcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: and r0, r8, r9 -; CHECK-NEXT: moveq r5, r6 +; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r6, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl fmaxf +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r4, r8 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_fcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r5, r4 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: bl fmaxf ; CHECK-NEXT: bl __aeabi_f2h -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr %b = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) ret half %b diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll index 70c569e4f4781a..14644e00c94b09 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll @@ -9,44 +9,41 @@ declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128>) define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK-LABEL: test_v4f16: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: mov r9, #255 -; CHECK-NEXT: mov r8, r3 -; CHECK-NEXT: orr r9, r9, #65280 -; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: and r0, r0, r9 -; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: mov r8, #255 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: orr r8, r8, #65280 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: and r0, r0, r8 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: and r0, r5, r9 +; CHECK-NEXT: and r0, r6, r8 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_fcmplt -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: and r0, r6, r9 -; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: bl fminf +; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: movne r5, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: bl __aeabi_fcmplt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: and r0, r8, r9 -; CHECK-NEXT: moveq r5, r6 +; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r6, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl fminf +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r4, r8 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_fcmplt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r5, r4 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: bl fminf ; CHECK-NEXT: bl __aeabi_f2h -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr %b = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) ret half %b diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll index b3334c43ef58ff..1416fa9033f3b1 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll @@ -11,28 +11,36 @@ define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r4, #255 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: orr r4, r4, #65280 +; CHECK-NEXT: mov r8, #255 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: orr r8, r8, #65280 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: and r0, r3, r4 +; CHECK-NEXT: and r0, r0, r8 ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: and r0, r5, r4 -; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: and r0, r7, r4 -; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: and r0, r6, r4 +; CHECK-NEXT: and r0, r6, r8 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __aeabi_fmul +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r6, r8 +; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_fmul -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r4, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_fmul ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll index d2476ab3f528f9..bd6f234ad48eca 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll @@ -16,9 +16,6 @@ define half @test_v1f16(<1 x half> %a) nounwind { ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: bl __aeabi_f2h -; CHECK-NEXT: mov r1, #255 -; CHECK-NEXT: orr r1, r1, #65280 -; CHECK-NEXT: and r0, r0, r1 ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr %b = call half @llvm.vector.reduce.fmul.f16.v1f16(half 1.0, <1 x half> %a) diff --git a/llvm/test/CodeGen/Thumb2/mve-vabd.ll b/llvm/test/CodeGen/Thumb2/mve-vabd.ll index f209a76d82e804..8d52fe52d9360f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vabd.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vabd.ll @@ -63,27 +63,31 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-MVE-NEXT: mov r4, r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q1[1] +; CHECK-MVE-NEXT: vmov.u16 r0, q1[0] ; CHECK-MVE-NEXT: vmov q5, q1 ; CHECK-MVE-NEXT: vmov q4, q0 ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r5, r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q4[1] +; CHECK-MVE-NEXT: vmov.u16 r0, q4[0] ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub +; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bl __aeabi_h2f +; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 +; CHECK-MVE-NEXT: bl __aeabi_f2h ; CHECK-MVE-NEXT: mov r5, r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q5[0] +; CHECK-MVE-NEXT: vmov.u16 r0, q5[1] ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r6, r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q4[0] +; CHECK-MVE-NEXT: vmov.u16 r0, q4[1] ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r1, r6 ; CHECK-MVE-NEXT: bl __aeabi_fsub -; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 ; CHECK-MVE-NEXT: bl __aeabi_f2h -; CHECK-MVE-NEXT: vmov.16 q6[0], r0 -; CHECK-MVE-NEXT: bic r0, r5, #-2147483648 +; CHECK-MVE-NEXT: vmov.16 q6[0], r5 +; CHECK-MVE-NEXT: bl __aeabi_h2f +; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 ; CHECK-MVE-NEXT: bl __aeabi_f2h ; CHECK-MVE-NEXT: vmov.16 q6[1], r0 ; CHECK-MVE-NEXT: vmov.u16 r0, q5[2] @@ -93,6 +97,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub +; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 ; CHECK-MVE-NEXT: bl __aeabi_f2h ; CHECK-MVE-NEXT: vmov.16 q6[2], r0 @@ -103,6 +109,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub +; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 ; CHECK-MVE-NEXT: bl __aeabi_f2h ; CHECK-MVE-NEXT: vmov.16 q6[3], r0 @@ -113,6 +121,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub +; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 ; CHECK-MVE-NEXT: bl __aeabi_f2h ; CHECK-MVE-NEXT: vmov.16 q6[4], r0 @@ -123,6 +133,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub +; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 ; CHECK-MVE-NEXT: bl __aeabi_f2h ; CHECK-MVE-NEXT: vmov.16 q6[5], r0 @@ -133,6 +145,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub +; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 ; CHECK-MVE-NEXT: bl __aeabi_f2h ; CHECK-MVE-NEXT: vmov.16 q6[6], r0 @@ -143,6 +157,8 @@ define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) { ; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: mov r1, r5 ; CHECK-MVE-NEXT: bl __aeabi_fsub +; CHECK-MVE-NEXT: bl __aeabi_f2h +; CHECK-MVE-NEXT: bl __aeabi_h2f ; CHECK-MVE-NEXT: bic r0, r0, #-2147483648 ; CHECK-MVE-NEXT: bl __aeabi_f2h ; CHECK-MVE-NEXT: vmov.16 q6[7], r0