diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8c2f85657ff87e..5ac5b7f8a5ab18 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18023,6 +18023,23 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue()); } + // We do not need to fold when this shifting used in specific load case: + // (ldr x, (add x, (shl (srl x, c1) 2))) + if (N->getOpcode() == ISD::SHL && N->hasOneUse()) { + if (auto C2 = dyn_cast(N->getOperand(1))) { + unsigned ShlAmt = C2->getZExtValue(); + if (auto ShouldADD = *N->use_begin(); + ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) { + if (auto ShouldLOAD = dyn_cast(*ShouldADD->use_begin())) { + unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8; + if ((1ULL << ShlAmt) == ByteVT && + isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT())) + return false; + } + } + } + } + return true; } diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll new file mode 100644 index 00000000000000..9dfc8df703ce64 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s +; + +define i16 @load16_shr63(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load16_shr63: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #63 +; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 63 + %arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr + %0 = load i16, ptr %arrayidx, align 2 + ret i16 %0 +} + +define i16 @load16_shr2(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load16_shr2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 2 + %arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr + %0 = load i16, ptr %arrayidx, align 2 + ret i16 %0 +} + +define i16 @load16_shr1(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load16_shr1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 1 + %arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr + %0 = load i16, ptr %arrayidx, align 2 + ret i16 %0 +} + +define i32 @load32_shr63(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load32_shr63: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #63 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 63 + %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @load32_shr2(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load32_shr2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 2 + %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @load32_shr1(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load32_shr1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 1 + %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i64 @load64_shr63(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load64_shr63: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #63 +; CHECK-NEXT: ldr x0, [x2, x8, lsl #3] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 63 + %arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +} + +define i64 @load64_shr2(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load64_shr2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: ldr x0, [x2, x8, lsl #3] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 2 + %arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +} + +define i64 @load64_shr1(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load64_shr1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldr x0, [x2, x8, lsl #3] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 1 + %arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +}