Skip to content

Commit

Permalink
[AArch64] Fold more load.x into load.i with large offset
Browse files Browse the repository at this point in the history
The list of load.x is refer to canFoldIntoAddrMode on D152828.
Also support LDRSroX missed in canFoldIntoAddrMode
  • Loading branch information
vfdff committed Aug 15, 2024
1 parent 3319049 commit 43ffe2e
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 53 deletions.
13 changes: 13 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4521,7 +4521,20 @@ AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
case AArch64::LDRBroX:
case AArch64::LDRBBroX:
case AArch64::LDRSBXroX:
case AArch64::LDRSBWroX:
case AArch64::LDRHroX:
case AArch64::LDRHHroX:
case AArch64::LDRSHXroX:
case AArch64::LDRSHWroX:
case AArch64::LDRWroX:
case AArch64::LDRSroX:
case AArch64::LDRSWroX:
case AArch64::LDRDroX:
case AArch64::LDRXroX:
case AArch64::LDRQroX:
return MI.getOperand(4);
}
}
Expand Down
51 changes: 49 additions & 2 deletions llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -509,12 +509,38 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
}

static unsigned getBaseAddressOpcode(unsigned Opc) {
// TODO: Add more index address loads/stores.
// TODO: Add more index address stores.
switch (Opc) {
default:
llvm_unreachable("Opcode has no base address equivalent!");
case AArch64::LDRBroX:
return AArch64::LDRBui;
case AArch64::LDRBBroX:
return AArch64::LDRBBui;
case AArch64::LDRSBXroX:
return AArch64::LDRSBXui;
case AArch64::LDRSBWroX:
return AArch64::LDRSBWui;
case AArch64::LDRHroX:
return AArch64::LDRHui;
case AArch64::LDRHHroX:
return AArch64::LDRHHui;
case AArch64::LDRSHXroX:
return AArch64::LDRSHXui;
case AArch64::LDRSHWroX:
return AArch64::LDRSHWui;
case AArch64::LDRWroX:
return AArch64::LDRWui;
case AArch64::LDRSroX:
return AArch64::LDRSui;
case AArch64::LDRSWroX:
return AArch64::LDRSWui;
case AArch64::LDRDroX:
return AArch64::LDRDui;
case AArch64::LDRXroX:
return AArch64::LDRXui;
case AArch64::LDRQroX:
return AArch64::LDRQui;
}
}

Expand Down Expand Up @@ -766,10 +792,31 @@ static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
default:
return false;
// Scaled instructions.
// TODO: Add more index address loads/stores.
// TODO: Add more index address stores.
case AArch64::LDRBroX:
case AArch64::LDRBBroX:
case AArch64::LDRSBXroX:
case AArch64::LDRSBWroX:
Scale = 1;
return true;
case AArch64::LDRHroX:
case AArch64::LDRHHroX:
case AArch64::LDRSHXroX:
case AArch64::LDRSHWroX:
Scale = 2;
return true;
case AArch64::LDRWroX:
case AArch64::LDRSroX:
case AArch64::LDRSWroX:
Scale = 4;
return true;
case AArch64::LDRDroX:
case AArch64::LDRXroX:
Scale = 8;
return true;
case AArch64::LDRQroX:
Scale = 16;
return true;
}
}

Expand Down
85 changes: 34 additions & 51 deletions llvm/test/CodeGen/AArch64/arm64-addrmode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a) {
define i32 @LdOffset_i8_sext32(ptr %a) {
; CHECK-LABEL: LdOffset_i8_sext32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrsb w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrsb w0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
Expand All @@ -266,9 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a) {
define i64 @LdOffset_i8_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_sext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrsb x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrsb x0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
Expand All @@ -280,9 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a) {
define i16 @LdOffset_i16(ptr %a) {
; CHECK-LABEL: LdOffset_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -293,9 +290,8 @@ define i16 @LdOffset_i16(ptr %a) {
define i32 @LdOffset_i16_zext32(ptr %a) {
; CHECK-LABEL: LdOffset_i16_zext32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -307,9 +303,8 @@ define i32 @LdOffset_i16_zext32(ptr %a) {
define i32 @LdOffset_i16_sext32(ptr %a) {
; CHECK-LABEL: LdOffset_i16_sext32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrsh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrsh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -321,9 +316,8 @@ define i32 @LdOffset_i16_sext32(ptr %a) {
define i64 @LdOffset_i16_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_zext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -335,9 +329,8 @@ define i64 @LdOffset_i16_zext64(ptr %a) {
define i64 @LdOffset_i16_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_sext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrsh x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrsh x0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -349,9 +342,8 @@ define i64 @LdOffset_i16_sext64(ptr %a) {
define i32 @LdOffset_i32(ptr %a) {
; CHECK-LABEL: LdOffset_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldr w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldr w0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 4
Expand All @@ -362,9 +354,8 @@ define i32 @LdOffset_i32(ptr %a) {
define i64 @LdOffset_i32_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_zext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldr w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldr w0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 2
Expand All @@ -376,9 +367,8 @@ define i64 @LdOffset_i32_zext64(ptr %a) {
define i64 @LdOffset_i32_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_sext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldrsw x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldrsw x0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 2
Expand All @@ -390,9 +380,8 @@ define i64 @LdOffset_i32_sext64(ptr %a) {
define i64 @LdOffset_i64(ptr %a) {
; CHECK-LABEL: LdOffset_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
; CHECK-NEXT: movk w8, #126, lsl #16
; CHECK-NEXT: ldr x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
; CHECK-NEXT: ldr x0, [x8, #29632]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
%val = load i64, ptr %arrayidx, align 4
Expand All @@ -403,9 +392,8 @@ define i64 @LdOffset_i64(ptr %a) {
define <2 x i32> @LdOffset_v2i32(ptr %a) {
; CHECK-LABEL: LdOffset_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
; CHECK-NEXT: movk w8, #126, lsl #16
; CHECK-NEXT: ldr d0, [x0, x8]
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
; CHECK-NEXT: ldr d0, [x8, #29632]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992
%val = load <2 x i32>, ptr %arrayidx, align 4
Expand All @@ -416,9 +404,8 @@ define <2 x i32> @LdOffset_v2i32(ptr %a) {
define <2 x i64> @LdOffset_v2i64(ptr %a) {
; CHECK-LABEL: LdOffset_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #59264 // =0xe780
; CHECK-NEXT: movk w8, #253, lsl #16
; CHECK-NEXT: ldr q0, [x0, x8]
; CHECK-NEXT: add x8, x0, #4048, lsl #12 // =16580608
; CHECK-NEXT: ldr q0, [x8, #59264]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992
%val = load <2 x i64>, ptr %arrayidx, align 4
Expand All @@ -429,9 +416,8 @@ define <2 x i64> @LdOffset_v2i64(ptr %a) {
define double @LdOffset_i8_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrsb w8, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrsb w8, [x8, #3704]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
Expand All @@ -444,9 +430,8 @@ define double @LdOffset_i8_f64(ptr %a) {
define double @LdOffset_i16_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrsh w8, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrsh w8, [x8, #7408]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
Expand All @@ -459,9 +444,8 @@ define double @LdOffset_i16_f64(ptr %a) {
define double @LdOffset_i32_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldr s0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldr s0, [x8, #14816]
; CHECK-NEXT: ucvtf d0, d0
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
Expand All @@ -474,9 +458,8 @@ define double @LdOffset_i32_f64(ptr %a) {
define double @LdOffset_i64_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i64_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
; CHECK-NEXT: movk w8, #126, lsl #16
; CHECK-NEXT: ldr d0, [x0, x8]
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
; CHECK-NEXT: ldr d0, [x8, #29632]
; CHECK-NEXT: scvtf d0, d0
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
Expand Down

0 comments on commit 43ffe2e

Please sign in to comment.