-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[CodeGen] Add 2 subtarget hooks canLowerToZeroCycleReg[Move|Zeroing] #148428
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -673,3 +673,83 @@ bool AArch64Subtarget::isX16X17Safer() const { | |
bool AArch64Subtarget::enableMachinePipeliner() const { | ||
return getSchedModel().hasInstrSchedModel(); | ||
} | ||
|
||
bool AArch64Subtarget::isRegInClass(const MachineInstr &MI, const Register &Reg, | ||
const TargetRegisterClass *TRC) const { | ||
if (Reg.isPhysical()) { | ||
return TRC->contains(Reg); | ||
} | ||
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); | ||
return TRC->hasSubClassEq(MRI.getRegClass(Reg)); | ||
} | ||
|
||
/// NOTE: must maintain consistency with `AArch64InstrInfo::copyPhysReg`. | ||
bool AArch64Subtarget::canLowerToZeroCycleRegMove( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Functional changes should come with a test. If that's in one of the follow-up commits, you should probably squash it into this one. |
||
const MachineInstr &CopyMI, const Register &DestReg, | ||
const Register &SrcReg) const { | ||
if (isRegInClass(CopyMI, DestReg, &AArch64::GPR32allRegClass) && | ||
isRegInClass(CopyMI, SrcReg, &AArch64::GPR32allRegClass) && | ||
DestReg != AArch64::WZR) { | ||
if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP || | ||
SrcReg != AArch64::WZR || !hasZeroCycleZeroingGP()) { | ||
return hasZeroCycleRegMoveGPR64() || hasZeroCycleRegMoveGPR32(); | ||
} | ||
return false; | ||
} | ||
|
||
if (isRegInClass(CopyMI, DestReg, &AArch64::GPR64allRegClass) && | ||
isRegInClass(CopyMI, SrcReg, &AArch64::GPR64allRegClass) && | ||
DestReg != AArch64::XZR) { | ||
if (DestReg == AArch64::SP || SrcReg == AArch64::SP || | ||
SrcReg != AArch64::XZR || !hasZeroCycleZeroingGP()) { | ||
return hasZeroCycleRegMoveGPR64(); | ||
} | ||
return false; | ||
} | ||
|
||
if (isRegInClass(CopyMI, DestReg, &AArch64::FPR128RegClass) && | ||
isRegInClass(CopyMI, SrcReg, &AArch64::FPR128RegClass)) { | ||
return isNeonAvailable() && hasZeroCycleRegMoveFPR128(); | ||
} | ||
|
||
if (isRegInClass(CopyMI, DestReg, &AArch64::FPR64RegClass) && | ||
isRegInClass(CopyMI, SrcReg, &AArch64::FPR64RegClass)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't use |
||
return hasZeroCycleRegMoveFPR64(); | ||
} | ||
|
||
if (isRegInClass(CopyMI, DestReg, &AArch64::FPR32RegClass) && | ||
isRegInClass(CopyMI, SrcReg, &AArch64::FPR32RegClass)) { | ||
return hasZeroCycleRegMoveFPR32() || hasZeroCycleRegMoveFPR64(); | ||
} | ||
|
||
if (isRegInClass(CopyMI, DestReg, &AArch64::FPR16RegClass) && | ||
isRegInClass(CopyMI, SrcReg, &AArch64::FPR16RegClass)) { | ||
return hasZeroCycleRegMoveFPR32() || hasZeroCycleRegMoveFPR64(); | ||
} | ||
|
||
if (isRegInClass(CopyMI, DestReg, &AArch64::FPR8RegClass) && | ||
isRegInClass(CopyMI, SrcReg, &AArch64::FPR8RegClass)) { | ||
return hasZeroCycleRegMoveFPR32() || hasZeroCycleRegMoveFPR64(); | ||
} | ||
|
||
return false; | ||
} | ||
|
||
/// NOTE: must maintain consistency with `AArch64InstrInfo::copyPhysReg`. | ||
bool AArch64Subtarget::canLowerToZeroCycleRegZeroing( | ||
const MachineInstr &CopyMI, const Register &DestReg, | ||
const Register &SrcReg) const { | ||
if (isRegInClass(CopyMI, DestReg, &AArch64::GPR32allRegClass) && | ||
isRegInClass(CopyMI, SrcReg, &AArch64::GPR32allRegClass) && | ||
DestReg != AArch64::WZR) { | ||
return AArch64::WZR == SrcReg && hasZeroCycleZeroingGP(); | ||
} | ||
|
||
if (isRegInClass(CopyMI, DestReg, &AArch64::GPR64allRegClass) && | ||
isRegInClass(CopyMI, SrcReg, &AArch64::GPR64allRegClass) && | ||
DestReg != AArch64::XZR) { | ||
return AArch64::XZR == SrcReg && hasZeroCycleZeroingGP(); | ||
} | ||
|
||
return false; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you avoid adding new hooks for this? Isn't this inferable from the sched model? Plus plenty of places essentially treat copy as free anyway (e.g. isTransient)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The patch series creates a cooperation between the register coalescer and post RA
AArch64::copyPhysReg
.AArch64::copyPhysReg
contains the logic that lowers to zero cycle instructions depending on subtarget features. Here, we almost replicate this logic for use at the higher level of the register coalescer to carefully check when to prevent remat. The sched model would have to depend on subtarget features, similarly (unless we try to generalize each part of the logic and combine it in the sched model itself, which can be an unnecessary complication). Currently the sched model is old and doesnt have the needed logic.Also, this patch series targets specifically the register coalescer, and not other places where copies are considered free.