Skip to content

Commit

Permalink
[AIE2P] Improve RegbankSelect handling for load/store offset and po…
Browse files Browse the repository at this point in the history
…st-increment addressing modes
  • Loading branch information
Abnikant Singh committed Feb 14, 2025
1 parent 439696f commit 6e26d97
Show file tree
Hide file tree
Showing 8 changed files with 1,768 additions and 249 deletions.
295 changes: 223 additions & 72 deletions llvm/lib/Target/AIE/aie2p/AIE2PRegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,32 +255,123 @@ AIE2PRegisterBankInfo::getInstrAlternativeMappings(
const MachineRegisterInfo &MRI = MF.getRegInfo();
switch (MI.getOpcode()) {
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
case AIE2P::G_AIE_OFFSET_LOAD:
case AIE2P::G_AIE_POSTINC_LOAD:
case AIE2P::G_AIE_POSTINC_2D_LOAD:
case AIE2P::G_AIE_POSTINC_3D_LOAD: {
const unsigned NumOperands = MI.getNumOperands();
const unsigned FirstSrcIdx = MI.getNumExplicitDefs();
unsigned MappingID = 1;
unsigned const Cost = 1;

Register DstReg = MI.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
unsigned Size = getSizeInBits(DstReg, MRI, TRI);
// Base Alternative Mapping for size <= 256.
if (Size <= 256)
break;
const LLT DstRegTy = MRI.getType(DstReg);
const unsigned DstSize = getSizeInBits(DstReg, MRI, TRI);

// If the instruction has any implicit-defs or uses,
// do not mess with it.
if (MI.getNumOperands() != 2)
// Base alternative mapping for sizes up to 256.
// For 2048-bit sizes, mapping is restricted to the accumulator bank.
if (DstSize <= 256 || DstSize == 2048)
break;

// Prepare the three mapping indices.
std::vector<AIEBaseRegisterBankInfo::PartialMappingIdx> MappingIndices = {
getVecPartialMappingIdx(DstRegTy), getAccPartialMappingIdx(DstRegTy),
getFifoPartialMappingIdx(DstRegTy)};

// Initialize the operand vectors.
// Set all operand mapping indices to PMI_MOD by default.
// Set all operand sizes to 20 by default.
SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands, PMI_MOD);
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
SmallVector<unsigned, 4> OpSize(NumOperands, 20);

// Operand[0]- vector operand.
OpSize[0] = DstSize;
for (unsigned I = 1; I < FirstSrcIdx; ++I)
OpRegBankIdx[I] = (I == 1) ? PMI_PTR : PMI_MOD;

// Assign PMI_PTR at FirstSrcIdx
OpRegBankIdx[FirstSrcIdx] = PMI_PTR;

InstructionMappings AltMappings;
const InstructionMapping &VRegMapping = getInstructionMapping(
/*ID*/ 1, /*Cost*/ 1,
getOperandsMapping({getValueMapping(getPartialMappingIdx(Ty), Size),
getValueMapping(PMI_PTR, 20)}),
/*NumOperands*/ 2);
const InstructionMapping &AccRegMapping = getInstructionMapping(
/*ID*/ 2, /*Cost*/ 1,
getOperandsMapping({getValueMapping(getAccPartialMappingIdx(Ty), Size),
getValueMapping(PMI_PTR, 20)}),
/*NumOperands*/ 2);
// For each mapping index in MappingIndices, override operand 0
// and build an alternative instruction mapping.
for (const auto &VecIdx : MappingIndices) {
OpRegBankIdx[0] =
static_cast<AIEBaseRegisterBankInfo::PartialMappingIdx>(VecIdx);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
if (!Ty.isValid())
continue;
OpdsMapping[Idx] = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
}
}
const InstructionMapping &InsnMapping = getInstructionMapping(
MappingID++, Cost, getOperandsMapping(OpdsMapping), NumOperands);
AltMappings.push_back(&InsnMapping);
}
return AltMappings;
}
case TargetOpcode::G_STORE:
case AIE2P::G_AIE_OFFSET_STORE:
case AIE2P::G_AIE_POSTINC_STORE:
case AIE2P::G_AIE_POSTINC_2D_STORE:
case AIE2P::G_AIE_POSTINC_3D_STORE: {
const unsigned NumOperands = MI.getNumOperands();
// Select the operand index for VecReg.
const unsigned VecRegOpIdx = MI.getNumExplicitDefs();
unsigned MappingID = 1;
unsigned const Cost = 1;

AltMappings.push_back(&VRegMapping);
AltMappings.push_back(&AccRegMapping);
Register VecReg = MI.getOperand(VecRegOpIdx).getReg();
const LLT VecRegTy = MRI.getType(VecReg);
const unsigned VecRegSize = getSizeInBits(VecReg, MRI, TRI);

// Base alternative mapping for sizes up to 256.
// For 2048-bit sizes, mapping is restricted to the accumulator bank.
// Also, if the instruction has any implicit defs/uses, leave it alone.
if (VecRegSize <= 256 || VecRegSize == 2048 ||
MI.getNumOperands() != MI.getNumExplicitOperands())
break;

// Prepare the three mapping indices.
std::vector<AIEBaseRegisterBankInfo::PartialMappingIdx> MappingIndices = {
getVecPartialMappingIdx(VecRegTy), getAccPartialMappingIdx(VecRegTy),
getFifoPartialMappingIdx(VecRegTy)};

// Initialize the operand vectors.
// Set all operand mapping indices to PMI_MOD by default.
// Set all operand sizes to 20 by default.
SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands, PMI_MOD);
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
SmallVector<unsigned, 4> OpSize(NumOperands, 20);

// Initialize first few indices based on VRegOpIdx
for (unsigned I = 0; I < VecRegOpIdx; ++I) {
OpRegBankIdx[I] = (I == 0) ? PMI_PTR : PMI_MOD;
}

// Assign PMI_PTR to the next operand if within bounds
assert(((VecRegOpIdx + 1) < NumOperands) && "RegOpIdx + 1 out of bound");
OpRegBankIdx[VecRegOpIdx + 1] = PMI_PTR;

InstructionMappings AltMappings;
for (const auto &VecIdx : MappingIndices) {
OpRegBankIdx[VecRegOpIdx] =
static_cast<AIEBaseRegisterBankInfo::PartialMappingIdx>(VecIdx);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
if (!Ty.isValid())
continue;
OpdsMapping[Idx] = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
}
}
const InstructionMapping &InsnMapping = getInstructionMapping(
MappingID++, Cost, getOperandsMapping(OpdsMapping), NumOperands);
AltMappings.push_back(&InsnMapping);
}
return AltMappings;
}
case TargetOpcode::G_CONCAT_VECTORS: {
Expand Down Expand Up @@ -717,8 +808,13 @@ static bool isUsedAsAccRegInInstr(const MachineInstr &MI,
return true;
break;
}
case TargetOpcode::G_STORE: {
auto *RB = RBI->getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
case TargetOpcode::G_STORE:
case AIE2P::G_AIE_OFFSET_STORE:
case AIE2P::G_AIE_POSTINC_STORE:
case AIE2P::G_AIE_POSTINC_2D_STORE:
case AIE2P::G_AIE_POSTINC_3D_STORE: {
const unsigned OpIdx = MI.getNumExplicitDefs();
auto *RB = RBI->getRegBank(MI.getOperand(OpIdx).getReg(), MRI, TRI);
if (RB == &AIE2P::AccRegBank)
return true;
break;
Expand Down Expand Up @@ -750,8 +846,13 @@ static bool isUsedAsFifoRegInInstr(const MachineInstr &MI,
return true;
break;
}
case TargetOpcode::G_STORE: {
auto *RB = RBI->getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
case TargetOpcode::G_STORE:
case AIE2P::G_AIE_OFFSET_STORE:
case AIE2P::G_AIE_POSTINC_STORE:
case AIE2P::G_AIE_POSTINC_2D_STORE:
case AIE2P::G_AIE_POSTINC_3D_STORE: {
const unsigned OpIdx = MI.getNumExplicitDefs();
auto *RB = RBI->getRegBank(MI.getOperand(OpIdx).getReg(), MRI, TRI);
if (RB == &AIE2P::FifoRegBank)
return true;
break;
Expand Down Expand Up @@ -810,6 +911,22 @@ AIE2PRegisterBankInfo::getPreferredRegBankForVectorTy(
return std::nullopt;
}

// Given an instruction that produces a pointer used in a load operation,
// return the candidate pointer register that should be traced
// for determining the register bank mapping.
const Register getUseCandidatePointer(const MachineInstr &DefMI) {
switch (DefMI.getOpcode()) {
case AIE2P::G_AIE_OFFSET_STORE:
case AIE2P::G_AIE_POSTINC_STORE:
case AIE2P::G_AIE_POSTINC_2D_STORE:
case AIE2P::G_AIE_POSTINC_3D_STORE: {
return DefMI.getOperand(DefMI.getNumExplicitDefs() + 1).getReg();
}
default:
return DefMI.getOperand(0).getReg();
}
}

const RegisterBankInfo::InstructionMapping &
AIE2PRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const unsigned Opc = MI.getOpcode();
Expand Down Expand Up @@ -971,74 +1088,108 @@ AIE2PRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
return AIEBaseRegisterBankInfo::getInstrMapping(MI);
}
case TargetOpcode::G_LOAD: {
case TargetOpcode::G_LOAD:
case AIE2P::G_AIE_OFFSET_LOAD:
case AIE2P::G_AIE_POSTINC_LOAD:
case AIE2P::G_AIE_POSTINC_2D_LOAD:
case AIE2P::G_AIE_POSTINC_3D_LOAD: {
bool isAccRegMapping = false;
bool isFifoPhysRegMapping = false;

// Check if that load feeds acc or fifo instructions.
Register UseCandidate = MI.getOperand(0).getReg();
LLT Type = MRI.getType(UseCandidate);

MachineMemOperand *MMO = *MI.memoperands_begin();
const unsigned MemSize = 8 * MMO->getSize().getValue();
// Size of accumulator and fifo vectors on aie2p >= 512.
// for MemSize < 512, fallback to base instruction mapping.
if (MemSize < 512)
return AIEBaseRegisterBankInfo::getInstrMapping(MI);

// Check if we already know the register bank.
auto *RB = getRegBank(UseCandidate, MRI, TRI);
if (RB == &AIE2P::AccRegBank || RB == &AIE2P::FifoRegBank)
if (RB == &AIE2P::AccRegBank || RB == &AIE2P::FifoRegBank ||
RB == &AIE2P::VRegBank)
return AIEBaseRegisterBankInfo::getInstrMapping(MI);
auto PreferredRegBank =
getPreferredRegBankForVectorTy(MRI, TRI, UseCandidate);
if (PreferredRegBank && &AIE2P::AccRegBank == *PreferredRegBank)
isAccRegMapping = true;
else if (PreferredRegBank && &AIE2P::FifoRegBank == *PreferredRegBank)
isFifoPhysRegMapping = true;
// size of accu and fifo vector on aie2p >= 512.
MachineMemOperand *MMO = *MI.memoperands_begin();
const unsigned MemSize = 8 * MMO->getSize().getValue();
// Handle the example case as below
/* %2:_(p0) = G_FRAME_INDEX %stack.0
G_STORE %1(<32 x s32>), %2(p0)
%4:_(<32 x s32>) = G_LOAD %2(p0) */
if (MemSize >= 512) {
Register PtrReg = MI.getOperand(1).getReg();
MachineInstr *DefMI = MRI.getVRegDef(PtrReg);
// e.g. UseCandidate:_(p0) = G_FRAME_INDEX %stack.0
UseCandidate = DefMI->getOperand(0).getReg();
Type = MRI.getType(MI.getOperand(0).getReg());
}

PreferredRegBank = getPreferredRegBankForVectorTy(MRI, TRI, UseCandidate);
if (PreferredRegBank && &AIE2P::AccRegBank == *PreferredRegBank)
isAccRegMapping = true;
else if (PreferredRegBank && &AIE2P::FifoRegBank == *PreferredRegBank)
isFifoPhysRegMapping = true;
if (isAccRegMapping) {
OpRegBankIdx[0] = getAccPartialMappingIdx(Type);
OpRegBankIdx[1] = PMI_PTR;
return AIEBaseRegisterBankInfo::getInstrMappingFinal(MI, Cost, OpSize,
OpRegBankIdx);
}
if (isFifoPhysRegMapping) {
OpRegBankIdx[0] = getFifoPartialMappingIdx(Type);
OpRegBankIdx[1] = PMI_PTR;

// First source Idx (pointer reg)
const unsigned FirstSrcIdx = MI.getNumExplicitDefs();

if (isAccRegMapping || isFifoPhysRegMapping) {
// Determine the appropriate mapping index
const unsigned MappingIdx = isAccRegMapping
? getAccPartialMappingIdx(Type)
: getFifoPartialMappingIdx(Type);

// Set all operand mapping indices to PMI_MOD by default.
// Set all operand sizes to 20 by default.
OpSize.assign(NumOperands, 20);
OpRegBankIdx.assign(NumOperands, PMI_MOD);
// Set up vector operand indices
OpRegBankIdx[0] =
static_cast<AIEBaseRegisterBankInfo::PartialMappingIdx>(MappingIdx);
// Assign PMI_PTR and PMI_MOD based on FirstSrcIdx
for (unsigned I = 1; I < FirstSrcIdx; ++I) {
OpRegBankIdx[I] = (I == 1) ? PMI_PTR : PMI_MOD;
}
// Assign PMI_PTR at FirstSrcIdx
OpRegBankIdx[FirstSrcIdx] = PMI_PTR;
return AIEBaseRegisterBankInfo::getInstrMappingFinal(MI, Cost, OpSize,
OpRegBankIdx);
}
return AIEBaseRegisterBankInfo::getInstrMapping(MI);
}
case TargetOpcode::G_STORE: {
// Check if the store is fed by acc or fifo instructions.
Register VReg = MI.getOperand(0).getReg();
if (!VReg)
break;
auto DefRegBank = getRegBank(VReg, MRI, TRI);
if (DefRegBank == &AIE2P::AccRegBank) {
OpRegBankIdx[0] = getAccPartialMappingIdx(MRI.getType(VReg));
OpRegBankIdx[1] = PMI_PTR;
return AIEBaseRegisterBankInfo::getInstrMappingFinal(MI, Cost, OpSize,
OpRegBankIdx);
}
if (DefRegBank == &AIE2P::FifoRegBank) {
OpRegBankIdx[0] = getFifoPartialMappingIdx(MRI.getType(VReg));
OpRegBankIdx[1] = PMI_PTR;
return AIEBaseRegisterBankInfo::getInstrMappingFinal(MI, Cost, OpSize,
OpRegBankIdx);
case TargetOpcode::G_STORE:
case AIE2P::G_AIE_OFFSET_STORE:
case AIE2P::G_AIE_POSTINC_STORE:
case AIE2P::G_AIE_POSTINC_2D_STORE:
case AIE2P::G_AIE_POSTINC_3D_STORE: {
// Select the operand index for VecReg.
const unsigned VecRegOpIdx = MI.getNumExplicitDefs();
Register VecReg = MI.getOperand(VecRegOpIdx).getReg();
// Size of accumulator and fifo vectors on aie2p >= 512.
// for VecSize < 512, fallback to base instruction mapping.
if (MRI.getType(VecReg).getSizeInBits() < 512)
return AIEBaseRegisterBankInfo::getInstrMapping(MI);

auto *DefRegBank = getRegBank(VecReg, MRI, TRI);
// Only process if VecReg is defined in Accumulator or Fifo register banks.
if (DefRegBank != &AIE2P::AccRegBank && DefRegBank != &AIE2P::FifoRegBank)
return AIEBaseRegisterBankInfo::getInstrMapping(MI);

// Compute the partial mapping index based on the register bank.
const unsigned MappingIdx =
(DefRegBank == &AIE2P::AccRegBank)
? getAccPartialMappingIdx(MRI.getType(VecReg))
: getFifoPartialMappingIdx(MRI.getType(VecReg));

// Set all operand mapping indices to PMI_MOD by default.
// Set all operand sizes to 20 by default.
OpSize.assign(NumOperands, 20);
OpRegBankIdx.assign(NumOperands, PMI_MOD);

// Initialize first few indices based on VRegOpIdx
for (unsigned I = 0; I < VecRegOpIdx; ++I) {
OpRegBankIdx[I] = (I == 0) ? PMI_PTR : PMI_MOD;
}
return AIEBaseRegisterBankInfo::getInstrMapping(MI);
OpRegBankIdx[VecRegOpIdx] =
static_cast<AIEBaseRegisterBankInfo::PartialMappingIdx>(MappingIdx);

// Assign PMI_PTR to the next operand if within bounds
assert(((VecRegOpIdx + 1) < NumOperands) && "RegOpIdx + 1 out of bounds");
OpRegBankIdx[VecRegOpIdx + 1] = PMI_PTR;

return AIEBaseRegisterBankInfo::getInstrMappingFinal(MI, Cost, OpSize,
OpRegBankIdx);
}
default:
// Base class implementation for others
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aie2 -run-pass=regbankselect -regbankselect-fast %s -verify-machineinstrs -o - | FileCheck %s
# RUN: llc -mtriple aie2 -run-pass=regbankselect -regbankselect-greedy %s -verify-machineinstrs -o - | FileCheck %s
# RUN: llc -mtriple aie2p -run-pass=regbankselect -regbankselect-fast %s -verify-machineinstrs -o - | FileCheck %s
# RUN: llc -mtriple aie2p -run-pass=regbankselect -regbankselect-greedy %s -verify-machineinstrs -o - | FileCheck %s

---
name: offset-load-s32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aie2 -run-pass=regbankselect -regbankselect-fast %s -verify-machineinstrs -o - | FileCheck %s
# RUN: llc -mtriple aie2 -run-pass=regbankselect -regbankselect-greedy %s -verify-machineinstrs -o - | FileCheck %s
# RUN: llc -mtriple aie2p -run-pass=regbankselect -regbankselect-fast %s -verify-machineinstrs -o - | FileCheck %s
# RUN: llc -mtriple aie2p -run-pass=regbankselect -regbankselect-greedy %s -verify-machineinstrs -o - | FileCheck %s

---
name: offset-store
Expand Down
Loading

0 comments on commit 6e26d97

Please sign in to comment.