From 1a0ccf8bef63d403736132ffc981dd8c135be5e3 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 3 Jun 2022 00:35:44 -0700 Subject: [PATCH 1/3] [mlgo] Add instruction features to register eviction In preparation to work on a register eviction model that takes advantage of instruction based features, this commit adds a very basic instruction feature, just taking all of the def/use instructions for the live range under analysis and putting them into a tensor of length 10, cropping and padding as needed due to limitations in libtensorflow. --- llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp | 49 ++++++++++++++++++--- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp index 7daf9025d30362..d6f61150c7fc90 100644 --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -16,7 +16,9 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/TensorSpec.h" -#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" #endif @@ -126,6 +128,22 @@ static const int64_t MaxInterferences = 32; static const int64_t CandidateVirtRegPos = MaxInterferences; static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; +// When the model gets trained, it won't understand new instructions unless +// trained explicitly on them. This is the current cutoff for x86 (current +// architecture focus for ML reg alloc work). Any instructions over this will be +// replaced with 0s so that the model will still function even with new opcodes. +// Comes from MCInstrInfo::getNumOpcodes() +static const int OpcodeCountCutoff = 17716; + +// The number of use/def instructions that a specific candidate virtual register +// might have is variable, but libtensorflow only supports models with a fixed +// number of inputs. Currently encoding the first ten encountered use/def +// instructions and just ignoring the rest. Padding with zeroes if less than +// ten. +static const int ModelMaxSupportedInstructionCount = 33; +static const std::vector InstructionShape{ + 1, ModelMaxSupportedInstructionCount}; + // Most features are as described above, so we'll reuse this vector in defining // them. static const std::vector PerLiveRangeShape{1, NumberOfInterferences}; @@ -192,6 +210,8 @@ static const std::vector PerLiveRangeShape{1, NumberOfInterferences}; "largest stage of an interval in this LR") \ M(int64_t, min_stage, PerLiveRangeShape, \ "lowest stage of an interval in this LR") \ + M(int64_t, lr_use_def_instructions, InstructionShape, \ + "use/def instructions for the candidate virtual register") \ M(float, progress, {1}, "ratio of current queue size to initial size") // The model learns to pick one of the mask == 1 interferences. This is the name @@ -292,6 +312,8 @@ class MLEvictAdvisor : public RegAllocEvictionAdvisor { size_t Pos, int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const; + void extractInstructionFeatures(const LiveInterval &VirtReg) const; + // Point-in-time: we didn't learn this, so we always delegate to the default. bool canEvictHintInterference( const LiveInterval &VirtReg, MCRegister PhysReg, @@ -629,12 +651,11 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( FeaturesListNormalizer Largest; Largest.fill(0.0); - // Same overal idea as in the default eviction policy - we visit the values of - // AllocationOrder one at a time. If it's not legally available, we mask off - // the corresponding feature column (==do nothing because we already reset all - // the features to 0) - // Use Pos to capture the column we load features at - in AllocationOrder - // order. + // Same overall idea as in the default eviction policy - we visit the values + // of AllocationOrder one at a time. If it's not legally available, we mask + // off the corresponding feature column (==do nothing because we already reset + // all the features to 0) Use Pos to capture the column we load features at - + // in AllocationOrder order. size_t Pos = 0; for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I, ++Pos) { @@ -665,6 +686,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( /*NrUrgent*/ 0.0); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); + extractInstructionFeatures(VirtReg); // Normalize the features. for (auto &V : Largest) V = V ? V : 1.0; @@ -838,6 +860,19 @@ void MLEvictAdvisor::extractFeatures( #undef SET } +void MLEvictAdvisor::extractInstructionFeatures( + const LiveInterval &VirtReg) const { + size_t InstructionCount = 0; + for (auto MII = MF.getRegInfo().reg_instr_begin(VirtReg.reg()); + MII != MF.getRegInfo().reg_instr_end() && + InstructionCount < ModelMaxSupportedInstructionCount; + ++MII, ++InstructionCount) { + Runner->getTensor( + FeatureIDs::lr_use_def_instructions)[InstructionCount] = + MII->getOpcode(); + } +} + // Development mode-specific implementations #ifdef LLVM_HAVE_TF_API RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() { From c9829cd11e7065cf1082f3bf35e1038edfd6a998 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 19 Jun 2022 12:18:12 -0700 Subject: [PATCH 2/3] Worked on regalloc all features --- llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp | 138 +++++++++++++++----- 1 file changed, 106 insertions(+), 32 deletions(-) diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp index d6f61150c7fc90..6468b88c049241 100644 --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -130,19 +130,19 @@ static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1; // When the model gets trained, it won't understand new instructions unless // trained explicitly on them. This is the current cutoff for x86 (current -// architecture focus for ML reg alloc work). Any instructions over this will be +// architecture focus for ML regalloc work). Any instructions over this will be // replaced with 0s so that the model will still function even with new opcodes. // Comes from MCInstrInfo::getNumOpcodes() static const int OpcodeCountCutoff = 17716; -// The number of use/def instructions that a specific candidate virtual register +// The number of instructions that a specific candidate virtual register // might have is variable, but libtensorflow only supports models with a fixed -// number of inputs. Currently encoding the first ten encountered use/def -// instructions and just ignoring the rest. Padding with zeroes if less than -// ten. -static const int ModelMaxSupportedInstructionCount = 33; -static const std::vector InstructionShape{ - 1, ModelMaxSupportedInstructionCount}; +// number of inputs. Currently encoding the first 100 encountered +// instructions (across all interfering live ranges) and just ignoring the rest. +// Padding with zeroes if less than 100. +static const int ModelMaxSupportedInstructionCount = 300; +static const std::vector InstructionsAndMappingShape{ + 1, NumberOfInterferences + 1, ModelMaxSupportedInstructionCount}; // Most features are as described above, so we'll reuse this vector in defining // them. @@ -210,8 +210,8 @@ static const std::vector PerLiveRangeShape{1, NumberOfInterferences}; "largest stage of an interval in this LR") \ M(int64_t, min_stage, PerLiveRangeShape, \ "lowest stage of an interval in this LR") \ - M(int64_t, lr_use_def_instructions, InstructionShape, \ - "use/def instructions for the candidate virtual register") \ + M(int64_t, instructions_and_mapping, InstructionsAndMappingShape, \ + "instructions and binary map between instructions and live ranges") \ M(float, progress, {1}, "ratio of current queue size to initial size") // The model learns to pick one of the mask == 1 interferences. This is the name @@ -293,11 +293,12 @@ class MLEvictAdvisor : public RegAllocEvictionAdvisor { /// Load the features of the given VirtReg (allocated or not) at column Pos, /// but if that can't be evicted, return false instead. - bool - loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, - bool IsHint, const SmallVirtRegSet &FixedRegisters, - std::array &Largest, - size_t Pos) const; + bool loadInterferenceFeatures( + const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, + const SmallVirtRegSet &FixedRegisters, + std::array &Largest, size_t Pos, + SmallVectorImpl> + &StartEndSlotIndices) const; private: static float getInitialQueueSize(const MachineFunction &MF); @@ -310,9 +311,13 @@ class MLEvictAdvisor : public RegAllocEvictionAdvisor { void extractFeatures(const SmallVectorImpl &Intervals, std::array &Largest, size_t Pos, int64_t IsHint, int64_t LocalIntfsCount, - float NrUrgent) const; + float NrUrgent, + SmallVectorImpl> + &StartEndSlotIndices) const; - void extractInstructionFeatures(const LiveInterval &VirtReg) const; + void extractInstructionFeatures( + SmallVectorImpl> + &StartEndSlotIndices) const; // Point-in-time: we didn't learn this, so we always delegate to the default. bool canEvictHintInterference( @@ -553,7 +558,9 @@ int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition( bool MLEvictAdvisor::loadInterferenceFeatures( const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest, - size_t Pos) const { + size_t Pos, + SmallVectorImpl> + &StartEndSlotIndices) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { // leave unavailable @@ -612,7 +619,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures( // OK, so if we made it this far, this LR is an eviction candidate, load its // features. extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs, - NrUrgent); + NrUrgent, StartEndSlotIndices); return true; } @@ -656,6 +663,8 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( // off the corresponding feature column (==do nothing because we already reset // all the features to 0) Use Pos to capture the column we load features at - // in AllocationOrder order. + SmallVector, NumberOfInterferences> + StartEndSlotIndices; size_t Pos = 0; for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I, ++Pos) { @@ -666,7 +675,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( continue; } if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters, - Largest, Pos)) { + Largest, Pos, StartEndSlotIndices)) { ++Available; Regs[Pos] = std::make_pair(PhysReg, true); } @@ -683,11 +692,11 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( if (!MustFindEviction) extractFeatures(SmallVector(1, &VirtReg), Largest, CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, - /*NrUrgent*/ 0.0); + /*NrUrgent*/ 0.0, StartEndSlotIndices); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); - extractInstructionFeatures(VirtReg); - // Normalize the features. + extractInstructionFeatures(StartEndSlotIndices); + // Normalize the features. for (auto &V : Largest) V = V ? V : 1.0; for (size_t FeatureIndex = 0; FeatureIndex < FeatureIDs::FeatureCount; @@ -771,7 +780,9 @@ MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const { void MLEvictAdvisor::extractFeatures( const SmallVectorImpl &Intervals, std::array &Largest, size_t Pos, - int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const { + int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent, + SmallVectorImpl> + &StartEndSlotIndices) const { int64_t NrDefsAndUses = 0; int64_t NrBrokenHints = 0; double R = 0.0; @@ -818,6 +829,11 @@ void MLEvictAdvisor::extractFeatures( HintWeights += LIFC.HintWeights; NrRematerializable += LIFC.IsRemat; + + for (auto CurrentSegment : LI) { + StartEndSlotIndices.push_back( + std::make_tuple(CurrentSegment.start, CurrentSegment.end, Pos)); + } } size_t Size = 0; if (!Intervals.empty()) { @@ -861,15 +877,73 @@ void MLEvictAdvisor::extractFeatures( } void MLEvictAdvisor::extractInstructionFeatures( - const LiveInterval &VirtReg) const { + SmallVectorImpl> + &StartEndSlotIndices) const { + std::sort(StartEndSlotIndices.begin(), StartEndSlotIndices.end(), + [](std::tuple A, + std::tuple B) { + return std::get<0>(A) < std::get<0>(B); + }); size_t InstructionCount = 0; - for (auto MII = MF.getRegInfo().reg_instr_begin(VirtReg.reg()); - MII != MF.getRegInfo().reg_instr_end() && - InstructionCount < ModelMaxSupportedInstructionCount; - ++MII, ++InstructionCount) { - Runner->getTensor( - FeatureIDs::lr_use_def_instructions)[InstructionCount] = - MII->getOpcode(); + size_t CurrentSegment = 0; + SlotIndex CurrentIndex = std::get<0>(StartEndSlotIndices[0]); + while (true) { + while (CurrentIndex <= std::get<1>(StartEndSlotIndices[CurrentSegment]) && + InstructionCount < ModelMaxSupportedInstructionCount) { + // set instruction + auto *CurrentMachineInstruction = + LIS->getInstructionFromIndex(CurrentIndex); + if (CurrentMachineInstruction == nullptr) { + CurrentIndex = CurrentIndex.getNextIndex(); + continue; + } + auto CurrentOpcode = CurrentMachineInstruction->getOpcode(); + Runner->getTensor( + FeatureIDs::instructions_and_mapping)[InstructionCount] = + CurrentOpcode < OpcodeCountCutoff ? CurrentOpcode : 0; + // set mask for instruction + // add 1 to the resulting position as all of the segment indices are + // offset 1 as the first row is instruction opcodes + auto CurrentSegmentPosition = + std::get<2>(StartEndSlotIndices[CurrentSegment]) + 1; + Runner->getTensor(FeatureIDs::instructions_and_mapping) + [CurrentSegmentPosition * ModelMaxSupportedInstructionCount + + InstructionCount] = 1; + // handle the overlapping LR case + size_t OverlapCheckCurrentSegment = CurrentSegment + 1; + while (OverlapCheckCurrentSegment < StartEndSlotIndices.size()) { + if (std::get<0>(StartEndSlotIndices[OverlapCheckCurrentSegment]) <= + CurrentIndex) { + auto OverlapCurrentSegmentPosition = + std::get<2>(StartEndSlotIndices[OverlapCheckCurrentSegment]) + 1; + Runner->getTensor(FeatureIDs::instructions_and_mapping) + [OverlapCurrentSegmentPosition * + ModelMaxSupportedInstructionCount + + InstructionCount] = 1; + } else { + break; + } + ++OverlapCheckCurrentSegment; + } + ++InstructionCount; + CurrentIndex = CurrentIndex.getNextIndex(); + } + // if we've just finished processing through the last segment or if we've + // hit the maximum number of instructions, break out of the loop. + if (CurrentSegment == StartEndSlotIndices.size() - 1 || + InstructionCount >= ModelMaxSupportedInstructionCount) { + break; + } + // just finished processing the previous segment, transition to the next one + if (std::get<0>(StartEndSlotIndices[CurrentSegment + 1]) <= + std::get<1>(StartEndSlotIndices[CurrentSegment])) { + // segments are overlapping. + ++CurrentSegment; + } else { + // segments are not overlapping. + CurrentIndex = std::get<0>(StartEndSlotIndices[CurrentSegment + 1]); + ++CurrentSegment; + } } } From 25574eae55cf33859ece2d7ccc63d02f330ca588 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 24 Jun 2022 17:33:40 +0000 Subject: [PATCH 3/3] misc style changes --- llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp | 32 +++++++++------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp index 6468b88c049241..3043be0f71aab0 100644 --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -137,12 +137,12 @@ static const int OpcodeCountCutoff = 17716; // The number of instructions that a specific candidate virtual register // might have is variable, but libtensorflow only supports models with a fixed -// number of inputs. Currently encoding the first 100 encountered -// instructions (across all interfering live ranges) and just ignoring the rest. -// Padding with zeroes if less than 100. +// number of inputs. Encodes the number of instructions (across all interfering +// live ranges) set in the variable and just ignoring the rest. Padding with +// zeroes if less than 100. static const int ModelMaxSupportedInstructionCount = 300; static const std::vector InstructionsAndMappingShape{ - 1, NumberOfInterferences + 1, ModelMaxSupportedInstructionCount}; + NumberOfInterferences + 1, ModelMaxSupportedInstructionCount}; // Most features are as described above, so we'll reuse this vector in defining // them. @@ -893,7 +893,7 @@ void MLEvictAdvisor::extractInstructionFeatures( // set instruction auto *CurrentMachineInstruction = LIS->getInstructionFromIndex(CurrentIndex); - if (CurrentMachineInstruction == nullptr) { + if (!CurrentMachineInstruction) { CurrentIndex = CurrentIndex.getNextIndex(); continue; } @@ -912,17 +912,15 @@ void MLEvictAdvisor::extractInstructionFeatures( // handle the overlapping LR case size_t OverlapCheckCurrentSegment = CurrentSegment + 1; while (OverlapCheckCurrentSegment < StartEndSlotIndices.size()) { - if (std::get<0>(StartEndSlotIndices[OverlapCheckCurrentSegment]) <= + if (std::get<0>(StartEndSlotIndices[OverlapCheckCurrentSegment]) > CurrentIndex) { - auto OverlapCurrentSegmentPosition = - std::get<2>(StartEndSlotIndices[OverlapCheckCurrentSegment]) + 1; - Runner->getTensor(FeatureIDs::instructions_and_mapping) - [OverlapCurrentSegmentPosition * - ModelMaxSupportedInstructionCount + - InstructionCount] = 1; - } else { break; } + auto OverlapCurrentSegmentPosition = + std::get<2>(StartEndSlotIndices[OverlapCheckCurrentSegment]) + 1; + Runner->getTensor(FeatureIDs::instructions_and_mapping) + [OverlapCurrentSegmentPosition * ModelMaxSupportedInstructionCount + + InstructionCount] = 1; ++OverlapCheckCurrentSegment; } ++InstructionCount; @@ -935,15 +933,13 @@ void MLEvictAdvisor::extractInstructionFeatures( break; } // just finished processing the previous segment, transition to the next one - if (std::get<0>(StartEndSlotIndices[CurrentSegment + 1]) <= + if (std::get<0>(StartEndSlotIndices[CurrentSegment + 1]) > std::get<1>(StartEndSlotIndices[CurrentSegment])) { - // segments are overlapping. - ++CurrentSegment; - } else { - // segments are not overlapping. + // segments aren't overlapping, skip to the beginning of the next segment CurrentIndex = std::get<0>(StartEndSlotIndices[CurrentSegment + 1]); ++CurrentSegment; } + ++CurrentSegment; } }