boomanaiden154 · boomanaiden154 · Jun 3, 2022 · Jun 19, 2022 · Jun 24, 2022 · mtrofin
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -16,7 +16,9 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/MLModelRunner.h"
 #include "llvm/Analysis/TensorSpec.h"
-#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) 
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API)
 #include "llvm/Analysis/ModelUnderTrainingRunner.h"
 #include "llvm/Analysis/NoInferenceModelRunner.h"
 #endif
@@ -126,6 +128,22 @@ static const int64_t MaxInterferences = 32;
 static const int64_t CandidateVirtRegPos = MaxInterferences;
 static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1;
 
+// When the model gets trained, it won't understand new instructions unless
+// trained explicitly on them. This is the current cutoff for x86 (current
+// architecture focus for ML regalloc work). Any instructions over this will be
+// replaced with 0s so that the model will still function even with new opcodes.
+// Comes from MCInstrInfo::getNumOpcodes()
+static const int OpcodeCountCutoff = 17716;
+
+// The number of instructions that a specific candidate virtual register
+// might have is variable, but libtensorflow only supports models with a fixed
+// number of inputs. Encodes the number of instructions (across all interfering
+// live ranges) set in the variable and just ignoring the rest. Padding with
+// zeroes if less than 100.
+static const int ModelMaxSupportedInstructionCount = 300;
+static const std::vector<int64_t> InstructionsAndMappingShape{
+    NumberOfInterferences + 1, ModelMaxSupportedInstructionCount};
+
 // Most features are as described above, so we'll reuse this vector in defining
 // them.
 static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
@@ -192,6 +210,8 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
     "largest stage of an interval in this LR")                                 \
   M(int64_t, min_stage, PerLiveRangeShape,                                     \
     "lowest stage of an interval in this LR")                                  \
+  M(int64_t, instructions_and_mapping, InstructionsAndMappingShape,            \
+    "instructions and binary map between instructions and live ranges")        \
   M(float, progress, {1}, "ratio of current queue size to initial size")
 
 // The model learns to pick one of the mask == 1 interferences. This is the name
@@ -273,11 +293,12 @@ class MLEvictAdvisor : public RegAllocEvictionAdvisor {
 
   /// Load the features of the given VirtReg (allocated or not) at column Pos,
   /// but if  that can't be evicted, return false instead.
-  bool
-  loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg,
-                           bool IsHint, const SmallVirtRegSet &FixedRegisters,
-                           std::array<float, FeatureIDs::FeatureCount> &Largest,
-                           size_t Pos) const;
+  bool loadInterferenceFeatures(
+      const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+      const SmallVirtRegSet &FixedRegisters,
+      std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos,
+      SmallVectorImpl<std::tuple<SlotIndex, SlotIndex, size_t>>
+          &StartEndSlotIndices) const;
 
 private:
   static float getInitialQueueSize(const MachineFunction &MF);
@@ -290,7 +311,13 @@ class MLEvictAdvisor : public RegAllocEvictionAdvisor {
   void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals,
                        std::array<float, FeatureIDs::FeatureCount> &Largest,
                        size_t Pos, int64_t IsHint, int64_t LocalIntfsCount,
-                       float NrUrgent) const;
+                       float NrUrgent,
+                       SmallVectorImpl<std::tuple<SlotIndex, SlotIndex, size_t>>
+                           &StartEndSlotIndices) const;
+
+  void extractInstructionFeatures(
+      SmallVectorImpl<std::tuple<SlotIndex, SlotIndex, size_t>>
+          &StartEndSlotIndices) const;
 
   // Point-in-time: we didn't learn this, so we always delegate to the default.
   bool canEvictHintInterference(
@@ -531,7 +558,9 @@ int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
 bool MLEvictAdvisor::loadInterferenceFeatures(
     const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
     const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest,
-    size_t Pos) const {
+    size_t Pos,
+    SmallVectorImpl<std::tuple<SlotIndex, SlotIndex, size_t>>
+        &StartEndSlotIndices) const {
   // It is only possible to evict virtual register interference.
   if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) {
     // leave unavailable
@@ -590,7 +619,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
   // OK, so if we made it this far, this LR is an eviction candidate, load its
   // features.
   extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs,
-                  NrUrgent);
+                  NrUrgent, StartEndSlotIndices);
   return true;
 }
 
@@ -629,12 +658,13 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
   FeaturesListNormalizer Largest;
   Largest.fill(0.0);
 
-  // Same overal idea as in the default eviction policy - we visit the values of
-  // AllocationOrder one at a time. If it's not legally available, we mask off
-  // the corresponding feature column (==do nothing because we already reset all
-  // the features to 0)
-  // Use Pos to capture the column we load features at - in AllocationOrder
-  // order.
+  // Same overall idea as in the default eviction policy - we visit the values
+  // of AllocationOrder one at a time. If it's not legally available, we mask
+  // off the corresponding feature column (==do nothing because we already reset
+  // all the features to 0) Use Pos to capture the column we load features at -
+  // in AllocationOrder order.
+  SmallVector<std::tuple<SlotIndex, SlotIndex, size_t>, NumberOfInterferences>
+      StartEndSlotIndices;
   size_t Pos = 0;
   for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
        ++I, ++Pos) {
@@ -645,7 +675,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
       continue;
     }
     if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters,
-                                 Largest, Pos)) {
+                                 Largest, Pos, StartEndSlotIndices)) {
       ++Available;
       Regs[Pos] = std::make_pair(PhysReg, true);
     }
@@ -662,10 +692,11 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
   if (!MustFindEviction)
     extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest,
                     CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0,
-                    /*NrUrgent*/ 0.0);
+                    /*NrUrgent*/ 0.0, StartEndSlotIndices);
   assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had "
                                "nothing to allocate initially.");
-  // Normalize the features.
+  extractInstructionFeatures(StartEndSlotIndices);
+  //  Normalize the features.
   for (auto &V : Largest)
     V = V ? V : 1.0;
   for (size_t FeatureIndex = 0; FeatureIndex < FeatureIDs::FeatureCount;
@@ -749,7 +780,9 @@ MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
 void MLEvictAdvisor::extractFeatures(
     const SmallVectorImpl<const LiveInterval *> &Intervals,
     std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos,
-    int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const {
+    int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent,
+    SmallVectorImpl<std::tuple<SlotIndex, SlotIndex, size_t>>
+        &StartEndSlotIndices) const {
   int64_t NrDefsAndUses = 0;
   int64_t NrBrokenHints = 0;
   double R = 0.0;
@@ -796,6 +829,11 @@ void MLEvictAdvisor::extractFeatures(
 
     HintWeights += LIFC.HintWeights;
     NrRematerializable += LIFC.IsRemat;
+
+    for (auto CurrentSegment : LI) {
+      StartEndSlotIndices.push_back(
+          std::make_tuple(CurrentSegment.start, CurrentSegment.end, Pos));
+    }
   }
   size_t Size = 0;
   if (!Intervals.empty()) {
@@ -838,6 +876,73 @@ void MLEvictAdvisor::extractFeatures(
 #undef SET
 }
 
+void MLEvictAdvisor::extractInstructionFeatures(
+    SmallVectorImpl<std::tuple<SlotIndex, SlotIndex, size_t>>
+        &StartEndSlotIndices) const {
+  std::sort(StartEndSlotIndices.begin(), StartEndSlotIndices.end(),
+            [](std::tuple<SlotIndex, SlotIndex, size_t> A,
+               std::tuple<SlotIndex, SlotIndex, size_t> B) {
+              return std::get<0>(A) < std::get<0>(B);
+            });
+  size_t InstructionCount = 0;
+  size_t CurrentSegment = 0;
+  SlotIndex CurrentIndex = std::get<0>(StartEndSlotIndices[0]);
+  while (true) {
+    while (CurrentIndex <= std::get<1>(StartEndSlotIndices[CurrentSegment]) &&
+           InstructionCount < ModelMaxSupportedInstructionCount) {
+      // set instruction
+      auto *CurrentMachineInstruction =
+          LIS->getInstructionFromIndex(CurrentIndex);
+      if (!CurrentMachineInstruction) {
+        CurrentIndex = CurrentIndex.getNextIndex();
+        continue;
+      }
+      auto CurrentOpcode = CurrentMachineInstruction->getOpcode();
+      Runner->getTensor<int64_t>(
+          FeatureIDs::instructions_and_mapping)[InstructionCount] =
+          CurrentOpcode < OpcodeCountCutoff ? CurrentOpcode : 0;
+      // set mask for instruction
+      // add 1 to the resulting position as all of the segment indices are
+      // offset 1 as the first row is instruction opcodes
+      auto CurrentSegmentPosition =
+          std::get<2>(StartEndSlotIndices[CurrentSegment]) + 1;
+      Runner->getTensor<int64_t>(FeatureIDs::instructions_and_mapping)
+          [CurrentSegmentPosition * ModelMaxSupportedInstructionCount +
+           InstructionCount] = 1;
+      // handle the overlapping LR case
+      size_t OverlapCheckCurrentSegment = CurrentSegment + 1;
+      while (OverlapCheckCurrentSegment < StartEndSlotIndices.size()) {
+        if (std::get<0>(StartEndSlotIndices[OverlapCheckCurrentSegment]) >
+            CurrentIndex) {
+          break;
+        }
+        auto OverlapCurrentSegmentPosition =
+            std::get<2>(StartEndSlotIndices[OverlapCheckCurrentSegment]) + 1;
+        Runner->getTensor<int64_t>(FeatureIDs::instructions_and_mapping)
+            [OverlapCurrentSegmentPosition * ModelMaxSupportedInstructionCount +
+             InstructionCount] = 1;
+        ++OverlapCheckCurrentSegment;
+      }
+      ++InstructionCount;
+      CurrentIndex = CurrentIndex.getNextIndex();
+    }
+    // if we've just finished processing through the last segment or if we've
+    // hit the maximum number of instructions, break out of the loop.
+    if (CurrentSegment == StartEndSlotIndices.size() - 1 ||
+        InstructionCount >= ModelMaxSupportedInstructionCount) {
+      break;
+    }
+    // just finished processing the previous segment, transition to the next one
+    if (std::get<0>(StartEndSlotIndices[CurrentSegment + 1]) >
+        std::get<1>(StartEndSlotIndices[CurrentSegment])) {
+      // segments aren't overlapping, skip to the beginning of the next segment
+      CurrentIndex = std::get<0>(StartEndSlotIndices[CurrentSegment + 1]);
+      ++CurrentSegment;
+    }
+    ++CurrentSegment;
+  }
+}
+
 // Development mode-specific implementations
 #ifdef LLVM_HAVE_TF_API
 RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() {