Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mlgo] inline for size: add bypass mechanism for perserving performance #95616

Merged
merged 3 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/MLInlineAdvisor.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/PassManager.h"

#include <deque>
Expand Down Expand Up @@ -89,6 +90,7 @@ class MLInlineAdvisor : public InlineAdvisor {
llvm::SmallPtrSet<const LazyCallGraph::Node *, 1> NodesInLastSCC;
DenseSet<const LazyCallGraph::Node *> AllNodes;
bool ForceStop = false;
ProfileSummaryInfo &PSI;
};

/// InlineAdvice that tracks changes post inlining. For that reason, it only
Expand Down
18 changes: 17 additions & 1 deletion llvm/lib/Analysis/MLInlineAdvisor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "llvm/Analysis/MLInlineAdvisor.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/InlineCost.h"
Expand All @@ -23,6 +24,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
Expand All @@ -46,6 +48,14 @@ static cl::opt<bool>
InteractiveIncludeDefault("inliner-interactive-include-default", cl::Hidden,
cl::desc(InclDefaultMsg));

enum class SkipMLPolicyCriteria { Never, IfCallerIsNotCold };

static cl::opt<SkipMLPolicyCriteria> SkipPolicy(
"ml-inliner-skip-policy", cl::Hidden, cl::init(SkipMLPolicyCriteria::Never),
cl::values(clEnumValN(SkipMLPolicyCriteria::Never, "never", "never"),
clEnumValN(SkipMLPolicyCriteria::IfCallerIsNotCold,
"if-caller-not-cold", "if the caller is not cold")));

#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
// codegen-ed file
#include "InlinerSizeModel.h" // NOLINT
Expand Down Expand Up @@ -129,7 +139,8 @@ MLInlineAdvisor::MLInlineAdvisor(
M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
ModelRunner(std::move(Runner)), GetDefaultAdvice(GetDefaultAdvice),
CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize),
PSI(MAM.getResult<ProfileSummaryAnalysis>(M)) {
assert(ModelRunner);
ModelRunner->switchContext("");
// Extract the 'call site height' feature - the position of a call site
Expand Down Expand Up @@ -334,6 +345,11 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);

if (SkipPolicy == SkipMLPolicyCriteria::IfCallerIsNotCold) {
if (!PSI.isFunctionEntryCold(&Caller))
return std::make_unique<InlineAdvice>(this, CB, ORE,
GetDefaultAdvice(CB));
}
auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
// If this is a "never inline" case, there won't be any changes to internal
// state we need to track, so we can just return the base InlineAdvice, which
Expand Down
14 changes: 10 additions & 4 deletions llvm/lib/Analysis/models/gen-inline-oz-test-model.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,12 @@ def get_output_spec_path(path):
return os.path.join(path, "output_spec.json")


def build_mock_model(path, signature):
def build_mock_model(path, signature, advice):
"""Build and save the mock model with the given signature"""
module = tf.Module()

def action(*inputs):
return {signature["output"]: tf.constant(value=1, dtype=tf.int64)}
return {signature["output"]: tf.constant(value=advice, dtype=tf.int64)}

module.action = tf.function()(action)
action = {"action": module.action.get_concrete_function(signature["inputs"])}
Expand All @@ -128,12 +128,18 @@ def get_signature():


def main(argv):
assert len(argv) == 2
assert len(argv) == 2 or (len(argv) == 3 and argv[2] == "never")
model_path = argv[1]

print(f"Output model to: [{argv[1]}]")

constant_advice = 1
if len(argv) == 3:
constant_advice = 0
print(f"The model will always return: {constant_advice}")

signature = get_signature()
build_mock_model(model_path, signature)
build_mock_model(model_path, signature, constant_advice)


if __name__ == "__main__":
Expand Down
78 changes: 78 additions & 0 deletions llvm/test/Transforms/Inline/ML/bypass.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
; REQUIRES: have_tflite
; RUN: rm -rf %t.runfiles %t.tflite %t.model_out
; RUN: mkdir %t.runfiles
; RUN: cp %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t.runfiles
; RUN: cp %S/../../../../lib/Analysis/models/saved-model-to-tflite.py %t.runfiles
; RUN: %python %t.runfiles/gen-inline-oz-test-model.py %t.model_out never
; RUN: %python %t.runfiles/saved-model-to-tflite.py %t.model_out %t.tflite

; When running O2, we expect both callers to inline callee.
; RUN: opt < %s -passes='default<O2>' -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s --check-prefixes=O2-HOT,O2-COLD

; The ML model we use always blocks inlining (by construction)
; RUN: opt < %s -passes='default<O2>' -inline-threshold=0 -hot-callsite-threshold=100 \
; RUN: -enable-ml-inliner=development -ml-inliner-model-under-training=%t.tflite \
; RUN: -S | FileCheck %s --check-prefixes=ML-HOT,ML-COLD

; When bypassing ML for non-cold callers, the hot caller will have its callee inlined, but the cold one won't
; RUN: opt < %s -passes='default<O2>' -inline-threshold=0 -hot-callsite-threshold=100 \
; RUN: -enable-ml-inliner=development -ml-inliner-model-under-training=%t.tflite \
; RUN: -ml-inliner-skip-policy=if-caller-not-cold -S | FileCheck %s --check-prefixes=O2-HOT,ML-COLD

declare void @extern()

define i32 @callee(i32 %x) {
%x1 = add i32 %x, 1
%x2 = add i32 %x1, 1
%x3 = add i32 %x2, 1
call void @extern()
call void @extern()
ret i32 %x3
}

define i32 @hot_caller(i32 %y1) !prof !15 {
%y = call i32 @callee(i32 %y1), !prof !16
ret i32 %y
}

define i32 @cold_caller(i32 %y1) !prof !17 {
%y = call i32 @callee(i32 %y1), !prof !16
ret i32 %y
}


!llvm.module.flags = !{!1}
!15 = !{!"function_entry_count", i64 300}
!16 = !{!"branch_weights", i64 300}
!17 = !{!"function_entry_count", i64 1}

!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"SampleProfile"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}

; O2-HOT-LABEL: @hot_caller
; O2-HOT-NOT: call i32 @callee
; O2-HOT: call void @extern
; O2-HOT-NEXT: call void @extern
; O2-HOT-NEXT: ret
; O2-COLD-LABEL: @cold_caller
; O2-COLD-NOT: call i32 @callee
; O2-COLD: call void @extern
; O2-COLD-NEXT: call void @extern
; O2-COLD-NEXT: ret

; ML-HOT-LABEL: @hot_caller
; ML-HOT-NEXT: call i32 @callee
; ML-COLD-LABEL: @cold_caller
; ML-COLD-NEXT: call i32 @callee
Loading