From 4b7241adf0a8d39cad600308b146ba987594e9af Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 14 Jun 2024 15:23:48 -0700 Subject: [PATCH 1/2] [mlgo] inline for size: add bypass mechanism for perserving performance This allows shrinking for size the cold part of the code, without sacrificing performance. --- llvm/include/llvm/Analysis/MLInlineAdvisor.h | 2 + llvm/lib/Analysis/MLInlineAdvisor.cpp | 18 ++++- .../models/gen-inline-oz-test-model.py | 16 ++-- llvm/test/Transforms/Inline/ML/bypass.ll | 78 +++++++++++++++++++ 4 files changed, 108 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Transforms/Inline/ML/bypass.ll diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index f58862e533529c..2aa077fe0e035d 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -13,6 +13,7 @@ #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/PassManager.h" #include @@ -89,6 +90,7 @@ class MLInlineAdvisor : public InlineAdvisor { llvm::SmallPtrSet NodesInLastSCC; DenseSet AllNodes; bool ForceStop = false; + ProfileSummaryInfo &PSI; }; /// InlineAdvice that tracks changes post inlining. For that reason, it only diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index 75eb8ece2e447e..21946572339b9a 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/MLInlineAdvisor.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/InlineCost.h" @@ -23,6 +24,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReleaseModeModelRunner.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" @@ -46,6 +48,14 @@ static cl::opt InteractiveIncludeDefault("inliner-interactive-include-default", cl::Hidden, cl::desc(InclDefaultMsg)); +enum class SkipMLPolicyCriteria { Never, IfCallerIsNotCold }; + +static cl::opt SkipPolicy( + "ml-inliner-skip-policy", cl::Hidden, cl::init(SkipMLPolicyCriteria::Never), + cl::values(clEnumValN(SkipMLPolicyCriteria::Never, "never", "never"), + clEnumValN(SkipMLPolicyCriteria::IfCallerIsNotCold, + "if-caller-not-cold", "if the caller is not cold"))); + #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) // codegen-ed file #include "InlinerSizeModel.h" // NOLINT @@ -129,7 +139,8 @@ MLInlineAdvisor::MLInlineAdvisor( M, MAM.getResult(M).getManager()), ModelRunner(std::move(Runner)), GetDefaultAdvice(GetDefaultAdvice), CG(MAM.getResult(M)), - InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { + InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize), + PSI(MAM.getResult(M)) { assert(ModelRunner); ModelRunner->switchContext(""); // Extract the 'call site height' feature - the position of a call site @@ -334,6 +345,11 @@ std::unique_ptr MLInlineAdvisor::getAdviceImpl(CallBase &CB) { auto &TIR = FAM.getResult(Callee); auto &ORE = FAM.getResult(Caller); + if (SkipPolicy == SkipMLPolicyCriteria::IfCallerIsNotCold) { + if (!PSI.isFunctionEntryCold(&Caller)) + return std::make_unique(this, CB, ORE, + GetDefaultAdvice(CB)); + } auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); // If this is a "never inline" case, there won't be any changes to internal // state we need to track, so we can just return the base InlineAdvice, which diff --git a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py index 4898509ea544f5..d6b5e1747a7b68 100644 --- a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py +++ b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py @@ -102,12 +102,12 @@ def get_output_spec_path(path): return os.path.join(path, "output_spec.json") -def build_mock_model(path, signature): +def build_mock_model(path, signature, advice): """Build and save the mock model with the given signature""" module = tf.Module() def action(*inputs): - return {signature["output"]: tf.constant(value=1, dtype=tf.int64)} + return {signature["output"]: tf.constant(value=advice, dtype=tf.int64)} module.action = tf.function()(action) action = {"action": module.action.get_concrete_function(signature["inputs"])} @@ -128,12 +128,18 @@ def get_signature(): def main(argv): - assert len(argv) == 2 + assert len(argv) == 2 or (len(argv) == 3 and argv[2] == "never") model_path = argv[1] - + print(f"Output model to: [{argv[1]}]") + + constant_advice = 1 + if len(argv) == 3: + constant_advice = 0 + print(f"The model will always return: {constant_advice}") + signature = get_signature() - build_mock_model(model_path, signature) + build_mock_model(model_path, signature, constant_advice) if __name__ == "__main__": diff --git a/llvm/test/Transforms/Inline/ML/bypass.ll b/llvm/test/Transforms/Inline/ML/bypass.ll new file mode 100644 index 00000000000000..ccdefdcc93bfe8 --- /dev/null +++ b/llvm/test/Transforms/Inline/ML/bypass.ll @@ -0,0 +1,78 @@ +; REQUIRES: have_tflite +; RUN: rm -rf %t.runfiles %t.tflite %t.model_out +; RUN: mkdir %t.runfiles +; RUN: cp %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t.runfiles +; RUN: cp %S/../../../../lib/Analysis/models/saved-model-to-tflite.py %t.runfiles +; RUN: %python %t.runfiles/gen-inline-oz-test-model.py %t.model_out never +; RUN: %python %t.runfiles/saved-model-to-tflite.py %t.model_out %t.tflite + +; When running O2, we expect both callers to inline callee. +; RUN: opt < %s -passes='default' -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s --check-prefixes=O2-HOT,O2-COLD + +; The ML model we use always blocks inlining (by construction) +; RUN: opt < %s -passes='default' -inline-threshold=0 -hot-callsite-threshold=100 \ +; RUN: -enable-ml-inliner=development -ml-inliner-model-under-training=%t.tflite \ +; RUN: -S | FileCheck %s --check-prefixes=ML-HOT,ML-COLD + +; When bypassing ML for non-cold callers, the hot caller will have its callee inlined, but the cold one won't +; RUN: opt < %s -passes='default' -inline-threshold=0 -hot-callsite-threshold=100 \ +; RUN: -enable-ml-inliner=development -ml-inliner-model-under-training=%t.tflite \ +; RUN: -ml-inliner-skip-policy=if-caller-not-cold -S | FileCheck %s --check-prefixes=O2-HOT,ML-COLD + +declare void @extern() + +define i32 @callee(i32 %x) { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + call void @extern() + ret i32 %x3 +} + +define i32 @hot_caller(i32 %y1) !prof !15 { + %y = call i32 @callee(i32 %y1), !prof !16 + ret i32 %y +} + +define i32 @cold_caller(i32 %y1) !prof !17 { + %y = call i32 @callee(i32 %y1), !prof !16 + ret i32 %y +} + + +!llvm.module.flags = !{!1} +!15 = !{!"function_entry_count", i64 300} +!16 = !{!"branch_weights", i64 300} +!17 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} + +; O2-HOT-LABEL: @hot_caller +; O2-HOT-NOT: call i32 @callee +; O2-HOT: call void @extern +; O2-HOT-NEXT: call void @extern +; O2-HOT-NEXT: ret +; O2-COLD-LABEL: @cold_caller +; O2-COLD-NOT: call i32 @callee +; O2-COLD: call void @extern +; O2-COLD-NEXT: call void @extern +; O2-COLD-NEXT: ret + +; ML-HOT-LABEL: @hot_caller +; ML-HOT-NEXT: call i32 @callee +; ML-COLD-LABEL: @cold_caller +; ML-COLD-NEXT: call i32 @callee \ No newline at end of file From e701245f034720f651d3373da4bc16c4411abfe6 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 14 Jun 2024 15:30:03 -0700 Subject: [PATCH 2/2] fix python formatting --- llvm/lib/Analysis/models/gen-inline-oz-test-model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py index d6b5e1747a7b68..83055890283e88 100644 --- a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py +++ b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py @@ -130,9 +130,9 @@ def get_signature(): def main(argv): assert len(argv) == 2 or (len(argv) == 3 and argv[2] == "never") model_path = argv[1] - + print(f"Output model to: [{argv[1]}]") - + constant_advice = 1 if len(argv) == 3: constant_advice = 0