From ebde38a2356b405951ef1f0c4684fae221803887 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 13 Oct 2023 18:15:24 -0700 Subject: [PATCH 1/2] [CodeLayout] cache-directed sort: limit max chain size When linking an executable with a slightly larger executable, ld.lld --call-graph-profile-sort=cdsort can be very slow (see #68638). ``` 4.6% 20.7Mi .text.hot 3.5% 15.9Mi .text 3.4% 15.2Mi .text.unknown ``` Add cl option `cds-max-chain-size`, which is similar to `ext-tsp-max-chain-size`, and set it to 128, to improve performance. In `ld.lld @response.txt --threads=4 --call-graph-profile-sort=cdsort --time-trace" builds, the "Total Sort sections" time is measured as follows: * -mllvm -cds-max-chain-size=64: 1.321813 * -mllvm -cds-max-chain-size=128: 2.030425 * -mllvm -cds-max-chain-size=256: 2.927684 * -mllvm -cds-max-chain-size=512: 5.493106 * unlimited: 9 minutes The rest part takes 6.8s. --- llvm/lib/Transforms/Utils/CodeLayout.cpp | 9 +++++++++ llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp index 6252c429205ab..f7f080af183ba 100644 --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -62,6 +62,12 @@ cl::opt ApplyExtTspWithoutProfile( "ext-tsp-apply-without-profile", cl::desc("Whether to apply ext-tsp placement for instances w/o profile"), cl::init(true), cl::Hidden); + +namespace codelayout { +cl::opt + CDMaxChainSize("cdsort-max-chain-size", cl::Hidden, cl::init(128), + cl::desc("The maximum size of a chain to create")); +} } // namespace llvm // Algorithm-specific params for Ext-TSP. The values are tuned for the best @@ -1156,6 +1162,9 @@ class CDSortImpl { // Ignore loop edges. if (Edge->isSelfEdge()) continue; + if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() > + CDMaxChainSize) + continue; // Compute the gain of merging the two chains. MergeGainT Gain = getBestMergeGain(Edge); diff --git a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp index ce42f703229bd..b6dcd03565bcc 100644 --- a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp @@ -1,4 +1,5 @@ #include "llvm/Transforms/Utils/CodeLayout.h" +#include "llvm/Support/CommandLine.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -7,6 +8,10 @@ using namespace llvm; using namespace llvm::codelayout; using testing::ElementsAreArray; +namespace llvm::codelayout { +extern cl::opt CDMaxChainSize; +} + namespace { TEST(CodeLayout, ThreeFunctions) { // Place the most likely successor (2) first. @@ -40,6 +45,14 @@ TEST(CodeLayout, HotChain) { const std::vector CallOffsets(std::size(Edges), 5); auto Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets); EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 2, 1})); + + // -cdsort-max-chain-size disables forming a larger chain and therefore may + // change the result. + unsigned Saved = CDMaxChainSize; + CDMaxChainSize.setValue(3); + Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets); + EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 1, 2})); + CDMaxChainSize.setValue(Saved); } } From 8a47e7cd3a79a6c56b8fb64701287ce8405e8b9f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 19 Oct 2023 13:20:01 -0700 Subject: [PATCH 2/2] Add MaxChainSize to CDSortConfig --- llvm/include/llvm/Transforms/Utils/CodeLayout.h | 2 ++ llvm/lib/Transforms/Utils/CodeLayout.cpp | 14 +++++++------- llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp | 13 ++++--------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeLayout.h b/llvm/include/llvm/Transforms/Utils/CodeLayout.h index f5127cff24af0..9d550fae6dd06 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeLayout.h +++ b/llvm/include/llvm/Transforms/Utils/CodeLayout.h @@ -65,6 +65,8 @@ struct CDSortConfig { unsigned CacheEntries = 16; /// The size of a line in the cache. unsigned CacheSize = 2048; + /// The maximum size of a chain to create. + unsigned MaxChainSize = 128; /// The power exponent for the distance-based locality. double DistancePower = 0.25; /// The scale factor for the frequency-based locality. diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp index f7f080af183ba..a6c9d2ac6cf2f 100644 --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -62,12 +62,6 @@ cl::opt ApplyExtTspWithoutProfile( "ext-tsp-apply-without-profile", cl::desc("Whether to apply ext-tsp placement for instances w/o profile"), cl::init(true), cl::Hidden); - -namespace codelayout { -cl::opt - CDMaxChainSize("cdsort-max-chain-size", cl::Hidden, cl::init(128), - cl::desc("The maximum size of a chain to create")); -} } // namespace llvm // Algorithm-specific params for Ext-TSP. The values are tuned for the best @@ -129,6 +123,10 @@ static cl::opt CacheEntries("cds-cache-entries", cl::ReallyHidden, static cl::opt CacheSize("cds-cache-size", cl::ReallyHidden, cl::desc("The size of a line in the cache")); +static cl::opt + CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden, + cl::desc("The maximum size of a chain to create")); + static cl::opt DistancePower( "cds-distance-power", cl::ReallyHidden, cl::desc("The power exponent for the distance-based locality")); @@ -1163,7 +1161,7 @@ class CDSortImpl { if (Edge->isSelfEdge()) continue; if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() > - CDMaxChainSize) + Config.MaxChainSize) continue; // Compute the gain of merging the two chains. @@ -1461,6 +1459,8 @@ std::vector codelayout::computeCacheDirectedLayout( Config.CacheEntries = CacheEntries; if (CacheSize.getNumOccurrences() > 0) Config.CacheSize = CacheSize; + if (CDMaxChainSize.getNumOccurrences() > 0) + Config.MaxChainSize = CDMaxChainSize; if (DistancePower.getNumOccurrences() > 0) Config.DistancePower = DistancePower; if (FrequencyScale.getNumOccurrences() > 0) diff --git a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp index b6dcd03565bcc..ef9aa9a76342f 100644 --- a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp @@ -1,5 +1,4 @@ #include "llvm/Transforms/Utils/CodeLayout.h" -#include "llvm/Support/CommandLine.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -8,10 +7,6 @@ using namespace llvm; using namespace llvm::codelayout; using testing::ElementsAreArray; -namespace llvm::codelayout { -extern cl::opt CDMaxChainSize; -} - namespace { TEST(CodeLayout, ThreeFunctions) { // Place the most likely successor (2) first. @@ -48,11 +43,11 @@ TEST(CodeLayout, HotChain) { // -cdsort-max-chain-size disables forming a larger chain and therefore may // change the result. - unsigned Saved = CDMaxChainSize; - CDMaxChainSize.setValue(3); - Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets); + CDSortConfig Config; + Config.MaxChainSize = 3; + Order = + computeCacheDirectedLayout(Config, Sizes, Counts, Edges, CallOffsets); EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 1, 2})); - CDMaxChainSize.setValue(Saved); } }