From e834d4256af8370357ca0e98b064e8bac5484560 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 19 Jun 2023 22:30:19 +0200 Subject: [PATCH] JIT: Cache significant segments computations for layouts in physical promotion (#87745) Before: JitEnablePhysicalPromotion=1 benchmarks.run_pgo: Total num SignificantSegments calls: 2543 (Per context) SignificantSegments calls <= 0 ===> 0 count ( 0% of total) 1 .. 1 ===> 312 count ( 37% of total) 2 .. 2 ===> 58 count ( 44% of total) 3 .. 3 ===> 181 count ( 66% of total) 4 .. 5 ===> 191 count ( 90% of total) 6 .. 10 ===> 66 count ( 98% of total) 11 .. 20 ===> 12 count ( 99% of total) 21 .. 35 ===> 4 count (100% of total) 36 .. 50 ===> 0 count (100% of total) 51 .. 75 ===> 0 count (100% of total) 76 .. 100 ===> 0 count (100% of total) 101 .. 150 ===> 0 count (100% of total) 151 .. 250 ===> 0 count (100% of total) 251 .. 500 ===> 0 count (100% of total) 501 .. 1000 ===> 0 count (100% of total) libraries.pmi: Total num SignificantSegments calls: 28525 (Per context) SignificantSegments calls <= 0 ===> 0 count ( 0% of total) 1 .. 1 ===> 1154 count ( 19% of total) 2 .. 2 ===> 1878 count ( 50% of total) 3 .. 3 ===> 809 count ( 64% of total) 4 .. 5 ===> 782 count ( 77% of total) 6 .. 10 ===> 831 count ( 91% of total) 11 .. 20 ===> 357 count ( 97% of total) 21 .. 35 ===> 101 count ( 98% of total) 36 .. 50 ===> 29 count ( 99% of total) 51 .. 75 ===> 29 count ( 99% of total) 76 .. 100 ===> 8 count (100% of total) 101 .. 150 ===> 0 count (100% of total) 151 .. 250 ===> 0 count (100% of total) 251 .. 500 ===> 0 count (100% of total) 501 .. 1000 ===> 0 count (100% of total) JitEnablePhysicalPromotion=1;JitStressModeNames=STRESS_NO_OLD_PROMOTION benchmarks.run_pgo: Total num SignificantSegments calls: 90839 (Per context) SignificantSegments calls <= 0 ===> 0 count ( 0% of total) 1 .. 1 ===> 681 count ( 7% of total) 2 .. 2 ===> 1035 count ( 17% of total) 3 .. 3 ===> 1635 count ( 34% of total) 4 .. 5 ===> 1053 count ( 45% of total) 6 .. 10 ===> 3162 count ( 78% of total) 11 .. 20 ===> 814 count ( 87% of total) 21 .. 35 ===> 931 count ( 96% of total) 36 .. 50 ===> 215 count ( 99% of total) 51 .. 75 ===> 69 count ( 99% of total) 76 .. 100 ===> 4 count ( 99% of total) 101 .. 150 ===> 4 count ( 99% of total) 151 .. 250 ===> 0 count ( 99% of total) 251 .. 500 ===> 11 count (100% of total) 501 .. 1000 ===> 0 count (100% of total) libraries.pmi: Total num SignificantSegments calls: 277708 (Per context) SignificantSegments calls <= 0 ===> 0 count ( 0% of total) 1 .. 1 ===> 6993 count ( 17% of total) 2 .. 2 ===> 8197 count ( 38% of total) 3 .. 3 ===> 5225 count ( 51% of total) 4 .. 5 ===> 5380 count ( 65% of total) 6 .. 10 ===> 7141 count ( 83% of total) 11 .. 20 ===> 4094 count ( 93% of total) 21 .. 35 ===> 1627 count ( 97% of total) 36 .. 50 ===> 519 count ( 98% of total) 51 .. 75 ===> 292 count ( 99% of total) 76 .. 100 ===> 98 count ( 99% of total) 101 .. 150 ===> 59 count ( 99% of total) 151 .. 250 ===> 11 count ( 99% of total) 251 .. 500 ===> 4 count (100% of total) 501 .. 1000 ===> 0 count (100% of total) After: benchmarks.run_pgo: Total num SignificantSegments calls: 915 (Per context) SignificantSegments calls <= 0 ===> 0 count ( 0% of total) 1 .. 1 ===> 741 count ( 89% of total) 2 .. 2 ===> 75 count ( 99% of total) 3 .. 3 ===> 8 count (100% of total) 4 .. 5 ===> 0 count (100% of total) 6 .. 10 ===> 0 count (100% of total) 11 .. 20 ===> 0 count (100% of total) 21 .. 35 ===> 0 count (100% of total) 36 .. 50 ===> 0 count (100% of total) 51 .. 75 ===> 0 count (100% of total) 76 .. 100 ===> 0 count (100% of total) 101 .. 150 ===> 0 count (100% of total) 151 .. 250 ===> 0 count (100% of total) 251 .. 500 ===> 0 count (100% of total) 501 .. 1000 ===> 0 count (100% of total) libraries.pmi: Total num SignificantSegments calls: 9061 (Per context) SignificantSegments calls <= 0 ===> 0 count ( 0% of total) 1 .. 1 ===> 4172 count ( 69% of total) 2 .. 2 ===> 1105 count ( 88% of total) 3 .. 3 ===> 416 count ( 95% of total) 4 .. 5 ===> 214 count ( 98% of total) 6 .. 10 ===> 67 count ( 99% of total) 11 .. 20 ===> 3 count ( 99% of total) 21 .. 35 ===> 1 count (100% of total) 36 .. 50 ===> 0 count (100% of total) 51 .. 75 ===> 0 count (100% of total) 76 .. 100 ===> 0 count (100% of total) 101 .. 150 ===> 0 count (100% of total) 151 .. 250 ===> 0 count (100% of total) 251 .. 500 ===> 0 count (100% of total) 501 .. 1000 ===> 0 count (100% of total) JitEnablePhysicalPromotion=1;JitStressModeNames=STRESS_NO_OLD_PROMOTION benchmarks.run_pgo: Total num SignificantSegments calls: 15082 (Per context) SignificantSegments calls <= 0 ===> 0 count ( 0% of total) 1 .. 1 ===> 6709 count ( 69% of total) 2 .. 2 ===> 1735 count ( 87% of total) 3 .. 3 ===> 462 count ( 92% of total) 4 .. 5 ===> 471 count ( 97% of total) 6 .. 10 ===> 237 count (100% of total) 11 .. 20 ===> 0 count (100% of total) 21 .. 35 ===> 0 count (100% of total) 36 .. 50 ===> 0 count (100% of total) 51 .. 75 ===> 0 count (100% of total) 76 .. 100 ===> 0 count (100% of total) 101 .. 150 ===> 0 count (100% of total) 151 .. 250 ===> 0 count (100% of total) 251 .. 500 ===> 0 count (100% of total) 501 .. 1000 ===> 0 count (100% of total) libraries.pmi: Total uncached SignificantSegments calls: 68938 (Per context) SignificantSegments calls <= 0 ===> 0 count ( 0% of total) 1 .. 1 ===> 24323 count ( 61% of total) 2 .. 2 ===> 8687 count ( 83% of total) 3 .. 3 ===> 3357 count ( 91% of total) 4 .. 5 ===> 2378 count ( 97% of total) 6 .. 10 ===> 789 count ( 99% of total) 11 .. 20 ===> 105 count ( 99% of total) 21 .. 35 ===> 1 count (100% of total) 36 .. 50 ===> 0 count (100% of total) 51 .. 75 ===> 0 count (100% of total) 76 .. 100 ===> 0 count (100% of total) 101 .. 150 ===> 0 count (100% of total) 151 .. 250 ===> 0 count (100% of total) 251 .. 500 ===> 0 count (100% of total) 501 .. 1000 ===> 0 count (100% of total) --- src/coreclr/jit/jitstd/vector.h | 27 +++----- src/coreclr/jit/promotion.cpp | 75 +++++----------------- src/coreclr/jit/promotion.h | 12 ++-- src/coreclr/jit/promotiondecomposition.cpp | 21 ++---- 4 files changed, 39 insertions(+), 96 deletions(-) diff --git a/src/coreclr/jit/jitstd/vector.h b/src/coreclr/jit/jitstd/vector.h index cc0afdc20d27ff..69f67817ecb6e9 100644 --- a/src/coreclr/jit/jitstd/vector.h +++ b/src/coreclr/jit/jitstd/vector.h @@ -305,36 +305,29 @@ template template vector::vector(const vector& vec) : m_allocator(vec.m_allocator) - , m_pArray(NULL) - , m_nSize(0) - , m_nCapacity(0) + , m_pArray(m_allocator.allocate(vec.m_nSize)) + , m_nSize(vec.m_nSize) + , m_nCapacity(vec.m_nSize) { - ensure_capacity(vec.m_nSize); - for (size_type i = 0, j = 0; i < vec.m_nSize; ++i, ++j) + for (size_type i = 0; i < vec.m_nSize; ++i) { - new (m_pArray + i, placement_t()) T((T) vec.m_pArray[j]); + new (m_pArray + i, placement_t()) T((T) vec.m_pArray[i]); } - - m_nSize = vec.m_nSize; } template vector::vector(const vector& vec) : m_allocator(vec.m_allocator) - , m_pArray(NULL) - , m_nSize(0) - , m_nCapacity(0) + , m_pArray(m_allocator.allocate(vec.m_nSize)) + , m_nSize(vec.m_nSize) + , m_nCapacity(vec.m_nSize) { - ensure_capacity(vec.m_nSize); - for (size_type i = 0, j = 0; i < vec.m_nSize; ++i, ++j) + for (size_type i = 0; i < vec.m_nSize; ++i) { - new (m_pArray + i, placement_t()) T(vec.m_pArray[j]); + new (m_pArray + i, placement_t()) T(vec.m_pArray[i]); } - - m_nSize = vec.m_nSize; } - template vector::~vector() { diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 98f4a7949f5c4b..7bc514d240b75c 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1103,7 +1103,7 @@ class LocalsUseVisitor : public GenTreeVisitor JITDUMP("Computing unpromoted remainder for V%02u\n", agg->LclNum); StructSegments unpromotedParts = - Promotion::SignificantSegments(m_compiler, m_compiler->lvaGetDesc(agg->LclNum)->GetLayout()); + m_prom->SignificantSegments(m_compiler->lvaGetDesc(agg->LclNum)->GetLayout()); for (Replacement& rep : reps) { unpromotedParts.Subtract(StructSegments::Segment(rep.Offset, rep.Offset + genTypeSize(rep.AccessType))); @@ -1576,42 +1576,6 @@ bool StructSegments::CoveringSegment(Segment* result) } #ifdef DEBUG -//------------------------------------------------------------------------ -// Check: -// Validate that the data structure is normalized and that it equals a -// specific fixed bit vector. -// -// Parameters: -// vect - The bit vector -// -// Remarks: -// This validates that the internal representation is normalized (i.e. -// all adjacent intervals are merged) and that it contains an index iff -// the specified vector contains that index. -// -void StructSegments::Check(FixedBitVect* vect) -{ - bool first = true; - unsigned last = 0; - for (const Segment& segment : m_segments) - { - assert(first || (last < segment.Start)); - assert(segment.End <= vect->bitVectGetSize()); - - for (unsigned i = last; i < segment.Start; i++) - assert(!vect->bitVectTest(i)); - - for (unsigned i = segment.Start; i < segment.End; i++) - assert(vect->bitVectTest(i)); - - first = false; - last = segment.End; - } - - for (unsigned i = last, size = vect->bitVectGetSize(); i < size; i++) - assert(!vect->bitVectTest(i)); -} - //------------------------------------------------------------------------ // Dump: // Dump a string representation of the segment tree to stdout. @@ -1640,18 +1604,20 @@ void StructSegments::Dump() // for the specified class layout. // // Parameters: -// compiler - Compiler instance // layout - The layout -// bitVectRept - In debug, a bit vector that represents the same segments as the returned segment tree. -// Used for verification purposes. // // Returns: // Segment tree containing all significant parts of the layout. // -StructSegments Promotion::SignificantSegments(Compiler* compiler, - ClassLayout* layout DEBUGARG(FixedBitVect** bitVectRepr)) +StructSegments Promotion::SignificantSegments(ClassLayout* layout) { - COMP_HANDLE compHnd = compiler->info.compCompHnd; + StructSegments* cached; + if ((m_significantSegmentsCache != nullptr) && m_significantSegmentsCache->Lookup(layout, &cached)) + { + return StructSegments(*cached); + } + + COMP_HANDLE compHnd = m_compiler->info.compCompHnd; bool significantPadding; if (layout->IsBlockLayout()) @@ -1683,19 +1649,11 @@ StructSegments Promotion::SignificantSegments(Compiler* compiler, } } - StructSegments segments(compiler->getAllocator(CMK_Promotion)); - - // Validate with "obviously correct" but less scalable fixed bit vector implementation. - INDEBUG(FixedBitVect* segmentBitVect = FixedBitVect::bitVectInit(layout->GetSize(), compiler)); + StructSegments segments(m_compiler->getAllocator(CMK_Promotion)); if (significantPadding) { segments.Add(StructSegments::Segment(0, layout->GetSize())); - -#ifdef DEBUG - for (unsigned i = 0; i < layout->GetSize(); i++) - segmentBitVect->bitVectSet(i); -#endif } else { @@ -1720,19 +1678,16 @@ StructSegments Promotion::SignificantSegments(Compiler* compiler, } segments.Add(StructSegments::Segment(fldOffset, fldOffset + size)); -#ifdef DEBUG - for (unsigned i = 0; i < size; i++) - segmentBitVect->bitVectSet(fldOffset + i); -#endif } } -#ifdef DEBUG - if (bitVectRepr != nullptr) + if (m_significantSegmentsCache == nullptr) { - *bitVectRepr = segmentBitVect; + m_significantSegmentsCache = + new (m_compiler, CMK_Promotion) ClassLayoutStructSegmentsMap(m_compiler->getAllocator(CMK_Promotion)); } -#endif + + m_significantSegmentsCache->Set(layout, new (m_compiler, CMK_Promotion) StructSegments(segments)); return segments; } diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index 2e93b9f6774ef9..dc69eec0b5e3b7 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -79,7 +79,6 @@ class StructSegments bool CoveringSegment(Segment* result); #ifdef DEBUG - void Check(FixedBitVect* vect); void Dump(); #endif }; @@ -104,9 +103,12 @@ struct AggregateInfo Replacement** endReplacement); }; +typedef JitHashTable, class StructSegments*> ClassLayoutStructSegmentsMap; + class Promotion { - Compiler* m_compiler; + Compiler* m_compiler; + ClassLayoutStructSegmentsMap* m_significantSegmentsCache = nullptr; friend class LocalUses; friend class LocalsUseVisitor; @@ -116,8 +118,7 @@ class Promotion friend class DecompositionPlan; friend class StructSegments; - static StructSegments SignificantSegments(Compiler* compiler, - ClassLayout* layout DEBUGARG(FixedBitVect** bitVectRepr = nullptr)); + StructSegments SignificantSegments(ClassLayout* layout); void ExplicitlyZeroInitReplacementLocals(unsigned lclNum, const jitstd::vector& replacements, @@ -252,6 +253,7 @@ class ReplaceVisitor : public GenTreeVisitor { friend class DecompositionPlan; + Promotion* m_promotion; jitstd::vector& m_aggregates; PromotionLiveness* m_liveness; bool m_madeChanges = false; @@ -269,7 +271,7 @@ class ReplaceVisitor : public GenTreeVisitor }; ReplaceVisitor(Promotion* prom, jitstd::vector& aggregates, PromotionLiveness* liveness) - : GenTreeVisitor(prom->m_compiler), m_aggregates(aggregates), m_liveness(liveness) + : GenTreeVisitor(prom->m_compiler), m_promotion(prom), m_aggregates(aggregates), m_liveness(liveness) { } diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index ca7d907b89d9f2..381de6327f5564 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -49,6 +49,7 @@ class DecompositionPlan var_types Type; }; + Promotion* m_promotion; Compiler* m_compiler; ReplaceVisitor* m_replacer; jitstd::vector& m_aggregates; @@ -61,7 +62,7 @@ class DecompositionPlan bool m_hasNonRemainderUseOfStructLocal = false; public: - DecompositionPlan(Compiler* comp, + DecompositionPlan(Promotion* prom, ReplaceVisitor* replacer, jitstd::vector& aggregates, PromotionLiveness* liveness, @@ -69,7 +70,8 @@ class DecompositionPlan GenTree* src, bool dstInvolvesReplacements, bool srcInvolvesReplacements) - : m_compiler(comp) + : m_promotion(prom) + , m_compiler(prom->m_compiler) , m_replacer(replacer) , m_aggregates(aggregates) , m_liveness(liveness) @@ -77,7 +79,7 @@ class DecompositionPlan , m_src(src) , m_dstInvolvesReplacements(dstInvolvesReplacements) , m_srcInvolvesReplacements(srcInvolvesReplacements) - , m_entries(comp->getAllocator(CMK_Promotion)) + , m_entries(prom->m_compiler->getAllocator(CMK_Promotion)) { } @@ -274,25 +276,16 @@ class DecompositionPlan { ClassLayout* dstLayout = m_store->GetLayout(m_compiler); - // Validate with "obviously correct" but less scalable fixed bit vector implementation. - INDEBUG(FixedBitVect * segmentBitVect); - StructSegments segments = Promotion::SignificantSegments(m_compiler, dstLayout DEBUGARG(&segmentBitVect)); + StructSegments segments = m_promotion->SignificantSegments(dstLayout); for (int i = 0; i < m_entries.Height(); i++) { const Entry& entry = m_entries.BottomRef(i); segments.Subtract(StructSegments::Segment(entry.Offset, entry.Offset + genTypeSize(entry.Type))); - -#ifdef DEBUG - for (unsigned i = 0; i < genTypeSize(entry.Type); i++) - segmentBitVect->bitVectClear(entry.Offset + i); -#endif } #ifdef DEBUG - segments.Check(segmentBitVect); - if (m_compiler->verbose) { printf(" Remainder: "); @@ -1084,7 +1077,7 @@ void ReplaceVisitor::HandleStructStore(GenTree** use, GenTree* user) DecompositionStatementList result; EliminateCommasInBlockOp(store, &result); - DecompositionPlan plan(m_compiler, this, m_aggregates, m_liveness, store, src, dstInvolvesReplacements, + DecompositionPlan plan(m_promotion, this, m_aggregates, m_liveness, store, src, dstInvolvesReplacements, srcInvolvesReplacements); if (dstInvolvesReplacements)