diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.cpp b/src/hotspot/share/gc/parallel/psCompactionManager.cpp index 117817caacc..cc35c6a86f1 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.cpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.cpp @@ -63,6 +63,8 @@ ParCompactionManager::ParCompactionManager() { _region_stack.initialize(); reset_bitmap_query_cache(); + + _marking_stats_cache = NULL; } void ParCompactionManager::initialize(ParMarkBitMap* mbm) { diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.hpp index a73e898f0b5..2d41fe91b8d 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.hpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.hpp @@ -105,7 +105,35 @@ class ParCompactionManager : public CHeapObj { // Do not implement an equivalent stack_pop. Deal with the // marking stack and overflow stack directly. - public: + // To collect per-region live-words in a worker local cache in order to + // reduce threads contention. + class MarkingStatsCache : public CHeapObj { + constexpr static size_t num_entries = 1024; + static_assert(is_power_of_2(num_entries), "inv"); + static_assert(num_entries > 0, "inv"); + + constexpr static size_t entry_mask = num_entries - 1; + + struct CacheEntry { + size_t region_id; + size_t live_words; + }; + + CacheEntry entries[num_entries] = {}; + + inline void push(size_t region_id, size_t live_words); + + public: + inline void push(oop obj, size_t live_words); + + inline void evict(size_t index); + + inline void evict_all(); + }; + + MarkingStatsCache* _marking_stats_cache; + +public: static const size_t InvalidShadow = ~0; static size_t pop_shadow_region_mt_safe(PSParallelCompact::RegionData* region_ptr); static void push_shadow_region_mt_safe(size_t shadow_region); @@ -195,6 +223,10 @@ class ParCompactionManager : public CHeapObj { virtual void do_void(); }; + inline void create_marking_stats_cache(); + + inline void flush_and_destroy_marking_stats_cache(); + // Called after marking. static void verify_all_marking_stack_empty() NOT_DEBUG_RETURN; diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp index e40e3689da2..1b199ef0291 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp @@ -107,6 +107,8 @@ inline void ParCompactionManager::mark_and_push(T* p) { assert(ParallelScavengeHeap::heap()->is_in(obj), "should be in heap"); if (mark_bitmap()->is_unmarked(obj) && PSParallelCompact::mark_obj(obj)) { + assert(_marking_stats_cache != nullptr, "inv"); + _marking_stats_cache->push(obj, obj->size()); push(obj); } } @@ -176,4 +178,73 @@ inline void ParCompactionManager::follow_contents(oop obj) { } } +inline void ParCompactionManager::MarkingStatsCache::push(size_t region_id, size_t live_words) { + size_t index = (region_id & entry_mask); + if (entries[index].region_id == region_id) { + // Hit + entries[index].live_words += live_words; + return; + } + // Miss + if (entries[index].live_words != 0) { + evict(index); + } + entries[index].region_id = region_id; + entries[index].live_words = live_words; +} + +inline void ParCompactionManager::MarkingStatsCache::push(oop obj, size_t live_words) { + ParallelCompactData& data = PSParallelCompact::summary_data(); + const size_t region_size = ParallelCompactData::RegionSize; + + HeapWord* addr = cast_from_oop(obj); + const size_t start_region_id = data.addr_to_region_idx(addr); + const size_t end_region_id = data.addr_to_region_idx(addr + live_words - 1); + if (start_region_id == end_region_id) { + // Completely inside this region + push(start_region_id, live_words); + return; + } + + // First region + push(start_region_id, region_size - data.region_offset(addr)); + + // Middle regions; bypass cache + for (size_t i = start_region_id + 1; i < end_region_id; ++i) { + data.region(i)->set_partial_obj_size(region_size); + data.region(i)->set_partial_obj_addr(addr); + } + + // Last region; bypass cache + const size_t end_offset = data.region_offset(addr + live_words - 1); + data.region(end_region_id)->set_partial_obj_size(end_offset + 1); + data.region(end_region_id)->set_partial_obj_addr(addr); +} + +inline void ParCompactionManager::MarkingStatsCache::evict(size_t index) { + ParallelCompactData& data = PSParallelCompact::summary_data(); + // flush to global data + data.region(entries[index].region_id)->add_live_obj(entries[index].live_words); +} + +inline void ParCompactionManager::MarkingStatsCache::evict_all() { + for (size_t i = 0; i < num_entries; ++i) { + if (entries[i].live_words != 0) { + evict(i); + entries[i].live_words = 0; + } + } +} + +inline void ParCompactionManager::create_marking_stats_cache() { + assert(_marking_stats_cache == nullptr, "precondition"); + _marking_stats_cache = new MarkingStatsCache(); +} + +inline void ParCompactionManager::flush_and_destroy_marking_stats_cache() { + _marking_stats_cache->evict_all(); + delete _marking_stats_cache; + _marking_stats_cache = nullptr; +} + #endif // SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_INLINE_HPP diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp index ce96640c2e5..d7be83c9f17 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp @@ -1792,7 +1792,7 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { bool marked_for_unloading = false; marking_start.update(); - marking_phase(vmthread_cm, maximum_heap_compaction, &_gc_tracer); + marking_phase(&_gc_tracer); bool max_on_system_gc = UseMaximumCompactionOnSystemGC && GCCause::is_user_requested_gc(gc_cause); @@ -1966,36 +1966,6 @@ class PCAddThreadRootsMarkingTaskClosure : public ThreadClosure { } }; -static void mark_from_roots_work(ParallelRootType::Value root_type, uint worker_id) { - assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc"); - - ParCompactionManager* cm = - ParCompactionManager::gc_thread_compaction_manager(worker_id); - PCMarkAndPushClosure mark_and_push_closure(cm); - - switch (root_type) { - case ParallelRootType::class_loader_data: - { - CLDToOopClosure cld_closure(&mark_and_push_closure, ClassLoaderData::_claim_strong); - ClassLoaderDataGraph::always_strong_cld_do(&cld_closure); - } - break; - - case ParallelRootType::code_cache: - // Do not treat nmethods as strong roots for mark/sweep, since we can unload them. - //ScavengableNMethods::scavengable_nmethods_do(CodeBlobToOopClosure(&mark_and_push_closure)); - break; - - case ParallelRootType::sentinel: - DEBUG_ONLY(default:) // DEBUG_ONLY hack will create compile error on release builds (-Wswitch) and runtime check on debug builds - fatal("Bad enumeration value: %u", root_type); - break; - } - - // Do the real work - cm->follow_marking_stacks(); -} - void steal_marking_work(TaskTerminator& terminator, uint worker_id) { assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc"); @@ -2019,7 +1989,6 @@ void steal_marking_work(TaskTerminator& terminator, uint worker_id) { class MarkFromRootsTask : public AbstractGangTask { StrongRootsScope _strong_roots_scope; // needed for Threads::possibly_parallel_threads_do OopStorageSetStrongParState _oop_storage_set_par_state; - SequentialSubTasksDone _subtasks; TaskTerminator _terminator; uint _active_workers; @@ -2027,14 +1996,20 @@ class MarkFromRootsTask : public AbstractGangTask { MarkFromRootsTask(uint active_workers) : AbstractGangTask("MarkFromRootsTask"), _strong_roots_scope(active_workers), - _subtasks(ParallelRootType::sentinel), _terminator(active_workers, ParCompactionManager::oop_task_queues()), - _active_workers(active_workers) { - } + _active_workers(active_workers) {} virtual void work(uint worker_id) { - for (uint task = 0; _subtasks.try_claim_task(task); /*empty*/ ) { - mark_from_roots_work(static_cast(task), worker_id); + ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id); + cm->create_marking_stats_cache(); + PCMarkAndPushClosure mark_and_push_closure(cm); + + { + CLDToOopClosure cld_closure(&mark_and_push_closure, ClassLoaderData::_claim_strong); + ClassLoaderDataGraph::always_strong_cld_do(&cld_closure); + + // Do the real work + cm->follow_marking_stacks(); } PCAddThreadRootsMarkingTaskClosure closure(worker_id); @@ -2042,9 +2017,7 @@ class MarkFromRootsTask : public AbstractGangTask { // Mark from OopStorages { - ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id); - PCMarkAndPushClosure closure(cm); - _oop_storage_set_par_state.oops_do(&closure); + _oop_storage_set_par_state.oops_do(&mark_and_push_closure); // Do the real work cm->follow_marking_stacks(); } @@ -2077,9 +2050,14 @@ class ParallelCompactRefProcProxyTask : public RefProcProxyTask { } }; -void PSParallelCompact::marking_phase(ParCompactionManager* cm, - bool maximum_heap_compaction, - ParallelOldTracer *gc_tracer) { +static void flush_marking_stats_cache(const uint num_workers) { + for (uint i = 0; i < num_workers; ++i) { + ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(i); + cm->flush_and_destroy_marking_stats_cache(); + } +} + +void PSParallelCompact::marking_phase(ParallelOldTracer *gc_tracer) { // Recursively traverse all live objects and mark them GCTraceTime(Info, gc, phases) tm("Marking Phase", &_gc_timer); @@ -2110,6 +2088,12 @@ void PSParallelCompact::marking_phase(ParCompactionManager* cm, pt.print_all_references(); } + { + GCTraceTime(Debug, gc, phases) tm("Flush Marking Stats", &_gc_timer); + + flush_marking_stats_cache(active_gc_threads); + } + // This is the point where the entire marking should have completed. ParCompactionManager::verify_all_marking_stack_empty(); diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.hpp b/src/hotspot/share/gc/parallel/psParallelCompact.hpp index c4319a0806c..11e387ae2d5 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp @@ -1044,9 +1044,7 @@ class PSParallelCompact : AllStatic { static void post_compact(); // Mark live objects - static void marking_phase(ParCompactionManager* cm, - bool maximum_heap_compaction, - ParallelOldTracer *gc_tracer); + static void marking_phase(ParallelOldTracer *gc_tracer); // Compute the dense prefix for the designated space. This is an experimental // implementation currently not used in production. diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp b/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp index f353b44d627..8db1d32b41a 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp @@ -99,7 +99,6 @@ inline void PSParallelCompact::check_new_location(HeapWord* old_addr, HeapWord* inline bool PSParallelCompact::mark_obj(oop obj) { const int obj_size = obj->size(); if (mark_bitmap()->mark_obj(obj, obj_size)) { - _summary_data.add_obj(obj, obj_size); return true; } else { return false;