Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8325553: Parallel: Use per-marker cache for marking stats during Full GC #2707

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/hotspot/share/gc/parallel/psCompactionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ ParCompactionManager::ParCompactionManager() {
_region_stack.initialize();

reset_bitmap_query_cache();

_marking_stats_cache = NULL;
}

void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
Expand Down
34 changes: 33 additions & 1 deletion src/hotspot/share/gc/parallel/psCompactionManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,35 @@ class ParCompactionManager : public CHeapObj<mtGC> {
// Do not implement an equivalent stack_pop. Deal with the
// marking stack and overflow stack directly.

public:
// To collect per-region live-words in a worker local cache in order to
// reduce threads contention.
class MarkingStatsCache : public CHeapObj<mtGC> {
constexpr static size_t num_entries = 1024;
static_assert(is_power_of_2(num_entries), "inv");
static_assert(num_entries > 0, "inv");

constexpr static size_t entry_mask = num_entries - 1;

struct CacheEntry {
size_t region_id;
size_t live_words;
};

CacheEntry entries[num_entries] = {};

inline void push(size_t region_id, size_t live_words);

public:
inline void push(oop obj, size_t live_words);

inline void evict(size_t index);

inline void evict_all();
};

MarkingStatsCache* _marking_stats_cache;

public:
static const size_t InvalidShadow = ~0;
static size_t pop_shadow_region_mt_safe(PSParallelCompact::RegionData* region_ptr);
static void push_shadow_region_mt_safe(size_t shadow_region);
Expand Down Expand Up @@ -195,6 +223,10 @@ class ParCompactionManager : public CHeapObj<mtGC> {
virtual void do_void();
};

inline void create_marking_stats_cache();

inline void flush_and_destroy_marking_stats_cache();

// Called after marking.
static void verify_all_marking_stack_empty() NOT_DEBUG_RETURN;

Expand Down
71 changes: 71 additions & 0 deletions src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ inline void ParCompactionManager::mark_and_push(T* p) {
assert(ParallelScavengeHeap::heap()->is_in(obj), "should be in heap");

if (mark_bitmap()->is_unmarked(obj) && PSParallelCompact::mark_obj(obj)) {
assert(_marking_stats_cache != nullptr, "inv");
_marking_stats_cache->push(obj, obj->size());
push(obj);
}
}
Expand Down Expand Up @@ -176,4 +178,73 @@ inline void ParCompactionManager::follow_contents(oop obj) {
}
}

inline void ParCompactionManager::MarkingStatsCache::push(size_t region_id, size_t live_words) {
size_t index = (region_id & entry_mask);
if (entries[index].region_id == region_id) {
// Hit
entries[index].live_words += live_words;
return;
}
// Miss
if (entries[index].live_words != 0) {
evict(index);
}
entries[index].region_id = region_id;
entries[index].live_words = live_words;
}

inline void ParCompactionManager::MarkingStatsCache::push(oop obj, size_t live_words) {
ParallelCompactData& data = PSParallelCompact::summary_data();
const size_t region_size = ParallelCompactData::RegionSize;

HeapWord* addr = cast_from_oop<HeapWord*>(obj);
const size_t start_region_id = data.addr_to_region_idx(addr);
const size_t end_region_id = data.addr_to_region_idx(addr + live_words - 1);
if (start_region_id == end_region_id) {
// Completely inside this region
push(start_region_id, live_words);
return;
}

// First region
push(start_region_id, region_size - data.region_offset(addr));

// Middle regions; bypass cache
for (size_t i = start_region_id + 1; i < end_region_id; ++i) {
data.region(i)->set_partial_obj_size(region_size);
data.region(i)->set_partial_obj_addr(addr);
}

// Last region; bypass cache
const size_t end_offset = data.region_offset(addr + live_words - 1);
data.region(end_region_id)->set_partial_obj_size(end_offset + 1);
data.region(end_region_id)->set_partial_obj_addr(addr);
}

inline void ParCompactionManager::MarkingStatsCache::evict(size_t index) {
ParallelCompactData& data = PSParallelCompact::summary_data();
// flush to global data
data.region(entries[index].region_id)->add_live_obj(entries[index].live_words);
}

inline void ParCompactionManager::MarkingStatsCache::evict_all() {
for (size_t i = 0; i < num_entries; ++i) {
if (entries[i].live_words != 0) {
evict(i);
entries[i].live_words = 0;
}
}
}

inline void ParCompactionManager::create_marking_stats_cache() {
assert(_marking_stats_cache == nullptr, "precondition");
_marking_stats_cache = new MarkingStatsCache();
}

inline void ParCompactionManager::flush_and_destroy_marking_stats_cache() {
_marking_stats_cache->evict_all();
delete _marking_stats_cache;
_marking_stats_cache = nullptr;
}

#endif // SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_INLINE_HPP
70 changes: 27 additions & 43 deletions src/hotspot/share/gc/parallel/psParallelCompact.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1792,7 +1792,7 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
bool marked_for_unloading = false;

marking_start.update();
marking_phase(vmthread_cm, maximum_heap_compaction, &_gc_tracer);
marking_phase(&_gc_tracer);

bool max_on_system_gc = UseMaximumCompactionOnSystemGC
&& GCCause::is_user_requested_gc(gc_cause);
Expand Down Expand Up @@ -1966,36 +1966,6 @@ class PCAddThreadRootsMarkingTaskClosure : public ThreadClosure {
}
};

static void mark_from_roots_work(ParallelRootType::Value root_type, uint worker_id) {
assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");

ParCompactionManager* cm =
ParCompactionManager::gc_thread_compaction_manager(worker_id);
PCMarkAndPushClosure mark_and_push_closure(cm);

switch (root_type) {
case ParallelRootType::class_loader_data:
{
CLDToOopClosure cld_closure(&mark_and_push_closure, ClassLoaderData::_claim_strong);
ClassLoaderDataGraph::always_strong_cld_do(&cld_closure);
}
break;

case ParallelRootType::code_cache:
// Do not treat nmethods as strong roots for mark/sweep, since we can unload them.
//ScavengableNMethods::scavengable_nmethods_do(CodeBlobToOopClosure(&mark_and_push_closure));
break;

case ParallelRootType::sentinel:
DEBUG_ONLY(default:) // DEBUG_ONLY hack will create compile error on release builds (-Wswitch) and runtime check on debug builds
fatal("Bad enumeration value: %u", root_type);
break;
}

// Do the real work
cm->follow_marking_stacks();
}

void steal_marking_work(TaskTerminator& terminator, uint worker_id) {
assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");

Expand All @@ -2019,32 +1989,35 @@ void steal_marking_work(TaskTerminator& terminator, uint worker_id) {
class MarkFromRootsTask : public AbstractGangTask {
StrongRootsScope _strong_roots_scope; // needed for Threads::possibly_parallel_threads_do
OopStorageSetStrongParState<false /* concurrent */, false /* is_const */> _oop_storage_set_par_state;
SequentialSubTasksDone _subtasks;
TaskTerminator _terminator;
uint _active_workers;

public:
MarkFromRootsTask(uint active_workers) :
AbstractGangTask("MarkFromRootsTask"),
_strong_roots_scope(active_workers),
_subtasks(ParallelRootType::sentinel),
_terminator(active_workers, ParCompactionManager::oop_task_queues()),
_active_workers(active_workers) {
}
_active_workers(active_workers) {}

virtual void work(uint worker_id) {
for (uint task = 0; _subtasks.try_claim_task(task); /*empty*/ ) {
mark_from_roots_work(static_cast<ParallelRootType::Value>(task), worker_id);
ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id);
cm->create_marking_stats_cache();
PCMarkAndPushClosure mark_and_push_closure(cm);

{
CLDToOopClosure cld_closure(&mark_and_push_closure, ClassLoaderData::_claim_strong);
ClassLoaderDataGraph::always_strong_cld_do(&cld_closure);

// Do the real work
cm->follow_marking_stacks();
}

PCAddThreadRootsMarkingTaskClosure closure(worker_id);
Threads::possibly_parallel_threads_do(true /*parallel */, &closure);

// Mark from OopStorages
{
ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id);
PCMarkAndPushClosure closure(cm);
_oop_storage_set_par_state.oops_do(&closure);
_oop_storage_set_par_state.oops_do(&mark_and_push_closure);
// Do the real work
cm->follow_marking_stacks();
}
Expand Down Expand Up @@ -2077,9 +2050,14 @@ class ParallelCompactRefProcProxyTask : public RefProcProxyTask {
}
};

void PSParallelCompact::marking_phase(ParCompactionManager* cm,
bool maximum_heap_compaction,
ParallelOldTracer *gc_tracer) {
static void flush_marking_stats_cache(const uint num_workers) {
for (uint i = 0; i < num_workers; ++i) {
ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(i);
cm->flush_and_destroy_marking_stats_cache();
}
}

void PSParallelCompact::marking_phase(ParallelOldTracer *gc_tracer) {
// Recursively traverse all live objects and mark them
GCTraceTime(Info, gc, phases) tm("Marking Phase", &_gc_timer);

Expand Down Expand Up @@ -2110,6 +2088,12 @@ void PSParallelCompact::marking_phase(ParCompactionManager* cm,
pt.print_all_references();
}

{
GCTraceTime(Debug, gc, phases) tm("Flush Marking Stats", &_gc_timer);

flush_marking_stats_cache(active_gc_threads);
}

// This is the point where the entire marking should have completed.
ParCompactionManager::verify_all_marking_stack_empty();

Expand Down
4 changes: 1 addition & 3 deletions src/hotspot/share/gc/parallel/psParallelCompact.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1044,9 +1044,7 @@ class PSParallelCompact : AllStatic {
static void post_compact();

// Mark live objects
static void marking_phase(ParCompactionManager* cm,
bool maximum_heap_compaction,
ParallelOldTracer *gc_tracer);
static void marking_phase(ParallelOldTracer *gc_tracer);

// Compute the dense prefix for the designated space. This is an experimental
// implementation currently not used in production.
Expand Down
1 change: 0 additions & 1 deletion src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ inline void PSParallelCompact::check_new_location(HeapWord* old_addr, HeapWord*
inline bool PSParallelCompact::mark_obj(oop obj) {
const int obj_size = obj->size();
if (mark_bitmap()->mark_obj(obj, obj_size)) {
_summary_data.add_obj(obj, obj_size);
return true;
} else {
return false;
Expand Down