Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Garbage profiler #42658

Closed
wants to merge 13 commits into from
Closed
18 changes: 18 additions & 0 deletions base/gcutils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,24 @@ Control whether garbage collection is enabled using a boolean argument (`true` f
"""
enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0

# TODO: maybe merge profiling & logging funcions?

function start_garbage_profile(io)
ccall(:jl_start_garbage_profile, Cvoid, (Ptr{Cvoid},), io.handle)
end

function stop_garbage_profile()
ccall(:jl_stop_garbage_profile, Cvoid, ())
end

function start_logging(io)
ccall(:jl_start_logging, Cvoid, (Ptr{Cvoid},), io.handle)
end

function stop_logging()
ccall(:jl_stop_logging, Cvoid, ())
end

"""
GC.enable_finalizers(on::Bool)

Expand Down
4 changes: 2 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ RUNTIME_SRCS := \
dlload sys init task array dump staticdata toplevel jl_uv datatype \
simplevector runtime_intrinsics precompile \
threading partr stackwalk gc gc-debug gc-pages gc-stacks method \
jlapi signal-handling safepoint timing subtype \
gc-garbage-profiler jlapi signal-handling safepoint timing subtype \
crc32c APInt-C processor ircode opaque_closure codegen-stubs
SRCS := jloptions runtime_ccall rtutils
ifeq ($(OS),WINNT)
Expand Down Expand Up @@ -284,7 +284,7 @@ $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)
$(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h)
$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-garbage-profiler.h
$(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h
Expand Down
190 changes: 190 additions & 0 deletions src/gc-garbage-profiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include "gc-garbage-profiler.h"

#include "julia_internal.h"
#include "gc.h"
#include "timefuncs.h"

#include <string>
#include <algorithm>
#include <vector>
#include <unordered_map>

using std::string;
using std::unordered_map;
using std::vector;

// == utility functions ==

void print_str_escape_csv(ios_t *stream, const std::string &s) {
ios_printf(stream, "\"");
for (auto c = s.cbegin(); c != s.cend(); c++) {
switch (*c) {
case '"': ios_printf(stream, "\"\""); break;
default:
ios_printf(stream, "%c", *c);
}
}
ios_printf(stream, "\"");
}

string _type_as_string(jl_datatype_t *type) {
if ((uintptr_t)type < 4096U) {
return "<corrupt>";
} else if (type == (jl_datatype_t*)jl_buff_tag) {
return "<buffer>";
} else if (type == (jl_datatype_t*)jl_malloc_tag) {
return "<malloc>";
} else if (type == jl_string_type) {
return "<string>";
} else if (type == jl_symbol_type) {
return "<symbol>";
} else if (jl_is_datatype(type)) {
ios_t str_;
ios_mem(&str_, 10024);
JL_STREAM* str = (JL_STREAM*)&str_;

jl_static_show(str, (jl_value_t*)type);

string type_str = string((const char*)str_.buf, str_.size);
ios_close(&str_);

return type_str;
} else {
return "<missing>";
}
}

// == global variables manipulated by callbacks ==
// TODO: wrap these up into a struct

ios_t *g_gc_log_stream = nullptr;

ios_t *g_garbage_profile_stream = nullptr;
int gc_epoch = 0;
// for each type, the index in mem_event where the type
// event appears.
unordered_map<size_t, string> g_type_name_by_address;
unordered_map<size_t, size_t> g_type_address_by_value_address;
unordered_map<size_t, size_t> g_frees_by_type_address;

// == exported interface ==

JL_DLLEXPORT void jl_start_logging(ios_t *stream) {
g_gc_log_stream = stream;
ios_printf(g_gc_log_stream, "gc_epoch,collection_type,start_timestamp_ms,duration_ms,bytes_freed\n");
}

JL_DLLEXPORT void jl_stop_logging() {
g_gc_log_stream = nullptr;
}

JL_DLLEXPORT void jl_start_garbage_profile(ios_t *stream) {
g_garbage_profile_stream = stream;
ios_printf(g_garbage_profile_stream, "gc_epoch,type,num_freed\n");
}

JL_DLLEXPORT void jl_stop_garbage_profile() {
// TODO: flush file?
g_garbage_profile_stream = nullptr;
g_type_name_by_address.clear();
g_type_address_by_value_address.clear();
g_frees_by_type_address.clear();
}

// == callbacks called into by the outside ==

void _report_gc_started() {
g_frees_by_type_address.clear();
}

bool pair_cmp(std::pair<size_t, size_t> a, std::pair<size_t, size_t> b) {
return a.second > b.second;
}

uint64_t get_timestamp_ms() {
jl_timeval jtv;
jl_gettimeofday(&jtv);
return jtv.sec*1e6 + jtv.usec/1e3;
}

// TODO: figure out how to pass all of these in as a struct
void _report_gc_finished(
jl_gc_collection_t collection, uint64_t pause, uint64_t freed, uint64_t allocd
) {
if (g_gc_log_stream != nullptr) {
auto end_timestamp_ms = get_timestamp_ms();
auto duration_ms = (uint64_t)(pause/1e6);

auto mode = collection == JL_GC_INCREMENTAL
? "INCR"
: (collection == JL_GC_FULL
? "FULL"
: "AUTO");

ios_printf(
g_gc_log_stream,
"%d,%s,%lld,%d,%d\n",
gc_epoch, mode, end_timestamp_ms - duration_ms, duration_ms, freed
);
ios_flush(g_gc_log_stream);
}

// sort frees
vector<std::pair<size_t, size_t>> pairs;
for (auto const &pair : g_frees_by_type_address) {
pairs.push_back(pair);
}
std::sort(pairs.begin(), pairs.end(), pair_cmp);

// print frees
if (g_garbage_profile_stream != nullptr) {
for (auto const &pair : pairs) {
auto type_str = g_type_name_by_address.find(pair.first);
if (type_str != g_type_name_by_address.end()) {
ios_printf(g_garbage_profile_stream, "%d,", gc_epoch);
print_str_escape_csv(g_garbage_profile_stream, type_str->second);
ios_printf(g_garbage_profile_stream, ",%d\n", pair.second);
} else {
jl_printf(JL_STDERR, "couldn't find type %lu\n", pair.first);
}
}
ios_flush(g_garbage_profile_stream);
}
gc_epoch++;
}

void register_type_string(jl_datatype_t *type) {
auto id = g_type_name_by_address.find((size_t)type);
if (id != g_type_name_by_address.end()) {
return;
}

string type_str = _type_as_string(type);
g_type_name_by_address[(size_t)type] = type_str;
}

void _record_allocated_value(jl_value_t *val) {
auto type = (jl_datatype_t*)jl_typeof(val);
register_type_string(type);

g_type_address_by_value_address[(size_t)val] = (size_t)type;
}

void _record_freed_value(jl_taggedvalue_t *tagged_val) {
jl_value_t *val = jl_valueof(tagged_val);

auto value_address = (size_t)val;
auto type_address = g_type_address_by_value_address.find(value_address);
if (type_address == g_type_address_by_value_address.end()) {
return; // TODO: warn
}
auto frees = g_frees_by_type_address.find(type_address->second);

if (frees == g_frees_by_type_address.end()) {
g_frees_by_type_address[type_address->second] = 1;
} else {
g_frees_by_type_address[type_address->second] = frees->second + 1;
}
}
48 changes: 48 additions & 0 deletions src/gc-garbage-profiler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#ifndef JL_GC_GARBAGE_PROFILER_H
#define JL_GC_GARBAGE_PROFILER_H

#include "julia.h"
#include "ios.h"

#ifdef __cplusplus
extern "C" {
#endif

JL_DLLEXPORT void jl_start_logging(ios_t *stream);
JL_DLLEXPORT void jl_stop_logging(void);

JL_DLLEXPORT void jl_start_garbage_profile(ios_t *stream);
JL_DLLEXPORT void jl_stop_garbage_profile(void);

void _report_gc_started(void);
void _report_gc_finished(jl_gc_collection_t collection, uint64_t pause, uint64_t freed, uint64_t allocd);
void _record_allocated_value(jl_value_t *val);
void _record_freed_value(jl_taggedvalue_t *tagged_val);

// ---------------------------------------------------------------------
// functions to call from GC when garbage profiling is enabled
// ---------------------------------------------------------------------

extern ios_t *g_garbage_profile_stream; // TODO: replace w/ bool?
extern ios_t *g_gc_log_stream;

static inline void record_allocated_value(jl_value_t *val) {
if (__unlikely(g_garbage_profile_stream != 0)) {
_record_allocated_value(val);
}
}

static inline void record_freed_value(jl_taggedvalue_t *tagged_val) {
if (__unlikely(g_garbage_profile_stream != 0)) {
_record_freed_value(tagged_val);
}
}

#ifdef __cplusplus
}
#endif


#endif // JL_GC_GARBAGE_PROFILER_H
6 changes: 6 additions & 0 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,7 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
while ((char*)v <= lim) {
int bits = v->bits.gc;
if (!gc_marked(bits)) {
record_freed_value(v);
*pfl = v;
pfl = &v->next;
pfl_begin = pfl_begin ? pfl_begin : pfl;
Expand Down Expand Up @@ -3009,6 +3010,8 @@ size_t jl_maxrss(void);
// Only one thread should be running in this function
static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
{
_report_gc_started();

combine_thread_gc_counts(&gc_num);

jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
Expand Down Expand Up @@ -3195,6 +3198,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)

uint64_t gc_end_t = jl_hrtime();
uint64_t pause = gc_end_t - t0;

_report_gc_finished(collection, pause, gc_num.freed, gc_num.allocd);

gc_final_pause_end(t0, gc_end_t);
gc_time_sweep_pause(gc_end_t, actual_allocd, live_bytes,
estimate_freed, sweep_full);
Expand Down
1 change: 1 addition & 0 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#endif
#endif
#include "julia_assert.h"
#include "gc-garbage-profiler.h"

#ifdef __cplusplus
extern "C" {
Expand Down
4 changes: 4 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "options.h"
#include "julia_locks.h"
#include "gc-garbage-profiler.h"
#include <uv.h>
#if !defined(_WIN32)
#include <unistd.h>
Expand Down Expand Up @@ -364,6 +365,9 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
v = jl_gc_big_alloc(ptls, allocsz);
}
jl_set_typeof(v, ty);

record_allocated_value(v);

return v;
}

Expand Down