Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Count invalidations in profiling reports #49535

Merged
merged 9 commits into from
May 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/cgmemmgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,8 @@ uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
code_allocated = true;
#endif
total_allocated += Size;
jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size);
if (exe_alloc)
return (uint8_t*)exe_alloc->alloc(Size, Alignment);
return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
Expand All @@ -879,6 +881,8 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
bool isReadOnly)
{
total_allocated += Size;
jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, Size);
if (!isReadOnly)
return (uint8_t*)rw_alloc.alloc(Size, Alignment);
if (ro_alloc)
Expand Down
26 changes: 14 additions & 12 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1189,10 +1189,16 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
}
}

static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
{
jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc);
return live_bytes += inc;
}

void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
{
combine_thread_gc_counts(&gc_num);
live_bytes += (gc_num.deferred_alloc + gc_num.allocd);
inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
gc_num.allocd = 0;
gc_num.deferred_alloc = 0;
reset_thread_gc_counts();
Expand Down Expand Up @@ -3094,9 +3100,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
{
combine_thread_gc_counts(&gc_num);

#ifdef USE_TRACY
TracyCPlot("Heap size", live_bytes + gc_num.allocd);
#endif
// We separate the update of the graph from the update of live_bytes here
// so that the sweep shows a downward trend in memory usage.
jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, gc_num.allocd);

jl_gc_markqueue_t *mq = &ptls->mark_queue;

Expand Down Expand Up @@ -3342,7 +3348,10 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
}

last_live_bytes = live_bytes;
// Can't call inc_live_bytes here because we already added allocd
// to the graph earlier
live_bytes += -gc_num.freed + gc_num.allocd;
jl_timing_counter_dec(JL_TIMING_COUNTER_HeapSize, gc_num.freed);

if (collection == JL_GC_AUTO) {
//If we aren't freeing enough or are seeing lots and lots of pointers let it increase faster
Expand Down Expand Up @@ -3486,10 +3495,6 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
SetLastError(last_error);
#endif
errno = last_errno;

#ifdef USE_TRACY
TracyCPlot("Heap size", jl_gc_live_bytes());
#endif
}

void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
Expand Down Expand Up @@ -3593,9 +3598,6 @@ void jl_gc_init(void)
if (jl_options.heap_size_hint)
jl_gc_set_max_memory(jl_options.heap_size_hint);

#ifdef USE_TRACY
TracyCPlotConfig("Heap size", TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
#endif
t_start = jl_hrtime();
}

Expand Down Expand Up @@ -3781,7 +3783,7 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds

if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) {
ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
live_bytes += allocsz - oldsz;
inc_live_bytes(allocsz - oldsz);
}
else if (allocsz < oldsz)
jl_atomic_store_relaxed(&ptls->gc_num.freed,
Expand Down
1 change: 1 addition & 0 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1642,6 +1642,7 @@ static void do_nothing_with_codeinst(jl_code_instance_t *ci) {}
// recursively invalidate cached methods that had an edge to a replaced method
static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced, size_t max_world, int depth)
{
jl_timing_counter_inc(JL_TIMING_COUNTER_Invalidations, 1);
if (_jl_debug_method_invalidation) {
jl_value_t *boxeddepth = NULL;
JL_GC_PUSH1(&boxeddepth);
Expand Down
17 changes: 17 additions & 0 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -851,10 +851,27 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
jitlink::PassConfiguration &Config) override {
Config.PostAllocationPasses.push_back([this](jitlink::LinkGraph &G) {
size_t graph_size = 0;
size_t code_size = 0;
size_t data_size = 0;
Comment on lines +854 to +855
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On aarch64 darwin I'm getting

/Users/mose/repo/julia/src/jitlayers.cpp:858:20: warning: variable 'data_size' set but not used [-Wunused-but-set-variable]
            size_t data_size = 0;
                   ^
/Users/mose/repo/julia/src/jitlayers.cpp:857:20: warning: variable 'code_size' set but not used [-Wunused-but-set-variable]
            size_t code_size = 0;
                   ^

(and currently we aren't using -Werror on CI on macOS)

for (auto block : G.blocks()) {
graph_size += block->getSize();
}
for (auto &section : G.sections()) {
size_t secsize = 0;
for (auto block : section.blocks()) {
secsize += block->getSize();
}
if ((section.getMemProt() & jitlink::MemProt::Exec) == jitlink::MemProt::None) {
data_size += secsize;
} else {
code_size += secsize;
}
graph_size += secsize;
}
this->total_size.fetch_add(graph_size, std::memory_order_relaxed);
jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, graph_size);
jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, code_size);
jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, data_size);
return Error::success();
});
}
Expand Down
1 change: 1 addition & 0 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -3277,6 +3277,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
arraylist_push(&jl_linkage_blobs, (void*)image_base);
arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg + sizeof(uintptr_t)));
arraylist_push(&jl_image_relocs, (void*)relocs_base);
jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t));
rebuild_image_blob_tree();

// jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1);
Expand Down
33 changes: 33 additions & 0 deletions src/timing.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,38 @@ const char *jl_timing_names[(int)JL_TIMING_LAST] =
#undef X
};

JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST];

#ifdef USE_ITTAPI
JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST];
#endif

void jl_print_timings(void)
{
#ifdef USE_TIMING_COUNTS
uint64_t total_time = cycleclock() - t0;
uint64_t root_time = total_time;
for (int i = 0; i < JL_TIMING_LAST; i++) {
root_time -= jl_timing_counts[i];
}
jl_timing_counts[0] = root_time;
fprintf(stderr, "\nJULIA TIMINGS\n");
for (int i = 0; i < JL_TIMING_LAST; i++) {
if (jl_timing_counts[i] != 0)
fprintf(stderr, "%-25s : %5.2f %% %" PRIu64 "\n", jl_timing_names[i],
100 * (((double)jl_timing_counts[i]) / total_time), jl_timing_counts[i]);
}

fprintf(stderr, "\nJULIA COUNTERS\n");
#define X(name) do { \
int64_t val = (int64_t) jl_atomic_load_relaxed(&jl_timing_counters[(int)JL_TIMING_COUNTER_##name].basic_counter); \
if (val != 0) \
fprintf(stderr, "%-25s : %" PRIi64 "\n", #name, val); \
} while (0);

JL_TIMING_COUNTERS
#undef X
#endif
}

void jl_init_timing(void)
Expand All @@ -79,6 +94,24 @@ void jl_init_timing(void)
#define X(name) jl_timing_ittapi_events[i++] = __itt_event_create(#name, strlen(#name));
JL_TIMING_EVENTS
#undef X
i = 0;
#define X(name) jl_timing_counters[i++].ittapi_counter = __itt_counter_create(#name, "julia.runtime");
JL_TIMING_COUNTERS
#undef X
#endif
#ifdef USE_TRACY
i = 0;
#define X(counter_name) jl_timing_counters[i].tracy_counter = (jl_tracy_counter_t){0, #counter_name}; \
TracyCPlotConfig(jl_timing_counters[i++].tracy_counter.name, TracyPlotFormatNumber, /* rectilinear */ 1, /* fill */ 1, /* color */ 0);
JL_TIMING_COUNTERS
#undef X
// We reference these by enum indexing and then asking for the name, since that allows the compiler
// to catch name mismatches.
TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_HeapSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITCodeSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITDataSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_ImageSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
#endif
}

Expand Down
81 changes: 81 additions & 0 deletions src/timing.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ static inline const char *gnu_basename(const char *path)
return base ? base+1 : path;
}

#ifdef USE_TRACY
typedef struct {
_Atomic(int64_t) val;
char* name;
} jl_tracy_counter_t;
#endif

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -78,6 +85,9 @@ extern uint32_t jl_timing_print_limit;
#define jl_timing_block_exit_task(ct, ptls) ((jl_timing_block_t *)NULL)
#define jl_pop_timing_block(blk)

#define jl_timing_counter_inc(counter, value)
#define jl_timing_counter_dec(counter, value)

#define jl_profile_lock_init(lock, name)
#define jl_profile_lock_start_wait(lock)
#define jl_profile_lock_acquired(lock)
Expand Down Expand Up @@ -181,6 +191,15 @@ void jl_timing_puts(jl_timing_block_t *cur_block, const char *str);
X(NATIVE_Create) \


#define JL_TIMING_COUNTERS \
X(Invalidations) \
X(HeapSize) \
X(JITSize) \
X(JITCodeSize) \
X(JITDataSize) \
X(ImageSize) \


enum jl_timing_owners {
#define X(name) JL_TIMING_ ## name,
JL_TIMING_OWNERS
Expand All @@ -195,6 +214,13 @@ enum jl_timing_events {
JL_TIMING_EVENT_LAST
};

enum jl_timing_counter_types {
#define X(name) JL_TIMING_COUNTER_ ## name,
JL_TIMING_COUNTERS
#undef X
JL_TIMING_COUNTER_LAST
};

/**
* Timing back-ends differ in terms of whether they support nested
* and asynchronous events.
Expand Down Expand Up @@ -404,6 +430,61 @@ struct jl_timing_suspend_cpp_t {
_jl_timing_suspend_ctor(&__timing_suspend, #subsystem, ct)
#endif

// Counting
#ifdef USE_ITTAPI
#define _ITTAPI_COUNTER_MEMBER __itt_counter ittapi_counter;
#else
#define _ITTAPI_COUNTER_MEMBER
#endif

#ifdef USE_TRACY
# define _TRACY_COUNTER_MEMBER jl_tracy_counter_t tracy_counter;
# else
# define _TRACY_COUNTER_MEMBER
#endif

#ifdef USE_TIMING_COUNTS
#define _COUNTS_MEMBER _Atomic(uint64_t) basic_counter;
#else
#define _COUNTS_MEMBER
#endif

typedef struct {
_ITTAPI_COUNTER_MEMBER
_TRACY_COUNTER_MEMBER
_COUNTS_MEMBER
} jl_timing_counter_t;

JL_DLLEXPORT extern jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST];

static inline void jl_timing_counter_inc(int counter, uint64_t val) JL_NOTSAFEPOINT {
#ifdef USE_ITTAPI
__itt_counter_inc_delta(jl_timing_counters[counter].ittapi_counter, val);
#endif
#ifdef USE_TRACY
jl_tracy_counter_t *tracy_counter = &jl_timing_counters[counter].tracy_counter;
uint64_t oldval = jl_atomic_fetch_add_relaxed(&tracy_counter->val, val);
TracyCPlotI(tracy_counter->name, oldval + val);
#endif
#ifdef USE_TIMING_COUNTS
jl_atomic_fetch_add_relaxed(&jl_timing_counters[counter].basic_counter, val);
#endif
}

static inline void jl_timing_counter_dec(int counter, uint64_t val) JL_NOTSAFEPOINT {
#ifdef USE_ITTAPI
__itt_counter_dec_delta(jl_timing_counters[counter].ittapi_counter, val);
#endif
#ifdef USE_TRACY
jl_tracy_counter_t *tracy_counter = &jl_timing_counters[counter].tracy_counter;
uint64_t oldval = jl_atomic_fetch_add_relaxed(&tracy_counter->val, -val);
TracyCPlotI(tracy_counter->name, oldval - val);
#endif
#ifdef USE_TIMING_COUNTS
jl_atomic_fetch_add_relaxed(&jl_timing_counters[counter].basic_counter, -(int64_t)val);
#endif
}

// Locking profiling
static inline void jl_profile_lock_init(jl_mutex_t *lock, const char *name) {
#ifdef USE_ITTAPI
Expand Down