Skip to content

Commit 5c5f898

Browse files
committed
Sync llama: print memory breakdown on exit
1 parent 89ea42f commit 5c5f898

File tree

1 file changed

+23
-12
lines changed

1 file changed

+23
-12
lines changed

llama_cpp/llama_cpp.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4164,19 +4164,20 @@ def llama_log_set(
41644164
# //
41654165
# // Performance utils
41664166
# //
4167-
# // NOTE: Used by llama.cpp examples, avoid using in third-party apps. Instead, do your own performance measurements.
4167+
# // NOTE: Used by llama.cpp examples/tools, avoid using in third-party apps. Instead, do your own performance measurements.
41684168
# //
41694169

41704170

41714171
# struct llama_perf_context_data {
4172-
# double t_start_ms;
4173-
# double t_load_ms;
4174-
# double t_p_eval_ms;
4175-
# double t_eval_ms;
4176-
#
4177-
# int32_t n_p_eval;
4178-
# int32_t n_eval;
4179-
# int32_t n_reused; // number of times a ggml compute graph had been reused
4172+
# // ms == milliseconds
4173+
# double t_start_ms; // absolute start time
4174+
# double t_load_ms; // time needed for loading the model
4175+
# double t_p_eval_ms; // time needed for processing the prompt
4176+
# double t_eval_ms; // time needed for generating tokens
4177+
4178+
# int32_t n_p_eval; // number of prompt tokens
4179+
# int32_t n_eval; // number of generated tokens
4180+
# int32_t n_reused; // number of times a ggml compute graph had been reused
41804181
# };
41814182
class llama_perf_context_data(ctypes.Structure):
41824183
_fields_ = [
@@ -4191,9 +4192,8 @@ class llama_perf_context_data(ctypes.Structure):
41914192

41924193

41934194
# struct llama_perf_sampler_data {
4194-
# double t_sample_ms;
4195-
#
4196-
# int32_t n_sample;
4195+
# double t_sample_ms; // time needed for sampling in ms
4196+
# int32_t n_sample; // number of sampled tokens
41974197
# };
41984198
class llama_perf_sampler_data(ctypes.Structure):
41994199
_fields_ = [
@@ -4263,6 +4263,17 @@ def llama_perf_sampler_reset(chain: llama_sampler_p, /):
42634263
...
42644264

42654265

4266+
# // print a breakdown of per-device memory use via LLAMA_LOG:
4267+
# LLAMA_API void llama_memory_breakdown_print(const struct llama_context * ctx);
4268+
@ctypes_function(
4269+
"llama_memory_breakdown_print",
4270+
[llama_context_p_ctypes],
4271+
None,
4272+
)
4273+
def llama_memory_breakdown_print(ctx: llama_context_p, /):
4274+
...
4275+
4276+
42664277
# //
42674278
# // training
42684279
# //

0 commit comments

Comments
 (0)