@@ -4164,19 +4164,20 @@ def llama_log_set(
4164
4164
# //
4165
4165
# // Performance utils
4166
4166
# //
4167
- # // NOTE: Used by llama.cpp examples, avoid using in third-party apps. Instead, do your own performance measurements.
4167
+ # // NOTE: Used by llama.cpp examples/tools , avoid using in third-party apps. Instead, do your own performance measurements.
4168
4168
# //
4169
4169
4170
4170
4171
4171
# struct llama_perf_context_data {
4172
- # double t_start_ms;
4173
- # double t_load_ms;
4174
- # double t_p_eval_ms;
4175
- # double t_eval_ms;
4176
- #
4177
- # int32_t n_p_eval;
4178
- # int32_t n_eval;
4179
- # int32_t n_reused; // number of times a ggml compute graph had been reused
4172
+ # // ms == milliseconds
4173
+ # double t_start_ms; // absolute start time
4174
+ # double t_load_ms; // time needed for loading the model
4175
+ # double t_p_eval_ms; // time needed for processing the prompt
4176
+ # double t_eval_ms; // time needed for generating tokens
4177
+
4178
+ # int32_t n_p_eval; // number of prompt tokens
4179
+ # int32_t n_eval; // number of generated tokens
4180
+ # int32_t n_reused; // number of times a ggml compute graph had been reused
4180
4181
# };
4181
4182
class llama_perf_context_data (ctypes .Structure ):
4182
4183
_fields_ = [
@@ -4191,9 +4192,8 @@ class llama_perf_context_data(ctypes.Structure):
4191
4192
4192
4193
4193
4194
# struct llama_perf_sampler_data {
4194
- # double t_sample_ms;
4195
- #
4196
- # int32_t n_sample;
4195
+ # double t_sample_ms; // time needed for sampling in ms
4196
+ # int32_t n_sample; // number of sampled tokens
4197
4197
# };
4198
4198
class llama_perf_sampler_data (ctypes .Structure ):
4199
4199
_fields_ = [
@@ -4263,6 +4263,17 @@ def llama_perf_sampler_reset(chain: llama_sampler_p, /):
4263
4263
...
4264
4264
4265
4265
4266
+ # // print a breakdown of per-device memory use via LLAMA_LOG:
4267
+ # LLAMA_API void llama_memory_breakdown_print(const struct llama_context * ctx);
4268
+ @ctypes_function (
4269
+ "llama_memory_breakdown_print" ,
4270
+ [llama_context_p_ctypes ],
4271
+ None ,
4272
+ )
4273
+ def llama_memory_breakdown_print (ctx : llama_context_p , / ):
4274
+ ...
4275
+
4276
+
4266
4277
# //
4267
4278
# // training
4268
4279
# //
0 commit comments