Skip to content

Commit 4269619

Browse files
committed
Update some llama model parameters(check_tensors, use_extra_bufts, no_host)
1 parent ada1975 commit 4269619

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

llama_cpp/llama.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ def __init__(
6969
vocab_only: bool = False,
7070
use_mmap: bool = True,
7171
use_mlock: bool = False,
72+
check_tensors: bool = False,
73+
use_extra_bufts: bool = False,
74+
no_host: bool = False,
7275
kv_overrides: Optional[Dict[str, Union[bool, int, float, str]]] = None,
7376
# Context Params
7477
seed: int = llama_cpp.LLAMA_DEFAULT_SEED,
@@ -156,6 +159,9 @@ def __init__(
156159
vocab_only: Only load the vocabulary no weights.
157160
use_mmap: Use mmap if possible.
158161
use_mlock: Force the system to keep the model in RAM.
162+
check_tensors: validate model tensor data
163+
use_extra_bufts: use extra buffer types (used for weight repacking)
164+
no_host: bypass host buffer allowing extra buffers to be used
159165
kv_overrides: Key-value overrides for the model.
160166
seed: RNG seed, -1 for random
161167
n_ctx: Text context, 0 = from model
@@ -248,6 +254,9 @@ def __init__(
248254
self.model_params.vocab_only = vocab_only
249255
self.model_params.use_mmap = use_mmap if lora_path is None else False
250256
self.model_params.use_mlock = use_mlock
257+
self.model_params.check_tensors = check_tensors
258+
self.model_params.use_extra_bufts = use_extra_bufts
259+
self.model_params.no_host = no_host
251260

252261
# kv_overrides is the original python dict
253262
self.kv_overrides = kv_overrides
@@ -2205,6 +2214,9 @@ def __getstate__(self):
22052214
vocab_only=self.model_params.vocab_only,
22062215
use_mmap=self.model_params.use_mmap,
22072216
use_mlock=self.model_params.use_mlock,
2217+
check_tensors=self.model_params.check_tensors,
2218+
use_extra_bufts=self.model_params.use_extra_bufts,
2219+
no_host=self.model_params.no_host,
22082220
kv_overrides=self.kv_overrides,
22092221
# Context Params
22102222
seed=self._seed,

llama_cpp/llama_cpp.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,7 @@ class llama_model_tensor_buft_override(ctypes.Structure):
757757
# bool use_mlock; // force system to keep model in RAM
758758
# bool check_tensors; // validate model tensor data
759759
# bool use_extra_bufts; // use extra buffer types (used for weight repacking)
760+
# bool no_host; // bypass host buffer allowing extra buffers to be used
760761
# };
761762
class llama_model_params(ctypes.Structure):
762763
"""Parameters for llama_model
@@ -775,7 +776,8 @@ class llama_model_params(ctypes.Structure):
775776
use_mmap (bool): use mmap if possible
776777
use_mlock (bool): force system to keep model in RAM
777778
check_tensors (bool): validate model tensor data
778-
use_extra_bufts (bool): use extra buffer types (used for weight repacking)"""
779+
use_extra_bufts (bool): use extra buffer types (used for weight repacking)
780+
no_host (bool): bypass host buffer allowing extra buffers to be used"""
779781

780782
if TYPE_CHECKING:
781783
devices: CtypesArray[ctypes.c_void_p] # NOTE: unused
@@ -792,6 +794,7 @@ class llama_model_params(ctypes.Structure):
792794
use_mlock: bool
793795
check_tensors: bool
794796
use_extra_bufts: bool
797+
no_host: bool
795798

796799
_fields_ = [
797800
("devices", ctypes.c_void_p), # NOTE: unnused
@@ -808,6 +811,7 @@ class llama_model_params(ctypes.Structure):
808811
("use_mlock", ctypes.c_bool),
809812
("check_tensors", ctypes.c_bool),
810813
("use_extra_bufts", ctypes.c_bool),
814+
("no_host", ctypes.c_bool),
811815
]
812816

813817

0 commit comments

Comments
 (0)