Update some llama model parameters(check_tensors, use_extra_bufts, no_host)

JamePeng · JamePeng · commit 4269619e2363 · 2025-10-07T22:17:28.000+08:00
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -69,6 +69,9 @@ def __init__(
         vocab_only: bool = False,
         use_mmap: bool = True,
         use_mlock: bool = False,
+        check_tensors: bool = False,
+        use_extra_bufts: bool = False,
+        no_host: bool = False,
         kv_overrides: Optional[Dict[str, Union[bool, int, float, str]]] = None,
         # Context Params
         seed: int = llama_cpp.LLAMA_DEFAULT_SEED,
@@ -156,6 +159,9 @@ def __init__(
             vocab_only: Only load the vocabulary no weights.
             use_mmap: Use mmap if possible.
             use_mlock: Force the system to keep the model in RAM.
+            check_tensors: validate model tensor data
+            use_extra_bufts: use extra buffer types (used for weight repacking)
+            no_host: bypass host buffer allowing extra buffers to be used
             kv_overrides: Key-value overrides for the model.
             seed: RNG seed, -1 for random
             n_ctx: Text context, 0 = from model
@@ -248,6 +254,9 @@ def __init__(
         self.model_params.vocab_only = vocab_only
         self.model_params.use_mmap = use_mmap if lora_path is None else False
         self.model_params.use_mlock = use_mlock
+        self.model_params.check_tensors = check_tensors
+        self.model_params.use_extra_bufts = use_extra_bufts
+        self.model_params.no_host = no_host
 
         # kv_overrides is the original python dict
         self.kv_overrides = kv_overrides
@@ -2205,6 +2214,9 @@ def __getstate__(self):
             vocab_only=self.model_params.vocab_only,
             use_mmap=self.model_params.use_mmap,
             use_mlock=self.model_params.use_mlock,
+            check_tensors=self.model_params.check_tensors,
+            use_extra_bufts=self.model_params.use_extra_bufts,
+            no_host=self.model_params.no_host,
             kv_overrides=self.kv_overrides,
             # Context Params
             seed=self._seed,
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -757,6 +757,7 @@ class llama_model_tensor_buft_override(ctypes.Structure):
 #     bool use_mlock;       // force system to keep model in RAM
 #     bool check_tensors;   // validate model tensor data
 #     bool use_extra_bufts; // use extra buffer types (used for weight repacking)
+#     bool no_host;         // bypass host buffer allowing extra buffers to be used
 # };
 class llama_model_params(ctypes.Structure):
     """Parameters for llama_model
@@ -775,7 +776,8 @@ class llama_model_params(ctypes.Structure):
         use_mmap (bool): use mmap if possible
         use_mlock (bool): force system to keep model in RAM
         check_tensors (bool): validate model tensor data
-        use_extra_bufts (bool): use extra buffer types (used for weight repacking)"""
+        use_extra_bufts (bool): use extra buffer types (used for weight repacking)
+        no_host (bool): bypass host buffer allowing extra buffers to be used"""
 
     if TYPE_CHECKING:
         devices: CtypesArray[ctypes.c_void_p]  # NOTE: unused
@@ -792,6 +794,7 @@ class llama_model_params(ctypes.Structure):
         use_mlock: bool
         check_tensors: bool
         use_extra_bufts: bool
+        no_host: bool
 
     _fields_ = [
         ("devices", ctypes.c_void_p), # NOTE: unnused
@@ -808,6 +811,7 @@ class llama_model_params(ctypes.Structure):
         ("use_mlock", ctypes.c_bool),
         ("check_tensors", ctypes.c_bool),
         ("use_extra_bufts", ctypes.c_bool),
+        ("no_host", ctypes.c_bool),
     ]