From 7d99b4d2d2b96acce84324885c3318cc3744e8f3 Mon Sep 17 00:00:00 2001 From: horenbergerb Date: Sat, 22 Apr 2023 17:01:49 -0400 Subject: [PATCH 1/2] add LoRA loading for the llamacpp LLM --- langchain/llms/llamacpp.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/langchain/llms/llamacpp.py b/langchain/llms/llamacpp.py index 0c83c7635da1f..a00f9a07b4b07 100644 --- a/langchain/llms/llamacpp.py +++ b/langchain/llms/llamacpp.py @@ -27,6 +27,12 @@ class LlamaCpp(LLM): model_path: str """The path to the Llama model file.""" + lora_base: Optional[str] = None + """The path to the Llama LoRA base model.""" + + lora_path: Optional[str] = None + """The path to the Llama LoRA. If None, no LoRa is loaded.""" + n_ctx: int = Field(512, alias="n_ctx") """Token context window.""" @@ -87,10 +93,15 @@ class LlamaCpp(LLM): last_n_tokens_size: Optional[int] = 64 """The number of tokens to look back when applying the repeat_penalty.""" + use_mmap: Optional[bool] = True + """Whether to keep the model loaded in RAM""" + @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that llama-cpp-python library is installed.""" model_path = values["model_path"] + lora_path = values["lora_path"] + lora_base = values["lora_base"] n_ctx = values["n_ctx"] n_parts = values["n_parts"] seed = values["seed"] @@ -100,13 +111,19 @@ def validate_environment(cls, values: Dict) -> Dict: use_mlock = values["use_mlock"] n_threads = values["n_threads"] n_batch = values["n_batch"] + use_mmap = values["use_mmap"] last_n_tokens_size = values["last_n_tokens_size"] try: from llama_cpp import Llama + if lora_path is not None: + use_mmap = False + values["client"] = Llama( model_path=model_path, + lora_base=lora_base, + lora_path=lora_path, n_ctx=n_ctx, n_parts=n_parts, seed=seed, @@ -116,6 +133,7 @@ def validate_environment(cls, values: Dict) -> Dict: use_mlock=use_mlock, n_threads=n_threads, n_batch=n_batch, + use_mmap=use_mmap, last_n_tokens_size=last_n_tokens_size, ) except ImportError: From 428f8dc11b5fba16dad54414c9041e11071b0e16 Mon Sep 17 00:00:00 2001 From: horenbergerb Date: Sun, 23 Apr 2023 11:51:38 -0400 Subject: [PATCH 2/2] remove logic to disable mmap when lora is used --- langchain/llms/llamacpp.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/langchain/llms/llamacpp.py b/langchain/llms/llamacpp.py index a00f9a07b4b07..e3c8b1464350c 100644 --- a/langchain/llms/llamacpp.py +++ b/langchain/llms/llamacpp.py @@ -117,9 +117,6 @@ def validate_environment(cls, values: Dict) -> Dict: try: from llama_cpp import Llama - if lora_path is not None: - use_mmap = False - values["client"] = Llama( model_path=model_path, lora_base=lora_base,