update ollama performance mode (#2874)

Mintplex-Labs · Dec 18, 2024 · a51de73 · a51de73 · lewismacnow · Dec 21, 2024
1 parent af70342
commit a51de73
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 8 deletions.
diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
@@ -169,18 +169,22 @@ export default function OllamaLLMOptions({ settings }) {
               className="tooltip !text-xs max-w-xs"
             >
               <p className="text-red-500">
-                <strong>Note:</strong> Only change this setting if you
-                understand its implications on performance and resource usage.
+                <strong>Note:</strong> Be careful with the Maximum mode. It may
+                increase resource usage significantly.
               </p>
               <br />
               <p>
                 <strong>Base:</strong> Ollama automatically limits the context
-                to 2048 tokens, reducing VRAM usage. Suitable for most users.
+                to 2048 tokens, keeping resources usage low while maintaining
+                good performance. Suitable for most users and models.
               </p>
               <br />
               <p>
                 <strong>Maximum:</strong> Uses the full context window (up to
-                Max Tokens). May increase VRAM usage significantly.
+                Max Tokens). Will result in increased resource usage but allows
+                for larger context conversations. <br />
+                <br />
+                This is not recommended for most users.
               </p>
             </Tooltip>
           </div>

diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js
@@ -29,6 +29,13 @@ class OllamaAILLM {
     this.client = new Ollama({ host: this.basePath });
     this.embedder = embedder ?? new NativeEmbedder();
     this.defaultTemp = 0.7;
+    this.#log(
+      `OllamaAILLM initialized with\nmodel: ${this.model}\nperf: ${this.performanceMode}\nn_ctx: ${this.promptWindowLimit()}`
+    );
+  }
+
+  #log(text, ...args) {
+    console.log(`\x1b[32m[Ollama]\x1b[0m ${text}`, ...args);
   }
 
   #appendContext(contextTexts = []) {
@@ -131,11 +138,11 @@ class OllamaAILLM {
           keep_alive: this.keepAlive,
           options: {
             temperature,
-            useMLock: true,
+            use_mlock: true,
             // There are currently only two performance settings so if its not "base" - its max context.
             ...(this.performanceMode === "base"
               ? {}
-              : { numCtx: this.promptWindowLimit() }),
+              : { num_ctx: this.promptWindowLimit() }),
           },
         })
         .then((res) => {
@@ -179,11 +186,11 @@ class OllamaAILLM {
         keep_alive: this.keepAlive,
         options: {
           temperature,
-          useMLock: true,
+          use_mlock: false,
           // There are currently only two performance settings so if its not "base" - its max context.
           ...(this.performanceMode === "base"
             ? {}
-            : { numCtx: this.promptWindowLimit() }),
+            : { num_ctx: this.promptWindowLimit() }),
         },
       }),
       messages,