-
-
Notifications
You must be signed in to change notification settings - Fork 89
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add support for some llama.cpp params on
LlamaModel
(#5)
- Loading branch information
Showing
7 changed files
with
163 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,62 @@ | ||
import {LlamaContext} from "./LlamaContext.js"; | ||
import {LLAMAContext, llamaCppNode, LLAMAModel} from "./LlamaBins.js"; | ||
import {llamaCppNode, LLAMAModel} from "./LlamaBins.js"; | ||
|
||
|
||
export class LlamaModel { | ||
private readonly _model: LLAMAModel; | ||
private readonly _prependBos: boolean; | ||
/** @internal */ | ||
public readonly _model: LLAMAModel; | ||
|
||
public constructor({modelPath, prependBos = true}: { modelPath: string, prependBos?: boolean }) { | ||
this._model = new LLAMAModel(modelPath); | ||
this._prependBos = prependBos; | ||
} | ||
|
||
public createContext() { | ||
return new LlamaContext({ | ||
ctx: new LLAMAContext(this._model), | ||
prependBos: this._prependBos | ||
}); | ||
/** | ||
* options source: | ||
* https://github.com/ggerganov/llama.cpp/blob/b5ffb2849d23afe73647f68eec7b68187af09be6/llama.h#L102 (struct llama_context_params) | ||
* @param {object} options | ||
* @param {string} options.modelPath - path to the model on the filesystem | ||
* @param {number | null} [options.seed] - If null, a random seed will be used | ||
* @param {number} [options.contextSize] - text context size | ||
* @param {number} [options.batchSize] - prompt processing batch size | ||
* @param {number} [options.gpuCores] - number of layers to store in VRAM | ||
* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance | ||
* @param {boolean} [options.f16Kv] - use fp16 for KV cache | ||
* @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one | ||
* @param {boolean} [options.vocabOnly] - only load the vocabulary, no weights | ||
* @param {boolean} [options.useMmap] - use mmap if possible | ||
* @param {boolean} [options.useMlock] - force system to keep model in RAM | ||
* @param {boolean} [options.embedding] - embedding mode only | ||
*/ | ||
public constructor({ | ||
modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuCores, | ||
lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding | ||
}: { | ||
modelPath: string, seed?: number | null, contextSize?: number, batchSize?: number, gpuCores?: number, | ||
lowVram?: boolean, f16Kv?: boolean, logitsAll?: boolean, vocabOnly?: boolean, useMmap?: boolean, useMlock?: boolean, | ||
embedding?: boolean | ||
}) { | ||
this._model = new LLAMAModel(modelPath, removeNullFields({ | ||
seed: seed != null ? Math.max(-1, seed) : undefined, | ||
contextSize, | ||
batchSize, | ||
gpuCores, | ||
lowVram, | ||
f16Kv, | ||
logitsAll, | ||
vocabOnly, | ||
useMmap, | ||
useMlock, | ||
embedding | ||
})); | ||
} | ||
|
||
public static get systemInfo() { | ||
return llamaCppNode.systemInfo(); | ||
} | ||
} | ||
|
||
function removeNullFields<T extends object>(obj: T): T { | ||
const newObj: T = Object.assign({}, obj); | ||
|
||
for (const key in obj) { | ||
if (newObj[key] == null) | ||
delete newObj[key]; | ||
} | ||
|
||
return newObj; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters