Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

January 2025 Update #1036

Merged
merged 25 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ac29c34
code changes for december update (not working yet)
martindevans Dec 3, 2024
e4f4fed
Changes to support up to https://github.com/ggerganov/llama.cpp/commi…
martindevans Dec 20, 2024
c90ddd9
Updated to latest llama.cpp binaries, this works on Windows CPU but n…
martindevans Dec 27, 2024
c27cfde
Updated to latest deps, fixed kernel memory failing to load
martindevans Jan 4, 2025
a5c9759
Copy missing Mac flibraries libggml-base and libggml-cpu
SignalRT Jan 4, 2025
34198f9
Removed any mention of AVX in MacOS loading
martindevans Jan 4, 2025
3d93174
Added file copying for some more targets (still missing macos)
martindevans Jan 11, 2025
0647df9
Updated to latest set of binaries
martindevans Jan 11, 2025
756a88f
Fixed copy path for CUDA12 DLLs
martindevans Jan 11, 2025
4950e0d
Compatibility with llama.cpp backend split (PR #10256) on all platforms
m0nsky Jan 17, 2025
40a8c6c
Restore original comment
m0nsky Jan 17, 2025
3521b27
Merge pull request #5 from m0nsky/wip_december_update_fixes
martindevans Jan 17, 2025
dc3dff1
Update the dependency loader for ggml-metal and ggml-blas
m0nsky Jan 18, 2025
7b558ce
Update the runtime targets for ggml-metal and ggml-blas
m0nsky Jan 18, 2025
6d0b421
Add CPU backend (fallback) dependency for the GPU backends
m0nsky Jan 18, 2025
4dbdc82
Fix icons for the nuget backends
m0nsky Jan 18, 2025
556a7c1
Update nuspec files for the GPU backends
m0nsky Jan 19, 2025
f526cbe
Update BinaryReleaseId
m0nsky Jan 19, 2025
91effe9
Update nuspec for CPU & OSX
m0nsky Jan 19, 2025
695a4da
Merge pull request #6 from m0nsky/wip_december_update_fixes_v2
martindevans Jan 19, 2025
3be20b1
Update CPU nuspec to use noavx folder
m0nsky Jan 19, 2025
686627c
Update Runtime.targets to use noavx folder
m0nsky Jan 19, 2025
1913966
Update BinaryReleaseId
m0nsky Jan 19, 2025
014ef78
CUDA & Vulkan native libraries now correctly store the detected or us…
m0nsky Jan 20, 2025
830a078
Merge pull request #7 from m0nsky/wip_december_update_fixes_v3
martindevans Jan 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions LLama.Examples/Program.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using LLama.Native;
using LLama.Native;
using Spectre.Console;
using System.Runtime.InteropServices;

AnsiConsole.MarkupLineInterpolated(
$"""
Expand All @@ -18,7 +17,7 @@ __ __ ____ __
""");

// Configure logging. Change this to `true` to see log messages from llama.cpp
var showLLamaCppLogs = false;
var showLLamaCppLogs = true;
NativeLibraryConfig
.All
.WithLogCallback((level, message) =>
Expand All @@ -31,8 +30,7 @@ __ __ ____ __
NativeLibraryConfig
.All
.WithCuda()
//.WithAutoDownload() // An experimental feature
.DryRun(out var loadedllamaLibrary, out var loadedLLavaLibrary);
.WithVulkan();

// Calling this method forces loading to occur now.
NativeApi.llama_empty_call();
Expand Down
4 changes: 1 addition & 3 deletions LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,9 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)

var @params = new ModelParams(config.ModelPath)
{
ContextSize = config.ContextSize ?? 2048,
ContextSize = config.ContextSize,
GpuLayerCount = config.GpuLayerCount ?? 20,
Embeddings = true,
MainGpu = config.MainGpu,
SplitMode = config.SplitMode,
PoolingType = LLamaPoolingType.Mean,
};
_weights = LLamaWeights.LoadFromFile(@params);
Expand Down
2 changes: 0 additions & 2 deletions LLama.KernelMemory/LlamaSharpTextGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ public LlamaSharpTextGenerator(LLamaSharpConfig config)
{
ContextSize = config.ContextSize ?? 2048,
GpuLayerCount = config.GpuLayerCount ?? 20,
MainGpu = config.MainGpu,
SplitMode = config.SplitMode
};
_weights = LLamaWeights.LoadFromFile(parameters);
_context = _weights.CreateContext(parameters);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Xunit.Abstractions;

namespace LLama.Unittest.KernelMemory
{
public class LLamaSharpTextEmbeddingGeneratorTests : ITextTokenizerTests, IDisposable
public class LLamaSharpTextEmbeddingGeneratorTests
: ITextTokenizerTests, IDisposable
{
private readonly LLamaSharpTextEmbeddingGenerator _embeddingGenerator;

public LLamaSharpTextEmbeddingGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper)
public LLamaSharpTextEmbeddingGeneratorTests(ITestOutputHelper testOutputHelper)
: base(testOutputHelper)
{
_embeddingGenerator = new LLamaSharpTextEmbeddingGenerator(_lsConfig);

Expand Down
18 changes: 4 additions & 14 deletions LLama.Unittest/KernelMemory/LlamaSharpTextGeneratorTests.cs
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection.Emit;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Xunit.Abstractions;
using Xunit.Sdk;
using static System.Net.Mime.MediaTypeNames;

namespace LLama.Unittest.KernelMemory
{
public class LlamaSharpTextGeneratorTests : ITextTokenizerTests, IDisposable
public class LlamaSharpTextGeneratorTests
: ITextTokenizerTests, IDisposable
{
private readonly LlamaSharpTextGenerator _textGenerator;

public LlamaSharpTextGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper)
public LlamaSharpTextGeneratorTests(ITestOutputHelper testOutputHelper)
: base(testOutputHelper)
{
_textGenerator = new LlamaSharpTextGenerator(_lsConfig);

Expand Down
6 changes: 1 addition & 5 deletions LLama.Unittest/SamplingTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,7 @@ private static SafeLLamaSamplerChainHandle CreateChain(SafeLLamaContextHandle co
var chain = SafeLLamaSamplerChainHandle.Create(LLamaSamplerChainParams.Default());

chain.AddPenalties(
vocabSize: context.VocabCount,
eos: context.ModelHandle.Tokens.EOS,
newline: context.ModelHandle.Tokens.Newline ?? 0,
penaltyCount: 60, repeat: 1, freq: 0, presence: 0,
penalizeNewline: false, ignoreEOS: false
penaltyCount: 60, repeat: 1, freq: 0, presence: 0
);

if (logit_bias != null) { chain.AddLogitBias(context.VocabCount, logit_bias); }
Expand Down
5 changes: 4 additions & 1 deletion LLama.Web/Common/ModelOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public class ModelOptions
public int MainGpu { get; set; } = 0;

/// <inheritdoc />
public GPUSplitMode SplitMode { get; set; } = GPUSplitMode.None;
public GPUSplitMode? SplitMode { get; set; }

/// <inheritdoc />
public int GpuLayerCount { get; set; } = 20;
Expand Down Expand Up @@ -59,6 +59,9 @@ public class ModelOptions
/// <inheritdoc />
public TensorSplitsCollection TensorSplits { get; set; } = new();

/// <inheritdoc />
public bool CheckTensors { get; }

/// <inheritdoc />
public List<MetadataOverride> MetadataOverrides { get; } = new();

Expand Down
7 changes: 6 additions & 1 deletion LLama/Abstractions/IModelParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public interface IModelParams
/// <summary>
/// How to split the model across multiple GPUs
/// </summary>
GPUSplitMode SplitMode { get; }
GPUSplitMode? SplitMode { get; }

/// <summary>
/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
Expand Down Expand Up @@ -68,6 +68,11 @@ public interface IModelParams
/// </summary>
bool VocabOnly { get; }

/// <summary>
/// Validate model tensor data before loading
/// </summary>
bool CheckTensors { get; }

/// <summary>
/// Override specific metadata items in the model
/// </summary>
Expand Down
5 changes: 4 additions & 1 deletion LLama/Common/ModelParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public record ModelParams
public int MainGpu { get; set; } = 0;

/// <inheritdoc />
public GPUSplitMode SplitMode { get; set; } = GPUSplitMode.None;
public GPUSplitMode? SplitMode { get; set; }

/// <inheritdoc />
public int GpuLayerCount { get; set; } = 20;
Expand Down Expand Up @@ -54,6 +54,9 @@ public record ModelParams
/// <inheritdoc />
public TensorSplitsCollection TensorSplits { get; set; } = new();

/// <inheritdoc />
public bool CheckTensors { get; }

/// <inheritdoc />
public List<MetadataOverride> MetadataOverrides { get; set; } = new();

Expand Down
7 changes: 5 additions & 2 deletions LLama/Extensions/IModelParamsExtensions.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System.IO;
using System.IO;
using System;
using System.Text;
using LLama.Abstractions;
Expand Down Expand Up @@ -31,11 +31,14 @@ public static IDisposable ToLlamaModelParams(this IModelParams @params, out LLam
result = LLamaModelParams.Default();

result.main_gpu = @params.MainGpu;
result.split_mode = @params.SplitMode;
result.n_gpu_layers = @params.GpuLayerCount < 0 ? int.MaxValue : @params.GpuLayerCount;
if (@params.SplitMode.HasValue)
result.split_mode = @params.SplitMode.Value;

result.use_mlock = @params.UseMemoryLock;
result.use_mmap = @params.UseMemorymap;
result.vocab_only = @params.VocabOnly;
result.check_tensors = @params.CheckTensors;

unsafe
{
Expand Down
2 changes: 1 addition & 1 deletion LLama/Extensions/LLamaExecutorExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ private string CreatePrompt(IList<ChatMessage> messages)
PreventEOS = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.PreventEOS), out bool eos) is true ? eos : s_defaultPipeline.PreventEOS,
PenalizeNewline = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.PenalizeNewline), out bool pnl) is true ? pnl : s_defaultPipeline.PenalizeNewline,
RepeatPenalty = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.RepeatPenalty), out float rp) is true ? rp : s_defaultPipeline.RepeatPenalty,
RepeatPenaltyCount = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.RepeatPenaltyCount), out int rpc) is true ? rpc : s_defaultPipeline.RepeatPenaltyCount,
PenaltyCount = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.PenaltyCount), out int rpc) is true ? rpc : s_defaultPipeline.PenaltyCount,
Grammar = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.Grammar), out Grammar? g) is true ? g : s_defaultPipeline.Grammar,
MinKeep = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.MinKeep), out int mk) is true ? mk : s_defaultPipeline.MinKeep,
MinP = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.MinP), out float mp) is true ? mp : s_defaultPipeline.MinP,
Expand Down
3 changes: 0 additions & 3 deletions LLama/LLamaQuantizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,6 @@ private static bool ValidateFtype(LLamaFtype ftype)
case LLamaFtype.MOSTLY_IQ3_S:
case LLamaFtype.MOSTLY_IQ3_M:

case LLamaFtype.MOSTLY_Q4_0_4_4:
case LLamaFtype.MOSTLY_Q4_0_4_8:
case LLamaFtype.MOSTLY_Q4_0_8_8:
return true;

case LLamaFtype.GUESSED:
Expand Down
Loading
Loading