Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

April 2024 Binary Update #662

Merged
merged 7 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LLama.Examples/Examples/BatchedExecutorGuidance.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ await AnsiConsole
guidance.Prompt(g);

// Early exit if we reach the natural end of the guided sentence
if (g == model.EndOfSentenceToken)
if (g == model.Tokens.EOS)
break;

// Update progress bar
Expand Down
2 changes: 1 addition & 1 deletion LLama.Examples/Examples/GetEmbeddings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public static void Run()
string modelPath = UserSettings.GetModelPath();

Console.ForegroundColor = ConsoleColor.DarkGray;
var @params = new ModelParams(modelPath) { EmbeddingMode = true };
var @params = new ModelParams(modelPath) { Embeddings = true };
using var weights = LLamaWeights.LoadFromFile(@params);
var embedder = new LLamaEmbedder(weights, @params);

Expand Down
2 changes: 1 addition & 1 deletion LLama.Examples/Examples/SemanticKernelMemory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public static async Task Run()
var parameters = new ModelParams(modelPath)
{
Seed = seed,
EmbeddingMode = true
Embeddings = true
};

using var model = LLamaWeights.LoadFromFile(parameters);
Expand Down
2 changes: 1 addition & 1 deletion LLama.KernelMemory/BuilderExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public static IKernelMemoryBuilder WithLLamaSharpDefaults(this IKernelMemoryBuil
ContextSize = config?.ContextSize ?? 2048,
Seed = config?.Seed ?? 0,
GpuLayerCount = config?.GpuLayerCount ?? 20,
EmbeddingMode = true,
Embeddings = true,
MainGpu = config?.MainGpu ?? 0,
SplitMode = config?.SplitMode ?? GPUSplitMode.None,
};
Expand Down
4 changes: 2 additions & 2 deletions LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
this._config = config;
var @params = new ModelParams(_config.ModelPath)
{
EmbeddingMode = true,
Embeddings = true,
MainGpu = _config.MainGpu,
SplitMode = _config.SplitMode
};
Expand All @@ -49,7 +49,7 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights we
this._config = config;
var @params = new ModelParams(_config.ModelPath)
{
EmbeddingMode = true,
Embeddings = true,
MainGpu = _config.MainGpu,
SplitMode = _config.SplitMode
};
Expand Down
2 changes: 1 addition & 1 deletion LLama.Unittest/BasicTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public sealed class BasicTest
public BasicTest(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
_params = new ModelParams(Constants.ModelPath)
_params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048
};
Expand Down
2 changes: 1 addition & 1 deletion LLama.Unittest/BeamTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public sealed class BeamTests
public BeamTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
_params = new ModelParams(Constants.ModelPath)
_params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048
};
Expand Down
10 changes: 6 additions & 4 deletions LLama.Unittest/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
{
internal static class Constants
{
public static string ModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
public static string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
public static string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
public static string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";
public static readonly string GenerativeModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";

public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
public static readonly string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";
}
}
2 changes: 1 addition & 1 deletion LLama.Unittest/GrammarTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public sealed class GrammarTest

public GrammarTest()
{
_params = new ModelParams(Constants.ModelPath)
_params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048,
Seed = 92,
Expand Down
6 changes: 6 additions & 0 deletions LLama.Unittest/LLama.Unittest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf" DestinationFolder="Models" DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf" SkipUnchangedFiles="true"></DownloadFile>


</Target>

<ItemGroup>
Expand All @@ -43,6 +46,9 @@
</ItemGroup>

<ItemGroup>
<None Update="Models\all-MiniLM-L12-v2.Q8_0.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Models\llama-2-7b-chat.Q3_K_S.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
2 changes: 1 addition & 1 deletion LLama.Unittest/LLamaContextTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public sealed class LLamaContextTests

public LLamaContextTests()
{
var @params = new ModelParams(Constants.ModelPath)
var @params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 768,
};
Expand Down
16 changes: 14 additions & 2 deletions LLama.Unittest/LLamaEmbedderTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using LLama.Common;
using LLama.Native;
using Xunit.Abstractions;
using Xunit.Sdk;

namespace LLama.Unittest;

Expand All @@ -12,11 +14,11 @@ public sealed class LLamaEmbedderTests
public LLamaEmbedderTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
var @params = new ModelParams(Constants.ModelPath)
var @params = new ModelParams(Constants.EmbeddingModelPath)
{
ContextSize = 4096,
Threads = 5,
EmbeddingMode = true,
Embeddings = true,
};
using var weights = LLamaWeights.LoadFromFile(@params);
_embedder = new(weights, @params);
Expand All @@ -38,15 +40,25 @@ private static float Dot(float[] a, float[] b)
public async Task EmbedCompare()
{
var cat = await _embedder.GetEmbeddings("The cat is cute");
Assert.DoesNotContain(float.NaN, cat);

var kitten = await _embedder.GetEmbeddings("The kitten is kawaii");
Assert.DoesNotContain(float.NaN, kitten);

var spoon = await _embedder.GetEmbeddings("The spoon is not real");
Assert.DoesNotContain(float.NaN, spoon);

_testOutputHelper.WriteLine($"Cat = [{string.Join(",", cat.AsMemory().Slice(0, 7).ToArray())}...]");
_testOutputHelper.WriteLine($"Kitten = [{string.Join(",", kitten.AsMemory().Slice(0, 7).ToArray())}...]");
_testOutputHelper.WriteLine($"Spoon = [{string.Join(",", spoon.AsMemory().Slice(0, 7).ToArray())}...]");

var close = 1 - Dot(cat, kitten);
var far = 1 - Dot(cat, spoon);

_testOutputHelper.WriteLine("");
_testOutputHelper.WriteLine($"Cat.Kitten (Close): {close:F4}");
_testOutputHelper.WriteLine($"Cat.Spoon (Far): {far:F4}");

Assert.True(close < far);
}
}
2 changes: 1 addition & 1 deletion LLama.Unittest/LLavaWeightsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public sealed class LLavaWeightTests

public LLavaWeightTests()
{
var @params = new ModelParams(Constants.ModelPath)
var @params = new ModelParams(Constants.GenerativeModelPath)
{
// Llava models requires big context
ContextSize = 4096
Expand Down
4 changes: 2 additions & 2 deletions LLama.Unittest/MemoryDisposalTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ public class MemoryDisposalTests
[Fact]
public void ModelDisposal()
{
var @params = new ModelParams(Constants.ModelPath)
var @params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048
};
Expand All @@ -21,7 +21,7 @@ public void ModelDisposal()
[Fact]
public void ContextDisposal()
{
var @params = new ModelParams(Constants.ModelPath)
var @params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048
};
Expand Down
2 changes: 1 addition & 1 deletion LLama.Unittest/StatelessExecutorTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class StatelessExecutorTest
public StatelessExecutorTest(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
_params = new ModelParams(Constants.ModelPath)
_params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 60,
Seed = 1754,
Expand Down
2 changes: 1 addition & 1 deletion LLama.Unittest/StreamingTextDecoderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public class StreamingTextDecoderTests
public StreamingTextDecoderTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
_params = new ModelParams(Constants.ModelPath);
_params = new ModelParams(Constants.GenerativeModelPath);
_model = LLamaWeights.LoadFromFile(_params);
}

Expand Down
2 changes: 1 addition & 1 deletion LLama.Unittest/TokenTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public sealed class TokenTests

public TokenTests()
{
_params = new ModelParams(Constants.ModelPath)
_params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048
};
Expand Down
8 changes: 6 additions & 2 deletions LLama.Web/Common/ModelOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,13 @@ public class ModelOptions
/// <inheritdoc />
public int GpuLayerCount { get; set; } = 20;

public uint SeqMax { get; }

/// <inheritdoc />
public uint Seed { get; set; } = 1686349486;

public bool Embeddings { get; }

/// <inheritdoc />
public bool UseMemorymap { get; set; } = true;

Expand All @@ -57,7 +61,7 @@ public class ModelOptions
public uint BatchSize { get; set; } = 512;

/// <inheritdoc />
public bool EmbeddingMode { get; set; } = false;
public uint UBatchSize { get; set; } = 512;

/// <inheritdoc />
public TensorSplitsCollection TensorSplits { get; set; } = new();
Expand Down Expand Up @@ -108,6 +112,6 @@ public class ModelOptions
public float DefragThreshold { get; set; }

/// <inheritdoc />
public bool DoPooling { get; set; }
public LLamaPoolingType PoolingType { get; set; }
}
}
21 changes: 15 additions & 6 deletions LLama/Abstractions/IContextParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,29 @@ public interface IContextParams
uint? ContextSize { get; }

/// <summary>
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
/// maximum batch size that can be submitted at once (must be >=32 to use BLAS) (n_batch)
/// </summary>
uint BatchSize { get; }

/// <summary>
/// Physical batch size
/// </summary>
uint UBatchSize { get; }

/// <summary>
/// max number of sequences (i.e. distinct states for recurrent models)
/// </summary>
uint SeqMax { get; }

/// <summary>
/// Seed for the random number generator (seed)
/// </summary>
uint Seed { get; }

/// <summary>
/// Whether to use embedding mode. (embedding) Note that if this is set to true,
/// The LLamaModel won't produce text response anymore.
/// If true, extract embeddings (together with logits).
/// </summary>
bool EmbeddingMode { get; }
bool Embeddings { get; }

/// <summary>
/// RoPE base frequency (null to fetch from the model)
Expand Down Expand Up @@ -105,7 +114,7 @@ public interface IContextParams
float DefragThreshold { get; }

/// <summary>
/// Whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
/// How to pool (sum) embedding results by sequence id (ignored if no pooling layer)
/// </summary>
bool DoPooling { get; }
LLamaPoolingType PoolingType { get; }
}
10 changes: 8 additions & 2 deletions LLama/Common/ModelParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ public record ModelParams
/// <inheritdoc />
public int GpuLayerCount { get; set; } = 20;

/// <inheritdoc />
public uint SeqMax { get; set; } = 1;

/// <inheritdoc />
public uint Seed { get; set; } = 0xFFFFFFFF;

Expand Down Expand Up @@ -52,7 +55,10 @@ public record ModelParams
public uint BatchSize { get; set; } = 512;

/// <inheritdoc />
public bool EmbeddingMode { get; set; }
public uint UBatchSize { get; set; } = 512;

/// <inheritdoc />
public bool Embeddings { get; set; }

/// <inheritdoc />
public TensorSplitsCollection TensorSplits { get; set; } = new();
Expand Down Expand Up @@ -97,7 +103,7 @@ public record ModelParams
public float DefragThreshold { get; set; }

/// <inheritdoc />
public bool DoPooling { get; set; }
public LLamaPoolingType PoolingType { get; set; } = LLamaPoolingType.Unspecified;

/// <inheritdoc />
public bool VocabOnly { get; set; }
Expand Down
12 changes: 9 additions & 3 deletions LLama/Extensions/IContextParamsExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ public static class IContextParamsExtensions
/// <exception cref="ArgumentException"></exception>
public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result)
{
result = NativeApi.llama_context_default_params();
result = LLamaContextParams.Default();

result.n_ctx = @params.ContextSize ?? 0;
result.n_batch = @params.BatchSize;
result.n_ubatch = @params.UBatchSize;
result.n_seq_max = @params.SeqMax;
result.seed = @params.Seed;
result.embedding = @params.EmbeddingMode;
result.embeddings = @params.Embeddings;
result.rope_freq_base = @params.RopeFrequencyBase ?? 0;
result.rope_freq_scale = @params.RopeFrequencyScale ?? 0;

Expand All @@ -41,10 +44,13 @@ public static void ToLlamaContextParams(this IContextParams @params, out LLamaCo
result.cb_eval = IntPtr.Zero;
result.cb_eval_user_data = IntPtr.Zero;

result.abort_callback = IntPtr.Zero;
result.abort_callback_user_data = IntPtr.Zero;

result.type_k = @params.TypeK ?? GGMLType.GGML_TYPE_F16;
result.type_k = @params.TypeV ?? GGMLType.GGML_TYPE_F16;
result.offload_kqv = !@params.NoKqvOffload;
result.do_pooling = @params.DoPooling;
result.llama_pooling_type = @params.PoolingType;

result.n_threads = Threads(@params.Threads);
result.n_threads_batch = Threads(@params.BatchThreads);
Expand Down
3 changes: 2 additions & 1 deletion LLama/Extensions/IModelParamsExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ public static IDisposable ToLlamaModelParams(this IModelParams @params, out LLam

var disposer = new GroupDisposable();

result = NativeApi.llama_model_default_params();
result = LLamaModelParams.Default();

result.main_gpu = @params.MainGpu;
result.split_mode = @params.SplitMode;
result.n_gpu_layers = @params.GpuLayerCount < 0 ? int.MaxValue : @params.GpuLayerCount;
Expand Down
Loading
Loading