From 42934351848a15387083c4fe23a5d56e512802e8 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Sat, 6 Apr 2024 17:22:29 +0200 Subject: [PATCH 1/7] Extension LLava with in memory images --- LLama/LLamaExecutorBase.cs | 8 ++++++-- LLama/LLamaInteractExecutor.cs | 7 ++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index 52b38e18f..51c9656c8 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -79,8 +79,11 @@ public bool IsMultiModal public LLavaWeights? ClipModel { get; } /// - public List ImagePaths { get; set; } - + public List ImagePaths { get; set; } + + /// + public List ImageBytes { get; set; } + /// /// Current "mu" value for mirostat sampling /// @@ -96,6 +99,7 @@ public bool IsMultiModal protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null) { ImagePaths = new List(); + ImageBytes = new List(); _logger = logger; Context = context; _pastTokensCount = 0; diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index 21bb8dcc9..de3eab715 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -154,7 +154,12 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru { _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromFileName( ClipModel.NativeHandle, Context, image ) ); } - + + foreach (var image in ImageBytes) + { + _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromMemory(ClipModel.NativeHandle, Context, image)); + } + int imageIndex = text.IndexOf(""); // Tokenize segment 1 (before tag) string preImagePrompt = text.Substring(0, imageIndex); From 7378e990790f12c627703a5824072ecdc7852045 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Sun, 7 Apr 2024 19:47:39 +0200 Subject: [PATCH 2/7] Standardizing Image Data implementation --- .../Examples/LlavaInteractiveModeExecute.cs | 6 ++- LLama/Abstractions/ILLamaExecutor.cs | 53 ++++++++++++++++--- LLama/LLamaExecutorBase.cs | 16 +++--- LLama/LLamaInteractExecutor.cs | 22 +++++--- LLama/LLamaStatelessExecutor.cs | 14 +++-- 5 files changed, 85 insertions(+), 26 deletions(-) diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index f7bf24945..8cfa7376b 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -2,6 +2,7 @@ using LLama.Batched; using LLama.Common; using Spectre.Console; +using LLama.Abstractions; namespace LLama.Examples.Examples { @@ -99,7 +100,10 @@ public static async Task Run() // Initilize Images in executor // - ex.ImagePaths = imagePaths.ToList(); + foreach (var image in imagePaths) + { + ex.Images.Add(new ImageData(ImageData.DataType.ImagePath, image)); + } } Console.ForegroundColor = Color.White; diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs index ee4cf5121..977cbc5e4 100644 --- a/LLama/Abstractions/ILLamaExecutor.cs +++ b/LLama/Abstractions/ILLamaExecutor.cs @@ -22,14 +22,13 @@ public interface ILLamaExecutor /// /// Muti-Modal Projections / Clip Model weights /// - public LLavaWeights? ClipModel { get; } - + public LLavaWeights? ClipModel { get; } + /// - /// List of images: Image filename and path (jpeg images). + /// List of images: Image filen path, uri or image byte array. See ImageData. /// - public List ImagePaths { get; set; } - - + public List Images { get; } + /// /// Asynchronously infers a response from the model. /// @@ -39,4 +38,46 @@ public interface ILLamaExecutor /// IAsyncEnumerable InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default); } + + /// + /// Holds image data + /// + public class ImageData + { + /// + /// constructor + /// + /// + /// + public ImageData(DataType type, object data) { Type = type; Data = data; } + + /// + /// the possible types of image data + /// + public enum DataType + { + /// + /// file path + /// + ImagePath, + /// + /// byte array + /// + ImageBytes, + /// + /// uri + /// + ImageURL + } + + /// + /// the type of this image data + /// + public DataType Type { get; set; } + + /// + /// the image data (string, byte array or uri) + /// + public object? Data { get; set; } + } } diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index 51c9656c8..9914f0ecc 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -76,13 +76,10 @@ public bool IsMultiModal } /// - public LLavaWeights? ClipModel { get; } - - /// - public List ImagePaths { get; set; } + public LLavaWeights? ClipModel { get; } /// - public List ImageBytes { get; set; } + public List Images { get; set; } /// /// Current "mu" value for mirostat sampling @@ -98,8 +95,7 @@ public bool IsMultiModal /// protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null) { - ImagePaths = new List(); - ImageBytes = new List(); + Images = new List(); _logger = logger; Context = context; _pastTokensCount = 0; @@ -109,6 +105,12 @@ protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null) _decoder = new StreamingTokenDecoder(context); } + /// + /// + /// + /// + /// + /// public StatefulExecutorBase(LLamaContext context, LLavaWeights lLavaWeights, ILogger? logger = null) : this( context, logger ) { diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index de3eab715..35104165a 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -148,16 +148,22 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru int usedTokens = 0; // If the prompt contains the tag extract this. _imageInPrompt = text.Contains(""); - if (_imageInPrompt) + if (_imageInPrompt && ClipModel != null) { - foreach (var image in ImagePaths) + foreach (var image in Images) { - _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromFileName( ClipModel.NativeHandle, Context, image ) ); - } - - foreach (var image in ImageBytes) - { - _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromMemory(ClipModel.NativeHandle, Context, image)); + if (image.Type == ImageData.DataType.ImagePath && image.Data != null) + { + _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromFileName(ClipModel.NativeHandle, Context, image.Data.ToString())); + } + else if (image.Type == ImageData.DataType.ImageBytes && image.Data != null) + { + _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromMemory(ClipModel.NativeHandle, Context, (byte[])image.Data)); + } + else if (image.Type == ImageData.DataType.ImageURL && image.Data != null) + { + throw new NotImplementedException(); + } } int imageIndex = text.IndexOf(""); diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs index 9d705af1b..9d2f8c783 100644 --- a/LLama/LLamaStatelessExecutor.cs +++ b/LLama/LLamaStatelessExecutor.cs @@ -26,10 +26,16 @@ public class StatelessExecutor // LLava Section public bool IsMultiModal => false; + + /// public bool MultiModalProject { get; } - public LLavaWeights? ClipModel { get; } - public List ImagePaths { get; set; } - + + /// + public LLavaWeights? ClipModel { get; } + + /// + public List Images { get; set; } + /// /// The context used by the executor when running the inference. /// @@ -43,7 +49,7 @@ public class StatelessExecutor /// public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null) { - ImagePaths = new List(); + Images = new List(); _weights = weights; _params = @params; _logger = logger; From 51ba8aa64f9cc953627912d1a1a62829ef4bd509 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Mon, 8 Apr 2024 10:06:04 +0200 Subject: [PATCH 3/7] Download image implementation --- LLama/LLamaInteractExecutor.cs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index 35104165a..0944bb4e6 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -11,6 +11,7 @@ using LLama.Exceptions; using LLama.Extensions; using Microsoft.Extensions.Logging; +using System.Net.Http; namespace LLama { @@ -154,7 +155,7 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru { if (image.Type == ImageData.DataType.ImagePath && image.Data != null) { - _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromFileName(ClipModel.NativeHandle, Context, image.Data.ToString())); + _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromFileName(ClipModel.NativeHandle, Context, (string)image.Data)); } else if (image.Type == ImageData.DataType.ImageBytes && image.Data != null) { @@ -162,7 +163,13 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru } else if (image.Type == ImageData.DataType.ImageURL && image.Data != null) { - throw new NotImplementedException(); + using var httpClient = new HttpClient(); + var uri = new Uri((string)image.Data); + var imageBytes = httpClient.GetByteArrayAsync(uri).Result; + if (imageBytes != null && imageBytes.Length > 0) + { + _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromMemory(ClipModel.NativeHandle, Context, imageBytes)); + } } } From c2f0d9c23347f85a4e601ac0e70e147bebd45252 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Mon, 8 Apr 2024 16:10:54 +0200 Subject: [PATCH 4/7] Simplifying image handling --- .../Examples/LlavaInteractiveModeExecute.cs | 2 +- LLama/Abstractions/ILLamaExecutor.cs | 44 +------------------ LLama/LLamaExecutorBase.cs | 4 +- LLama/LLamaInteractExecutor.cs | 19 +------- LLama/LLamaStatelessExecutor.cs | 4 +- 5 files changed, 7 insertions(+), 66 deletions(-) diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index 8cfa7376b..507f041b1 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -102,7 +102,7 @@ public static async Task Run() // foreach (var image in imagePaths) { - ex.Images.Add(new ImageData(ImageData.DataType.ImagePath, image)); + ex.Images.Add(File.ReadAllBytes(image)); } } diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs index 977cbc5e4..d6c8d2ce2 100644 --- a/LLama/Abstractions/ILLamaExecutor.cs +++ b/LLama/Abstractions/ILLamaExecutor.cs @@ -27,7 +27,7 @@ public interface ILLamaExecutor /// /// List of images: Image filen path, uri or image byte array. See ImageData. /// - public List Images { get; } + public List Images { get; } /// /// Asynchronously infers a response from the model. @@ -38,46 +38,4 @@ public interface ILLamaExecutor /// IAsyncEnumerable InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default); } - - /// - /// Holds image data - /// - public class ImageData - { - /// - /// constructor - /// - /// - /// - public ImageData(DataType type, object data) { Type = type; Data = data; } - - /// - /// the possible types of image data - /// - public enum DataType - { - /// - /// file path - /// - ImagePath, - /// - /// byte array - /// - ImageBytes, - /// - /// uri - /// - ImageURL - } - - /// - /// the type of this image data - /// - public DataType Type { get; set; } - - /// - /// the image data (string, byte array or uri) - /// - public object? Data { get; set; } - } } diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index 9914f0ecc..65c0dcb4b 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -79,7 +79,7 @@ public bool IsMultiModal public LLavaWeights? ClipModel { get; } /// - public List Images { get; set; } + public List Images { get; set; } /// /// Current "mu" value for mirostat sampling @@ -95,7 +95,7 @@ public bool IsMultiModal /// protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null) { - Images = new List(); + Images = new List(); _logger = logger; Context = context; _pastTokensCount = 0; diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index 0944bb4e6..a87a0f37c 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -153,24 +153,7 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru { foreach (var image in Images) { - if (image.Type == ImageData.DataType.ImagePath && image.Data != null) - { - _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromFileName(ClipModel.NativeHandle, Context, (string)image.Data)); - } - else if (image.Type == ImageData.DataType.ImageBytes && image.Data != null) - { - _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromMemory(ClipModel.NativeHandle, Context, (byte[])image.Data)); - } - else if (image.Type == ImageData.DataType.ImageURL && image.Data != null) - { - using var httpClient = new HttpClient(); - var uri = new Uri((string)image.Data); - var imageBytes = httpClient.GetByteArrayAsync(uri).Result; - if (imageBytes != null && imageBytes.Length > 0) - { - _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromMemory(ClipModel.NativeHandle, Context, imageBytes)); - } - } + _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromMemory(ClipModel.NativeHandle, Context, image)); } int imageIndex = text.IndexOf(""); diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs index 9d2f8c783..f9d6ca5b2 100644 --- a/LLama/LLamaStatelessExecutor.cs +++ b/LLama/LLamaStatelessExecutor.cs @@ -34,7 +34,7 @@ public class StatelessExecutor public LLavaWeights? ClipModel { get; } /// - public List Images { get; set; } + public List Images { get; set; } /// /// The context used by the executor when running the inference. @@ -49,7 +49,7 @@ public class StatelessExecutor /// public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null) { - Images = new List(); + Images = new List(); _weights = weights; _params = @params; _logger = logger; From 5d442fa2f8fe37a65ac05d69fbcded62b962878a Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Fri, 19 Apr 2024 08:40:43 +0200 Subject: [PATCH 5/7] Embeddings correction --- LLama/LLamaEmbedder.cs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index 13a3e1c27..c29b6b256 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -97,9 +97,13 @@ public async Task GetEmbeddings(string text, bool addBos, CancellationT private float[] GetEmbeddingsArray() { - var embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero); - if (embeddings.Length == 0) - return Array.Empty(); + var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle); + if (embeddings == null || embeddings.Length == 0) + { + embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero); + if (embeddings == null || embeddings.Length == 0) + return Array.Empty(); + } return embeddings.ToArray(); } From 9c91fac20f3ebde5d1f1bc6a9feacaaa61c4d087 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Fri, 19 Apr 2024 10:55:36 +0200 Subject: [PATCH 6/7] Automatic Solution Generator - Work in progress --- CMakeLists.txt | 126 ++++++++++++++++ LLama.GenerateSolution/CMakeLists.txt.in | 126 ++++++++++++++++ .../GenerateSolution.csproj | 14 ++ LLama.GenerateSolution/GenerateSolution.sln | 25 ++++ LLama.GenerateSolution/Program.cs | 137 ++++++++++++++++++ 5 files changed, 428 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 LLama.GenerateSolution/CMakeLists.txt.in create mode 100644 LLama.GenerateSolution/GenerateSolution.csproj create mode 100644 LLama.GenerateSolution/GenerateSolution.sln create mode 100644 LLama.GenerateSolution/Program.cs diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..b84dc1de8 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,126 @@ +#---------------------------------------------------------------------------------------- +# INFO: +# - How to use: change the flags in the 'Set one of these ON and all others OFF' section +# - CUDA: it will use automatically the CUDA SDK version installed +# +#---------------------------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.8) +project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) +if(NOT MSVC) + message(FATAL_ERROR "This CMake file only works with MSVC.") +endif(NOT MSVC) + +#--------- Set one of these ON and all others OFF -------------------> +option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) +option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) +option(LLAMA_CUDA "CUDA (no AVX)" OFF) +#etc... add other setups +#<--------- Set one of these ON and all others OFF ------------------- + +# --------------- Don't change below this line ----------------------- + +# Variable Settings +if(LLAMA_CUDA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(LLAMA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" OFF) +elseif(LLAMA_CUDA) + option(LLAMA_AVX "llama: enable AVX" OFF) + option(LLAMA_AVX2 "llama: enable AVX2" OFF) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(OTHER_SETUPS) + #etc... +endif() + +# Fixed Settings +# general +option(BUILD_SHARED_LIBS "build shared libraries" ON) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_CCACHE "llama: use ccache if available" ON) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" OFF) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" OFF) +endif() + +if (WIN32) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) +set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") +#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) +option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) +option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") +option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "llama: max. batch size for using peer access") +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_VULKAN "llama: use Vulkan" OFF) +option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) +option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) +option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) +option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) +option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) +option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) +option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) +option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) +option(LLAMA_KOMPUTE "llama: use Kompute" OFF) +option(LLAMA_MPI "llama: use MPI" OFF) +option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) +option(LLAMA_SYCL "llama: use SYCL" OFF) +option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) + +option(LLAMA_BUILD_TESTS "llama: build tests" OFF) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) + +# add perf arguments +option(LLAMA_PERF "llama: enable perf" OFF) + +include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) + +include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) + +include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) + +include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) + +include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) + +include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) + +include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) + +add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/CMakeLists.txt.in b/LLama.GenerateSolution/CMakeLists.txt.in new file mode 100644 index 000000000..b84dc1de8 --- /dev/null +++ b/LLama.GenerateSolution/CMakeLists.txt.in @@ -0,0 +1,126 @@ +#---------------------------------------------------------------------------------------- +# INFO: +# - How to use: change the flags in the 'Set one of these ON and all others OFF' section +# - CUDA: it will use automatically the CUDA SDK version installed +# +#---------------------------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.8) +project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) +if(NOT MSVC) + message(FATAL_ERROR "This CMake file only works with MSVC.") +endif(NOT MSVC) + +#--------- Set one of these ON and all others OFF -------------------> +option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) +option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) +option(LLAMA_CUDA "CUDA (no AVX)" OFF) +#etc... add other setups +#<--------- Set one of these ON and all others OFF ------------------- + +# --------------- Don't change below this line ----------------------- + +# Variable Settings +if(LLAMA_CUDA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(LLAMA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" OFF) +elseif(LLAMA_CUDA) + option(LLAMA_AVX "llama: enable AVX" OFF) + option(LLAMA_AVX2 "llama: enable AVX2" OFF) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(OTHER_SETUPS) + #etc... +endif() + +# Fixed Settings +# general +option(BUILD_SHARED_LIBS "build shared libraries" ON) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_CCACHE "llama: use ccache if available" ON) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" OFF) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" OFF) +endif() + +if (WIN32) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) +set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") +#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) +option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) +option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") +option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "llama: max. batch size for using peer access") +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_VULKAN "llama: use Vulkan" OFF) +option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) +option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) +option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) +option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) +option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) +option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) +option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) +option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) +option(LLAMA_KOMPUTE "llama: use Kompute" OFF) +option(LLAMA_MPI "llama: use MPI" OFF) +option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) +option(LLAMA_SYCL "llama: use SYCL" OFF) +option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) + +option(LLAMA_BUILD_TESTS "llama: build tests" OFF) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) + +# add perf arguments +option(LLAMA_PERF "llama: enable perf" OFF) + +include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) + +include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) + +include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) + +include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) + +include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) + +include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) + +include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) + +add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/GenerateSolution.csproj b/LLama.GenerateSolution/GenerateSolution.csproj new file mode 100644 index 000000000..f28f91ba6 --- /dev/null +++ b/LLama.GenerateSolution/GenerateSolution.csproj @@ -0,0 +1,14 @@ + + + + Exe + net7.0 + enable + enable + + + + + + + diff --git a/LLama.GenerateSolution/GenerateSolution.sln b/LLama.GenerateSolution/GenerateSolution.sln new file mode 100644 index 000000000..74c9e8e10 --- /dev/null +++ b/LLama.GenerateSolution/GenerateSolution.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.8.34525.116 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GenerateSolution", "GenerateSolution.csproj", "{89306FE9-4428-4C70-AF58-0AF871BED56B}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {32128714-30D0-4015-9184-24F609AAE564} + EndGlobalSection +EndGlobal diff --git a/LLama.GenerateSolution/Program.cs b/LLama.GenerateSolution/Program.cs new file mode 100644 index 000000000..ebf082b14 --- /dev/null +++ b/LLama.GenerateSolution/Program.cs @@ -0,0 +1,137 @@ +using Spectre.Console; +using System; +using System.Diagnostics; +using System.Text; +using static System.Runtime.InteropServices.JavaScript.JSType; + +namespace GenerateSolution +{ + internal class Program + { + static void Main(string[] args) + { + System.Console.InputEncoding = Encoding.Unicode; + System.Console.OutputEncoding = Encoding.Unicode; + + // Check if we can accept key strokes + if (!AnsiConsole.Profile.Capabilities.Interactive) + { + AnsiConsole.MarkupLine("[red]Environment does not support interaction.[/]"); + return; + } + + var options = AskOptions(); + var cmakePath = AskCMakePath(); + if(string.IsNullOrEmpty(cmakePath) == true) + { + cmakePath = "C:\\Program Files\\CMake\\bin\\cmake.exe"; + } + AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", cmakePath); + + string cmakeListsPath = @"..\..\..\..\CMakeLists.txt"; + + //cmake [] -B [-S ] + //TODO: get the chosen arguments from above (hardcoded values below) + //TODO: edit the CMakeList.txt.in template and create the CMakeLists.txt with the chosen options + cmakeListsPath += " -G \"Visual Studio 17 2022\" -A x64 -B ..\\..\\..\\..\\ -S ..\\..\\..\\..\\"; + + ProcessStartInfo startInfo = new ProcessStartInfo + { + FileName = cmakePath, + Arguments = cmakeListsPath, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + }; + + try + { + bool bSuccess = false; + string lastError = ""; + AnsiConsole.Progress() + .AutoClear(false) + .Columns(new ProgressColumn[] + { + new TaskDescriptionColumn(), + new SpinnerColumn(Spinner.Known.Ascii), + }) + .Start(ctx => + { + var cmakeTask = ctx.AddTask("Generating VS Solution", autoStart: false).IsIndeterminate(); + cmakeTask.StartTask(); + using (Process process = new Process()) + { + process.StartInfo = startInfo; + process.Start(); + string output = process.StandardOutput.ReadToEnd(); + lastError = process.StandardError.ReadToEnd(); + process.WaitForExit(); + cmakeTask.StopTask(); + if (process.ExitCode == 0) + { + bSuccess = true; + } + } + }); + + if (bSuccess == true) + { + AnsiConsole.WriteLine("VS solution generated successfully."); + } + else + { + AnsiConsole.WriteLine($"Error running CMake configuration: {lastError}"); + } + } + catch (Exception ex) + { + AnsiConsole.WriteLine("[red]ERROR[/] " + ex.Message); + } + + Console.ReadLine(); + } + + public static string AskCMakePath() + { + return AnsiConsole.Prompt( + new TextPrompt("What's your [green]CMake path[/] (default: C:\\Program Files\\CMake\\bin\\cmake.exe)?") + .AllowEmpty()); + } + + public static List AskOptions() + { + var options = AnsiConsole.Prompt( + new MultiSelectionPrompt() + .PageSize(10) + .Title("Select the preferred [green]options[/]?") + .MoreChoicesText("[grey](Move up and down to reveal more options)[/]") + .InstructionsText("[grey](Press [blue][/] to toggle an option, [green][/] to accept)[/]") + .AddChoiceGroup("Avx", new[] + { + "Avx2", "Avx512" + }) + .AddChoiceGroup("Cuda", new[] + { + "Cuda" + }) + .AddChoices(new[] + { + "x64", + }) + .AddChoiceGroup("Visual Studio", new[] + { + "Visual Studio 16 2019", + "Visual Studio 17 2022" + }) + ); + + if (options.Count > 0) + { + AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", string.Join(",",options)); + } + + return options; + } + } +} From 43786b0d9ae67e4ac48cc7a4d83ae83811cf87a8 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Fri, 19 Apr 2024 17:19:13 +0200 Subject: [PATCH 7/7] Revert "Embeddings correction" This reverts commit 5d442fa2f8fe37a65ac05d69fbcded62b962878a. --- LLama/LLamaEmbedder.cs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index c29b6b256..13a3e1c27 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -97,13 +97,9 @@ public async Task GetEmbeddings(string text, bool addBos, CancellationT private float[] GetEmbeddingsArray() { - var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle); - if (embeddings == null || embeddings.Length == 0) - { - embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero); - if (embeddings == null || embeddings.Length == 0) - return Array.Empty(); - } + var embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero); + if (embeddings.Length == 0) + return Array.Empty(); return embeddings.ToArray(); }