Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Llava api #563

Merged
merged 20 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions LLama/LLavaWeights.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ namespace LLama;

public sealed class LLavaWeights : IDisposable
{
public SafeLlavaModelHandle NativeClipHandle { get; }
public SafeLlavaModelHandle NativeHandle { get; }

internal LLavaWeights(SafeLlavaModelHandle weights)
{
NativeClipHandle = weights;
NativeHandle = weights;
}

public static LLavaWeights LoadFromFile(string mmProject)
Expand All @@ -28,7 +28,7 @@ public static LLavaWeights LoadFromFile(string mmProject)
/// <returns></returns>
public bool EmbedImage(LLamaContext ctxLlama, string Image, ref int n_past )
{
return NativeClipHandle.EmbedImage(ctxLlama, Image, ref n_past );
return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past );
}

/// <summary>
Expand All @@ -40,12 +40,12 @@ public bool EmbedImage(LLamaContext ctxLlama, string Image, ref int n_past )
/// <returns></returns>
public bool EmbedImage(LLamaContext ctxLlama, Byte[] Image, ref int n_past )
{
return NativeClipHandle.EmbedImage(ctxLlama, Image, ref n_past );
return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past );
}

public void Dispose()
{
NativeClipHandle.Dispose();
NativeHandle.Dispose();
}

}
13 changes: 13 additions & 0 deletions LLama/Native/LLavaImageEmbed.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using System.Runtime.InteropServices;

namespace LLama.Native;

/// <summary>
/// LLaVa Image embeddings
/// </summary>
[StructLayout(LayoutKind.Sequential)]
unsafe public struct LLavaImageEmbed
{
public float* embed;
public int n_image_pos;
}
36 changes: 5 additions & 31 deletions LLama/Native/NativeApi.LLava.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,6 @@ namespace LLama.Native;
public static unsafe partial class NativeApi
{

/*/// <summary>
/// Clip Vision Parameters
/// </summary>
[StructLayout(LayoutKind.Sequential)]
public struct clip_vision_hparams
{
public Int32 image_size;
public Int32 patch_size;
public Int32 hidden_size;
public Int32 n_intermediate;
public Int32 projection_dim;
public Int32 n_head;
public Int32 n_layer;
public float eps;
};*/

/// <summary>
/// LLaVa Image embeddings
/// </summary>
[StructLayout(LayoutKind.Sequential)]
public struct llava_image_embed
{
public float* embed;
public int n_image_pos;
}

/// <summary>
/// Load MULTI MODAL PROJECTIONS model / Clip Model
/// </summary>
Expand All @@ -51,7 +25,7 @@ public struct llava_image_embed


/// <summary>
/// Sanity check for clip <-> llava embed size match
/// Sanity check for clip &lt;-&gt; llava embed size match
/// </summary>
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "llava_validate_embed_size", CallingConvention = CallingConvention.Cdecl)]
Expand All @@ -67,7 +41,7 @@ public struct llava_image_embed
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes",
CallingConvention = CallingConvention.Cdecl)]
public static extern llava_image_embed* llava_image_embed_make_with_bytes(clip_ctx ctx_clip, int n_threads,
public static extern LLavaImageEmbed* llava_image_embed_make_with_bytes(clip_ctx ctx_clip, int n_threads,
byte[] image_bytes, int image_bytes_length);

/// <summary>
Expand All @@ -79,7 +53,7 @@ public struct llava_image_embed
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename",
CallingConvention = CallingConvention.Cdecl)]
public static extern llava_image_embed* llava_image_embed_make_with_filename(clip_ctx ctx_clip, int n_threads,
public static extern LLavaImageEmbed* llava_image_embed_make_with_filename(clip_ctx ctx_clip, int n_threads,
[MarshalAs(UnmanagedType.LPStr)] string image_path);

/// <summary>
Expand All @@ -88,7 +62,7 @@ public struct llava_image_embed
/// <param name="embed"></param>
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_free", CallingConvention = CallingConvention.Cdecl)]
public static extern llava_image_embed* llava_image_embed_free(llava_image_embed* embed);
public static extern LLavaImageEmbed* llava_image_embed_free(LLavaImageEmbed* embed);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If a LLavaImageEmbed is being allocated in some methods and free in another it should be handled with a SafeHandle to absolutely ensure it is disposed properly.

Sorry I didn't spot this in my last review!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless it's extremely short lived, in which case it can be handled with try/finally everywhere it's used. But a SafeHandle is probably easier and safer.


/// <summary>
/// Write the image represented by embed into the llama context with batch size n_batch, starting at context
Expand All @@ -97,7 +71,7 @@ public struct llava_image_embed
/// <param name="embed">ctx_llama</param>
/// <returns></returns>
[DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)]
public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctc_llama, llava_image_embed* embed,
public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctc_llama, LLavaImageEmbed* embed,
int n_batch, ref int n_past);

}
23 changes: 20 additions & 3 deletions LLama/Native/SafeLlavaModelHandle.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using LLama;
Expand All @@ -15,7 +16,7 @@ public sealed class SafeLlavaModelHandle
: SafeLLamaHandleBase
{

internal protected SafeLlavaModelHandle(IntPtr handle)
public SafeLlavaModelHandle(IntPtr handle)
: base(handle, true)
{
}
Expand All @@ -38,13 +39,29 @@ protected override bool ReleaseHandle()
/// <exception cref="RuntimeError"></exception>
public static SafeLlavaModelHandle LoadFromFile(string modelPath, int verbosity )
{
SignalRT marked this conversation as resolved.
Show resolved Hide resolved
var ctxContext = NativeApi.clip_model_load(modelPath, verbosity );

// Try to open the model file, this will check:
// - File exists (automatically throws FileNotFoundException)
// - File is readable (explicit check)
// This provides better error messages that llama.cpp, which would throw an access violation exception in both cases.
using (var fs = new FileStream(modelPath, FileMode.Open))
if (!fs.CanRead)
throw new InvalidOperationException($"Llava MMP Model file '{modelPath}' is not readable");

var ctxContext = NativeApi.clip_model_load(modelPath, verbosity );

if (ctxContext == IntPtr.Zero)
throw new RuntimeError($"Failed to load LLaVa model {modelPath}.");

return new SafeLlavaModelHandle(ctxContext);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can modify clip_model_load to directly return SafeLlavaModelHandle. That way you never have to directly handle the poniter. See here for example.


}

/// <summary>
/// Load and embed image
/// </summary>
/// <param name="imagePath">Image path on jpeg format</param>
/// <param name="threads"></param>
public void LoadImage( string imagePath, int threads )
SignalRT marked this conversation as resolved.
Show resolved Hide resolved
{
unsafe
Expand Down Expand Up @@ -75,7 +92,7 @@ public bool EmbedImage(LLamaContext ctxLlama, string image, ref int n_past)
/// Embed the image from binary in llama context
/// </summary>
/// <param name="ctxLlama"></param>
/// <param name="image"></param>
/// <param name="image">jpeg image</param>
/// <param name="n_past"></param>
/// <returns></returns>
public bool EmbedImage(LLamaContext ctxLlama, Byte[] image, ref int n_past )
SignalRT marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
Binary file removed LLama/runtimes/deps/avx/libllama.dll
SignalRT marked this conversation as resolved.
Outdated
Show resolved Hide resolved
Binary file not shown.
Binary file removed LLama/runtimes/deps/avx2/libllama.dll
SignalRT marked this conversation as resolved.
Outdated
Show resolved Hide resolved
Binary file not shown.
Binary file removed LLama/runtimes/deps/avx512/libllama.dll
SignalRT marked this conversation as resolved.
Outdated
Show resolved Hide resolved
Binary file not shown.
Binary file removed LLama/runtimes/deps/libllama.dll
SignalRT marked this conversation as resolved.
Outdated
Show resolved Hide resolved
Binary file not shown.
Loading