SciSharp · martindevans · Mar 13, 2024 · Mar 1, 2024 · Mar 2, 2024 · Mar 3, 2024
diff --git a/LLama/LLavaWeights.cs b/LLama/LLavaWeights.cs
@@ -6,11 +6,11 @@ namespace LLama;
 
 public sealed class LLavaWeights : IDisposable
 {
-    public SafeLlavaModelHandle NativeClipHandle { get; }   
+    public SafeLlavaModelHandle NativeHandle { get; }   
 
     internal LLavaWeights(SafeLlavaModelHandle weights)
     {
-        NativeClipHandle = weights;
+        NativeHandle = weights;
     }
 
     public static LLavaWeights LoadFromFile(string mmProject)
@@ -28,7 +28,7 @@ public static LLavaWeights LoadFromFile(string mmProject)
     /// <returns></returns>
     public bool EmbedImage(LLamaContext ctxLlama, string Image, ref int n_past )
     {
-        return NativeClipHandle.EmbedImage(ctxLlama, Image, ref n_past );
+        return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past );
     }
 
     /// <summary>
@@ -40,12 +40,12 @@ public bool EmbedImage(LLamaContext ctxLlama, string Image, ref int n_past )
     /// <returns></returns>
     public bool EmbedImage(LLamaContext ctxLlama, Byte[] Image, ref int n_past )
     {
-        return NativeClipHandle.EmbedImage(ctxLlama, Image, ref n_past );
+        return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past );
     }
 
     public void Dispose()
     {
-        NativeClipHandle.Dispose();
+        NativeHandle.Dispose();
     }    
 
 }
diff --git a/LLama/Native/LLavaImageEmbed.cs b/LLama/Native/LLavaImageEmbed.cs
@@ -0,0 +1,13 @@
+using System.Runtime.InteropServices;
+
+namespace LLama.Native;
+
+/// <summary>
+/// LLaVa Image embeddings 
+/// </summary>
+[StructLayout(LayoutKind.Sequential)]
+unsafe public struct LLavaImageEmbed
+{
+    public float* embed;
+    public int n_image_pos;
+}
diff --git a/LLama/Native/NativeApi.LLava.cs b/LLama/Native/NativeApi.LLava.cs
@@ -7,32 +7,6 @@ namespace LLama.Native;
 public static unsafe partial class NativeApi
 {
 
-    /*/// <summary>
-    /// Clip Vision Parameters
-    /// </summary>
-    [StructLayout(LayoutKind.Sequential)]
-    public struct clip_vision_hparams
-    {
-        public Int32 image_size;
-        public Int32 patch_size;
-        public Int32 hidden_size;
-        public Int32 n_intermediate;
-        public Int32 projection_dim;
-        public Int32 n_head;
-        public Int32 n_layer;
-        public float eps;
-    };*/
-
-    /// <summary>
-    /// LLaVa Image embeddings 
-    /// </summary>
-    [StructLayout(LayoutKind.Sequential)]
-    public struct llava_image_embed
-    {
-        public float* embed;
-        public int n_image_pos;
-    }
-
     /// <summary>
     /// Load MULTI MODAL PROJECTIONS model / Clip Model
     /// </summary>
@@ -51,7 +25,7 @@ public struct llava_image_embed
 
 
     /// <summary>
-    /// Sanity check for clip <-> llava embed size match
+    /// Sanity check for clip &lt;-&gt; llava embed size match
     /// </summary>
     /// <returns></returns>
     [DllImport(llavaLibraryName, EntryPoint = "llava_validate_embed_size", CallingConvention = CallingConvention.Cdecl)]
@@ -67,7 +41,7 @@ public struct llava_image_embed
     /// <returns></returns>
     [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes",
         CallingConvention = CallingConvention.Cdecl)]
-    public static extern llava_image_embed* llava_image_embed_make_with_bytes(clip_ctx ctx_clip, int n_threads,
+    public static extern LLavaImageEmbed* llava_image_embed_make_with_bytes(clip_ctx ctx_clip, int n_threads,
         byte[] image_bytes, int image_bytes_length);
 
     /// <summary>
@@ -79,7 +53,7 @@ public struct llava_image_embed
     /// <returns></returns>
     [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename",
         CallingConvention = CallingConvention.Cdecl)]
-    public static extern llava_image_embed* llava_image_embed_make_with_filename(clip_ctx ctx_clip, int n_threads,
+    public static extern LLavaImageEmbed* llava_image_embed_make_with_filename(clip_ctx ctx_clip, int n_threads,
         [MarshalAs(UnmanagedType.LPStr)] string image_path);
 
     /// <summary>
@@ -88,7 +62,7 @@ public struct llava_image_embed
     /// <param name="embed"></param>
     /// <returns></returns>
     [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_free", CallingConvention = CallingConvention.Cdecl)]
-    public static extern llava_image_embed* llava_image_embed_free(llava_image_embed* embed);
+    public static extern LLavaImageEmbed* llava_image_embed_free(LLavaImageEmbed* embed);
 
     /// <summary>
     /// Write the image represented by embed into the llama context with batch size n_batch, starting at context
@@ -97,7 +71,7 @@ public struct llava_image_embed
     /// <param name="embed">ctx_llama</param>
     /// <returns></returns>
     [DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)]
-    public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctc_llama, llava_image_embed* embed,
+    public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctc_llama, LLavaImageEmbed* embed,
         int n_batch, ref int n_past);
 
 }
diff --git a/LLama/Native/SafeLlavaModelHandle.cs b/LLama/Native/SafeLlavaModelHandle.cs
@@ -1,5 +1,6 @@
 using System;
 using System.Collections.Generic;
+using System.IO;
 using System.Linq;
 using System.Text;
 using LLama;
@@ -15,7 +16,7 @@ public sealed class SafeLlavaModelHandle
         : SafeLLamaHandleBase
     {
 
-        internal protected SafeLlavaModelHandle(IntPtr handle)
+        public SafeLlavaModelHandle(IntPtr handle)
             : base(handle, true)
         {
         }
@@ -38,13 +39,29 @@ protected override bool ReleaseHandle()
         /// <exception cref="RuntimeError"></exception>
         public static SafeLlavaModelHandle LoadFromFile(string modelPath, int verbosity )
         {
-            var ctxContext =  NativeApi.clip_model_load(modelPath, verbosity );            
+
+            // Try to open the model file, this will check:
+            // - File exists (automatically throws FileNotFoundException)
+            // - File is readable (explicit check)
+            // This provides better error messages that llama.cpp, which would throw an access violation exception in both cases.
+            using (var fs = new FileStream(modelPath, FileMode.Open))
+                if (!fs.CanRead)
+                    throw new InvalidOperationException($"Llava MMP Model file '{modelPath}' is not readable");
+
+            var ctxContext =  NativeApi.clip_model_load(modelPath, verbosity ); 
+
             if (ctxContext == IntPtr.Zero)
                 throw new RuntimeError($"Failed to load LLaVa model {modelPath}.");
 
             return new SafeLlavaModelHandle(ctxContext);
+
         }
 
+        /// <summary>
+        /// Load and embed image
+        /// </summary>
+        /// <param name="imagePath">Image path on jpeg format</param>
+        /// <param name="threads"></param>
         public void LoadImage( string imagePath, int threads )
         {
             unsafe
@@ -75,7 +92,7 @@ public bool EmbedImage(LLamaContext ctxLlama, string image, ref int n_past)
         /// Embed the image from binary in llama context
         /// </summary>
         /// <param name="ctxLlama"></param>
-        /// <param name="image"></param>
+        /// <param name="image">jpeg image</param>
         /// <param name="n_past"></param>
         /// <returns></returns>
         public bool EmbedImage(LLamaContext ctxLlama, Byte[] image, ref int n_past )

diff --git a/LLama/runtimes/deps/avx/libllama.dll b/LLama/runtimes/deps/avx/libllama.dll
diff --git a/LLama/runtimes/deps/avx2/libllama.dll b/LLama/runtimes/deps/avx2/libllama.dll
diff --git a/LLama/runtimes/deps/avx512/libllama.dll b/LLama/runtimes/deps/avx512/libllama.dll
diff --git a/LLama/runtimes/deps/libllama.dll b/LLama/runtimes/deps/libllama.dll