Implement data URI parser

To make LLaVA simpler to use I've created an eval_string() function that accepts an arbitrary string input, which is tokenized and decoded. If it has any legal data:foo URI substrings inside it then they'll be replaced with image emdeddings and evaluated separately. This means you can embed images into your JSON chat.completion.messages using <img>, ![](data:..) or simply putting the naked data:.... uri in your message. If it decodes as a valid image file format that STB supports, then LLaVA shall see it. This change additionally sneaks in the following improvements: - Introduce --nologo flag for chatbot - Support --no-display-prompt in chatbot - Upgrade STB libraries to latest versions - Overhaul --help and man page documentation - Be smarter about printing \n in chatbot output - Make --gpu flag imply -ngl 999 for ease of use - Use srgb rather than linear scaling in stable diffusion - Support the --verbose flag in the chatbot to display logs - Fix the /stats command which wasn't working correctly earlier
Mozilla-Ocho · Nov 11, 2024 · 4c7b7d5 · 4c7b7d5
1 parent d25c077
commit 4c7b7d5
Show file tree

Hide file tree

Showing 28 changed files with 18,286 additions and 9,756 deletions.
diff --git a/llama.cpp/base64.h b/llama.cpp/base64.h
@@ -61,21 +61,23 @@ class base64
     };
 
     /**
-     Encodes all the elements from `in_begin` to `in_end` to `out`.
-
-     @warning The source and destination cannot overlap. The destination must be able to hold at least
-     `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator.
-
-     @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than
-     8 bits
-     @tparam Output_iterator the destination; the elements written to it are from the type `char`
-     @param in_begin the beginning of the source
-     @param in_end the ending of the source
-     @param out the destination iterator
-     @param alphabet which alphabet should be used
-     @returns the iterator to the next element past the last element copied
-     @throws see `Input_iterator` and `Output_iterator`
-    */
+     * Encodes all the elements from `in_begin` to `in_end` to `out`.
+     *
+     * @warning The source and destination cannot overlap. The
+     *     destination must be able to hold at least
+     *     `required_encode_size(std::distance(in_begin, in_end))`,
+     *     otherwise the behavior depends on the output iterator.
+     *
+     * @tparam Input_iterator the source; the returned elements are cast
+     *     to `std::uint8_t` and should not be greater than 8 bits
+     * @tparam Output_iterator the destination; the elements written to it are from the type `char`
+     * @param in_begin the beginning of the source
+     * @param in_end the ending of the source
+     * @param out the destination iterator
+     * @param alphabet which alphabet should be used
+     * @returns the iterator to the next element past the last element copied
+     * @throws see `Input_iterator` and `Output_iterator`
+     */
     template<typename Input_iterator, typename Output_iterator>
     static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
                                   alphabet alphabet = alphabet::standard)
@@ -142,60 +144,59 @@ class base64
 
         return out;
     }
+
     /**
-     Encodes a string.
-
-     @param str the string that should be encoded
-     @param alphabet which alphabet should be used
-     @returns the encoded base64 string
-     @throws see base64::encode()
-    */
-    static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard)
+     * Encodes a string.
+     *
+     * @param str the string that should be encoded
+     * @param alphabet which alphabet should be used
+     * @returns the encoded base64 string
+     * @throws see base64::encode()
+     */
+    static std::string encode(const std::string_view& str, alphabet alphabet = alphabet::standard)
     {
         std::string result;
-
         result.reserve(required_encode_size(str.length()) + 1);
-
         encode(str.begin(), str.end(), std::back_inserter(result), alphabet);
-
         return result;
     }
-    /**
-     Encodes a char array.
 
-     @param buffer the char array
-     @param size the size of the array
-     @param alphabet which alphabet should be used
-     @returns the encoded string
-    */
+    /**
+     * Encodes a char array.
+     *
+     * @param buffer the char array
+     * @param size the size of the array
+     * @param alphabet which alphabet should be used
+     * @returns the encoded string
+     */
     static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard)
     {
         std::string result;
-
         result.reserve(required_encode_size(size) + 1);
-
         encode(buffer, buffer + size, std::back_inserter(result), alphabet);
-
         return result;
     }
+
     /**
-     Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`,
-     in other words: inplace decoding is possible.
-
-     @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`,
-     otherwise the behavior depends on the output iterator.
-
-     @tparam Input_iterator the source; the returned elements are cast to `char`
-     @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t`
-     @param in_begin the beginning of the source
-     @param in_end the ending of the source
-     @param out the destination iterator
-     @param alphabet which alphabet should be used
-     @param behavior the behavior when an error was detected
-     @returns the iterator to the next element past the last element copied
-     @throws base64_error depending on the set behavior
-     @throws see `Input_iterator` and `Output_iterator`
-    */
+     * Decodes all the elements from `in_begin` to `in_end` to `out`.
+     * `in_begin` may point to the same location as `out`, in other
+     * words: inplace decoding is possible.
+     *
+     * @warning The destination must be able to hold at least
+     *     `required_decode_size(std::distance(in_begin, in_end))`,
+     *     otherwise the behavior depends on the output iterator.
+     *
+     * @tparam Input_iterator the source; the returned elements are cast to `char`
+     * @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t`
+     * @param in_begin the beginning of the source
+     * @param in_end the ending of the source
+     * @param out the destination iterator
+     * @param alphabet which alphabet should be used
+     * @param behavior the behavior when an error was detected
+     * @returns the iterator to the next element past the last element copied
+     * @throws base64_error depending on the set behavior
+     * @throws see `Input_iterator` and `Output_iterator`
+     */
     template<typename Input_iterator, typename Output_iterator>
     static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
                                   alphabet alphabet          = alphabet::auto_,
@@ -242,99 +243,99 @@ class base64
 
         return out;
     }
+
     /**
-     Decodes a string.
-
-     @param str the base64 encoded string
-     @param alphabet which alphabet should be used
-     @param behavior the behavior when an error was detected
-     @returns the decoded string
-     @throws see base64::decode()
-    */
-    static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_,
+     * Decodes a string.
+     *
+     * @param str the base64 encoded string
+     * @param alphabet which alphabet should be used
+     * @param behavior the behavior when an error was detected
+     * @returns the decoded string
+     * @throws see base64::decode()
+     */
+    static std::string decode(const std::string_view& str, alphabet alphabet = alphabet::auto_,
                               decoding_behavior behavior = decoding_behavior::moderate)
     {
         std::string result;
-
         result.reserve(max_decode_size(str.length()));
-
         decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior);
-
         return result;
     }
+
     /**
-     Decodes a string.
-
-     @param buffer the base64 encoded buffer
-     @param size the size of the buffer
-     @param alphabet which alphabet should be used
-     @param behavior the behavior when an error was detected
-     @returns the decoded string
-     @throws see base64::decode()
-    */
+     * Decodes a string.
+     *
+     * @param buffer the base64 encoded buffer
+     * @param size the size of the buffer
+     * @param alphabet which alphabet should be used
+     * @param behavior the behavior when an error was detected
+     * @returns the decoded string
+     * @throws see base64::decode()
+     */
     static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_,
                               decoding_behavior behavior = decoding_behavior::moderate)
     {
         std::string result;
-
         result.reserve(max_decode_size(size));
-
         decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior);
-
         return result;
     }
-    /**
-     Decodes a string inplace.
 
-     @param[in,out] str the base64 encoded string
-     @param alphabet which alphabet should be used
-     @param behavior the behavior when an error was detected
-     @throws base64::decode_inplace()
-    */
+    /**
+     * Decodes a string inplace.
+     *
+     * @param[in,out] str the base64 encoded string
+     * @param alphabet which alphabet should be used
+     * @param behavior the behavior when an error was detected
+     * @throws base64::decode_inplace()
+     */
     static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_,
                                decoding_behavior behavior = decoding_behavior::moderate)
     {
         str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin());
     }
+
     /**
-     Decodes a char array inplace.
-
-     @param[in,out] str the string array
-     @param size the length of the array
-     @param alphabet which alphabet should be used
-     @param behavior the behavior when an error was detected
-     @returns the pointer to the next element past the last element decoded
-     @throws base64::decode_inplace()
-    */
+     * Decodes a char array inplace.
+     *
+     * @param[in,out] str the string array
+     * @param size the length of the array
+     * @param alphabet which alphabet should be used
+     * @param behavior the behavior when an error was detected
+     * @returns the pointer to the next element past the last element decoded
+     * @throws base64::decode_inplace()
+     */
     static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_,
                                 decoding_behavior behavior = decoding_behavior::moderate)
     {
         return decode(str, str + size, str, alphabet, behavior);
     }
-    /**
-     Returns the required decoding size for a given size. The value is calculated with the following formula:
-
-     $$
-     \lceil \frac{size}{4} \rceil \cdot 3
-     $$
 
-     @param size the size of the encoded input
-     @returns the size of the resulting decoded buffer; this the absolute maximum
-    */
+    /**
+     * Returns the required decoding size for a given size. The value is calculated with the following formula:
+     *
+     * $$
+     * \lceil \frac{size}{4} \rceil \cdot 3
+     * $$
+     *
+     * @param size the size of the encoded input
+     * @returns the size of the resulting decoded buffer; this the absolute maximum
+     */
     static std::size_t max_decode_size(std::size_t size) noexcept
     {
         return (size / 4 + (size % 4 ? 1 : 0)) * 3;
     }
-    /**
-     Returns the required encoding size for a given size. The value is calculated with the following formula:
-
-     $$
-     \lceil \frac{size}{3} \rceil \cdot 4
-     $$
 
-     @param size the size of the decoded input
-     @returns the size of the resulting encoded buffer
-    */
+    /**
+     * Returns the required encoding size for a given size. The value is calculated with the following formula:
+     *
+     * $$
+     * \lceil \frac{size}{3} \rceil \cdot 4
+     * $$
+     *
+     * @param size the size of the decoded input
+     * @returns the size of the resulting encoded buffer
+     */
     static std::size_t required_encode_size(std::size_t size) noexcept
     {
         return (size / 3 + (size % 3 ? 1 : 0)) * 4;

diff --git a/llama.cpp/common.cpp b/llama.cpp/common.cpp
@@ -225,6 +225,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         FLAG_ascii = true;
         return true;
     }
+    if (arg == "--nologo") {
+        FLAG_nologo = true;
+        return true;
+    }
     if (arg == "--precise") {
         FLAG_precise = true;
         return true;
@@ -261,6 +265,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
             fprintf(stderr, "error: invalid --gpu flag value: %s\n", argv[i]);
             exit(1);
         }
+        if (FLAG_gpu >= 0 && params.n_gpu_layers == -1) {
+            params.n_gpu_layers = 999;
+        }
         return true;
     }
 

diff --git a/llama.cpp/imatrix/imatrix.1 b/llama.cpp/imatrix/imatrix.1
@@ -46,6 +46,15 @@ tensor. Experience indicates that it is better to not utilize the
 importance matrix when quantizing
 .Pa output.weight ,
 so this is set to false by default.
+.It Fl Fl chunks Ar N
+Max number of chunks to process.
+.Pp
+.Bl -dash -compact
+.It
+-1 = all
+.El
+.Pp
+Default: -1
 .El
 .Sh PROTIPS
 For faster computation, pass the

diff --git a/llama.cpp/llama-bench/main.1 b/llama.cpp/llama-bench/main.1
@@ -489,8 +489,6 @@ How to split tensors across multiple GPUs, comma-separated list of
 proportions, e.g. 3,1
 .It Fl mg Ar i , Fl Fl main-gpu Ar i
 The GPU to use for scratch and small tensors.
-.It Fl nommq , Fl Fl no-mul-mat-q
-Use cuBLAS instead of custom mul_mat_q CUDA kernels. Not recommended since this is both slower and uses more VRAM.
 .It Fl Fl verbose-prompt
 Print prompt before generation.
 .It Fl Fl simple-io