From 0dac7c26b6f6d502eb9c023dca8d260d62be7fd2 Mon Sep 17 00:00:00 2001 From: ThinkThinkSyn Date: Thu, 5 Jun 2025 02:22:07 +0800 Subject: [PATCH 1/8] add mm support for /completion --- .gitignore | 1 + tools/server/server.cpp | 64 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index f8ceb1560a1df..88bbdec4463af 100644 --- a/.gitignore +++ b/.gitignore @@ -146,3 +146,4 @@ poetry.toml # Local scripts /run-vim.sh /run-chat.sh +.history \ No newline at end of file diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 9038df4c3830e..6bcc37815811d 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4332,9 +4332,69 @@ int main(int argc, char ** argv) { } }; - const auto handle_completions = [&handle_completions_impl](const httplib::Request & req, httplib::Response & res) { + const auto handle_completions = [&ctx_server, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) { json data = json::parse(req.body); - std::vector files; // dummy + json & medias = json_value(data, "medias", json::array()); + auto & opt = ctx_server.oai_parser_opt; + std::vector files; + + if (medias.is_array()) { + for (auto & m : medias) { + std::string type = json_value(m, "type", std::string()); + std::string data = json_value(m, "data", std::string()); + if (type.empty() || data.empty()) { + continue; + } + if (type == "image_url" || type == "image" || type == "img") { + if (!opt.allow_image) { + throw std::runtime_error("image input is not supported - hint: if this is unexpected, you may need to provide the mmproj"); + } + if (string_starts_with(data, "http")) { + // download remote image + common_remote_params params; + params.headers.push_back("User-Agent: llama.cpp/" + build_info); + params.max_size = 1024 * 1024 * 10; // 10MB + params.timeout = 10; // seconds + SRV_INF("downloading image from '%s'\n", url.c_str()); + auto res = common_remote_get_content(url, params); + if (200 <= res.first && res.first < 300) { + SRV_INF("downloaded %ld bytes\n", res.second.size()); + raw_buffer buf; + buf.insert(buf.end(), res.second.begin(), res.second.end()); + files.push_back(buf); + } else { + throw std::runtime_error("Failed to download image"); + } + } else { + // try to decode base64 image + std::vector parts = string_split(url, /*separator*/ ','); + if (parts.size() != 2) { + throw std::runtime_error("Invalid image_url.url value"); + } else if (!string_starts_with(parts[0], "data:image/")) { + throw std::runtime_error("Invalid image_url.url format: " + parts[0]); + } else if (!string_ends_with(parts[0], "base64")) { + throw std::runtime_error("image_url.url must be base64 encoded"); + } else { + auto base64_data = parts[1]; + auto decoded_data = base64_decode(base64_data); + files.push_back(decoded_data); + } + } + } else if (type == "input_audio" || type == "audio") { + if (!opt.allow_audio) { + throw std::runtime_error("audio input is not supported - hint: if this is unexpected, you may need to provide the mmproj"); + } + std::string format = json_value(m, "format", std::string()); + // while we also support flac, we don't allow it here so we matches the OAI spec + if (format != "wav" && format != "mp3") { + throw std::runtime_error("input_audio.format must be either 'wav' or 'mp3'"); + } + auto decoded_data = base64_decode(data); // expected to be base64 encoded + files.push_back(decoded_data); + } + } + } + handle_completions_impl( SERVER_TASK_TYPE_COMPLETION, data, From 260051867a66ec6e79d1efcf1069648766e77875 Mon Sep 17 00:00:00 2001 From: ThinkThinkSyn Date: Thu, 5 Jun 2025 02:33:01 +0800 Subject: [PATCH 2/8] fix url bug --- tools/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 6bcc37815811d..572e81540b0b2 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4356,7 +4356,7 @@ int main(int argc, char ** argv) { params.max_size = 1024 * 1024 * 10; // 10MB params.timeout = 10; // seconds SRV_INF("downloading image from '%s'\n", url.c_str()); - auto res = common_remote_get_content(url, params); + auto res = common_remote_get_content(data, params); if (200 <= res.first && res.first < 300) { SRV_INF("downloaded %ld bytes\n", res.second.size()); raw_buffer buf; From c1262004b6ca4d459503792f65e161a4c975257f Mon Sep 17 00:00:00 2001 From: ThinkThinkSyn Date: Thu, 5 Jun 2025 02:34:31 +0800 Subject: [PATCH 3/8] fix url bug --- tools/server/server.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 572e81540b0b2..ee2c588ff71ca 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4355,7 +4355,7 @@ int main(int argc, char ** argv) { params.headers.push_back("User-Agent: llama.cpp/" + build_info); params.max_size = 1024 * 1024 * 10; // 10MB params.timeout = 10; // seconds - SRV_INF("downloading image from '%s'\n", url.c_str()); + SRV_INF("downloading image from '%s'\n", data.c_str()); auto res = common_remote_get_content(data, params); if (200 <= res.first && res.first < 300) { SRV_INF("downloaded %ld bytes\n", res.second.size()); @@ -4367,7 +4367,7 @@ int main(int argc, char ** argv) { } } else { // try to decode base64 image - std::vector parts = string_split(url, /*separator*/ ','); + std::vector parts = string_split(data, /*separator*/ ','); if (parts.size() != 2) { throw std::runtime_error("Invalid image_url.url value"); } else if (!string_starts_with(parts[0], "data:image/")) { From 9dc3dc78da37b5aa7ed8a493eecd9b228750a5f0 Mon Sep 17 00:00:00 2001 From: ThinkThinkSyn Date: Thu, 5 Jun 2025 02:35:54 +0800 Subject: [PATCH 4/8] fix data bug --- tools/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index ee2c588ff71ca..8734aa463ddd3 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4334,7 +4334,7 @@ int main(int argc, char ** argv) { const auto handle_completions = [&ctx_server, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) { json data = json::parse(req.body); - json & medias = json_value(data, "medias", json::array()); + json & medias = json_value(&data, "medias", json::array()); auto & opt = ctx_server.oai_parser_opt; std::vector files; From 490af6ee42b5871e599fe9e989100ee0403514e0 Mon Sep 17 00:00:00 2001 From: ThinkThinkSyn Date: Thu, 5 Jun 2025 02:40:22 +0800 Subject: [PATCH 5/8] fix data bug --- tools/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 8734aa463ddd3..522483b090425 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4334,7 +4334,7 @@ int main(int argc, char ** argv) { const auto handle_completions = [&ctx_server, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) { json data = json::parse(req.body); - json & medias = json_value(&data, "medias", json::array()); + json medias = json_value(data, "medias", json::array()); auto & opt = ctx_server.oai_parser_opt; std::vector files; From 38b20c5a0b34c72177ed263775a988dba2fd1f1a Mon Sep 17 00:00:00 2001 From: ThinkThinkSyn Date: Thu, 5 Jun 2025 03:15:40 +0800 Subject: [PATCH 6/8] enable mtmd in completion --- tools/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 522483b090425..3d91ca22dd17e 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4215,7 +4215,7 @@ int main(int argc, char ** argv) { throw std::runtime_error("prompt must be a string"); } - if (oaicompat && has_mtmd) { + if (has_mtmd) { // multimodal std::string prompt_str = prompt.get(); mtmd_input_text inp_txt = { From e02bf185a6aac45c586eeeb9ccbe936bc02f8372 Mon Sep 17 00:00:00 2001 From: 92MING Date: Fri, 27 Jun 2025 23:51:25 +0800 Subject: [PATCH 7/8] Update .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 88bbdec4463af..7d16c0567884f 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ .swiftpm .vs/ .vscode/ +.history/ nppBackup @@ -146,4 +147,3 @@ poetry.toml # Local scripts /run-vim.sh /run-chat.sh -.history \ No newline at end of file From 9d3da863f74993b28567431986c9279a261b9c51 Mon Sep 17 00:00:00 2001 From: 92MING Date: Fri, 27 Jun 2025 23:53:02 +0800 Subject: [PATCH 8/8] Update server.cpp --- tools/server/server.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index ad6ed5816e7e3..f2b67544602b0 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4392,7 +4392,6 @@ int main(int argc, char ** argv) { } } } - handle_completions_impl( SERVER_TASK_TYPE_COMPLETION, data,