Skip to content

Commit

Permalink
replace cinatra with cpp-httplib
Browse files Browse the repository at this point in the history
  • Loading branch information
okdshin committed Dec 9, 2023
1 parent 34d0291 commit c4bd681
Showing 2 changed files with 54 additions and 59 deletions.
32 changes: 11 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -15,12 +15,6 @@ FetchContent_Declare(
GIT_TAG add_pfnet_plamo_13b
)

FetchContent_Declare(
asio
GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git
GIT_TAG asio-1-28-1
)

set(JSONCPP_WITH_TESTS OFF)
set(JSONCPP_WITH_POST_BUILD_UNITTEST OFF)
set(JSONCPP_WITH_TESTS OFF)
@@ -38,33 +32,29 @@ set(SPDLOG_BUILD_SHARED OFF)
FetchContent_Declare(
spdlog URL "https://github.com/gabime/spdlog/archive/refs/tags/v1.12.0.zip")

FetchContent_MakeAvailable(llama_cpp asio jsoncpp structopt spdlog)

add_library(asio INTERFACE)
target_include_directories(asio INTERFACE "${asio_SOURCE_DIR}/asio/include")
FetchContent_MakeAvailable(llama_cpp jsoncpp structopt spdlog)

FetchContent_Declare(
cinatra
GIT_REPOSITORY https://github.com/okdshin/cinatra.git
GIT_TAG v0.8.0_g++10
httplib
GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git
GIT_TAG v0.14.2
)
FetchContent_Populate(cinatra)
add_library(cinatra INTERFACE)
target_include_directories(cinatra INTERFACE "${cinatra_SOURCE_DIR}/include")

FetchContent_Populate(httplib)
add_library(httplib INTERFACE)
target_include_directories(httplib INTERFACE "${httplib_SOURCE_DIR}")
find_package(Threads REQUIRED)
target_link_libraries(cinatra INTERFACE Threads::Threads asio)
target_compile_definitions(cinatra INTERFACE -DASIO_STANDALONE)
target_link_libraries(httplib INTERFACE Threads::Threads)

add_executable(${PROJECT_NAME}-server
${CMAKE_CURRENT_SOURCE_DIR}/flatline_server.cpp)
set_target_properties(${PROJECT_NAME}-server PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
target_link_libraries(${PROJECT_NAME}-server PRIVATE llama ggml cinatra jsoncpp_static structopt spdlog::spdlog "stdc++fs")
target_link_libraries(${PROJECT_NAME}-server PRIVATE llama ggml httplib jsoncpp_static structopt spdlog::spdlog "stdc++fs")
add_custom_command(
TARGET ${PROJECT_NAME}-server
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/flatline.LICENSE.txt
COMMAND ${CMAKE_COMMAND} -E copy ${asio_SOURCE_DIR}/asio/LICENSE_1_0.txt $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/asio.LICENSE.txt
COMMAND ${CMAKE_COMMAND} -E copy ${cinatra_SOURCE_DIR}/LICENSE $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/cinatra.LICENSE.txt
COMMAND ${CMAKE_COMMAND} -E copy ${httplib_SOURCE_DIR}/LICENSE $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/httplib.LICENSE.txt
COMMAND ${CMAKE_COMMAND} -E copy ${jsoncpp_SOURCE_DIR}/LICENSE $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/jsoncpp.LICENSE.txt
COMMAND ${CMAKE_COMMAND} -E copy ${llama_cpp_SOURCE_DIR}/LICENSE $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/llama_cpp.LICENSE.txt
COMMAND ${CMAKE_COMMAND} -E copy ${spdlog_SOURCE_DIR}/LICENSE $<TARGET_FILE_DIR:${PROJECT_NAME}-server>/spdlog.LICENSE.txt
81 changes: 43 additions & 38 deletions flatline_server.cpp
Original file line number Diff line number Diff line change
@@ -7,12 +7,15 @@
#include <tuple>

#include <array>
#include <cinatra.hpp>
#include <iostream>
#include <memory>
#include <optional>
#include <stdexcept>

#define CPPHTTPLIB_THREAD_POOL_COUNT 1
#include <httplib.h>
#undef CPPHTTPLIB_THREAD_POOL_COUNT

namespace {
std::shared_ptr<spdlog::logger> logger() {
static auto logger_ = spdlog::stdout_color_mt("flatline");
@@ -115,12 +118,12 @@ class llama_cpp_model {
};
} // namespace

std::optional<Json::Value> try_to_parse_json(cinatra::request const &req) {
std::optional<Json::Value> try_to_parse_json(httplib::Request const &req) {
Json::CharReaderBuilder builder;
const std::unique_ptr<Json::CharReader> reader(builder.newCharReader());
Json::Value root;
JSONCPP_STRING err;
std::string_view body = req.body();
std::string_view body = req.body;
logger()->info("request {}", body);
if (!reader->parse(body.data(), body.data() + body.size(), &root, &err)) {
return std::nullopt;
@@ -150,7 +153,7 @@ std::string make_response_json(std::vector<float> const &next_token_logits) {

#include <structopt/app.hpp>
struct app_options {
std::optional<std::string> port = "57045";
std::optional<int> port = 57045;
std::optional<std::string> model_path;
std::optional<bool> numa = true;
std::optional<int> n_threads = -1;
@@ -174,60 +177,62 @@ int main(int argc, char **argv) {
*options.model_path, infer_thread_num, *options.n_gpu_layers);
logger()->info("model loading finished");

cinatra::http_server server(server_thread_num);
server.listen("0.0.0.0", *options.port);
server.set_http_handler<cinatra::GET, cinatra::POST>(
"/", [](cinatra::request &req, cinatra::response &res) {
res.set_status_and_content(cinatra::status_type::ok,
"Flatline backend server is available");
});
server.set_http_handler<cinatra::GET>(
"/config", [&options](cinatra::request &req, cinatra::response &res) {
Json::Value config;
config["port"] = *options.port;
config["model_path"] = *options.model_path;
config["numa"] = *options.numa;
config["n_threads"] = *options.n_threads;
config["n_gpu_layers"] = *options.n_gpu_layers;
Json::FastWriter json_fast_writer;
res.set_status_and_content(cinatra::status_type::ok,
json_fast_writer.write(config));
});
auto calc_next_token_logits_func = [&model](cinatra::request &req,
cinatra::response &res) {
httplib::Server server;
server.Get("/", [](httplib::Request const &req, httplib::Response &res) {
res.set_content("Flatline backend server is available", "text/plain");
});
server.Get("/config", [&options](httplib::Request const &req,
httplib::Response &res) {
Json::Value config;
config["port"] = *options.port;
config["model_path"] = *options.model_path;
config["numa"] = *options.numa;
config["n_threads"] = *options.n_threads;
config["n_gpu_layers"] = *options.n_gpu_layers;
Json::FastWriter json_fast_writer;
res.set_content(json_fast_writer.write(config), "application/json");
});
constexpr int status_bad_request = 400;
std::mutex computing_resource_mutex;
auto calc_next_token_logits_func = [&model, &computing_resource_mutex](
httplib::Request const &req,
httplib::Response &res) {
// Header check
if (req.get_header_value("Content-type") != "application/json") {
res.set_status_and_content(
cinatra::status_type::bad_request,
"\"Content-type\" must be \"application/json\"");
res.status = status_bad_request;
res.set_content("\"Content-type\" must be \"application/json\"",
"text/plain");
logger()->info("Content-type is not application/json");
return;
}
// Data check & parse
std::optional<Json::Value> root_opt = try_to_parse_json(req);
if (!root_opt) {
res.set_status_and_content(cinatra::status_type::bad_request,
"JSON data is broken");
res.status = status_bad_request;
res.set_content("JSON data is broken", "text/plain");
logger()->info("JSON data is broken");
return;
}
Json::Value const &root = *root_opt;
std::vector<int> input_tokens = get_request_data(root);

// Calc next token logits
std::vector<float> next_token_logits =
model.calc_next_token_logits(input_tokens);
std::vector<float> next_token_logits;
{
// lock
std::unique_lock lock(computing_resource_mutex);

// Calc next token logits
next_token_logits = model.calc_next_token_logits(input_tokens);
}

// Send response
res.add_header("Content-type", "application/json");
std::string response_json = make_response_json(next_token_logits);
res.set_status_and_content(cinatra::status_type::ok, response_json.c_str());
res.set_content(response_json.c_str(), "application/json");
logger()->info("sent response {}",
std::string(response_json.c_str()).substr(0, 128) + "...");
};
server.set_http_handler<cinatra::POST>("/v1/calc_next_token_logits",
calc_next_token_logits_func);
server.run();
server.Post("/v1/calc_next_token_logits", calc_next_token_logits_func);
server.listen("0.0.0.0", *options.port);

llama_backend_free();

0 comments on commit c4bd681

Please sign in to comment.