diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..4fa6c320 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +envoyfilter/target diff --git a/envoyfilter/Cargo.lock b/envoyfilter/Cargo.lock index eb09703b..e30b2d51 100644 --- a/envoyfilter/Cargo.lock +++ b/envoyfilter/Cargo.lock @@ -1,3 +1,121 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + [[package]] name = "intelligent-prompt-gateway" version = "0.1.0" +dependencies = [ + "log", + "proxy-wasm", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proxy-wasm" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "823b744520cd4a54ba7ebacbffe4562e839d6dcd8f89209f96a1ace4f5229cd4" +dependencies = [ + "hashbrown", + "log", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/envoyfilter/Cargo.toml b/envoyfilter/Cargo.toml index df895f41..4fbe3de5 100644 --- a/envoyfilter/Cargo.toml +++ b/envoyfilter/Cargo.toml @@ -2,6 +2,11 @@ name = "intelligent-prompt-gateway" version = "0.1.0" authors = ["Katanemo Inc "] -edition = "2018" +edition = "2021" + +[lib] +crate-type = ["cdylib"] [dependencies] +proxy-wasm = "0.2.1" +log = "0.4" diff --git a/envoyfilter/README.md b/envoyfilter/README.md index e83a6211..022e8b1f 100644 --- a/envoyfilter/README.md +++ b/envoyfilter/README.md @@ -1 +1,66 @@ -Envoy filter code for gateway +# Envoy filter code for gateway + +## Add toolchain + +```sh +$ rustup target add wasm32-wasi +``` + +## Building + +```sh +$ cargo build --target wasm32-wasi --release +``` + +## Using in Envoy + +This example can be run with [`docker compose`](https://docs.docker.com/compose/install/) +and has a matching Envoy configuration. + +```sh +$ docker compose up +``` + +## Examples + +### Direct response. + +Send HTTP request to `localhost:10000/hello`: + +```sh +$ curl localhost:10000/hello +``` + +Expected response: + +```console +HTTP/1.1 200 OK +content-length: 40 +content-type: text/plain +custom-header: katanemo filter +date: Wed, 10 Jul 2024 16:59:43 GMT +server: envoy +``` + +### Inline call. + +Send HTTP request to `localhost:10000/inline`: + +```sh +$ curl localhost:10000/hello +{ + "headers": { + "Accept": "*/*", + "Host": "localhost", + "User-Agent": "curl/7.81.0", + "X-Amzn-Trace-Id": "Root=1-637c4767-6e31776a0b407a0219b5b570", + "X-Envoy-Expected-Rq-Timeout-Ms": "15000" + } +} +``` + +Expected Envoy logs: + +```console +[...] wasm log http_auth_random: Access granted. +``` diff --git a/envoyfilter/docker-compose.yaml b/envoyfilter/docker-compose.yaml new file mode 100644 index 00000000..4c1b0309 --- /dev/null +++ b/envoyfilter/docker-compose.yaml @@ -0,0 +1,13 @@ +services: + envoy: + image: envoyproxy/envoy:v1.30-latest + hostname: envoy + ports: + - "10000:10000" + volumes: + - ./envoy.yaml:/etc/envoy/envoy.yaml + - ./target/wasm32-wasi/release:/etc/envoy/proxy-wasm-plugins + networks: + - envoymesh +networks: + envoymesh: {} diff --git a/envoyfilter/envoy.yaml b/envoyfilter/envoy.yaml new file mode 100644 index 00000000..54ee4dea --- /dev/null +++ b/envoyfilter/envoy.yaml @@ -0,0 +1,65 @@ +static_resources: + listeners: + address: + socket_address: + address: 0.0.0.0 + port_value: 10000 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + stat_prefix: ingress_http + codec_type: AUTO + route_config: + name: local_routes + virtual_hosts: + - name: local_service + domains: + - "*" + routes: + - match: + prefix: "/inline" + route: + cluster: httpbin + - match: + prefix: "/" + direct_response: + status: 200 + body: + inline_string: "Inspect the HTTP header: custom-header.\n" + http_filters: + - name: envoy.filters.http.wasm + typed_config: + "@type": type.googleapis.com/udpa.type.v1.TypedStruct + type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm + value: + config: + name: "http_config" + configuration: + "@type": "type.googleapis.com/google.protobuf.StringValue" + value: katanemo filter + vm_config: + runtime: "envoy.wasm.runtime.v8" + code: + local: + filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm" + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + + clusters: + - name: httpbin + connect_timeout: 5s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: httpbin + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: httpbin.org + port_value: 80 + hostname: "httpbin.org" diff --git a/envoyfilter/src/lib.rs b/envoyfilter/src/lib.rs new file mode 100644 index 00000000..69131654 --- /dev/null +++ b/envoyfilter/src/lib.rs @@ -0,0 +1,114 @@ +use log::info; +use std::time::Duration; + +use proxy_wasm::traits::*; +use proxy_wasm::types::*; + +proxy_wasm::main! {{ + proxy_wasm::set_log_level(LogLevel::Trace); + proxy_wasm::set_root_context(|_| -> Box { + Box::new(HttpHeaderRoot { + header_content: String::new(), + }) + }); +}} + +struct HttpHeader { + context_id: u32, + header_content: String, +} + +// HttpContext is the trait that allows the Rust code to interact with HTTP objects. +impl HttpContext for HttpHeader { + // Envoy's HTTP model is event driven. The WASM ABI has given implementors events to hook onto + // the lifecycle of the http request and response. + fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action { + // Example of reading the HTTP headers on the incoming request + for (name, value) in &self.get_http_request_headers() { + info!("#{} -> {}: {}", self.context_id, name, value); + } + + // Example logic of branching based on a request header. + match self.get_http_request_header(":path") { + // If the path header is present and the path is /inline + Some(path) if path == "/inline" => { + // Dispatch an HTTP call inline. This is the model that we will use for the LLM routing host. + self.dispatch_http_call( + "httpbin", + vec![ + (":method", "GET"), + (":path", "/bytes/1"), + (":authority", "httpbin.org"), + ], + None, + vec![], + Duration::from_secs(5), + ) + .unwrap(); + // Pause the filter until the out of band HTTP response arrives. + Action::Pause + } + + // Otherwise let the HTTP request continue. + _ => Action::Continue, + } + } + + fn on_http_response_headers(&mut self, _: usize, _: bool) -> Action { + // Note that the filter can add custom headers. In this case the header is coming from a config value. + self.add_http_response_header("custom-header", self.header_content.as_str()); + Action::Continue + } +} + +impl Context for HttpHeader { + // Note that the event driven model continues here from the return of the on_http_request_headers above. + fn on_http_call_response(&mut self, _: u32, _: usize, body_size: usize, _: usize) { + if let Some(body) = self.get_http_call_response_body(0, body_size) { + if !body.is_empty() && body[0] % 2 == 0 { + info!("Access granted."); + // This call allows the filter to continue operating on the HTTP request sent by the user. + // In Katanemo's use case the call would continue after the LLM host has responded with routing + // decisions. + self.resume_http_request(); + return; + } + } + info!("Access forbidden."); + // This is an example of short-circuiting the http request and sending back a response to the client. + // i.e there was never an external HTTP request made. This could be used for example if the user prompt requires + // more information before it can be sent out to a third party API. + self.send_http_response( + 403, + vec![("Powered-By", "Katanemo")], + Some(b"Access forbidden.\n"), + ); + } +} + +struct HttpHeaderRoot { + header_content: String, +} + +impl Context for HttpHeaderRoot {} + +// RootContext allows the Rust code to reach into the Envoy Config +impl RootContext for HttpHeaderRoot { + fn on_configure(&mut self, _: usize) -> bool { + if let Some(config_bytes) = self.get_plugin_configuration() { + self.header_content = String::from_utf8(config_bytes).unwrap() + } + true + } + + fn create_http_context(&self, context_id: u32) -> Option> { + Some(Box::new(HttpHeader { + context_id, + header_content: self.header_content.clone(), + })) + } + + fn get_type(&self) -> Option { + Some(ContextType::HttpContext) + } +} diff --git a/envoyfilter/src/main.rs b/envoyfilter/src/main.rs deleted file mode 100644 index 98aa8079..00000000 --- a/envoyfilter/src/main.rs +++ /dev/null @@ -1,17 +0,0 @@ -fn main() { - println!("Hello, world!\nMy favourite number is {}", some_fn()); -} - -fn some_fn() -> i32 { - 42 -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn some_fn_is_42() { - assert_eq!(some_fn(), 42); - } -}