From 798c765c70469618e0afe4bb0d0a7a033cd2c2bc Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 15 Jan 2025 16:04:53 +0100 Subject: [PATCH 1/3] Upgrading our rustc version. --- Dockerfile | 2 +- Dockerfile_amd | 2 +- Dockerfile_intel | 2 +- Dockerfile_trtllm | 2 +- rust-toolchain.toml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0c08d48f6e4..0f2ae6cc811 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse diff --git a/Dockerfile_amd b/Dockerfile_amd index 1f34ffa30f9..92acff5aac2 100644 --- a/Dockerfile_amd +++ b/Dockerfile_amd @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse diff --git a/Dockerfile_intel b/Dockerfile_intel index 3b5e4a131b2..2b41fd8bc2e 100644 --- a/Dockerfile_intel +++ b/Dockerfile_intel @@ -1,6 +1,6 @@ ARG PLATFORM=xpu -FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse diff --git a/Dockerfile_trtllm b/Dockerfile_trtllm index b4523ea59bd..e6a16eccdc6 100644 --- a/Dockerfile_trtllm +++ b/Dockerfile_trtllm @@ -2,7 +2,7 @@ ARG CUDA_ARCH_LIST="75-real;80-real;86-real;89-real;90-real" ARG OMPI_VERSION="4.1.7rc1" # Build dependencies resolver stage -FROM lukemathwalker/cargo-chef:latest AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef WORKDIR /usr/src/text-generation-inference/backends/trtllm FROM chef AS planner diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 12d58532c4c..25959e0e0f8 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,5 +1,5 @@ [toolchain] # Released on: June 13, 2024 # https://releases.rs/docs/1.79.0/ -channel = "1.80.1" +channel = "1.84.0" components = ["rustfmt", "clippy"] From b9ab5037b089a29f915831e842441710800fb1cb Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 15 Jan 2025 16:12:13 +0100 Subject: [PATCH 2/3] Fixing the rust tests to proper version. --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 4eeca3348ae..6bcf7d96239 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -31,7 +31,7 @@ jobs: with: # Released on: 02 May, 2024 # https://releases.rs/docs/1.78.0/ - toolchain: 1.80.0 + toolchain: 1.84.0 override: true components: rustfmt, clippy - name: Install Protoc From 55fabaae01c30bd630937f37a61781e36765447a Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 15 Jan 2025 16:51:28 +0100 Subject: [PATCH 3/3] Clippy everything. --- backends/grpc-metadata/src/lib.rs | 2 +- backends/v2/src/queue.rs | 11 ++++----- backends/v3/src/block_allocator.rs | 4 ++-- backends/v3/src/queue.rs | 3 +-- backends/v3/src/radix.rs | 2 +- flake.lock | 12 +++++----- launcher/src/main.rs | 36 +++++++++++++----------------- router/src/lib.rs | 2 +- router/src/validation.rs | 9 +++----- 9 files changed, 34 insertions(+), 47 deletions(-) diff --git a/backends/grpc-metadata/src/lib.rs b/backends/grpc-metadata/src/lib.rs index 3068a61c3d2..822b03072da 100644 --- a/backends/grpc-metadata/src/lib.rs +++ b/backends/grpc-metadata/src/lib.rs @@ -8,7 +8,7 @@ use tracing_opentelemetry::OpenTelemetrySpanExt; /// Inject context in the metadata of a gRPC request. struct MetadataInjector<'a>(pub &'a mut tonic::metadata::MetadataMap); -impl<'a> Injector for MetadataInjector<'a> { +impl Injector for MetadataInjector<'_> { /// Set a key and value in the MetadataMap. Does nothing if the key or value are not valid inputs fn set(&mut self, key: &str, value: String) { if let Ok(key) = tonic::metadata::MetadataKey::from_bytes(key.as_bytes()) { diff --git a/backends/v2/src/queue.rs b/backends/v2/src/queue.rs index 61a3eebc927..c9a9335dd9d 100644 --- a/backends/v2/src/queue.rs +++ b/backends/v2/src/queue.rs @@ -213,8 +213,7 @@ impl State { } // Pad prefill_token_budget to be a multiple of block size - let prefill_token_budget = - ((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size; + let prefill_token_budget = prefill_token_budget.div_ceil(self.block_size) * self.block_size; // Create span for this batch to add context to inference calls let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty); @@ -245,9 +244,8 @@ impl State { prefill_tokens = (batch_requests.len() + 1) as u32 * max_input_length } else { // pad to block size - prefill_tokens += ((entry.request.input_length + self.block_size - 1) - / self.block_size) - * self.block_size; + prefill_tokens += + entry.request.input_length.div_ceil(self.block_size) * self.block_size; } if self.requires_padding { @@ -262,8 +260,7 @@ impl State { }; // pad to block size - decode_tokens += - ((max_new_tokens + self.block_size - 1) / self.block_size) * self.block_size; + decode_tokens += max_new_tokens.div_ceil(self.block_size) * self.block_size; } if prefill_tokens > prefill_token_budget diff --git a/backends/v3/src/block_allocator.rs b/backends/v3/src/block_allocator.rs index 4fea172b65a..e7f3d85a9e1 100644 --- a/backends/v3/src/block_allocator.rs +++ b/backends/v3/src/block_allocator.rs @@ -165,13 +165,13 @@ impl Allocator for SimpleAllocator { let (tokens, repeats) = match self.window_size { None => (tokens, 1), Some(window_size) => { - let repeats = (tokens + window_size - 1) / window_size; + let repeats = tokens.div_ceil(window_size); let tokens = core::cmp::min(tokens, window_size); (tokens, repeats as usize) } }; // Pad to a multiple of block size - let required_blocks = (tokens + self.block_size - 1) / self.block_size; + let required_blocks = tokens.div_ceil(self.block_size); (required_blocks, repeats) }; diff --git a/backends/v3/src/queue.rs b/backends/v3/src/queue.rs index dd27806f97a..249eebf7615 100644 --- a/backends/v3/src/queue.rs +++ b/backends/v3/src/queue.rs @@ -257,8 +257,7 @@ impl State { } // Pad prefill_token_budget to be a multiple of block size - let prefill_token_budget = - ((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size; + let prefill_token_budget = prefill_token_budget.div_ceil(self.block_size) * self.block_size; // Create span for this batch to add context to inference calls let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty); diff --git a/backends/v3/src/radix.rs b/backends/v3/src/radix.rs index 8a544891199..532ec6ddcc8 100644 --- a/backends/v3/src/radix.rs +++ b/backends/v3/src/radix.rs @@ -103,7 +103,7 @@ impl Allocator for RadixAllocator { let prefix_len = blocks.len() * self.block_size as usize; let suffix_len = tokens - prefix_len as u32; - let suffix_blocks = (suffix_len + self.block_size - 1) / self.block_size; + let suffix_blocks = suffix_len.div_ceil(self.block_size); tracing::info!("Prefix {prefix_len} - Suffix {suffix_len}"); diff --git a/flake.lock b/flake.lock index d304e3ccc7a..23e76b8be53 100644 --- a/flake.lock +++ b/flake.lock @@ -108,11 +108,11 @@ "pre-commit-hooks": "pre-commit-hooks_3" }, "locked": { - "lastModified": 1732039290, - "narHash": "sha256-LQKY7bShf2H9kJouxa9ZspfdrulnZF9o4kLTqGqCDYM=", + "lastModified": 1734429562, + "narHash": "sha256-V2XNs3Ir8WXNHdocfzkR/fu0FzkZ9uTDJkVecxJrGmQ=", "owner": "nix-community", "repo": "crate2nix", - "rev": "9ff208ce7f5a482272b1bcefbe363c772d7ff914", + "rev": "8537c2d7cb623679aaeff62c4c4c43a91566ab09", "type": "github" }, "original": { @@ -853,11 +853,11 @@ ] }, "locked": { - "lastModified": 1732242723, - "narHash": "sha256-NWI8csIK0ujFlFuEXKnoc+7hWoCiEtINK9r48LUUMeU=", + "lastModified": 1736907983, + "narHash": "sha256-fw55wVwpJW36Md2HZBKuxX3YHGeqsGsspPLtCMVr1Y8=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "a229311fcb45b88a95fdfa5cecd8349c809a272a", + "rev": "eaa365c911441e07e387ff6acc596619fc50b156", "type": "github" }, "original": { diff --git a/launcher/src/main.rs b/launcher/src/main.rs index c092d74510e..7df9abda8c7 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -5,7 +5,6 @@ use hf_hub::{ }; use nix::sys::signal::{self, Signal}; use nix::unistd::Pid; -use regex::Regex; use serde::Deserialize; use std::env; use std::ffi::OsString; @@ -2176,26 +2175,21 @@ fn main() -> Result<(), LauncherError> { } // capture adapter_id, path, revision in format of adapter_id=path@revision - let re = Regex::new(r"^([^=@]+)(?:=([^@]+))?(?:@(.+))?$").unwrap(); - if let Some(caps) = re.captures(adapter) { - let adapter_id = caps.get(1).map_or("", |m| m.as_str()); - let revision = caps.get(3).map(|m| m.as_str()); - - download_convert_model( - adapter_id, - revision, - args.trust_remote_code, - args.huggingface_hub_cache.as_deref(), - args.weights_cache_override.as_deref(), - running.clone(), - false, // avoid merging lora adapters if using multi-lora - )?; - } else { - return Err(LauncherError::ArgumentValidation(format!( - "Invalid LoRA adapter format: {}", - adapter - ))); - } + // path is disabled beforehand. + let mut splits = adapter.split("@"); + let adapter_id = splits.next().ok_or_else(|| { + LauncherError::ArgumentValidation("Missing adapter id".to_string()) + })?; + let revision = splits.next(); + download_convert_model( + adapter_id, + revision, + args.trust_remote_code, + args.huggingface_hub_cache.as_deref(), + args.weights_cache_override.as_deref(), + running.clone(), + false, // avoid merging lora adapters if using multi-lora + )?; } } diff --git a/router/src/lib.rs b/router/src/lib.rs index 21c45241308..dbd36827f46 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -79,7 +79,7 @@ impl TokenizerTrait for tokenizers::Tokenizer { } } -impl<'a> TokenizerTrait for PyTokenizer<'a> { +impl TokenizerTrait for PyTokenizer<'_> { fn encode_trait( &self, query: String, diff --git a/router/src/validation.rs b/router/src/validation.rs index cdf954de2cd..7ac05b21def 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -1229,12 +1229,11 @@ mod tests { assert!( chunks == vec![ - Chunk::Text("test".to_string()).into(), + Chunk::Text("test".to_string()), Chunk::Image(Image { data: pixel_data.clone(), mimetype: "image/gif".to_string() }) - .into() ], "Failed to process images", ); @@ -1289,17 +1288,15 @@ mod tests { assert!( chunks == vec![ - Chunk::Text("test".to_string()).into(), + Chunk::Text("test".to_string()), Chunk::Image(Image { data: pixel_data.clone(), mimetype: "image/gif".to_string() - }) - .into(), + }), Chunk::Image(Image { data: pixel_data.clone(), mimetype: "image/gif".to_string() }) - .into() ], "Failed to process images", );