Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrading our rustc version. #2908

Merged
merged 3 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
with:
# Released on: 02 May, 2024
# https://releases.rs/docs/1.78.0/
toolchain: 1.80.0
toolchain: 1.84.0
override: true
components: rustfmt, clippy
- name: Install Protoc
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Rust builder
FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
WORKDIR /usr/src

ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_amd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Rust builder
FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
WORKDIR /usr/src

ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_intel
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG PLATFORM=xpu

FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
WORKDIR /usr/src

ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_trtllm
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ ARG CUDA_ARCH_LIST="75-real;80-real;86-real;89-real;90-real"
ARG OMPI_VERSION="4.1.7rc1"

# Build dependencies resolver stage
FROM lukemathwalker/cargo-chef:latest AS chef
FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
WORKDIR /usr/src/text-generation-inference/backends/trtllm

FROM chef AS planner
Expand Down
2 changes: 1 addition & 1 deletion backends/grpc-metadata/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use tracing_opentelemetry::OpenTelemetrySpanExt;
/// Inject context in the metadata of a gRPC request.
struct MetadataInjector<'a>(pub &'a mut tonic::metadata::MetadataMap);

impl<'a> Injector for MetadataInjector<'a> {
impl Injector for MetadataInjector<'_> {
/// Set a key and value in the MetadataMap. Does nothing if the key or value are not valid inputs
fn set(&mut self, key: &str, value: String) {
if let Ok(key) = tonic::metadata::MetadataKey::from_bytes(key.as_bytes()) {
Expand Down
11 changes: 4 additions & 7 deletions backends/v2/src/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,7 @@ impl State {
}

// Pad prefill_token_budget to be a multiple of block size
let prefill_token_budget =
((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size;
let prefill_token_budget = prefill_token_budget.div_ceil(self.block_size) * self.block_size;

// Create span for this batch to add context to inference calls
let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty);
Expand Down Expand Up @@ -245,9 +244,8 @@ impl State {
prefill_tokens = (batch_requests.len() + 1) as u32 * max_input_length
} else {
// pad to block size
prefill_tokens += ((entry.request.input_length + self.block_size - 1)
/ self.block_size)
* self.block_size;
prefill_tokens +=
entry.request.input_length.div_ceil(self.block_size) * self.block_size;
}

if self.requires_padding {
Expand All @@ -262,8 +260,7 @@ impl State {
};

// pad to block size
decode_tokens +=
((max_new_tokens + self.block_size - 1) / self.block_size) * self.block_size;
decode_tokens += max_new_tokens.div_ceil(self.block_size) * self.block_size;
}

if prefill_tokens > prefill_token_budget
Expand Down
4 changes: 2 additions & 2 deletions backends/v3/src/block_allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,13 @@ impl Allocator for SimpleAllocator {
let (tokens, repeats) = match self.window_size {
None => (tokens, 1),
Some(window_size) => {
let repeats = (tokens + window_size - 1) / window_size;
let repeats = tokens.div_ceil(window_size);
let tokens = core::cmp::min(tokens, window_size);
(tokens, repeats as usize)
}
};
// Pad to a multiple of block size
let required_blocks = (tokens + self.block_size - 1) / self.block_size;
let required_blocks = tokens.div_ceil(self.block_size);
(required_blocks, repeats)
};

Expand Down
3 changes: 1 addition & 2 deletions backends/v3/src/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,7 @@ impl State {
}

// Pad prefill_token_budget to be a multiple of block size
let prefill_token_budget =
((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size;
let prefill_token_budget = prefill_token_budget.div_ceil(self.block_size) * self.block_size;

// Create span for this batch to add context to inference calls
let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty);
Expand Down
2 changes: 1 addition & 1 deletion backends/v3/src/radix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ impl Allocator for RadixAllocator {
let prefix_len = blocks.len() * self.block_size as usize;
let suffix_len = tokens - prefix_len as u32;

let suffix_blocks = (suffix_len + self.block_size - 1) / self.block_size;
let suffix_blocks = suffix_len.div_ceil(self.block_size);

tracing::info!("Prefix {prefix_len} - Suffix {suffix_len}");

Expand Down
12 changes: 6 additions & 6 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 15 additions & 21 deletions launcher/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use hf_hub::{
};
use nix::sys::signal::{self, Signal};
use nix::unistd::Pid;
use regex::Regex;
use serde::Deserialize;
use std::env;
use std::ffi::OsString;
Expand Down Expand Up @@ -2176,26 +2175,21 @@ fn main() -> Result<(), LauncherError> {
}

// capture adapter_id, path, revision in format of adapter_id=path@revision
let re = Regex::new(r"^([^=@]+)(?:=([^@]+))?(?:@(.+))?$").unwrap();
if let Some(caps) = re.captures(adapter) {
let adapter_id = caps.get(1).map_or("", |m| m.as_str());
let revision = caps.get(3).map(|m| m.as_str());

download_convert_model(
adapter_id,
revision,
args.trust_remote_code,
args.huggingface_hub_cache.as_deref(),
args.weights_cache_override.as_deref(),
running.clone(),
false, // avoid merging lora adapters if using multi-lora
)?;
} else {
return Err(LauncherError::ArgumentValidation(format!(
"Invalid LoRA adapter format: {}",
adapter
)));
}
// path is disabled beforehand.
let mut splits = adapter.split("@");
let adapter_id = splits.next().ok_or_else(|| {
LauncherError::ArgumentValidation("Missing adapter id".to_string())
})?;
let revision = splits.next();
download_convert_model(
adapter_id,
revision,
args.trust_remote_code,
args.huggingface_hub_cache.as_deref(),
args.weights_cache_override.as_deref(),
running.clone(),
false, // avoid merging lora adapters if using multi-lora
)?;
}
}

Expand Down
2 changes: 1 addition & 1 deletion router/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl TokenizerTrait for tokenizers::Tokenizer {
}
}

impl<'a> TokenizerTrait for PyTokenizer<'a> {
impl TokenizerTrait for PyTokenizer<'_> {
fn encode_trait(
&self,
query: String,
Expand Down
9 changes: 3 additions & 6 deletions router/src/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1229,12 +1229,11 @@ mod tests {
assert!(
chunks
== vec![
Chunk::Text("test".to_string()).into(),
Chunk::Text("test".to_string()),
Chunk::Image(Image {
data: pixel_data.clone(),
mimetype: "image/gif".to_string()
})
.into()
],
"Failed to process images",
);
Expand Down Expand Up @@ -1289,17 +1288,15 @@ mod tests {
assert!(
chunks
== vec![
Chunk::Text("test".to_string()).into(),
Chunk::Text("test".to_string()),
Chunk::Image(Image {
data: pixel_data.clone(),
mimetype: "image/gif".to_string()
})
.into(),
}),
Chunk::Image(Image {
data: pixel_data.clone(),
mimetype: "image/gif".to_string()
})
.into()
],
"Failed to process images",
);
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[toolchain]
# Released on: June 13, 2024
# https://releases.rs/docs/1.79.0/
channel = "1.80.1"
channel = "1.84.0"
components = ["rustfmt", "clippy"]
Loading