Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(lex): Use new-ish OsStr API #5344

Merged
merged 1 commit into from
Feb 8, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 21 additions & 52 deletions clap_lex/src/ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ pub trait OsStrExt: private::Sealed {

impl OsStrExt for OsStr {
fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
let bytes = to_bytes(self);
let bytes = self.as_encoded_bytes();
std::str::from_utf8(bytes)
}

Expand All @@ -192,22 +192,22 @@ impl OsStrExt for OsStr {
}

fn find(&self, needle: &str) -> Option<usize> {
let bytes = to_bytes(self);
let bytes = self.as_encoded_bytes();
(0..=self.len().checked_sub(needle.len())?)
.find(|&x| bytes[x..].starts_with(needle.as_bytes()))
}

fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
let bytes = to_bytes(self);
let bytes = self.as_encoded_bytes();
bytes.strip_prefix(prefix.as_bytes()).map(|s| {
// SAFETY:
// - This came from `to_bytes`
// - Since `prefix` is `&str`, any split will be along UTF-8 boundarie
unsafe { to_os_str_unchecked(s) }
// - This came from `as_encoded_bytes`
// - Since `prefix` is `&str`, any split will be along UTF-8 boundary
unsafe { OsStr::from_encoded_bytes_unchecked(s) }
})
}
fn starts_with(&self, prefix: &str) -> bool {
let bytes = to_bytes(self);
let bytes = self.as_encoded_bytes();
bytes.starts_with(prefix.as_bytes())
}

Expand All @@ -222,13 +222,18 @@ impl OsStrExt for OsStr {
fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
let start = self.find(needle)?;
let end = start + needle.len();
let haystack = to_bytes(self);
let haystack = self.as_encoded_bytes();
let first = &haystack[0..start];
let second = &haystack[end..];
// SAFETY:
// - This came from `to_bytes`
// - Since `needle` is `&str`, any split will be along UTF-8 boundarie
unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) }
// - This came from `as_encoded_bytes`
// - Since `needle` is `&str`, any split will be along UTF-8 boundary
unsafe {
Some((
OsStr::from_encoded_bytes_unchecked(first),
OsStr::from_encoded_bytes_unchecked(second),
))
}
}
}

Expand All @@ -238,45 +243,6 @@ mod private {
impl Sealed for std::ffi::OsStr {}
}

/// Allow access to raw bytes
///
/// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with
/// 7-bit ASCII or `&str`
///
/// # Compatibility
///
/// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate
/// (since its dependent on rustc)
fn to_bytes(s: &OsStr) -> &[u8] {
// SAFETY:
// - Lifetimes are the same
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
// - The primary contract is that the encoding for invalid surrogate code points is not
// guaranteed which isn't a problem here
//
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
// but its in limbo
unsafe { std::mem::transmute(s) }
}

/// Restore raw bytes as `OsStr`
///
/// # Safety
///
/// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary
/// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries
unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr {
// SAFETY:
// - Lifetimes are the same
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
// - The primary contract is that the encoding for invalid surrogate code points is not
// guaranteed which isn't a problem here
//
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
// but its in limbo
std::mem::transmute(s)
}

pub struct Split<'s, 'n> {
haystack: Option<&'s OsStr>,
needle: &'n str,
Expand Down Expand Up @@ -309,7 +275,10 @@ impl<'s, 'n> Iterator for Split<'s, 'n> {
///
/// `index` must be at a valid UTF-8 boundary
pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
let bytes = to_bytes(os);
let bytes = os.as_encoded_bytes();
let (first, second) = bytes.split_at(index);
(to_os_str_unchecked(first), to_os_str_unchecked(second))
(
OsStr::from_encoded_bytes_unchecked(first),
OsStr::from_encoded_bytes_unchecked(second),
)
}
Loading