From df4d490038c37e441065890fa27ed2ce0bdf83e6 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Mon, 23 Dec 2019 15:40:20 -0500 Subject: [PATCH] Minimize unsafety in encode_utf8 Use slice patterns to avoid having to skip bounds checking --- src/libcore/char/methods.rs | 59 ++++++++++++++++++------------------- src/libcore/lib.rs | 1 + 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs index bb6d6db57d214..fe5d16862a6a6 100644 --- a/src/libcore/char/methods.rs +++ b/src/libcore/char/methods.rs @@ -434,36 +434,35 @@ impl char { #[inline] pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str { let code = self as u32; - // SAFETY: each arm checks the size of the slice and only uses `get_unchecked` unsafe ops - unsafe { - let len = if code < MAX_ONE_B && !dst.is_empty() { - *dst.get_unchecked_mut(0) = code as u8; - 1 - } else if code < MAX_TWO_B && dst.len() >= 2 { - *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; - *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; - 2 - } else if code < MAX_THREE_B && dst.len() >= 3 { - *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; - *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; - 3 - } else if dst.len() >= 4 { - *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; - *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; - 4 - } else { - panic!( - "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", - from_u32_unchecked(code).len_utf8(), - code, - dst.len(), - ) - }; - from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)) - } + let len = self.len_utf8(); + match (len, &mut dst[..]) { + (1, [a, ..]) => { + *a = code as u8; + } + (2, [a, b, ..]) => { + *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + *b = (code & 0x3F) as u8 | TAG_CONT; + } + (3, [a, b, c, ..]) => { + *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + *b = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *c = (code & 0x3F) as u8 | TAG_CONT; + } + (4, [a, b, c, d, ..]) => { + *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + *b = (code >> 12 & 0x3F) as u8 | TAG_CONT; + *c = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *d = (code & 0x3F) as u8 | TAG_CONT; + } + _ => panic!( + "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", + len, + code, + dst.len(), + ), + }; + // SAFETY: We just wrote UTF-8 content in, so converting to str is fine. + unsafe { from_utf8_unchecked_mut(&mut dst[..len]) } } /// Encodes this character as UTF-16 into the provided `u16` buffer, diff --git a/src/libcore/lib.rs b/src/libcore/lib.rs index d12aebb87b975..7d11dd2800fd4 100644 --- a/src/libcore/lib.rs +++ b/src/libcore/lib.rs @@ -129,6 +129,7 @@ #![feature(associated_type_bounds)] #![feature(const_type_id)] #![feature(const_caller_location)] +#![feature(slice_patterns)] #[prelude_import] #[allow(unused)]