From 3abcc7c4f537c16ad9937f8cc60fb23cb506ac85 Mon Sep 17 00:00:00 2001 From: Olly Swanson Date: Thu, 6 Jul 2023 17:32:19 +0100 Subject: [PATCH] fix: strip BOM in `Response::text_with_charset` (#1898) The byte order mark (BOM) is now stripped from utf-8 encoded response bodies when calling `Response::text` and `Response::text_with_charset`. This should prevent surprising behaviour when trying to use the returned String. Closes #1897 --- src/async_impl/response.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/async_impl/response.rs b/src/async_impl/response.rs index 340e54174..fc5a5d464 100644 --- a/src/async_impl/response.rs +++ b/src/async_impl/response.rs @@ -1,4 +1,3 @@ -use std::borrow::Cow; use std::fmt; use std::net::SocketAddr; use std::pin::Pin; @@ -130,6 +129,8 @@ impl Response { /// Encoding is determined from the `charset` parameter of `Content-Type` header, /// and defaults to `utf-8` if not presented. /// + /// Note that the BOM is stripped from the returned String. + /// /// # Example /// /// ``` @@ -155,6 +156,8 @@ impl Response { /// `charset` parameter of `Content-Type` header is still prioritized. For more information /// about the possible encoding name, please go to [`encoding_rs`] docs. /// + /// Note that the BOM is stripped from the returned String. + /// /// [`encoding_rs`]: https://docs.rs/encoding_rs/0.8/encoding_rs/#relationship-with-windows-code-pages /// /// # Example @@ -185,14 +188,7 @@ impl Response { let full = self.bytes().await?; let (text, _, _) = encoding.decode(&full); - if let Cow::Owned(s) = text { - return Ok(s); - } - unsafe { - // decoding returned Cow::Borrowed, meaning these bytes - // are already valid utf8 - Ok(String::from_utf8_unchecked(full.to_vec())) - } + Ok(text.into_owned()) } /// Try to deserialize the response body as JSON.