diff --git a/Cargo.toml b/Cargo.toml index 01da60d64..46b3a6b08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,6 +62,7 @@ unstable-client-transport = [] [dev-dependencies] rustls = { version = "0.21.9" } serde_test = "1.0.130" +serde_json = "1.0.113" serde_yaml = "0.9" tokio = { version = "1", features = ["rt-multi-thread", "io-util", "net"] } tokio-test = "0.4" diff --git a/src/base/charstr.rs b/src/base/charstr.rs index 0ab89b727..c807d38e1 100644 --- a/src/base/charstr.rs +++ b/src/base/charstr.rs @@ -51,6 +51,71 @@ use std::vec::Vec; /// As per [RFC 1035], character strings compare ignoring ASCII case. /// `CharStr`’s implementations of the `std::cmp` traits act accordingly. /// +/// # Presentation format +/// +/// The text representation of a character string comes in two flavors: +/// Quoted and unquoted. In both cases, the content is interpreted as ASCII +/// text and those octets that aren’t printable ASCII characters, as well as +/// some special symbols, are escaped. +/// +/// There are two escaping mechanisms: octets that are printable ASCII +/// characters but need to be escaped anyway use what we call a “simple +/// escape” that precedes the character with an ASCII backslash. For all +/// non-printable octets “decimal escapes” are used: an ASCII backslash is +/// followed by three decimal digits representing the decimal value of the +/// octet. A consequence if this is that you cannot escape the digits 0, 1, +/// and 2 using simple escapes and you probably shouldn’t do it for the other +/// digits. +/// +/// In the quoted form, the content is preceded and followed by exactly one +/// ASCII double quote. Inside, only double quotes, backslashes, and +/// non-printable octets need to be escaped. +/// +/// In the unquoted form, the content is formatted without any explicit +/// delimiters. Instead, it ends at the first ASCII space or any other +/// delimiting symbol, normally ASCII control characters or an ASCII +/// semicolon which marks the start of a comment. These characters, as well +/// as the double quote, also need to be escaped. +/// +/// # `Display` and `FromStr` +/// +/// When formatting a character string using the `Display` trait, a variation +/// of the unquoted form is used where only backslashes and non-printable +/// octets are escaped. Two methods are available that make it possible to +/// format the character string in quoted and unquoted formats, +/// [`display_quoted`][Self::display_quoted] and +/// [`display_unquoted`][Self::display_unquoted]. They return a temporary +/// value that can be given to a formatting macro. +/// +/// The `FromStr` implementation reads a character string from a Rust string +/// in the format created by `Display` but is more relaxed about escape +/// sequences – it accepts all of them as long as they lead to a valid +/// character string. +/// +/// # Serde support +/// +/// When the `serde` feature is enabled, the type supports serialization and +/// deserialization. The format differs for human readable and non-human +/// readable serialization formats. +/// +/// For human readable formats, character strings are serialized as a newtype +/// `CharStr` wrapping a string with the content as an ASCII string. +/// Non-printable ASCII characters (i.e., those with a byte value below 32 +/// and above 176) are escaped using the decimal escape sequences as used by +/// the presentation format. In addition, backslashes are escaped using a +/// simple escape sequence and thus are doubled. +/// +/// This leads to a slightly unfortunate situation in serialization formats +/// that in turn use backslash as an escape sequence marker in their own +/// string representation, such as JSON, where a backslash ends up as four +/// backslashes. +/// +/// When deserializing, escape sequences are excepted for all octets and +/// translated. Non-ASCII characters are not accepted and lead to error. +/// +/// For non-human readable formats, character strings are serialized as a +/// newtype `CharStr` wrapping a byte array with the content as is. +/// /// [RFC 1035]: https://tools.ietf.org/html/rfc1035 #[derive(Clone)] pub struct CharStr(Octs); @@ -292,10 +357,23 @@ impl CharStr { } impl + ?Sized> CharStr { - /// Returns an object that displays the string always quoted. + /// Returns an object that formats in quoted presentation format. + /// + /// The returned object will display the content surrounded by double + /// quotes. It will escape double quotes, backslashes, and non-printable + /// octets only. pub fn display_quoted(&self) -> DisplayQuoted { DisplayQuoted(self.for_slice()) } + + /// Returns an object that formats in unquoted presentation format. + /// + /// The returned object will display the content without explicit + /// delimiters and escapes space, double quotes, semicolons, backslashes, + /// and non-printable octets. + pub fn display_unquoted(&self) -> DisplayUnquoted { + DisplayUnquoted(self.for_slice()) + } } //--- OctetsFrom @@ -430,7 +508,7 @@ impl + ?Sized> hash::Hash for CharStr { impl + ?Sized> fmt::Display for CharStr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for &ch in self.0.as_ref() { - fmt::Display::fmt(&Symbol::from_octet(ch), f)?; + fmt::Display::fmt(&Symbol::display_from_octet(ch), f)?; } Ok(()) } @@ -816,7 +894,7 @@ impl<'a> Iterator for Iter<'a> { //------------ DisplayQuoted ------------------------------------------------- -/// Helper struct for display a character string surrounded by quotes. +/// Helper struct for displaying in quoted presentation format. /// /// A value of this type can be obtained via `CharStr::display_quoted`. #[derive(Clone, Copy, Debug)] @@ -826,12 +904,29 @@ impl<'a> fmt::Display for DisplayQuoted<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str("\"")?; for &ch in self.0.as_ref() { - fmt::Display::fmt(&Symbol::from_quoted_octet(ch), f)?; + fmt::Display::fmt(&Symbol::quoted_from_octet(ch), f)?; } f.write_str("\"") } } +//------------ DisplayUnquoted ----------------------------------------------- + +/// Helper struct for displaying in serialization format. +/// +/// A value of this type can be obtained via `CharStr::display_serialized`. +#[derive(Clone, Copy, Debug)] +pub struct DisplayUnquoted<'a>(&'a CharStr<[u8]>); + +impl<'a> fmt::Display for DisplayUnquoted<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for &ch in self.0.as_ref() { + fmt::Display::fmt(&Symbol::from_octet(ch), f)?; + } + Ok(()) + } +} + //============ Error Types =================================================== //------------ CharStrError -------------------------------------------------- @@ -1055,47 +1150,50 @@ mod test { use serde_test::{assert_tokens, Configure, Token}; assert_tokens( - &CharStr::from_octets(vec![b'f', b'o', 0x12]) + &CharStr::from_octets(Vec::from(b"fo\x12 bar".as_ref())) .unwrap() .compact(), &[ Token::NewtypeStruct { name: "CharStr" }, - Token::ByteBuf(b"fo\x12"), + Token::ByteBuf(b"fo\x12 bar"), ], ); assert_tokens( - &CharStr::from_octets(vec![b'f', b'o', 0x12]) + &CharStr::from_octets(Vec::from(b"fo\x12 bar".as_ref())) .unwrap() .readable(), &[ Token::NewtypeStruct { name: "CharStr" }, - Token::Str("fo\\018"), + Token::Str("fo\\018 bar"), ], ); } + #[cfg(feature = "serde")] + #[test] + fn cycle_serde_json() { + let json = r#""foo bar\" \\018;""#; + + let cstr: CharStr> = serde_json::from_str(json).unwrap(); + assert_eq!(cstr.as_slice(), b"foo bar\" \x12;"); + assert_eq!(serde_json::to_string(&cstr).unwrap(), json); + } + #[test] fn display() { - fn cmp(input: &[u8], normal: &str, quoted: &str) { - assert_eq!( - format!("{}", CharStr::from_octets(input).unwrap()), - normal - ); - assert_eq!( - format!( - "{}", - CharStr::from_octets(input).unwrap().display_quoted() - ), - quoted - ); + fn cmp(input: &[u8], normal: &str, quoted: &str, unquoted: &str) { + let s = CharStr::from_octets(input).unwrap(); + assert_eq!(format!("{}", s), normal); + assert_eq!(format!("{}", s.display_quoted()), quoted); + assert_eq!(format!("{}", s.display_unquoted()), unquoted); } - cmp(b"foo", "foo", "\"foo\""); - cmp(b"f oo", "f\\ oo", "\"f oo\""); - cmp(b"f\"oo", "f\\\"oo", "\"f\\\"oo\""); - cmp(b"f\\oo", "f\\\\oo", "\"f\\\\oo\""); - cmp(b"f;oo", "f\\;oo", "\"f;oo\""); - cmp(b"f\noo", "f\\010oo", "\"f\\010oo\""); + cmp(br#"foo"#, r#"foo"#, r#""foo""#, r#"foo"#); + cmp(br#"f oo"#, r#"f oo"#, r#""f oo""#, r#"f\ oo"#); + cmp(br#"f"oo"#, r#"f"oo"#, r#""f\"oo""#, r#"f\"oo"#); + cmp(br#"f\oo"#, r#"f\\oo"#, r#""f\\oo""#, r#"f\\oo"#); + cmp(br#"f;oo"#, r#"f;oo"#, r#""f;oo""#, r#"f\;oo"#); + cmp(b"f\noo", r#"f\010oo"#, r#""f\010oo""#, r#"f\010oo"#); } } diff --git a/src/base/scan.rs b/src/base/scan.rs index c52df9346..5f8f3f71f 100644 --- a/src/base/scan.rs +++ b/src/base/scan.rs @@ -590,10 +590,10 @@ impl Symbol { /// Provides the best symbol for an octet inside a quoted string. /// - /// The function will only escape a double quote and backslash using the + /// The function will only escape a double quote and backslash using a /// simple escape and all non-printable characters using decimal escapes. #[must_use] - pub fn from_quoted_octet(ch: u8) -> Self { + pub fn quoted_from_octet(ch: u8) -> Self { if ch == b'"' || ch == b'\\' { Symbol::SimpleEscape(ch) } else if !(0x20..0x7F).contains(&ch) { @@ -603,6 +603,21 @@ impl Symbol { } } + /// Provides the best symbol for an octet inside a `Display` impl. + /// + /// The function will only escape a backslash using a simple escape and + /// all non-printable characters using decimal escapes. + #[must_use] + pub fn display_from_octet(ch: u8) -> Self { + if ch == b'\\' { + Symbol::SimpleEscape(ch) + } else if !(0x20..0x7F).contains(&ch) { + Symbol::DecimalEscape(ch) + } else { + Symbol::Char(ch as char) + } + } + /// Converts the symbol into an octet if it represents one. /// /// Both domain names and character strings operate on bytes instead of