From 1ad8bc660616df82c9d726f297c2b4b17306cd95 Mon Sep 17 00:00:00 2001 From: John DiSanti Date: Fri, 4 Jun 2021 08:40:58 -0700 Subject: [PATCH 1/4] Add fuzz harness for smithy-json with json_deserialize target --- rust-runtime/smithy-json/fuzz/.gitignore | 7 + rust-runtime/smithy-json/fuzz/Cargo.toml | 32 +++++ .../fuzz/corpus/json_deserialize/array | 1 + .../corpus/json_deserialize/string_escapes | 1 + .../fuzz/corpus/json_deserialize/struct | 7 + .../smithy-json/fuzz/fuzz_targets/common.rs | 120 ++++++++++++++++++ .../fuzz/fuzz_targets/json_deserialize.rs | 13 ++ .../json_deserialize_corpus_cov.rs | 26 ++++ .../smithy-json/fuzz/show-corpus-coverage.sh | 12 ++ rust-runtime/smithy-json/src/deserialize.rs | 6 +- 10 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 rust-runtime/smithy-json/fuzz/.gitignore create mode 100644 rust-runtime/smithy-json/fuzz/Cargo.toml create mode 100644 rust-runtime/smithy-json/fuzz/corpus/json_deserialize/array create mode 100644 rust-runtime/smithy-json/fuzz/corpus/json_deserialize/string_escapes create mode 100644 rust-runtime/smithy-json/fuzz/corpus/json_deserialize/struct create mode 100644 rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs create mode 100644 rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize.rs create mode 100644 rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize_corpus_cov.rs create mode 100755 rust-runtime/smithy-json/fuzz/show-corpus-coverage.sh diff --git a/rust-runtime/smithy-json/fuzz/.gitignore b/rust-runtime/smithy-json/fuzz/.gitignore new file mode 100644 index 0000000000..4b2272f972 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/.gitignore @@ -0,0 +1,7 @@ + +target +corpus +artifacts +coverage +coverage.profdata +coverage.profraw diff --git a/rust-runtime/smithy-json/fuzz/Cargo.toml b/rust-runtime/smithy-json/fuzz/Cargo.toml new file mode 100644 index 0000000000..57fa77a7da --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/Cargo.toml @@ -0,0 +1,32 @@ + +[package] +name = "smithy-json-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +serde_json = { version = "1", features = ["float_roundtrip"] } +smithy-json = { path = ".." } +smithy-types = { path = "../../smithy-types" } + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "json_deserialize" +path = "fuzz_targets/json_deserialize.rs" +test = false +doc = false + +[[bin]] +name = "json_deserialize_corpus_cov" +path = "fuzz_targets/json_deserialize_corpus_cov.rs" +test = false +doc = false diff --git a/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/array b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/array new file mode 100644 index 0000000000..33872d4b69 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/array @@ -0,0 +1 @@ +[null, true, false, 0, -1, 2.2, -2.2, 2e5, "test", "\t", "\u0078", [], [1], {}, {"foo": 1}] diff --git a/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/string_escapes b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/string_escapes new file mode 100644 index 0000000000..94b2a48316 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/string_escapes @@ -0,0 +1 @@ +"\uD801\uDC37\/\b\n\r\\\t\"\f\u0000" diff --git a/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/struct b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/struct new file mode 100644 index 0000000000..fbae06a92c --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/struct @@ -0,0 +1,7 @@ +{ "some_int": 5, + "some_float": 5.2, + "some_negative": -5, + "some_negative_float": -2.4, + "some_string": "test", + "some_struct": { "nested": "asdf" }, + "some_array": ["one", "two"] } diff --git a/rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs b/rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs new file mode 100644 index 0000000000..df03067e6e --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs @@ -0,0 +1,120 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +use serde_json::{Map, Value}; +use smithy_json::deserialize::{Error, Token}; +use smithy_types::Number; + +pub fn run_data(data: &[u8]) { + // Parse through with smithy-json first to make sure it doesn't panic on invalid inputs + if let Ok(tokens) = + smithy_json::deserialize::json_token_iter(data).collect::, Error>>() + { + // Exercise string unescaping since the later comparison against Serde + // reserializes, and thus, loses UTF-16 surrogate pairs. + for token in tokens { + if let Token::ValueString(escaped) = token { + if let Ok(unescaped) = escaped.to_unescaped() { + let serde_equiv = serde_json::from_str::(&format!( + "\"{}\"", + escaped.as_escaped_str() + )) + .unwrap(); + assert_eq!(serde_equiv, unescaped); + } + } + } + } + + // Now parse with Serde, and if it's valid, compare the two and panic if different + let serde_value = serde_json::from_slice::(data); + if let Ok(value) = serde_value { + // Re-serialize to normalize the large floating point numbers + let json = serde_json::to_string(&value).unwrap(); + + let tokens = smithy_json::deserialize::json_token_iter(json.as_bytes()) + .collect::, Error>>() + .unwrap(); + let mut token_iter = RewindingTokenIter::new(tokens); + let converted_value = convert_tokens(&mut token_iter); + assert_eq!(None, token_iter.next()); + assert_eq!(value, converted_value); + } +} + +/// Utility for iterating over the tokens while being able to rewind +struct RewindingTokenIter<'a> { + tokens: Vec>, + position: usize, +} + +impl<'a> RewindingTokenIter<'a> { + fn new(tokens: Vec>) -> RewindingTokenIter<'a> { + RewindingTokenIter { + tokens, + position: 0, + } + } + + fn next(&mut self) -> Option<&Token<'a>> { + if self.position < self.tokens.len() { + let value = &self.tokens[self.position]; + self.position += 1; + Some(value) + } else { + None + } + } + + fn rewind_once(&mut self) { + assert!(self.position > 0); + self.position -= 1; + } +} + +/// Converts a token stream into a Serde [Value] +fn convert_tokens(tokens: &mut RewindingTokenIter) -> Value { + match tokens.next().unwrap() { + Token::StartObject => { + let mut map = Map::new(); + loop { + match tokens.next() { + Some(Token::EndObject) => break, + Some(Token::ObjectKey(key)) => { + let key = key.to_unescaped().unwrap().to_string(); + let value = convert_tokens(tokens); + map.insert(key, value); + } + Some(_) => unreachable!(), + None => panic!("should have encountered EndObject before end of stream"), + } + } + Value::Object(map) + } + Token::StartArray => { + let mut list = Vec::new(); + loop { + match tokens.next() { + Some(Token::EndArray) => break, + Some(_) => { + tokens.rewind_once(); + list.push(convert_tokens(tokens)); + } + None => panic!("should have encountered EndArray before end of stream"), + } + } + Value::Array(list) + } + Token::ValueNull => Value::Null, + Token::ValueNumber(num) => Value::Number(match num { + Number::NegInt(value) => serde_json::Number::from(*value), + Number::PosInt(value) => serde_json::Number::from(*value), + Number::Float(value) => serde_json::Number::from_f64(*value).unwrap(), + }), + Token::ValueString(string) => Value::String(string.to_unescaped().unwrap().into()), + Token::ValueBool(bool) => Value::Bool(*bool), + _ => unreachable!(), + } +} diff --git a/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize.rs b/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize.rs new file mode 100644 index 0000000000..6c536aa7b1 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize.rs @@ -0,0 +1,13 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#![no_main] +use libfuzzer_sys::fuzz_target; + +mod common; + +fuzz_target!(|data: &[u8]| { + common::run_data(data); +}); diff --git a/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize_corpus_cov.rs b/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize_corpus_cov.rs new file mode 100644 index 0000000000..60ebbe9f9a --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize_corpus_cov.rs @@ -0,0 +1,26 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +use std::fs::File; +use std::io::Read; + +mod common; + +fn main() { + let current_dir = std::env::current_dir().unwrap(); + println!("cwd: {:?}", current_dir); + + let corpus_path = "corpus/json_deserialize"; + for entry in std::fs::read_dir(corpus_path).unwrap() { + let path = entry.unwrap().path(); + println!("Running {:?}", path); + + let mut data = Vec::new(); + let mut file = File::open(path).unwrap(); + file.read_to_end(&mut data).unwrap(); + + common::run_data(&data); + } +} diff --git a/rust-runtime/smithy-json/fuzz/show-corpus-coverage.sh b/rust-runtime/smithy-json/fuzz/show-corpus-coverage.sh new file mode 100755 index 0000000000..770891e504 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/show-corpus-coverage.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Script that gathers coverage from the entire fuzz corpus and shows covered lines in the terminal. +set -ex + +# Run instrumented json_deserialize_corpus_cov to run the entire fuzz corpus with coverage +RUSTFLAGS="-Zinstrument-coverage" LLVM_PROFILE_FILE=coverage.profraw cargo run --release --bin json_deserialize_corpus_cov + +# Convert raw coverage into profdata +cargo profdata -- merge -sparse coverage.profraw -o coverage.profdata + +# Show coverage +cargo cov -- show --use-color --ignore-filename-regex='/.cargo/registry' --instr-profile=coverage.profdata --object target/release/json_deserialize_corpus_cov --show-instantiations --show-line-counts-or-regions | less -R diff --git a/rust-runtime/smithy-json/src/deserialize.rs b/rust-runtime/smithy-json/src/deserialize.rs index 03906f8ed5..9f82f2d1ed 100644 --- a/rust-runtime/smithy-json/src/deserialize.rs +++ b/rust-runtime/smithy-json/src/deserialize.rs @@ -75,9 +75,9 @@ impl<'a> EscapedStr<'a> { self.0 } - /// Consumes self and returns the string unescaped. + /// Unescapes the string and returns it. /// If the string doesn't need unescaping, it will be returned directly. - pub fn into_unescaped(self) -> Result, EscapeError> { + pub fn to_unescaped(&self) -> Result, EscapeError> { unescape_string(self.0) } } @@ -860,6 +860,6 @@ mod tests { fn escaped_str() { let escaped = EscapedStr::new("foo\\nbar"); assert_eq!("foo\\nbar", escaped.as_escaped_str()); - assert_eq!("foo\nbar", escaped.into_unescaped().unwrap()); + assert_eq!("foo\nbar", escaped.to_unescaped().unwrap()); } } From 9152ef1fe5f598d29a0574bfc052f9c30672f973 Mon Sep 17 00:00:00 2001 From: John DiSanti Date: Fri, 4 Jun 2021 12:50:23 -0700 Subject: [PATCH 2/4] Fix unescaping bug found by fuzzing --- rust-runtime/smithy-json/src/escape.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/rust-runtime/smithy-json/src/escape.rs b/rust-runtime/smithy-json/src/escape.rs index 8f2faf6807..ffd006cf3e 100644 --- a/rust-runtime/smithy-json/src/escape.rs +++ b/rust-runtime/smithy-json/src/escape.rs @@ -156,8 +156,15 @@ fn read_codepoint(rest: &[u8]) -> Result { } let codepoint_str = std::str::from_utf8(&rest[2..6]).map_err(|_| Error::InvalidUtf8)?; - u16::from_str_radix(codepoint_str, 16) - .map_err(|_| Error::InvalidUnicodeEscape(codepoint_str.into())) + + // Error on characters `u16::from_str_radix` would otherwise accept, such as `+` + for byte in codepoint_str.bytes() { + match byte { + b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' => {} + _ => return Err(Error::InvalidUnicodeEscape(codepoint_str.into())), + } + } + Ok(u16::from_str_radix(codepoint_str, 16).expect("hex string is valid 16-bit value")) } /// Reads JSON Unicode escape sequences (i.e., "\u1234"). Will also read @@ -252,6 +259,11 @@ mod test { Err(Error::InvalidSurrogatePair(0xD801, 0xC501)), unescape_string("\\uD801\\uC501") ); + + assert_eq!( + Err(Error::InvalidUnicodeEscape("+04D".into())), + unescape_string("\\u+04D") + ); } use proptest::proptest; From 6bb7e89c3e9ad7ab9afe619f42d83b99bc7f6c63 Mon Sep 17 00:00:00 2001 From: John DiSanti Date: Fri, 4 Jun 2021 15:04:54 -0700 Subject: [PATCH 3/4] CR feedback --- rust-runtime/smithy-json/fuzz/Cargo.toml | 3 +- .../smithy-json/fuzz/fuzz_targets/common.rs | 51 +++++-------------- rust-runtime/smithy-json/src/escape.rs | 7 +-- 3 files changed, 15 insertions(+), 46 deletions(-) diff --git a/rust-runtime/smithy-json/fuzz/Cargo.toml b/rust-runtime/smithy-json/fuzz/Cargo.toml index 57fa77a7da..f04125f556 100644 --- a/rust-runtime/smithy-json/fuzz/Cargo.toml +++ b/rust-runtime/smithy-json/fuzz/Cargo.toml @@ -1,8 +1,7 @@ - [package] name = "smithy-json-fuzz" version = "0.0.0" -authors = ["Automatically generated"] +authors = ["AWS Rust SDK Team ", "John DiSanti "] publish = false edition = "2018" diff --git a/rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs b/rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs index df03067e6e..865f8da83d 100644 --- a/rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs +++ b/rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs @@ -6,6 +6,7 @@ use serde_json::{Map, Value}; use smithy_json::deserialize::{Error, Token}; use smithy_types::Number; +use std::iter::Peekable; pub fn run_data(data: &[u8]) { // Parse through with smithy-json first to make sure it doesn't panic on invalid inputs @@ -37,45 +38,15 @@ pub fn run_data(data: &[u8]) { let tokens = smithy_json::deserialize::json_token_iter(json.as_bytes()) .collect::, Error>>() .unwrap(); - let mut token_iter = RewindingTokenIter::new(tokens); + let mut token_iter = tokens.into_iter().peekable(); let converted_value = convert_tokens(&mut token_iter); assert_eq!(None, token_iter.next()); assert_eq!(value, converted_value); } } -/// Utility for iterating over the tokens while being able to rewind -struct RewindingTokenIter<'a> { - tokens: Vec>, - position: usize, -} - -impl<'a> RewindingTokenIter<'a> { - fn new(tokens: Vec>) -> RewindingTokenIter<'a> { - RewindingTokenIter { - tokens, - position: 0, - } - } - - fn next(&mut self) -> Option<&Token<'a>> { - if self.position < self.tokens.len() { - let value = &self.tokens[self.position]; - self.position += 1; - Some(value) - } else { - None - } - } - - fn rewind_once(&mut self) { - assert!(self.position > 0); - self.position -= 1; - } -} - /// Converts a token stream into a Serde [Value] -fn convert_tokens(tokens: &mut RewindingTokenIter) -> Value { +fn convert_tokens<'a, I: Iterator>>(tokens: &mut Peekable) -> Value { match tokens.next().unwrap() { Token::StartObject => { let mut map = Map::new(); @@ -96,10 +67,12 @@ fn convert_tokens(tokens: &mut RewindingTokenIter) -> Value { Token::StartArray => { let mut list = Vec::new(); loop { - match tokens.next() { - Some(Token::EndArray) => break, + match tokens.peek() { + Some(Token::EndArray) => { + tokens.next(); + break; + } Some(_) => { - tokens.rewind_once(); list.push(convert_tokens(tokens)); } None => panic!("should have encountered EndArray before end of stream"), @@ -109,12 +82,12 @@ fn convert_tokens(tokens: &mut RewindingTokenIter) -> Value { } Token::ValueNull => Value::Null, Token::ValueNumber(num) => Value::Number(match num { - Number::NegInt(value) => serde_json::Number::from(*value), - Number::PosInt(value) => serde_json::Number::from(*value), - Number::Float(value) => serde_json::Number::from_f64(*value).unwrap(), + Number::NegInt(value) => serde_json::Number::from(value), + Number::PosInt(value) => serde_json::Number::from(value), + Number::Float(value) => serde_json::Number::from_f64(value).unwrap(), }), Token::ValueString(string) => Value::String(string.to_unescaped().unwrap().into()), - Token::ValueBool(bool) => Value::Bool(*bool), + Token::ValueBool(bool) => Value::Bool(bool), _ => unreachable!(), } } diff --git a/rust-runtime/smithy-json/src/escape.rs b/rust-runtime/smithy-json/src/escape.rs index ffd006cf3e..1110e5d127 100644 --- a/rust-runtime/smithy-json/src/escape.rs +++ b/rust-runtime/smithy-json/src/escape.rs @@ -158,11 +158,8 @@ fn read_codepoint(rest: &[u8]) -> Result { let codepoint_str = std::str::from_utf8(&rest[2..6]).map_err(|_| Error::InvalidUtf8)?; // Error on characters `u16::from_str_radix` would otherwise accept, such as `+` - for byte in codepoint_str.bytes() { - match byte { - b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' => {} - _ => return Err(Error::InvalidUnicodeEscape(codepoint_str.into())), - } + if codepoint_str.bytes().any(|byte| !matches!(byte, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) { + return Err(Error::InvalidUnicodeEscape(codepoint_str.into())) } Ok(u16::from_str_radix(codepoint_str, 16).expect("hex string is valid 16-bit value")) } From 4e1570b65b1e917732c543ee6be70172d1830ae3 Mon Sep 17 00:00:00 2001 From: John DiSanti Date: Fri, 4 Jun 2021 15:17:51 -0700 Subject: [PATCH 4/4] Run rustfmt --- rust-runtime/smithy-json/src/escape.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rust-runtime/smithy-json/src/escape.rs b/rust-runtime/smithy-json/src/escape.rs index 1110e5d127..2caf317b45 100644 --- a/rust-runtime/smithy-json/src/escape.rs +++ b/rust-runtime/smithy-json/src/escape.rs @@ -158,8 +158,11 @@ fn read_codepoint(rest: &[u8]) -> Result { let codepoint_str = std::str::from_utf8(&rest[2..6]).map_err(|_| Error::InvalidUtf8)?; // Error on characters `u16::from_str_radix` would otherwise accept, such as `+` - if codepoint_str.bytes().any(|byte| !matches!(byte, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) { - return Err(Error::InvalidUnicodeEscape(codepoint_str.into())) + if codepoint_str + .bytes() + .any(|byte| !matches!(byte, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) + { + return Err(Error::InvalidUnicodeEscape(codepoint_str.into())); } Ok(u16::from_str_radix(codepoint_str, 16).expect("hex string is valid 16-bit value")) }