diff --git a/rust-runtime/smithy-json/fuzz/.gitignore b/rust-runtime/smithy-json/fuzz/.gitignore new file mode 100644 index 0000000000..4b2272f972 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/.gitignore @@ -0,0 +1,7 @@ + +target +corpus +artifacts +coverage +coverage.profdata +coverage.profraw diff --git a/rust-runtime/smithy-json/fuzz/Cargo.toml b/rust-runtime/smithy-json/fuzz/Cargo.toml new file mode 100644 index 0000000000..f04125f556 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "smithy-json-fuzz" +version = "0.0.0" +authors = ["AWS Rust SDK Team ", "John DiSanti "] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +serde_json = { version = "1", features = ["float_roundtrip"] } +smithy-json = { path = ".." } +smithy-types = { path = "../../smithy-types" } + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "json_deserialize" +path = "fuzz_targets/json_deserialize.rs" +test = false +doc = false + +[[bin]] +name = "json_deserialize_corpus_cov" +path = "fuzz_targets/json_deserialize_corpus_cov.rs" +test = false +doc = false diff --git a/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/array b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/array new file mode 100644 index 0000000000..33872d4b69 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/array @@ -0,0 +1 @@ +[null, true, false, 0, -1, 2.2, -2.2, 2e5, "test", "\t", "\u0078", [], [1], {}, {"foo": 1}] diff --git a/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/string_escapes b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/string_escapes new file mode 100644 index 0000000000..94b2a48316 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/string_escapes @@ -0,0 +1 @@ +"\uD801\uDC37\/\b\n\r\\\t\"\f\u0000" diff --git a/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/struct b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/struct new file mode 100644 index 0000000000..fbae06a92c --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/corpus/json_deserialize/struct @@ -0,0 +1,7 @@ +{ "some_int": 5, + "some_float": 5.2, + "some_negative": -5, + "some_negative_float": -2.4, + "some_string": "test", + "some_struct": { "nested": "asdf" }, + "some_array": ["one", "two"] } diff --git a/rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs b/rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs new file mode 100644 index 0000000000..865f8da83d --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/fuzz_targets/common.rs @@ -0,0 +1,93 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +use serde_json::{Map, Value}; +use smithy_json::deserialize::{Error, Token}; +use smithy_types::Number; +use std::iter::Peekable; + +pub fn run_data(data: &[u8]) { + // Parse through with smithy-json first to make sure it doesn't panic on invalid inputs + if let Ok(tokens) = + smithy_json::deserialize::json_token_iter(data).collect::, Error>>() + { + // Exercise string unescaping since the later comparison against Serde + // reserializes, and thus, loses UTF-16 surrogate pairs. + for token in tokens { + if let Token::ValueString(escaped) = token { + if let Ok(unescaped) = escaped.to_unescaped() { + let serde_equiv = serde_json::from_str::(&format!( + "\"{}\"", + escaped.as_escaped_str() + )) + .unwrap(); + assert_eq!(serde_equiv, unescaped); + } + } + } + } + + // Now parse with Serde, and if it's valid, compare the two and panic if different + let serde_value = serde_json::from_slice::(data); + if let Ok(value) = serde_value { + // Re-serialize to normalize the large floating point numbers + let json = serde_json::to_string(&value).unwrap(); + + let tokens = smithy_json::deserialize::json_token_iter(json.as_bytes()) + .collect::, Error>>() + .unwrap(); + let mut token_iter = tokens.into_iter().peekable(); + let converted_value = convert_tokens(&mut token_iter); + assert_eq!(None, token_iter.next()); + assert_eq!(value, converted_value); + } +} + +/// Converts a token stream into a Serde [Value] +fn convert_tokens<'a, I: Iterator>>(tokens: &mut Peekable) -> Value { + match tokens.next().unwrap() { + Token::StartObject => { + let mut map = Map::new(); + loop { + match tokens.next() { + Some(Token::EndObject) => break, + Some(Token::ObjectKey(key)) => { + let key = key.to_unescaped().unwrap().to_string(); + let value = convert_tokens(tokens); + map.insert(key, value); + } + Some(_) => unreachable!(), + None => panic!("should have encountered EndObject before end of stream"), + } + } + Value::Object(map) + } + Token::StartArray => { + let mut list = Vec::new(); + loop { + match tokens.peek() { + Some(Token::EndArray) => { + tokens.next(); + break; + } + Some(_) => { + list.push(convert_tokens(tokens)); + } + None => panic!("should have encountered EndArray before end of stream"), + } + } + Value::Array(list) + } + Token::ValueNull => Value::Null, + Token::ValueNumber(num) => Value::Number(match num { + Number::NegInt(value) => serde_json::Number::from(value), + Number::PosInt(value) => serde_json::Number::from(value), + Number::Float(value) => serde_json::Number::from_f64(value).unwrap(), + }), + Token::ValueString(string) => Value::String(string.to_unescaped().unwrap().into()), + Token::ValueBool(bool) => Value::Bool(bool), + _ => unreachable!(), + } +} diff --git a/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize.rs b/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize.rs new file mode 100644 index 0000000000..6c536aa7b1 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize.rs @@ -0,0 +1,13 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#![no_main] +use libfuzzer_sys::fuzz_target; + +mod common; + +fuzz_target!(|data: &[u8]| { + common::run_data(data); +}); diff --git a/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize_corpus_cov.rs b/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize_corpus_cov.rs new file mode 100644 index 0000000000..60ebbe9f9a --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/fuzz_targets/json_deserialize_corpus_cov.rs @@ -0,0 +1,26 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +use std::fs::File; +use std::io::Read; + +mod common; + +fn main() { + let current_dir = std::env::current_dir().unwrap(); + println!("cwd: {:?}", current_dir); + + let corpus_path = "corpus/json_deserialize"; + for entry in std::fs::read_dir(corpus_path).unwrap() { + let path = entry.unwrap().path(); + println!("Running {:?}", path); + + let mut data = Vec::new(); + let mut file = File::open(path).unwrap(); + file.read_to_end(&mut data).unwrap(); + + common::run_data(&data); + } +} diff --git a/rust-runtime/smithy-json/fuzz/show-corpus-coverage.sh b/rust-runtime/smithy-json/fuzz/show-corpus-coverage.sh new file mode 100755 index 0000000000..770891e504 --- /dev/null +++ b/rust-runtime/smithy-json/fuzz/show-corpus-coverage.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Script that gathers coverage from the entire fuzz corpus and shows covered lines in the terminal. +set -ex + +# Run instrumented json_deserialize_corpus_cov to run the entire fuzz corpus with coverage +RUSTFLAGS="-Zinstrument-coverage" LLVM_PROFILE_FILE=coverage.profraw cargo run --release --bin json_deserialize_corpus_cov + +# Convert raw coverage into profdata +cargo profdata -- merge -sparse coverage.profraw -o coverage.profdata + +# Show coverage +cargo cov -- show --use-color --ignore-filename-regex='/.cargo/registry' --instr-profile=coverage.profdata --object target/release/json_deserialize_corpus_cov --show-instantiations --show-line-counts-or-regions | less -R diff --git a/rust-runtime/smithy-json/src/deserialize.rs b/rust-runtime/smithy-json/src/deserialize.rs index 03906f8ed5..9f82f2d1ed 100644 --- a/rust-runtime/smithy-json/src/deserialize.rs +++ b/rust-runtime/smithy-json/src/deserialize.rs @@ -75,9 +75,9 @@ impl<'a> EscapedStr<'a> { self.0 } - /// Consumes self and returns the string unescaped. + /// Unescapes the string and returns it. /// If the string doesn't need unescaping, it will be returned directly. - pub fn into_unescaped(self) -> Result, EscapeError> { + pub fn to_unescaped(&self) -> Result, EscapeError> { unescape_string(self.0) } } @@ -860,6 +860,6 @@ mod tests { fn escaped_str() { let escaped = EscapedStr::new("foo\\nbar"); assert_eq!("foo\\nbar", escaped.as_escaped_str()); - assert_eq!("foo\nbar", escaped.into_unescaped().unwrap()); + assert_eq!("foo\nbar", escaped.to_unescaped().unwrap()); } } diff --git a/rust-runtime/smithy-json/src/escape.rs b/rust-runtime/smithy-json/src/escape.rs index 8f2faf6807..2caf317b45 100644 --- a/rust-runtime/smithy-json/src/escape.rs +++ b/rust-runtime/smithy-json/src/escape.rs @@ -156,8 +156,15 @@ fn read_codepoint(rest: &[u8]) -> Result { } let codepoint_str = std::str::from_utf8(&rest[2..6]).map_err(|_| Error::InvalidUtf8)?; - u16::from_str_radix(codepoint_str, 16) - .map_err(|_| Error::InvalidUnicodeEscape(codepoint_str.into())) + + // Error on characters `u16::from_str_radix` would otherwise accept, such as `+` + if codepoint_str + .bytes() + .any(|byte| !matches!(byte, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) + { + return Err(Error::InvalidUnicodeEscape(codepoint_str.into())); + } + Ok(u16::from_str_radix(codepoint_str, 16).expect("hex string is valid 16-bit value")) } /// Reads JSON Unicode escape sequences (i.e., "\u1234"). Will also read @@ -252,6 +259,11 @@ mod test { Err(Error::InvalidSurrogatePair(0xD801, 0xC501)), unescape_string("\\uD801\\uC501") ); + + assert_eq!( + Err(Error::InvalidUnicodeEscape("+04D".into())), + unescape_string("\\u+04D") + ); } use proptest::proptest;