From c62afebd50328260cd7ccffbef1cd093c9a8ff07 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Wed, 12 May 2021 13:45:20 -0400 Subject: [PATCH] Redo url encoding (#368) * Redo URL encoding to cover all cases * Fix test that missed a couple of encodings * Should have been escaping ! --- .../aws-hyper/src/test_connection.rs | 2 +- .../smithy/rustsdk/FluentClientGenerator.kt | 2 +- codegen-test/model/rest-json-extras.smithy | 2 +- .../http/RequestBindingGeneratorTest.kt | 2 +- rust-runtime/smithy-http/Cargo.toml | 1 + rust-runtime/smithy-http/src/label.rs | 28 +++++-- rust-runtime/smithy-http/src/lib.rs | 1 + rust-runtime/smithy-http/src/query.rs | 77 ++----------------- rust-runtime/smithy-http/src/urlencode.rs | 34 ++++++++ 9 files changed, 68 insertions(+), 81 deletions(-) create mode 100644 rust-runtime/smithy-http/src/urlencode.rs diff --git a/aws/rust-runtime/aws-hyper/src/test_connection.rs b/aws/rust-runtime/aws-hyper/src/test_connection.rs index d494e9bf53..a2749fb647 100644 --- a/aws/rust-runtime/aws-hyper/src/test_connection.rs +++ b/aws/rust-runtime/aws-hyper/src/test_connection.rs @@ -118,7 +118,7 @@ impl> tower::Service> for TestConnec self.requests .lock() .unwrap() - .push(ValidateRequest { actual, expected }); + .push(ValidateRequest { expected, actual }); std::future::ready(Ok(resp.map(|body| SdkBody::from(body.into())))) } else { std::future::ready(Err("No more data".into())) diff --git a/aws/sdk-codegen/src/main/kotlin/software/amazon/smithy/rustsdk/FluentClientGenerator.kt b/aws/sdk-codegen/src/main/kotlin/software/amazon/smithy/rustsdk/FluentClientGenerator.kt index c394033ce4..8639160eed 100644 --- a/aws/sdk-codegen/src/main/kotlin/software/amazon/smithy/rustsdk/FluentClientGenerator.kt +++ b/aws/sdk-codegen/src/main/kotlin/software/amazon/smithy/rustsdk/FluentClientGenerator.kt @@ -107,7 +107,7 @@ class FluentClientGenerator(protocolConfig: ProtocolConfig) { pub fn from_conf_conn(conf: crate::Config, conn: #{aws_hyper}::conn::Standard) -> Self { let client = #{aws_hyper}::Client::new(conn); - Self { handle: std::sync::Arc::new(Handle { conf, client })} + Self { handle: std::sync::Arc::new(Handle { client, conf })} } pub fn conf(&self) -> &crate::Config { diff --git a/codegen-test/model/rest-json-extras.smithy b/codegen-test/model/rest-json-extras.smithy index 4e687a1c8b..b3d9d8d26a 100644 --- a/codegen-test/model/rest-json-extras.smithy +++ b/codegen-test/model/rest-json-extras.smithy @@ -15,7 +15,7 @@ apply QueryPrecedence @httpRequestTests([ method: "POST", uri: "/Precedence", body: "", - queryParams: ["bar=%26%F0%9F%90%B1", "hello%20there=how's%20your%20encoding?", "a%20%26%20b%20%26%20c=better%20encode%20%3D%20this"], + queryParams: ["bar=%26%F0%9F%90%B1", "hello%20there=how%27s%20your%20encoding%3F", "a%20%26%20b%20%26%20c=better%20encode%20%3D%20this"], params: { foo: "&🐱", baz: { diff --git a/codegen/src/test/kotlin/software/amazon/smithy/rust/codegen/generators/http/RequestBindingGeneratorTest.kt b/codegen/src/test/kotlin/software/amazon/smithy/rust/codegen/generators/http/RequestBindingGeneratorTest.kt index 98c479ce4d..dc8a9e2413 100644 --- a/codegen/src/test/kotlin/software/amazon/smithy/rust/codegen/generators/http/RequestBindingGeneratorTest.kt +++ b/codegen/src/test/kotlin/software/amazon/smithy/rust/codegen/generators/http/RequestBindingGeneratorTest.kt @@ -153,7 +153,7 @@ class RequestBindingGeneratorTest { assert_eq!(o.as_str(), "/somebucket%2Fok/1970-04-28T03%3A58%3A45Z"); o.clear(); inp.uri_query(&mut o); - assert_eq!(o.as_str(), "?paramName=svq!!%25%26&hello=0&hello=1&hello=2&hello=44") + assert_eq!(o.as_str(), "?paramName=svq%21%21%25%26&hello=0&hello=1&hello=2&hello=44") """ ) } diff --git a/rust-runtime/smithy-http/Cargo.toml b/rust-runtime/smithy-http/Cargo.toml index e3b8cd5284..585f578275 100644 --- a/rust-runtime/smithy-http/Cargo.toml +++ b/rust-runtime/smithy-http/Cargo.toml @@ -12,6 +12,7 @@ http-body = "0.4.0" http = "0.2.3" thiserror = "1" pin-project = "1" +percent-encoding = "2.1.0" # We are using hyper for our streaming body implementation, but this is an internal detail. hyper = "0.14.5" diff --git a/rust-runtime/smithy-http/src/label.rs b/rust-runtime/smithy-http/src/label.rs index 4a354e3e6c..48b61755c4 100644 --- a/rust-runtime/smithy-http/src/label.rs +++ b/rust-runtime/smithy-http/src/label.rs @@ -3,24 +3,36 @@ * SPDX-License-Identifier: Apache-2.0. */ -/// Formatting values as Smithy -/// [httpLabel](https://awslabs.github.io/smithy/1.0/spec/core/http-traits.html#httplabel-trait) +//! Formatting values as Smithy +//! [httpLabel](https://awslabs.github.io/smithy/1.0/spec/core/http-traits.html#httplabel-trait) + +use crate::urlencode::BASE_SET; +use percent_encoding::AsciiSet; use smithy_types::Instant; use std::fmt::Debug; +const GREEDY: &AsciiSet = &BASE_SET.remove(b'/'); + pub fn fmt_default(t: T) -> String { format!("{:?}", t) } pub fn fmt_string>(t: T, greedy: bool) -> String { - let s = t.as_ref().replace(":", "%3A"); - if greedy { - s - } else { - s.replace("/", "%2F") - } + let uri_set = if greedy { GREEDY } else { BASE_SET }; + percent_encoding::utf8_percent_encode(t.as_ref(), &uri_set).to_string() } pub fn fmt_timestamp(t: &Instant, format: smithy_types::instant::Format) -> String { crate::query::fmt_string(t.fmt(format)) } + +#[cfg(test)] +mod test { + use crate::label::fmt_string; + + #[test] + fn greedy_params() { + assert_eq!(fmt_string("a/b", false), "a%2Fb"); + assert_eq!(fmt_string("a/b", true), "a/b"); + } +} diff --git a/rust-runtime/smithy-http/src/lib.rs b/rust-runtime/smithy-http/src/lib.rs index fd2ac7881e..9fd661fefe 100644 --- a/rust-runtime/smithy-http/src/lib.rs +++ b/rust-runtime/smithy-http/src/lib.rs @@ -17,3 +17,4 @@ pub mod query; pub mod response; pub mod result; pub mod retry; +mod urlencode; diff --git a/rust-runtime/smithy-http/src/query.rs b/rust-runtime/smithy-http/src/query.rs index 410826b309..8100b8b86f 100644 --- a/rust-runtime/smithy-http/src/query.rs +++ b/rust-runtime/smithy-http/src/query.rs @@ -3,80 +3,25 @@ * SPDX-License-Identifier: Apache-2.0. */ +use crate::urlencode::BASE_SET; +use percent_encoding::utf8_percent_encode; /// Formatting values into the query string as specified in /// [httpQuery](https://awslabs.github.io/smithy/1.0/spec/core/http-traits.html#httpquery-trait) use smithy_types::Instant; use std::fmt::Debug; -const HEX_CHARS: &[u8; 16] = b"0123456789ABCDEF"; - pub fn fmt_default(t: T) -> String { format!("{:?}", t) } pub fn fmt_string>(t: T) -> String { - let bytes = t.as_ref(); - let final_capacity = bytes - .chars() - .map(|c| { - if is_valid_query(c) { - 1 - } else { - c.len_utf8() * 3 - } - }) - .sum(); - let mut out = String::with_capacity(final_capacity); - for char in bytes.chars() { - url_encode(char, &mut out); - } - debug_assert_eq!(out.capacity(), final_capacity); - out + utf8_percent_encode(t.as_ref(), BASE_SET).to_string() } pub fn fmt_timestamp(t: &Instant, format: smithy_types::instant::Format) -> String { fmt_string(t.fmt(format)) } -fn is_valid_query(c: char) -> bool { - // Although & / = are allowed in the query string, we want to escape them - let explicit_invalid = |c: char| !matches!(c, '&' | '='); - let unreserved = |c: char| c.is_alphanumeric() || matches!(c, '-' | '.' | '_' | '~'); - - // RFC-3986 §3.3 allows sub-delims (defined in section2.2) to be in the path component. - // This includes both colon ':' and comma ',' characters. - // Smithy protocol tests percent encode these expected values though whereas `encodeUrlPath()` - // does not and follows the RFC. Fixing the tests was discussed but would adversely affect - // other SDK's and we were asked to work around it. - // Replace any left over sub-delims with the percent encoded value so that tests can proceed - // https://tools.ietf.org/html/rfc3986#section-3.3 - - let sub_delims = |c: char| match c { - '!' | '$' | '\'' | '(' | ')' | '*' | '+' | /*',' |*/ ';' => true, - // TODO: should &/= be url encoded? - '&' | '=' => false, - _ => false, - }; - let p_char = |c: char| unreserved(c) || sub_delims(c) || /* c == ':' || */ c == '@'; - explicit_invalid(c) && (p_char(c) || c == '/' || c == '?') -} - -fn url_encode(c: char, buff: &mut String) { - if is_valid_query(c) { - buff.push(c) - } else { - let mut inner_buff = [0; 4]; - let u8_slice = c.encode_utf8(&mut inner_buff).as_bytes(); - for c in u8_slice { - let upper = (c & 0xf0) >> 4; - let lower = c & 0x0f; - buff.push('%'); - buff.push(HEX_CHARS[upper as usize] as char); - buff.push(HEX_CHARS[lower as usize] as char); - } - } -} - /// Simple abstraction to enable appending params to a string as query params /// /// ```rust @@ -113,22 +58,16 @@ impl<'a> Writer<'a> { #[cfg(test)] mod test { - use crate::query::{fmt_string, is_valid_query}; - - #[test] - fn test_valid_query_chars() { - assert_eq!(is_valid_query(' '), false); - assert_eq!(is_valid_query('a'), true); - assert_eq!(is_valid_query('/'), true); - assert_eq!(is_valid_query('%'), false); - } + use crate::query::fmt_string; #[test] - fn test_url_encode() { + fn url_encode() { assert_eq!(fmt_string("y̆").as_str(), "y%CC%86"); assert_eq!(fmt_string(" ").as_str(), "%20"); - assert_eq!(fmt_string("foo/baz%20").as_str(), "foo/baz%2520"); + assert_eq!(fmt_string("foo/baz%20").as_str(), "foo%2Fbaz%2520"); assert_eq!(fmt_string("&=").as_str(), "%26%3D"); assert_eq!(fmt_string("🐱").as_str(), "%F0%9F%90%B1"); + // `:` needs to be encoded, but only for AWS services + assert_eq!(fmt_string("a:b"), "a%3Ab") } } diff --git a/rust-runtime/smithy-http/src/urlencode.rs b/rust-runtime/smithy-http/src/urlencode.rs new file mode 100644 index 0000000000..44be9dbe9c --- /dev/null +++ b/rust-runtime/smithy-http/src/urlencode.rs @@ -0,0 +1,34 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +use percent_encoding::{AsciiSet, CONTROLS}; + +/// base set of characters that must be URL encoded +pub const BASE_SET: &AsciiSet = &CONTROLS + .add(b' ') + .add(b'/') + // RFC-3986 §3.3 allows sub-delims (defined in section2.2) to be in the path component. + // This includes both colon ':' and comma ',' characters. + // Smithy protocol tests & AWS services percent encode these expected values. Signing + // will fail if these values are not percent encoded + .add(b':') + .add(b',') + .add(b'?') + .add(b'#') + .add(b'[') + .add(b')') + .add(b')') + .add(b'@') + .add(b'!') + .add(b'$') + .add(b'&') + .add(b'\'') + .add(b'(') + .add(b')') + .add(b'*') + .add(b'+') + .add(b';') + .add(b'=') + .add(b'%');