From 49decfbd3d6cb772aae8934a3789f907c6ebfc5d Mon Sep 17 00:00:00 2001 From: Daniele <> Date: Sat, 2 Jul 2022 16:56:53 +0200 Subject: [PATCH 1/4] Override base to use in the exponential backoff --- rust-runtime/aws-smithy-client/src/retry.rs | 54 ++++++------- .../aws-smithy-client/tests/e2e_test.rs | 80 +++++++++++++++++-- rust-runtime/aws-smithy-types/src/retry.rs | 14 ++++ 3 files changed, 116 insertions(+), 32 deletions(-) diff --git a/rust-runtime/aws-smithy-client/src/retry.rs b/rust-runtime/aws-smithy-client/src/retry.rs index eca522a18d..f186939070 100644 --- a/rust-runtime/aws-smithy-client/src/retry.rs +++ b/rust-runtime/aws-smithy-client/src/retry.rs @@ -58,19 +58,19 @@ pub struct Config { timeout_retry_cost: usize, max_attempts: u32, max_backoff: Duration, - base: fn() -> f64, + base: u32, } impl Config { /// Override `b` in the exponential backoff computation /// - /// By default, `base` is a randomly generated value between 0 and 1. In tests, it can + /// By default, `base` is 100ms. In tests, it can /// be helpful to override this: /// ```no_run /// use aws_smithy_client::retry::Config; - /// let conf = Config::default().with_base(||1_f64); + /// let conf = Config::default().with_base(50); /// ``` - pub fn with_base(mut self, base: fn() -> f64) -> Self { + pub fn with_base(mut self, base: u32) -> Self { self.base = base; self } @@ -94,7 +94,7 @@ impl Default for Config { max_attempts: MAX_ATTEMPTS, max_backoff: Duration::from_secs(20), // by default, use a random base for exponential backoff - base: fastrand::f64, + base: BASE, } } } @@ -108,6 +108,7 @@ impl From for Config { const MAX_ATTEMPTS: u32 = 3; const INITIAL_RETRY_TOKENS: usize = 500; const RETRY_COST: usize = 5; +const BASE: u32 = 100; // Defaults to 100 ms for all services except DynamoDB, where it defaults to 50ms /// Manage retries for a service /// @@ -264,16 +265,15 @@ impl RetryHandler { }; /* From the retry spec: - b = random number within the range of: 0 <= b <= 1 + b = default 100 r = 2 t_i = min(br^i, MAX_BACKOFF); */ let r: i32 = 2; - let b = (self.config.base)(); // `self.local.attempts` tracks number of requests made including the initial request // The initial attempt shouldn't count towards backoff calculations so we subtract it - let backoff = b * (r.pow(self.local.attempts - 1) as f64); - let backoff = Duration::from_secs_f64(backoff).min(self.config.max_backoff); + let backoff = self.config.base as f64 * (r.pow(self.local.attempts - 1) as f64); + let backoff = Duration::from_millis(backoff as u64).min(self.config.max_backoff); let next = RetryHandler { local: RequestLocalRetryState { attempts: self.local.attempts + 1, @@ -368,7 +368,7 @@ mod test { use std::time::Duration; fn test_config() -> Config { - Config::default().with_base(|| 1_f64) + Config::default().with_base(1) } #[test] @@ -384,13 +384,13 @@ mod test { let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(1)); + assert_eq!(dur, Duration::from_millis(1)); assert_eq!(policy.retry_quota(), 495); let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(2)); + assert_eq!(dur, Duration::from_millis(2)); assert_eq!(policy.retry_quota(), 490); let no_retry = policy.should_retry(&RetryKind::Unnecessary); @@ -404,13 +404,13 @@ mod test { let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(1)); + assert_eq!(dur, Duration::from_millis(1)); assert_eq!(policy.retry_quota(), 495); let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(2)); + assert_eq!(dur, Duration::from_millis(2)); assert_eq!(policy.retry_quota(), 490); let no_retry = policy.should_retry(&RetryKind::Error(ErrorKind::ServerError)); @@ -427,7 +427,7 @@ mod test { let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(1)); + assert_eq!(dur, Duration::from_millis(1)); assert_eq!(policy.retry_quota(), 0); let no_retry = policy.should_retry(&RetryKind::Error(ErrorKind::ServerError)); @@ -443,13 +443,13 @@ mod test { let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::TransientError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(1)); + assert_eq!(dur, Duration::from_millis(1)); assert_eq!(policy.retry_quota(), 90); let (policy, dur) = policy - .should_retry(&RetryKind::Explicit(Duration::from_secs(1))) + .should_retry(&RetryKind::Explicit(Duration::from_millis(1))) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(1)); + assert_eq!(dur, Duration::from_millis(1)); assert_eq!( policy.retry_quota(), 90, @@ -472,25 +472,25 @@ mod test { let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(1)); + assert_eq!(dur, Duration::from_millis(1)); assert_eq!(policy.retry_quota(), 495); let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(2)); + assert_eq!(dur, Duration::from_millis(2)); assert_eq!(policy.retry_quota(), 490); let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(4)); + assert_eq!(dur, Duration::from_millis(4)); assert_eq!(policy.retry_quota(), 485); let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(8)); + assert_eq!(dur, Duration::from_millis(8)); assert_eq!(policy.retry_quota(), 480); let no_retry = policy.should_retry(&RetryKind::Error(ErrorKind::ServerError)); @@ -502,30 +502,30 @@ mod test { fn max_backoff_time() { let mut conf = test_config(); conf.max_attempts = 5; - conf.max_backoff = Duration::from_secs(3); + conf.max_backoff = Duration::from_millis(3); let policy = Standard::new(conf).new_request_policy(None); let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(1)); + assert_eq!(dur, Duration::from_millis(1)); assert_eq!(policy.retry_quota(), 495); let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(2)); + assert_eq!(dur, Duration::from_millis(2)); assert_eq!(policy.retry_quota(), 490); let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(3)); + assert_eq!(dur, Duration::from_millis(3)); assert_eq!(policy.retry_quota(), 485); let (policy, dur) = policy .should_retry(&RetryKind::Error(ErrorKind::ServerError)) .expect("should retry"); - assert_eq!(dur, Duration::from_secs(3)); + assert_eq!(dur, Duration::from_millis(3)); assert_eq!(policy.retry_quota(), 480); let no_retry = policy.should_retry(&RetryKind::Error(ErrorKind::ServerError)); diff --git a/rust-runtime/aws-smithy-client/tests/e2e_test.rs b/rust-runtime/aws-smithy-client/tests/e2e_test.rs index 2a11b87df1..2c0d0ef1a1 100644 --- a/rust-runtime/aws-smithy-client/tests/e2e_test.rs +++ b/rust-runtime/aws-smithy-client/tests/e2e_test.rs @@ -100,7 +100,7 @@ fn test_operation() -> Operation http::Request { http::Request::builder() .body(SdkBody::from("request body")) @@ -138,7 +138,77 @@ async fn end_to_end_retry_test() { let conn = TestConnection::new(events); let retry_config = aws_smithy_client::retry::Config::default() .with_max_attempts(4) - .with_base(|| 1_f64); + .with_base(1); + let client = Client::, Identity>::new(conn.clone()) + .with_retry_config(retry_config) + .with_sleep_impl(Arc::new(TokioSleep::new())); + tokio::time::pause(); + let initial = tokio::time::Instant::now(); + let resp = client + .call(test_operation()) + .await + .expect("successful operation"); + assert_time_passed(initial, Duration::from_millis(3)); + assert_eq!(resp, "Hello!"); + // 3 requests should have been made, 2 failing & one success + assert_eq!(conn.requests().len(), 3); + + let initial = tokio::time::Instant::now(); + client + .call(test_operation()) + .await + .expect("successful operation"); + assert_time_passed(initial, Duration::from_millis(1)); + assert_eq!(conn.requests().len(), 5); + let initial = tokio::time::Instant::now(); + let err = client + .call(test_operation()) + .await + .expect_err("all responses failed"); + // 4 more tries followed by failure + assert_eq!(conn.requests().len(), 9); + assert!(matches!(err, SdkError::ServiceError { .. })); + assert_time_passed(initial, Duration::from_millis(7)); +} + +#[tokio::test] +async fn end_to_end_retry_test() { + fn req() -> http::Request { + http::Request::builder() + .body(SdkBody::from("request body")) + .unwrap() + } + + fn ok() -> http::Response<&'static str> { + http::Response::builder() + .status(200) + .body("response body") + .unwrap() + } + + fn err() -> http::Response<&'static str> { + http::Response::builder() + .status(500) + .body("response body") + .unwrap() + } + // 1 failing response followed by 1 successful response + let events = vec![ + // First operation + (req(), err()), + (req(), err()), + (req(), ok()), + // Second operation + (req(), err()), + (req(), ok()), + // Third operation will fail, only errors + (req(), err()), + (req(), err()), + (req(), err()), + (req(), err()), + ]; + let conn = TestConnection::new(events); + let retry_config = aws_smithy_client::retry::Config::default().with_max_attempts(4); let client = Client::, Identity>::new(conn.clone()) .with_retry_config(retry_config) .with_sleep_impl(Arc::new(TokioSleep::new())); @@ -148,7 +218,7 @@ async fn end_to_end_retry_test() { .call(test_operation()) .await .expect("successful operation"); - assert_time_passed(initial, Duration::from_secs(3)); + assert_time_passed(initial, Duration::from_millis(302)); assert_eq!(resp, "Hello!"); // 3 requests should have been made, 2 failing & one success assert_eq!(conn.requests().len(), 3); @@ -158,7 +228,7 @@ async fn end_to_end_retry_test() { .call(test_operation()) .await .expect("successful operation"); - assert_time_passed(initial, Duration::from_secs(1)); + assert_time_passed(initial, Duration::from_millis(101)); assert_eq!(conn.requests().len(), 5); let initial = tokio::time::Instant::now(); let err = client @@ -168,7 +238,7 @@ async fn end_to_end_retry_test() { // 4 more tries followed by failure assert_eq!(conn.requests().len(), 9); assert!(matches!(err, SdkError::ServiceError { .. })); - assert_time_passed(initial, Duration::from_secs(7)); + assert_time_passed(initial, Duration::from_millis(703)); } /// Validate that time has passed with a 5ms tolerance diff --git a/rust-runtime/aws-smithy-types/src/retry.rs b/rust-runtime/aws-smithy-types/src/retry.rs index 3c707732d2..7ab3ec9b4b 100644 --- a/rust-runtime/aws-smithy-types/src/retry.rs +++ b/rust-runtime/aws-smithy-types/src/retry.rs @@ -134,6 +134,7 @@ impl FromStr for RetryMode { pub struct RetryConfigBuilder { mode: Option, max_attempts: Option, + with_base: Option, } impl RetryConfigBuilder { @@ -154,6 +155,12 @@ impl RetryConfigBuilder { self } + /// The base number of milliseconds to use in the exponential backoff for operation retries. Defaults to 100 ms for all services. DynamoDB, should be set to 50ms. + pub fn set_with_base(&mut self, with_base: Option) -> &mut Self { + self.with_base = with_base; + self + } + /// Sets the retry mode. pub fn mode(mut self, mode: RetryMode) -> Self { self.set_mode(Some(mode)); @@ -166,6 +173,12 @@ impl RetryConfigBuilder { self } + /// The base number of milliseconds to use in the exponential backoff for operation retries. Defaults to 100 ms for all services. DynamoDB, should be set to 50ms. + pub fn with_base(&mut self, with_base: Option) -> &mut Self { + self.with_base = with_base; + self + } + /// Merge two builders together. Values from `other` will only be used as a fallback for values /// from `self` Useful for merging configs from different sources together when you want to /// handle "precedence" per value instead of at the config level @@ -186,6 +199,7 @@ impl RetryConfigBuilder { Self { mode: self.mode.or(other.mode), max_attempts: self.max_attempts.or(other.max_attempts), + with_base: self.with_base.or(other.with_base), } } From 0472c8ad9a35ddc5272301c39d9624b4fa7aa361 Mon Sep 17 00:00:00 2001 From: Daniele <> Date: Sat, 2 Jul 2022 17:12:09 +0200 Subject: [PATCH 2/4] base is milliseconds so u64 --- rust-runtime/aws-smithy-client/src/retry.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rust-runtime/aws-smithy-client/src/retry.rs b/rust-runtime/aws-smithy-client/src/retry.rs index f186939070..c86de70d97 100644 --- a/rust-runtime/aws-smithy-client/src/retry.rs +++ b/rust-runtime/aws-smithy-client/src/retry.rs @@ -58,19 +58,21 @@ pub struct Config { timeout_retry_cost: usize, max_attempts: u32, max_backoff: Duration, - base: u32, + base: u64, } impl Config { /// Override `b` in the exponential backoff computation /// - /// By default, `base` is 100ms. In tests, it can + /// By default, `base` is 100ms. + /// Max is 20s + /// In tests, it can /// be helpful to override this: /// ```no_run /// use aws_smithy_client::retry::Config; /// let conf = Config::default().with_base(50); /// ``` - pub fn with_base(mut self, base: u32) -> Self { + pub fn with_base(mut self, base: u64) -> Self { self.base = base; self } @@ -108,7 +110,7 @@ impl From for Config { const MAX_ATTEMPTS: u32 = 3; const INITIAL_RETRY_TOKENS: usize = 500; const RETRY_COST: usize = 5; -const BASE: u32 = 100; // Defaults to 100 ms for all services except DynamoDB, where it defaults to 50ms +const BASE: u64 = 100; // Defaults to 100 ms for all services except DynamoDB, where it defaults to 50ms /// Manage retries for a service /// From c4ec7c322f717aad31f5ffe2ec642992cdcbeb48 Mon Sep 17 00:00:00 2001 From: Daniele <> Date: Tue, 5 Jul 2022 14:12:06 +0200 Subject: [PATCH 3/4] change changelog.next.toml --- CHANGELOG.next.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.next.toml b/CHANGELOG.next.toml index 1dffbf7f63..530e8655a2 100644 --- a/CHANGELOG.next.toml +++ b/CHANGELOG.next.toml @@ -5,8 +5,8 @@ # meta = { "breaking" = false, "tada" = false, "bug" = false } # author = "rcoh" # -# [[smithy-rs]] -# message = "Fix typos in module documentation for generated crates" -# references = ["smithy-rs#920"] -# meta = { "breaking" = false, "tada" = false, "bug" = false } -# author = "rcoh" \ No newline at end of file +[[smithy-rs]] +message = "Fix retry logic" +references = ["aws-sdk-rust#567"] +meta = { "breaking" = false, "tada" = false, "bug" = false } +author = "ymwjbxxq" From de4042a1e26ad8c7133ac249a37bd5d499f0eaaa Mon Sep 17 00:00:00 2001 From: Daniele <> Date: Tue, 5 Jul 2022 16:46:37 +0200 Subject: [PATCH 4/4] change the comment for with_base --- rust-runtime/aws-smithy-client/src/retry.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust-runtime/aws-smithy-client/src/retry.rs b/rust-runtime/aws-smithy-client/src/retry.rs index c86de70d97..0637821684 100644 --- a/rust-runtime/aws-smithy-client/src/retry.rs +++ b/rust-runtime/aws-smithy-client/src/retry.rs @@ -65,6 +65,7 @@ impl Config { /// Override `b` in the exponential backoff computation /// /// By default, `base` is 100ms. + /// DynamoDb should be set to 50ms /// Max is 20s /// In tests, it can /// be helpful to override this: @@ -110,7 +111,7 @@ impl From for Config { const MAX_ATTEMPTS: u32 = 3; const INITIAL_RETRY_TOKENS: usize = 500; const RETRY_COST: usize = 5; -const BASE: u64 = 100; // Defaults to 100 ms for all services except DynamoDB, where it defaults to 50ms +const BASE: u64 = 100; /// Manage retries for a service ///