Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Override smithy client retry config #1528

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions CHANGELOG.next.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
# meta = { "breaking" = false, "tada" = false, "bug" = false }
# author = "rcoh"
#
# [[smithy-rs]]
# message = "Fix typos in module documentation for generated crates"
# references = ["smithy-rs#920"]
# meta = { "breaking" = false, "tada" = false, "bug" = false }
# author = "rcoh"
[[smithy-rs]]
message = "Fix retry logic"
references = ["aws-sdk-rust#567"]
meta = { "breaking" = false, "tada" = false, "bug" = false }
author = "ymwjbxxq"
57 changes: 30 additions & 27 deletions rust-runtime/aws-smithy-client/src/retry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,22 @@ pub struct Config {
timeout_retry_cost: usize,
max_attempts: u32,
max_backoff: Duration,
base: fn() -> f64,
base: u64,
}

impl Config {
/// Override `b` in the exponential backoff computation
///
/// By default, `base` is a randomly generated value between 0 and 1. In tests, it can
/// By default, `base` is 100ms.
/// DynamoDb should be set to 50ms
/// Max is 20s
/// In tests, it can
/// be helpful to override this:
/// ```no_run
/// use aws_smithy_client::retry::Config;
/// let conf = Config::default().with_base(||1_f64);
/// let conf = Config::default().with_base(50);
/// ```
pub fn with_base(mut self, base: fn() -> f64) -> Self {
pub fn with_base(mut self, base: u64) -> Self {
self.base = base;
self
}
Expand All @@ -94,7 +97,7 @@ impl Default for Config {
max_attempts: MAX_ATTEMPTS,
max_backoff: Duration::from_secs(20),
// by default, use a random base for exponential backoff
base: fastrand::f64,
base: BASE,
}
}
}
Expand All @@ -108,6 +111,7 @@ impl From<aws_smithy_types::retry::RetryConfig> for Config {
const MAX_ATTEMPTS: u32 = 3;
const INITIAL_RETRY_TOKENS: usize = 500;
const RETRY_COST: usize = 5;
const BASE: u64 = 100;

/// Manage retries for a service
///
Expand Down Expand Up @@ -264,16 +268,15 @@ impl RetryHandler {
};
/*
From the retry spec:
b = random number within the range of: 0 <= b <= 1
b = default 100
r = 2
t_i = min(br^i, MAX_BACKOFF);
*/
let r: i32 = 2;
let b = (self.config.base)();
// `self.local.attempts` tracks number of requests made including the initial request
// The initial attempt shouldn't count towards backoff calculations so we subtract it
let backoff = b * (r.pow(self.local.attempts - 1) as f64);
let backoff = Duration::from_secs_f64(backoff).min(self.config.max_backoff);
let backoff = self.config.base as f64 * (r.pow(self.local.attempts - 1) as f64);
let backoff = Duration::from_millis(backoff as u64).min(self.config.max_backoff);
let next = RetryHandler {
local: RequestLocalRetryState {
attempts: self.local.attempts + 1,
Expand Down Expand Up @@ -368,7 +371,7 @@ mod test {
use std::time::Duration;

fn test_config() -> Config {
Config::default().with_base(|| 1_f64)
Config::default().with_base(1)
}

#[test]
Expand All @@ -384,13 +387,13 @@ mod test {
let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(1));
assert_eq!(dur, Duration::from_millis(1));
assert_eq!(policy.retry_quota(), 495);

let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(2));
assert_eq!(dur, Duration::from_millis(2));
assert_eq!(policy.retry_quota(), 490);

let no_retry = policy.should_retry(&RetryKind::Unnecessary);
Expand All @@ -404,13 +407,13 @@ mod test {
let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(1));
assert_eq!(dur, Duration::from_millis(1));
assert_eq!(policy.retry_quota(), 495);

let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(2));
assert_eq!(dur, Duration::from_millis(2));
assert_eq!(policy.retry_quota(), 490);

let no_retry = policy.should_retry(&RetryKind::Error(ErrorKind::ServerError));
Expand All @@ -427,7 +430,7 @@ mod test {
let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(1));
assert_eq!(dur, Duration::from_millis(1));
assert_eq!(policy.retry_quota(), 0);

let no_retry = policy.should_retry(&RetryKind::Error(ErrorKind::ServerError));
Expand All @@ -443,13 +446,13 @@ mod test {
let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::TransientError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(1));
assert_eq!(dur, Duration::from_millis(1));
assert_eq!(policy.retry_quota(), 90);

let (policy, dur) = policy
.should_retry(&RetryKind::Explicit(Duration::from_secs(1)))
.should_retry(&RetryKind::Explicit(Duration::from_millis(1)))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(1));
assert_eq!(dur, Duration::from_millis(1));
assert_eq!(
policy.retry_quota(),
90,
Expand All @@ -472,25 +475,25 @@ mod test {
let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(1));
assert_eq!(dur, Duration::from_millis(1));
assert_eq!(policy.retry_quota(), 495);

let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(2));
assert_eq!(dur, Duration::from_millis(2));
assert_eq!(policy.retry_quota(), 490);

let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(4));
assert_eq!(dur, Duration::from_millis(4));
assert_eq!(policy.retry_quota(), 485);

let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(8));
assert_eq!(dur, Duration::from_millis(8));
assert_eq!(policy.retry_quota(), 480);

let no_retry = policy.should_retry(&RetryKind::Error(ErrorKind::ServerError));
Expand All @@ -502,30 +505,30 @@ mod test {
fn max_backoff_time() {
let mut conf = test_config();
conf.max_attempts = 5;
conf.max_backoff = Duration::from_secs(3);
conf.max_backoff = Duration::from_millis(3);
let policy = Standard::new(conf).new_request_policy(None);
let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(1));
assert_eq!(dur, Duration::from_millis(1));
assert_eq!(policy.retry_quota(), 495);

let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(2));
assert_eq!(dur, Duration::from_millis(2));
assert_eq!(policy.retry_quota(), 490);

let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(3));
assert_eq!(dur, Duration::from_millis(3));
assert_eq!(policy.retry_quota(), 485);

let (policy, dur) = policy
.should_retry(&RetryKind::Error(ErrorKind::ServerError))
.expect("should retry");
assert_eq!(dur, Duration::from_secs(3));
assert_eq!(dur, Duration::from_millis(3));
assert_eq!(policy.retry_quota(), 480);

let no_retry = policy.should_retry(&RetryKind::Error(ErrorKind::ServerError));
Expand Down
80 changes: 75 additions & 5 deletions rust-runtime/aws-smithy-client/tests/e2e_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ fn test_operation() -> Operation<test_operation::TestOperationParser, test_opera
}

#[tokio::test]
async fn end_to_end_retry_test() {
async fn end_to_end_retry_test_overriding_base() {
fn req() -> http::Request<SdkBody> {
http::Request::builder()
.body(SdkBody::from("request body"))
Expand Down Expand Up @@ -138,7 +138,77 @@ async fn end_to_end_retry_test() {
let conn = TestConnection::new(events);
let retry_config = aws_smithy_client::retry::Config::default()
.with_max_attempts(4)
.with_base(|| 1_f64);
.with_base(1);
let client = Client::<TestConnection<_>, Identity>::new(conn.clone())
.with_retry_config(retry_config)
.with_sleep_impl(Arc::new(TokioSleep::new()));
tokio::time::pause();
let initial = tokio::time::Instant::now();
let resp = client
.call(test_operation())
.await
.expect("successful operation");
assert_time_passed(initial, Duration::from_millis(3));
assert_eq!(resp, "Hello!");
// 3 requests should have been made, 2 failing & one success
assert_eq!(conn.requests().len(), 3);

let initial = tokio::time::Instant::now();
client
.call(test_operation())
.await
.expect("successful operation");
assert_time_passed(initial, Duration::from_millis(1));
assert_eq!(conn.requests().len(), 5);
let initial = tokio::time::Instant::now();
let err = client
.call(test_operation())
.await
.expect_err("all responses failed");
// 4 more tries followed by failure
assert_eq!(conn.requests().len(), 9);
assert!(matches!(err, SdkError::ServiceError { .. }));
assert_time_passed(initial, Duration::from_millis(7));
}

#[tokio::test]
async fn end_to_end_retry_test() {
fn req() -> http::Request<SdkBody> {
http::Request::builder()
.body(SdkBody::from("request body"))
.unwrap()
}

fn ok() -> http::Response<&'static str> {
http::Response::builder()
.status(200)
.body("response body")
.unwrap()
}

fn err() -> http::Response<&'static str> {
http::Response::builder()
.status(500)
.body("response body")
.unwrap()
}
// 1 failing response followed by 1 successful response
let events = vec![
// First operation
(req(), err()),
(req(), err()),
(req(), ok()),
// Second operation
(req(), err()),
(req(), ok()),
// Third operation will fail, only errors
(req(), err()),
(req(), err()),
(req(), err()),
(req(), err()),
];
let conn = TestConnection::new(events);
let retry_config = aws_smithy_client::retry::Config::default().with_max_attempts(4);
let client = Client::<TestConnection<_>, Identity>::new(conn.clone())
.with_retry_config(retry_config)
.with_sleep_impl(Arc::new(TokioSleep::new()));
Expand All @@ -148,7 +218,7 @@ async fn end_to_end_retry_test() {
.call(test_operation())
.await
.expect("successful operation");
assert_time_passed(initial, Duration::from_secs(3));
assert_time_passed(initial, Duration::from_millis(302));
assert_eq!(resp, "Hello!");
// 3 requests should have been made, 2 failing & one success
assert_eq!(conn.requests().len(), 3);
Expand All @@ -158,7 +228,7 @@ async fn end_to_end_retry_test() {
.call(test_operation())
.await
.expect("successful operation");
assert_time_passed(initial, Duration::from_secs(1));
assert_time_passed(initial, Duration::from_millis(101));
assert_eq!(conn.requests().len(), 5);
let initial = tokio::time::Instant::now();
let err = client
Expand All @@ -168,7 +238,7 @@ async fn end_to_end_retry_test() {
// 4 more tries followed by failure
assert_eq!(conn.requests().len(), 9);
assert!(matches!(err, SdkError::ServiceError { .. }));
assert_time_passed(initial, Duration::from_secs(7));
assert_time_passed(initial, Duration::from_millis(703));
}

/// Validate that time has passed with a 5ms tolerance
Expand Down
14 changes: 14 additions & 0 deletions rust-runtime/aws-smithy-types/src/retry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ impl FromStr for RetryMode {
pub struct RetryConfigBuilder {
mode: Option<RetryMode>,
max_attempts: Option<u32>,
with_base: Option<u32>,
}

impl RetryConfigBuilder {
Expand All @@ -154,6 +155,12 @@ impl RetryConfigBuilder {
self
}

/// The base number of milliseconds to use in the exponential backoff for operation retries. Defaults to 100 ms for all services. DynamoDB, should be set to 50ms.
pub fn set_with_base(&mut self, with_base: Option<u32>) -> &mut Self {
self.with_base = with_base;
self
}

/// Sets the retry mode.
pub fn mode(mut self, mode: RetryMode) -> Self {
self.set_mode(Some(mode));
Expand All @@ -166,6 +173,12 @@ impl RetryConfigBuilder {
self
}

/// The base number of milliseconds to use in the exponential backoff for operation retries. Defaults to 100 ms for all services. DynamoDB, should be set to 50ms.
pub fn with_base(&mut self, with_base: Option<u32>) -> &mut Self {
self.with_base = with_base;
self
}

/// Merge two builders together. Values from `other` will only be used as a fallback for values
/// from `self` Useful for merging configs from different sources together when you want to
/// handle "precedence" per value instead of at the config level
Expand All @@ -186,6 +199,7 @@ impl RetryConfigBuilder {
Self {
mode: self.mode.or(other.mode),
max_attempts: self.max_attempts.or(other.max_attempts),
with_base: self.with_base.or(other.with_base),
}
}

Expand Down