diff --git a/api/envoy/config/route/v3/route_components.proto b/api/envoy/config/route/v3/route_components.proto index 3643d449272e..857bb3a6d827 100644 --- a/api/envoy/config/route/v3/route_components.proto +++ b/api/envoy/config/route/v3/route_components.proto @@ -1050,10 +1050,15 @@ message RouteAction { } // HTTP retry :ref:`architecture overview `. -// [#next-free-field: 11] +// [#next-free-field: 12] message RetryPolicy { option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.route.RetryPolicy"; + enum ResetHeaderFormat { + SECONDS = 0; + UNIX_TIMESTAMP = 1; + } + message RetryPriority { option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.route.RetryPolicy.RetryPriority"; @@ -1104,6 +1109,69 @@ message RetryPolicy { google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {}}]; } + message ResetHeader { + string name = 1 + [(validate.rules).string = {min_bytes: 1 well_known_regex: HTTP_HEADER_NAME strict: false}]; + + ResetHeaderFormat format = 2 [(validate.rules).enum = {defined_only: true}]; + } + + // A retry back-off strategy that applies when the upstream server rate limits + // the request. + // + // Given this configuration: + // + // .. code-block:: yaml + // + // rate_limited_retry_back_off: + // reset_headers: + // - name: Retry-After + // format: SECONDS + // - name: X-RateLimit-Reset + // format: UNIX_TIMESTAMP + // max_interval: "300s" + // + // The following algorithm will apply: + // + // 1. If the response contains the header ``Retry-After`` its value must be on + // the form ``120`` (an integer that represents the number of seconds to + // wait before retrying). If so, this value is used as the back-off interval. + // 2. Otherwise, if the response contains the header ``X-RateLimit-Reset`` its + // value must be on the form ``1595320702`` (an integer that represents the + // point in time at which to retry, as a Unix timestamp in seconds). If so, + // the current time is subtracted from this value and the result is used as + // the back-off interval. + // 3. Otherwise, Envoy will use the default + // :ref:`exponential back-off ` + // strategy. + // + // No matter which format is used, if the resulting back-off interval exceeds + // ``max_interval`` it is discarded and the next header in ``reset_headers`` + // is tried. If a request timeout is configured for the route it will further + // limit how long the request will be allowed to run. + // + // To prevent many clients retrying at the same point in time jitter is added + // to the back-off interval, so the resulting interval is decided by taking: + // ``random(interval, interval * 1.5)``. + // + // .. attention:: + // + // Configuring ``rate_limited_retry_back_off`` will not by itself cause a request + // to be retried. You will still need to configure the right retry policy to match + // the responses from the upstream server. + message RateLimitedRetryBackOff { + // Specifies the reset headers (like ``Retry-After`` or ``X-RateLimit-Reset``) + // to match against the response. Headers are tried in order, and matched case + // insensitive. The first header to be parsed successfully is used. If no headers + // match the default exponential back-off is used instead. + repeated ResetHeader reset_headers = 1 [(validate.rules).repeated = {min_items: 1}]; + + // Specifies the maximum back off interval that Envoy will allow. If a reset + // header contains an interval longer than this then it will be discarded and + // the next header will be tried. Defaults to 300 seconds. + google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {}}]; + } + // Specifies the conditions under which retry takes place. These are the same // conditions documented for :ref:`config_http_filters_router_x-envoy-retry-on` and // :ref:`config_http_filters_router_x-envoy-retry-grpc-on`. @@ -1147,13 +1215,22 @@ message RetryPolicy { // HTTP status codes that should trigger a retry in addition to those specified by retry_on. repeated uint32 retriable_status_codes = 7; - // Specifies parameters that control retry back off. This parameter is optional, in which case the + // Specifies parameters that control exponential retry back off. This parameter is optional, in which case the // default base interval is 25 milliseconds or, if set, the current value of the // `upstream.base_retry_backoff_ms` runtime parameter. The default maximum interval is 10 times // the base interval. The documentation for :ref:`config_http_filters_router_x-envoy-max-retries` // describes Envoy's back-off algorithm. RetryBackOff retry_back_off = 8; + // Specifies parameters that control a retry back-off strategy that is used + // when the request is rate limited by the upstream server. The server may + // return a response header like ``Retry-After`` or ``X-RateLimit-Reset`` to + // provide feedback to the client on how long to wait before retrying. If + // configured, this back-off strategy will be used instead of the + // default exponential back off strategy (configured using `retry_back_off`) + // whenever a response includes the matching headers. + RateLimitedRetryBackOff rate_limited_retry_back_off = 11; + // HTTP response headers that trigger a retry if present in the response. A retry will be // triggered if any of the header matches match the upstream response headers. // The field is only consulted if 'retriable-headers' retry policy is active. diff --git a/api/envoy/config/route/v4alpha/route_components.proto b/api/envoy/config/route/v4alpha/route_components.proto index d57cdef06def..3aac37fbab3d 100644 --- a/api/envoy/config/route/v4alpha/route_components.proto +++ b/api/envoy/config/route/v4alpha/route_components.proto @@ -1028,10 +1028,15 @@ message RouteAction { } // HTTP retry :ref:`architecture overview `. -// [#next-free-field: 11] +// [#next-free-field: 12] message RetryPolicy { option (udpa.annotations.versioning).previous_message_type = "envoy.config.route.v3.RetryPolicy"; + enum ResetHeaderFormat { + SECONDS = 0; + UNIX_TIMESTAMP = 1; + } + message RetryPriority { option (udpa.annotations.versioning).previous_message_type = "envoy.config.route.v3.RetryPolicy.RetryPriority"; @@ -1082,6 +1087,75 @@ message RetryPolicy { google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {}}]; } + message ResetHeader { + option (udpa.annotations.versioning).previous_message_type = + "envoy.config.route.v3.RetryPolicy.ResetHeader"; + + string name = 1 + [(validate.rules).string = {min_bytes: 1 well_known_regex: HTTP_HEADER_NAME strict: false}]; + + ResetHeaderFormat format = 2 [(validate.rules).enum = {defined_only: true}]; + } + + // A retry back-off strategy that applies when the upstream server rate limits + // the request. + // + // Given this configuration: + // + // .. code-block:: yaml + // + // rate_limited_retry_back_off: + // reset_headers: + // - name: Retry-After + // format: SECONDS + // - name: X-RateLimit-Reset + // format: UNIX_TIMESTAMP + // max_interval: "300s" + // + // The following algorithm will apply: + // + // 1. If the response contains the header ``Retry-After`` its value must be on + // the form ``120`` (an integer that represents the number of seconds to + // wait before retrying). If so, this value is used as the back-off interval. + // 2. Otherwise, if the response contains the header ``X-RateLimit-Reset`` its + // value must be on the form ``1595320702`` (an integer that represents the + // point in time at which to retry, as a Unix timestamp in seconds). If so, + // the current time is subtracted from this value and the result is used as + // the back-off interval. + // 3. Otherwise, Envoy will use the default + // :ref:`exponential back-off ` + // strategy. + // + // No matter which format is used, if the resulting back-off interval exceeds + // ``max_interval`` it is discarded and the next header in ``reset_headers`` + // is tried. If a request timeout is configured for the route it will further + // limit how long the request will be allowed to run. + // + // To prevent many clients retrying at the same point in time jitter is added + // to the back-off interval, so the resulting interval is decided by taking: + // ``random(interval, interval * 1.5)``. + // + // .. attention:: + // + // Configuring ``rate_limited_retry_back_off`` will not by itself cause a request + // to be retried. You will still need to configure the right retry policy to match + // the responses from the upstream server. + message RateLimitedRetryBackOff { + option (udpa.annotations.versioning).previous_message_type = + "envoy.config.route.v3.RetryPolicy.RateLimitedRetryBackOff"; + + // Specifies the reset headers (like ``Retry-After`` or ``X-RateLimit-Reset``) + // to match against the response. Headers are tried in order, and matched case + // insensitive. The first header to be parsed successfully is used. If no headers + // match the default exponential back-off is used instead. + repeated ResetHeader reset_headers = 1 [(validate.rules).repeated = {min_items: 1}]; + + // Specifies the maximum back off interval that Envoy will allow. If a reset + // header contains an interval longer than this then it will be discarded and + // the next header will be tried. Defaults to 300 seconds. + google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {}}]; + } + // Specifies the conditions under which retry takes place. These are the same // conditions documented for :ref:`config_http_filters_router_x-envoy-retry-on` and // :ref:`config_http_filters_router_x-envoy-retry-grpc-on`. @@ -1124,13 +1198,22 @@ message RetryPolicy { // HTTP status codes that should trigger a retry in addition to those specified by retry_on. repeated uint32 retriable_status_codes = 7; - // Specifies parameters that control retry back off. This parameter is optional, in which case the + // Specifies parameters that control exponential retry back off. This parameter is optional, in which case the // default base interval is 25 milliseconds or, if set, the current value of the // `upstream.base_retry_backoff_ms` runtime parameter. The default maximum interval is 10 times // the base interval. The documentation for :ref:`config_http_filters_router_x-envoy-max-retries` // describes Envoy's back-off algorithm. RetryBackOff retry_back_off = 8; + // Specifies parameters that control a retry back-off strategy that is used + // when the request is rate limited by the upstream server. The server may + // return a response header like ``Retry-After`` or ``X-RateLimit-Reset`` to + // provide feedback to the client on how long to wait before retrying. If + // configured, this back-off strategy will be used instead of the + // default exponential back off strategy (configured using `retry_back_off`) + // whenever a response includes the matching headers. + RateLimitedRetryBackOff rate_limited_retry_back_off = 11; + // HTTP response headers that trigger a retry if present in the response. A retry will be // triggered if any of the header matches match the upstream response headers. // The field is only consulted if 'retriable-headers' retry policy is active. diff --git a/docs/root/configuration/http/http_filters/router_filter.rst b/docs/root/configuration/http/http_filters/router_filter.rst index 4ca285e9eda7..0c5ecb353783 100644 --- a/docs/root/configuration/http/http_filters/router_filter.rst +++ b/docs/root/configuration/http/http_filters/router_filter.rst @@ -42,7 +42,7 @@ A few notes on how Envoy does retries: retries. Thus if the request timeout is set to 3s, and the first request attempt takes 2.7s, the retry (including back-off) has .3s to complete. This is by design to avoid an exponential retry/timeout explosion. -* Envoy uses a fully jittered exponential back-off algorithm for retries with a default base +* By default, Envoy uses a fully jittered exponential back-off algorithm for retries with a default base interval of 25ms. Given a base interval B and retry number N, the back-off for the retry is in the range :math:`\big[0, (2^N-1)B\big)`. For example, given the default interval, the first retry will be delayed randomly by 0-24ms, the 2nd by 0-74ms, the 3rd by 0-174ms, and so on. The @@ -51,6 +51,13 @@ A few notes on how Envoy does retries: upstream.base_retry_backoff_ms runtime parameter. The back-off intervals can also be modified by configuring the retry policy's :ref:`retry back-off `. +* Envoy can also be configured to use feedback from the upstream server to decide the interval between + retries. Response headers like ``Retry-After`` or ``X-RateLimit-Reset`` instruct the client how long + to wait before re-trying. The retry policy's + :ref:`rate limited retry back off ` + strategy can be configured to expect a particular header, and if that header is present in the response Envoy + will use its value to decide the back-off. If the header is not present, or if it cannot be parsed + successfully, Envoy will use the default exponential back-off algorithm instead. .. _config_http_filters_router_x-envoy-retry-on: diff --git a/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst b/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst index 5d956c28d2b3..e4ba160313e7 100644 --- a/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst +++ b/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst @@ -74,6 +74,8 @@ Every cluster has a statistics tree rooted at *cluster..* with the followi upstream_rq_rx_reset, Counter, Total requests that were reset remotely upstream_rq_tx_reset, Counter, Total requests that were reset locally upstream_rq_retry, Counter, Total request retries + upstream_rq_retry_backoff_exponential, Counter, Total retries using the exponential backoff strategy + upstream_rq_retry_backoff_ratelimited, Counter, Total retries using the ratelimited backoff strategy upstream_rq_retry_limit_exceeded, Counter, Total requests not retried due to exceeding :ref:`the configured number of maximum retries ` upstream_rq_retry_success, Counter, Total request retry successes upstream_rq_retry_overflow, Counter, Total requests not retried due to circuit breaking or exceeding the :ref:`retry budget ` diff --git a/docs/root/intro/arch_overview/http/http_routing.rst b/docs/root/intro/arch_overview/http/http_routing.rst index ff24b0fd512e..884963dfcb80 100644 --- a/docs/root/intro/arch_overview/http/http_routing.rst +++ b/docs/root/intro/arch_overview/http/http_routing.rst @@ -106,8 +106,9 @@ Envoy allows retries to be configured both in the :ref:`route configuration ` as well as for specific requests via :ref:`request headers `. The following configurations are possible: -* **Maximum number of retries**: Envoy will continue to retry any number of times. An exponential - backoff algorithm is used between each retry. Additionally, *all retries are contained within the +* **Maximum number of retries**: Envoy will continue to retry any number of times. The intervals between + retries are decided either by an exponential backoff algorithm (the default), or based on feedback + from the upstream server via headers (if present). Additionally, *all retries are contained within the overall request timeout*. This avoids long request times due to a large number of retries. * **Retry conditions**: Envoy can retry on different types of conditions depending on application requirements. For example, network failure, all 5xx response codes, idempotent 4xx response codes, diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index 9ca7c87a0c55..f7c2f197efce 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -63,6 +63,7 @@ New Features * ratelimit: added :ref:`enable_x_ratelimit_headers ` option to enable `X-RateLimit-*` headers as defined in `draft RFC `_. * rbac filter: added a log action to the :ref:`RBAC filter ` which sets dynamic metadata to inform access loggers whether to log. * redis: added fault injection support :ref:`fault injection for redis proxy `, described further in :ref:`configuration documentation `. +* router: added a new :ref:`rate limited retry back off ` strategy that uses headers like `Retry-After` or `X-RateLimit-Reset` to decide the back off interval. * router: added new :ref:`envoy-ratelimited` retry policy, which allows retrying envoy's own rate limited responses. diff --git a/generated_api_shadow/envoy/config/route/v3/route_components.proto b/generated_api_shadow/envoy/config/route/v3/route_components.proto index 0b8fe7908603..1c7e26120697 100644 --- a/generated_api_shadow/envoy/config/route/v3/route_components.proto +++ b/generated_api_shadow/envoy/config/route/v3/route_components.proto @@ -1061,10 +1061,15 @@ message RouteAction { } // HTTP retry :ref:`architecture overview `. -// [#next-free-field: 11] +// [#next-free-field: 12] message RetryPolicy { option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.route.RetryPolicy"; + enum ResetHeaderFormat { + SECONDS = 0; + UNIX_TIMESTAMP = 1; + } + message RetryPriority { option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.route.RetryPolicy.RetryPriority"; @@ -1111,6 +1116,69 @@ message RetryPolicy { google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {}}]; } + message ResetHeader { + string name = 1 + [(validate.rules).string = {min_bytes: 1 well_known_regex: HTTP_HEADER_NAME strict: false}]; + + ResetHeaderFormat format = 2 [(validate.rules).enum = {defined_only: true}]; + } + + // A retry back-off strategy that applies when the upstream server rate limits + // the request. + // + // Given this configuration: + // + // .. code-block:: yaml + // + // rate_limited_retry_back_off: + // reset_headers: + // - name: Retry-After + // format: SECONDS + // - name: X-RateLimit-Reset + // format: UNIX_TIMESTAMP + // max_interval: "300s" + // + // The following algorithm will apply: + // + // 1. If the response contains the header ``Retry-After`` its value must be on + // the form ``120`` (an integer that represents the number of seconds to + // wait before retrying). If so, this value is used as the back-off interval. + // 2. Otherwise, if the response contains the header ``X-RateLimit-Reset`` its + // value must be on the form ``1595320702`` (an integer that represents the + // point in time at which to retry, as a Unix timestamp in seconds). If so, + // the current time is subtracted from this value and the result is used as + // the back-off interval. + // 3. Otherwise, Envoy will use the default + // :ref:`exponential back-off ` + // strategy. + // + // No matter which format is used, if the resulting back-off interval exceeds + // ``max_interval`` it is discarded and the next header in ``reset_headers`` + // is tried. If a request timeout is configured for the route it will further + // limit how long the request will be allowed to run. + // + // To prevent many clients retrying at the same point in time jitter is added + // to the back-off interval, so the resulting interval is decided by taking: + // ``random(interval, interval * 1.5)``. + // + // .. attention:: + // + // Configuring ``rate_limited_retry_back_off`` will not by itself cause a request + // to be retried. You will still need to configure the right retry policy to match + // the responses from the upstream server. + message RateLimitedRetryBackOff { + // Specifies the reset headers (like ``Retry-After`` or ``X-RateLimit-Reset``) + // to match against the response. Headers are tried in order, and matched case + // insensitive. The first header to be parsed successfully is used. If no headers + // match the default exponential back-off is used instead. + repeated ResetHeader reset_headers = 1 [(validate.rules).repeated = {min_items: 1}]; + + // Specifies the maximum back off interval that Envoy will allow. If a reset + // header contains an interval longer than this then it will be discarded and + // the next header will be tried. Defaults to 300 seconds. + google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {}}]; + } + // Specifies the conditions under which retry takes place. These are the same // conditions documented for :ref:`config_http_filters_router_x-envoy-retry-on` and // :ref:`config_http_filters_router_x-envoy-retry-grpc-on`. @@ -1154,13 +1222,22 @@ message RetryPolicy { // HTTP status codes that should trigger a retry in addition to those specified by retry_on. repeated uint32 retriable_status_codes = 7; - // Specifies parameters that control retry back off. This parameter is optional, in which case the + // Specifies parameters that control exponential retry back off. This parameter is optional, in which case the // default base interval is 25 milliseconds or, if set, the current value of the // `upstream.base_retry_backoff_ms` runtime parameter. The default maximum interval is 10 times // the base interval. The documentation for :ref:`config_http_filters_router_x-envoy-max-retries` // describes Envoy's back-off algorithm. RetryBackOff retry_back_off = 8; + // Specifies parameters that control a retry back-off strategy that is used + // when the request is rate limited by the upstream server. The server may + // return a response header like ``Retry-After`` or ``X-RateLimit-Reset`` to + // provide feedback to the client on how long to wait before retrying. If + // configured, this back-off strategy will be used instead of the + // default exponential back off strategy (configured using `retry_back_off`) + // whenever a response includes the matching headers. + RateLimitedRetryBackOff rate_limited_retry_back_off = 11; + // HTTP response headers that trigger a retry if present in the response. A retry will be // triggered if any of the header matches match the upstream response headers. // The field is only consulted if 'retriable-headers' retry policy is active. diff --git a/generated_api_shadow/envoy/config/route/v4alpha/route_components.proto b/generated_api_shadow/envoy/config/route/v4alpha/route_components.proto index 0cad79e8aa77..beaafe707ddd 100644 --- a/generated_api_shadow/envoy/config/route/v4alpha/route_components.proto +++ b/generated_api_shadow/envoy/config/route/v4alpha/route_components.proto @@ -1056,10 +1056,15 @@ message RouteAction { } // HTTP retry :ref:`architecture overview `. -// [#next-free-field: 11] +// [#next-free-field: 12] message RetryPolicy { option (udpa.annotations.versioning).previous_message_type = "envoy.config.route.v3.RetryPolicy"; + enum ResetHeaderFormat { + SECONDS = 0; + UNIX_TIMESTAMP = 1; + } + message RetryPriority { option (udpa.annotations.versioning).previous_message_type = "envoy.config.route.v3.RetryPolicy.RetryPriority"; @@ -1110,6 +1115,75 @@ message RetryPolicy { google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {}}]; } + message ResetHeader { + option (udpa.annotations.versioning).previous_message_type = + "envoy.config.route.v3.RetryPolicy.ResetHeader"; + + string name = 1 + [(validate.rules).string = {min_bytes: 1 well_known_regex: HTTP_HEADER_NAME strict: false}]; + + ResetHeaderFormat format = 2 [(validate.rules).enum = {defined_only: true}]; + } + + // A retry back-off strategy that applies when the upstream server rate limits + // the request. + // + // Given this configuration: + // + // .. code-block:: yaml + // + // rate_limited_retry_back_off: + // reset_headers: + // - name: Retry-After + // format: SECONDS + // - name: X-RateLimit-Reset + // format: UNIX_TIMESTAMP + // max_interval: "300s" + // + // The following algorithm will apply: + // + // 1. If the response contains the header ``Retry-After`` its value must be on + // the form ``120`` (an integer that represents the number of seconds to + // wait before retrying). If so, this value is used as the back-off interval. + // 2. Otherwise, if the response contains the header ``X-RateLimit-Reset`` its + // value must be on the form ``1595320702`` (an integer that represents the + // point in time at which to retry, as a Unix timestamp in seconds). If so, + // the current time is subtracted from this value and the result is used as + // the back-off interval. + // 3. Otherwise, Envoy will use the default + // :ref:`exponential back-off ` + // strategy. + // + // No matter which format is used, if the resulting back-off interval exceeds + // ``max_interval`` it is discarded and the next header in ``reset_headers`` + // is tried. If a request timeout is configured for the route it will further + // limit how long the request will be allowed to run. + // + // To prevent many clients retrying at the same point in time jitter is added + // to the back-off interval, so the resulting interval is decided by taking: + // ``random(interval, interval * 1.5)``. + // + // .. attention:: + // + // Configuring ``rate_limited_retry_back_off`` will not by itself cause a request + // to be retried. You will still need to configure the right retry policy to match + // the responses from the upstream server. + message RateLimitedRetryBackOff { + option (udpa.annotations.versioning).previous_message_type = + "envoy.config.route.v3.RetryPolicy.RateLimitedRetryBackOff"; + + // Specifies the reset headers (like ``Retry-After`` or ``X-RateLimit-Reset``) + // to match against the response. Headers are tried in order, and matched case + // insensitive. The first header to be parsed successfully is used. If no headers + // match the default exponential back-off is used instead. + repeated ResetHeader reset_headers = 1 [(validate.rules).repeated = {min_items: 1}]; + + // Specifies the maximum back off interval that Envoy will allow. If a reset + // header contains an interval longer than this then it will be discarded and + // the next header will be tried. Defaults to 300 seconds. + google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {}}]; + } + // Specifies the conditions under which retry takes place. These are the same // conditions documented for :ref:`config_http_filters_router_x-envoy-retry-on` and // :ref:`config_http_filters_router_x-envoy-retry-grpc-on`. @@ -1152,13 +1226,22 @@ message RetryPolicy { // HTTP status codes that should trigger a retry in addition to those specified by retry_on. repeated uint32 retriable_status_codes = 7; - // Specifies parameters that control retry back off. This parameter is optional, in which case the + // Specifies parameters that control exponential retry back off. This parameter is optional, in which case the // default base interval is 25 milliseconds or, if set, the current value of the // `upstream.base_retry_backoff_ms` runtime parameter. The default maximum interval is 10 times // the base interval. The documentation for :ref:`config_http_filters_router_x-envoy-max-retries` // describes Envoy's back-off algorithm. RetryBackOff retry_back_off = 8; + // Specifies parameters that control a retry back-off strategy that is used + // when the request is rate limited by the upstream server. The server may + // return a response header like ``Retry-After`` or ``X-RateLimit-Reset`` to + // provide feedback to the client on how long to wait before retrying. If + // configured, this back-off strategy will be used instead of the + // default exponential back off strategy (configured using `retry_back_off`) + // whenever a response includes the matching headers. + RateLimitedRetryBackOff rate_limited_retry_back_off = 11; + // HTTP response headers that trigger a retry if present in the response. A retry will be // triggered if any of the header matches match the upstream response headers. // The field is only consulted if 'retriable-headers' retry policy is active. diff --git a/include/envoy/router/BUILD b/include/envoy/router/BUILD index 85b6058ed878..f3e5c450d04e 100644 --- a/include/envoy/router/BUILD +++ b/include/envoy/router/BUILD @@ -58,6 +58,7 @@ envoy_cc_library( "//include/envoy/access_log:access_log_interface", "//include/envoy/common:conn_pool_interface", "//include/envoy/common:matchers_interface", + "//include/envoy/common:time_interface", "//include/envoy/config:typed_metadata_interface", "//include/envoy/http:codec_interface", "//include/envoy/http:codes_interface", diff --git a/include/envoy/router/router.h b/include/envoy/router/router.h index 7d37cf02cf32..271d838d7b4f 100644 --- a/include/envoy/router/router.h +++ b/include/envoy/router/router.h @@ -150,6 +150,23 @@ class CorsPolicy { virtual bool shadowEnabled() const PURE; }; +/** + * An interface to be implemented by rate limited reset header parsers. + */ +class ResetHeaderParser { +public: + virtual ~ResetHeaderParser() = default; + + /** + * Iterate over the headers, choose the first one that matches by name, and try to parse its + * value. + */ + virtual absl::optional + parseInterval(TimeSource& time_source, const Http::HeaderMap& headers) const PURE; +}; + +using ResetHeaderParserSharedPtr = std::shared_ptr; + /** * Route level retry policy. */ @@ -235,6 +252,18 @@ class RetryPolicy { * @return absl::optional maximum retry interval */ virtual absl::optional maxInterval() const PURE; + + /** + * @return std::vector& list of reset header + * parsers that will be used to extract a retry back-off interval from response headers. + */ + virtual const std::vector& resetHeaders() const PURE; + + /** + * @return std::chrono::milliseconds upper limit placed on a retry + * back-off interval parsed from response headers. + */ + virtual std::chrono::milliseconds resetMaxInterval() const PURE; }; /** @@ -294,6 +323,14 @@ class RetryState { */ virtual bool enabled() PURE; + /** + * Attempts to parse any matching rate limited reset headers (RFC 7231), either in the form of an + * interval directly, or in the form of a unix timestamp relative to the current system time. + * @return the interval if parsing was successful. + */ + virtual absl::optional + parseResetInterval(const Http::ResponseHeaderMap& response_headers) const PURE; + /** * Determine whether a request should be retried based on the response headers. * @param response_headers supplies the response headers. diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h index ebdc1575eb8f..9330b0e8fdb5 100644 --- a/include/envoy/upstream/upstream.h +++ b/include/envoy/upstream/upstream.h @@ -577,6 +577,8 @@ class PrioritySet { COUNTER(upstream_rq_pending_total) \ COUNTER(upstream_rq_per_try_timeout) \ COUNTER(upstream_rq_retry) \ + COUNTER(upstream_rq_retry_backoff_exponential) \ + COUNTER(upstream_rq_retry_backoff_ratelimited) \ COUNTER(upstream_rq_retry_limit_exceeded) \ COUNTER(upstream_rq_retry_overflow) \ COUNTER(upstream_rq_retry_success) \ diff --git a/source/common/common/backoff_strategy.cc b/source/common/common/backoff_strategy.cc index c9b5b61b733b..4f08e4332fa6 100644 --- a/source/common/common/backoff_strategy.cc +++ b/source/common/common/backoff_strategy.cc @@ -2,15 +2,15 @@ namespace Envoy { -JitteredBackOffStrategy::JitteredBackOffStrategy(uint64_t base_interval, uint64_t max_interval, - Random::RandomGenerator& random) +JitteredExponentialBackOffStrategy::JitteredExponentialBackOffStrategy( + uint64_t base_interval, uint64_t max_interval, Random::RandomGenerator& random) : base_interval_(base_interval), max_interval_(max_interval), next_interval_(base_interval), random_(random) { ASSERT(base_interval_ > 0); ASSERT(base_interval_ <= max_interval_); } -uint64_t JitteredBackOffStrategy::nextBackOffMs() { +uint64_t JitteredExponentialBackOffStrategy::nextBackOffMs() { const uint64_t backoff = next_interval_; ASSERT(backoff > 0); // Set next_interval_ to max_interval_ if doubling the interval would exceed the max or overflow. @@ -22,7 +22,16 @@ uint64_t JitteredBackOffStrategy::nextBackOffMs() { return std::min(random_.random() % backoff, max_interval_); } -void JitteredBackOffStrategy::reset() { next_interval_ = base_interval_; } +void JitteredExponentialBackOffStrategy::reset() { next_interval_ = base_interval_; } + +JitteredLowerBoundBackOffStrategy::JitteredLowerBoundBackOffStrategy( + uint64_t min_interval, Random::RandomGenerator& random) + : min_interval_(min_interval), random_(random) {} + +uint64_t JitteredLowerBoundBackOffStrategy::nextBackOffMs() { + // random(min_interval_, 1.5 * min_interval_) + return (random_.random() % (min_interval_ >> 1)) + min_interval_; +} FixedBackOffStrategy::FixedBackOffStrategy(uint64_t interval_ms) : interval_ms_(interval_ms) { ASSERT(interval_ms_ > 0); diff --git a/source/common/common/backoff_strategy.h b/source/common/common/backoff_strategy.h index 2484f3e11b20..c1928632979a 100644 --- a/source/common/common/backoff_strategy.h +++ b/source/common/common/backoff_strategy.h @@ -13,7 +13,7 @@ namespace Envoy { /** * Implementation of BackOffStrategy that uses a fully jittered exponential backoff algorithm. */ -class JitteredBackOffStrategy : public BackOffStrategy { +class JitteredExponentialBackOffStrategy : public BackOffStrategy { public: /** @@ -23,8 +23,8 @@ class JitteredBackOffStrategy : public BackOffStrategy { * @param max_interval the cap on the next backoff value. * @param random the random generator. */ - JitteredBackOffStrategy(uint64_t base_interval, uint64_t max_interval, - Random::RandomGenerator& random); + JitteredExponentialBackOffStrategy(uint64_t base_interval, uint64_t max_interval, + Random::RandomGenerator& random); // BackOffStrategy methods uint64_t nextBackOffMs() override; @@ -37,6 +37,28 @@ class JitteredBackOffStrategy : public BackOffStrategy { Random::RandomGenerator& random_; }; +/** + * Implementation of BackOffStrategy that returns random values in the range + * [min_interval, 1.5 * min_interval). + */ +class JitteredLowerBoundBackOffStrategy : public BackOffStrategy { +public: + /** + * Constructs fully jittered backoff strategy. + * @param min_interval the lower bound on the next backoff value. + * @param random the random generator. + */ + JitteredLowerBoundBackOffStrategy(uint64_t min_interval, Random::RandomGenerator& random); + + // BackOffStrategy methods + uint64_t nextBackOffMs() override; + void reset() override {} + +private: + const uint64_t min_interval_; + Random::RandomGenerator& random_; +}; + /** * Implementation of BackOffStrategy that uses a fixed backoff. */ diff --git a/source/common/config/datasource.cc b/source/common/config/datasource.cc index d3e286d0b27a..776061a61be2 100644 --- a/source/common/config/datasource.cc +++ b/source/common/config/datasource.cc @@ -64,8 +64,8 @@ RemoteAsyncDataProvider::RemoteAsyncDataProvider( } } - backoff_strategy_ = - std::make_unique(base_interval_ms, max_interval_ms, random); + backoff_strategy_ = std::make_unique(base_interval_ms, + max_interval_ms, random); retry_timer_ = dispatcher.createTimer([this]() -> void { start(); }); manager.add(init_target_); diff --git a/source/common/config/grpc_stream.h b/source/common/config/grpc_stream.h index b922d8c09f0f..f6673402df4f 100644 --- a/source/common/config/grpc_stream.h +++ b/source/common/config/grpc_stream.h @@ -48,8 +48,8 @@ class GrpcStream : public Grpc::AsyncStreamCallbacks, // TODO(htuch): Make this configurable. static constexpr uint32_t RetryInitialDelayMs = 500; static constexpr uint32_t RetryMaxDelayMs = 30000; // Do not cross more than 30s - backoff_strategy_ = - std::make_unique(RetryInitialDelayMs, RetryMaxDelayMs, random_); + backoff_strategy_ = std::make_unique( + RetryInitialDelayMs, RetryMaxDelayMs, random_); } void establishNewStream() { diff --git a/source/common/config/utility.h b/source/common/config/utility.h index 0e5f42c6c847..61b629f927f1 100644 --- a/source/common/config/utility.h +++ b/source/common/config/utility.h @@ -431,7 +431,8 @@ class Utility { "dns_failure_refresh_rate must have max_interval greater than " "or equal to the base_interval"); } - return std::make_unique(base_interval_ms, max_interval_ms, random); + return std::make_unique(base_interval_ms, max_interval_ms, + random); } return std::make_unique(dns_refresh_rate_ms); } diff --git a/source/common/http/async_client_impl.h b/source/common/http/async_client_impl.h index a4e2e7c86b84..6342b0a3af1f 100644 --- a/source/common/http/async_client_impl.h +++ b/source/common/http/async_client_impl.h @@ -152,10 +152,17 @@ class AsyncStreamImpl : public AsyncClient::Stream, return absl::nullopt; } absl::optional maxInterval() const override { return absl::nullopt; } + const std::vector& resetHeaders() const override { + return reset_headers_; + } + std::chrono::milliseconds resetMaxInterval() const override { + return std::chrono::milliseconds(300000); + } const std::vector retriable_status_codes_{}; const std::vector retriable_headers_{}; const std::vector retriable_request_headers_{}; + const std::vector reset_headers_{}; }; struct NullConfig : public Router::Config { diff --git a/source/common/router/BUILD b/source/common/router/BUILD index 5f9e2b8cbc96..a2825b775dda 100644 --- a/source/common/router/BUILD +++ b/source/common/router/BUILD @@ -37,6 +37,7 @@ envoy_cc_library( ":header_formatter_lib", ":header_parser_lib", ":metadatamatchcriteria_lib", + ":reset_header_parser_lib", ":retry_state_lib", ":router_ratelimit_lib", ":tls_context_match_criteria_lib", @@ -378,6 +379,20 @@ envoy_cc_library( ], ) +envoy_cc_library( + name = "reset_header_parser_lib", + srcs = ["reset_header_parser.cc"], + hdrs = ["reset_header_parser.h"], + deps = [ + "//include/envoy/common:time_interface", + "//include/envoy/http:header_map_interface", + "//include/envoy/router:router_interface", + "//source/common/http:headers_lib", + "//source/common/protobuf:utility_lib", + "@envoy_api//envoy/config/route/v3:pkg_cc_proto", + ], +) + envoy_cc_library( name = "string_accessor_lib", hdrs = ["string_accessor_impl.h"], diff --git a/source/common/router/config_impl.cc b/source/common/router/config_impl.cc index 0cf162262a2f..d63c1711ac8a 100644 --- a/source/common/router/config_impl.cc +++ b/source/common/router/config_impl.cc @@ -33,6 +33,7 @@ #include "common/http/utility.h" #include "common/protobuf/protobuf.h" #include "common/protobuf/utility.h" +#include "common/router/reset_header_parser.h" #include "common/router/retry_state_impl.h" #include "common/runtime/runtime_features.h" #include "common/tracing/http_tracer_impl.h" @@ -120,6 +121,21 @@ RetryPolicyImpl::RetryPolicyImpl(const envoy::config::route::v3::RetryPolicy& re } } } + + if (retry_policy.has_rate_limited_retry_back_off()) { + reset_headers_ = ResetHeaderParserImpl::buildResetHeaderParserVector( + retry_policy.rate_limited_retry_back_off().reset_headers()); + + absl::optional reset_max_interval = + PROTOBUF_GET_OPTIONAL_MS(retry_policy.rate_limited_retry_back_off(), max_interval); + if (reset_max_interval.has_value()) { + std::chrono::milliseconds max_interval = reset_max_interval.value(); + if (max_interval.count() < 1) { + max_interval = std::chrono::milliseconds(1); + } + reset_max_interval_ = max_interval; + } + } } std::vector RetryPolicyImpl::retryHostPredicates() const { diff --git a/source/common/router/config_impl.h b/source/common/router/config_impl.h index 4f4c0c8a7e1d..192da070ce53 100644 --- a/source/common/router/config_impl.h +++ b/source/common/router/config_impl.h @@ -272,6 +272,10 @@ class RetryPolicyImpl : public RetryPolicy { } absl::optional baseInterval() const override { return base_interval_; } absl::optional maxInterval() const override { return max_interval_; } + const std::vector& resetHeaders() const override { + return reset_headers_; + } + std::chrono::milliseconds resetMaxInterval() const override { return reset_max_interval_; } private: std::chrono::milliseconds per_try_timeout_{0}; @@ -294,6 +298,8 @@ class RetryPolicyImpl : public RetryPolicy { std::vector retriable_request_headers_; absl::optional base_interval_; absl::optional max_interval_; + std::vector reset_headers_{}; + std::chrono::milliseconds reset_max_interval_{300000}; ProtobufMessage::ValidationVisitor* validation_visitor_{}; }; diff --git a/source/common/router/reset_header_parser.cc b/source/common/router/reset_header_parser.cc new file mode 100644 index 000000000000..f5d61e9ca714 --- /dev/null +++ b/source/common/router/reset_header_parser.cc @@ -0,0 +1,68 @@ +#include "common/router/reset_header_parser.h" + +#include + +#include "common/common/assert.h" + +#include "absl/strings/numbers.h" + +namespace Envoy { +namespace Router { + +ResetHeaderParserImpl::ResetHeaderParserImpl( + const envoy::config::route::v3::RetryPolicy::ResetHeader& config) + : name_(config.name()) { + switch (config.format()) { + case envoy::config::route::v3::RetryPolicy::SECONDS: + format_ = ResetHeaderFormat::Seconds; + break; + case envoy::config::route::v3::RetryPolicy::UNIX_TIMESTAMP: + format_ = ResetHeaderFormat::UnixTimestamp; + break; + default: + NOT_REACHED_GCOVR_EXCL_LINE; + } +} + +absl::optional +ResetHeaderParserImpl::parseInterval(TimeSource& time_source, + const Http::HeaderMap& headers) const { + const Http::HeaderEntry* header = headers.get(name_); + + if (header == nullptr) { + return absl::nullopt; + } + + const auto& header_value = header->value().getStringView(); + uint64_t num_seconds{}; + + switch (format_) { + case ResetHeaderFormat::Seconds: + if (absl::SimpleAtoi(header_value, &num_seconds)) { + return absl::optional(num_seconds * 1000UL); + } + break; + + case ResetHeaderFormat::UnixTimestamp: + if (absl::SimpleAtoi(header_value, &num_seconds)) { + const auto time_now = time_source.systemTime().time_since_epoch(); + const uint64_t timestamp = std::chrono::duration_cast(time_now).count(); + + if (num_seconds < timestamp) { + return absl::nullopt; + } + + const uint64_t interval = num_seconds - timestamp; + return absl::optional(interval * 1000UL); + } + break; + + default: + NOT_REACHED_GCOVR_EXCL_LINE; + } + + return absl::nullopt; +} + +} // namespace Router +} // namespace Envoy diff --git a/source/common/router/reset_header_parser.h b/source/common/router/reset_header_parser.h new file mode 100644 index 000000000000..776486d339b1 --- /dev/null +++ b/source/common/router/reset_header_parser.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include + +#include "envoy/common/time.h" +#include "envoy/config/route/v3/route_components.pb.h" +#include "envoy/http/header_map.h" +#include "envoy/router/router.h" + +#include "common/protobuf/protobuf.h" + +#include "absl/types/optional.h" + +namespace Envoy { +namespace Router { + +enum class ResetHeaderFormat { Seconds, UnixTimestamp }; + +/** + * A ResetHeaderParser specifies a header name and a format to match against + * response headers that are used to signal a rate limit interval reset, such + * as Retry-After or X-RateLimit-Reset. + */ +class ResetHeaderParserImpl : public ResetHeaderParser { +public: + /** + * Build a vector of ResetHeaderParserSharedPtr given input config. + */ + static std::vector buildResetHeaderParserVector( + const Protobuf::RepeatedPtrField& + reset_headers) { + std::vector ret; + for (const auto& reset_header : reset_headers) { + ret.emplace_back(std::make_shared(reset_header)); + } + return ret; + } + + ResetHeaderParserImpl(const envoy::config::route::v3::RetryPolicy::ResetHeader& config); + + const Http::LowerCaseString& name() const { return name_; } + ResetHeaderFormat format() const { return format_; } + + /** + * Iterate over the headers, choose the first one that matches by name, and try to parse its + * value. + */ + absl::optional + parseInterval(TimeSource& time_source, const Http::HeaderMap& headers) const override; + +private: + const Http::LowerCaseString name_; + ResetHeaderFormat format_; +}; + +} // namespace Router +} // namespace Envoy diff --git a/source/common/router/retry_state_impl.cc b/source/common/router/retry_state_impl.cc index 9912f041709e..938850c47f42 100644 --- a/source/common/router/retry_state_impl.cc +++ b/source/common/router/retry_state_impl.cc @@ -38,14 +38,14 @@ RetryStatePtr RetryStateImpl::create(const RetryPolicy& route_policy, const Upstream::ClusterInfo& cluster, const VirtualCluster* vcluster, Runtime::Loader& runtime, Random::RandomGenerator& random, Event::Dispatcher& dispatcher, - Upstream::ResourcePriority priority) { + TimeSource& time_source, Upstream::ResourcePriority priority) { RetryStatePtr ret; // We short circuit here and do not bother with an allocation if there is no chance we will retry. if (request_headers.EnvoyRetryOn() || request_headers.EnvoyRetryGrpcOn() || route_policy.retryOn()) { ret.reset(new RetryStateImpl(route_policy, request_headers, cluster, vcluster, runtime, random, - dispatcher, priority)); + dispatcher, time_source, priority)); } // Consume all retry related headers to avoid them being propagated to the upstream @@ -66,14 +66,17 @@ RetryStateImpl::RetryStateImpl(const RetryPolicy& route_policy, Http::RequestHeaderMap& request_headers, const Upstream::ClusterInfo& cluster, const VirtualCluster* vcluster, Runtime::Loader& runtime, Random::RandomGenerator& random, - Event::Dispatcher& dispatcher, Upstream::ResourcePriority priority) + Event::Dispatcher& dispatcher, TimeSource& time_source, + Upstream::ResourcePriority priority) : cluster_(cluster), vcluster_(vcluster), runtime_(runtime), random_(random), - dispatcher_(dispatcher), retry_on_(route_policy.retryOn()), + dispatcher_(dispatcher), time_source_(time_source), retry_on_(route_policy.retryOn()), retries_remaining_(route_policy.numRetries()), priority_(priority), retry_host_predicates_(route_policy.retryHostPredicates()), retry_priority_(route_policy.retryPriority()), retriable_status_codes_(route_policy.retriableStatusCodes()), - retriable_headers_(route_policy.retriableHeaders()) { + retriable_headers_(route_policy.retriableHeaders()), + reset_headers_(route_policy.resetHeaders()), + reset_max_interval_(route_policy.resetMaxInterval()) { std::chrono::milliseconds base_interval( runtime_.snapshot().getInteger("upstream.base_retry_backoff_ms", 25)); @@ -88,8 +91,8 @@ RetryStateImpl::RetryStateImpl(const RetryPolicy& route_policy, max_interval = *route_policy.maxInterval(); } - backoff_strategy_ = std::make_unique(base_interval.count(), - max_interval.count(), random_); + backoff_strategy_ = std::make_unique( + base_interval.count(), max_interval.count(), random_); host_selection_max_attempts_ = route_policy.hostSelectionMaxAttempts(); // Merge in the headers. @@ -156,8 +159,23 @@ void RetryStateImpl::enableBackoffTimer() { retry_timer_ = dispatcher_.createTimer([this]() -> void { callback_(); }); } - // We use a fully jittered exponential backoff algorithm. - retry_timer_->enableTimer(std::chrono::milliseconds(backoff_strategy_->nextBackOffMs())); + if (ratelimited_backoff_strategy_ != nullptr) { + // If we have a backoff strategy based on rate limit feedback from the response we use it. + retry_timer_->enableTimer( + std::chrono::milliseconds(ratelimited_backoff_strategy_->nextBackOffMs())); + + // The strategy is only valid for the response that sent the ratelimit reset header and cannot + // be reused. + ratelimited_backoff_strategy_.reset(); + + cluster_.stats().upstream_rq_retry_backoff_ratelimited_.inc(); + + } else { + // Otherwise we use a fully jittered exponential backoff algorithm. + retry_timer_->enableTimer(std::chrono::milliseconds(backoff_strategy_->nextBackOffMs())); + + cluster_.stats().upstream_rq_retry_backoff_exponential_.inc(); + } } std::pair RetryStateImpl::parseRetryOn(absl::string_view config) { @@ -212,6 +230,18 @@ std::pair RetryStateImpl::parseRetryGrpcOn(absl::string_view ret return {ret, all_fields_valid}; } +absl::optional +RetryStateImpl::parseResetInterval(const Http::ResponseHeaderMap& response_headers) const { + for (const auto& reset_header : reset_headers_) { + const auto interval = reset_header->parseInterval(time_source_, response_headers); + if (interval.has_value() && interval.value() <= reset_max_interval_) { + return interval; + } + } + + return absl::nullopt; +} + void RetryStateImpl::resetRetry() { if (callback_) { cluster_.resourceManager(priority_).retries().dec(); @@ -273,7 +303,19 @@ RetryStatus RetryStateImpl::shouldRetry(bool would_retry, DoRetryCallback callba RetryStatus RetryStateImpl::shouldRetryHeaders(const Http::ResponseHeaderMap& response_headers, DoRetryCallback callback) { - return shouldRetry(wouldRetryFromHeaders(response_headers), callback); + const bool would_retry = wouldRetryFromHeaders(response_headers); + + // Yes, we will retry based on the headers - try to parse a rate limited reset interval from the + // response. + if (would_retry && !reset_headers_.empty()) { + const auto backoff_interval = parseResetInterval(response_headers); + if (backoff_interval.has_value()) { + ratelimited_backoff_strategy_ = std::make_unique( + backoff_interval.value().count(), random_); + } + } + + return shouldRetry(would_retry, callback); } RetryStatus RetryStateImpl::shouldRetryReset(Http::StreamResetReason reset_reason, diff --git a/source/common/router/retry_state_impl.h b/source/common/router/retry_state_impl.h index 9b0a19a77911..8931b5bf3751 100644 --- a/source/common/router/retry_state_impl.h +++ b/source/common/router/retry_state_impl.h @@ -29,7 +29,8 @@ class RetryStateImpl : public RetryState { Http::RequestHeaderMap& request_headers, const Upstream::ClusterInfo& cluster, const VirtualCluster* vcluster, Runtime::Loader& runtime, Random::RandomGenerator& random, - Event::Dispatcher& dispatcher, Upstream::ResourcePriority priority); + Event::Dispatcher& dispatcher, TimeSource& time_source, + Upstream::ResourcePriority priority); ~RetryStateImpl() override; /** @@ -52,6 +53,8 @@ class RetryStateImpl : public RetryState { // Router::RetryState bool enabled() override { return retry_on_ != 0; } + absl::optional + parseResetInterval(const Http::ResponseHeaderMap& response_headers) const override; RetryStatus shouldRetryHeaders(const Http::ResponseHeaderMap& response_headers, DoRetryCallback callback) override; // Returns true if the retry policy would retry the passed headers. Does not @@ -92,7 +95,8 @@ class RetryStateImpl : public RetryState { RetryStateImpl(const RetryPolicy& route_policy, Http::RequestHeaderMap& request_headers, const Upstream::ClusterInfo& cluster, const VirtualCluster* vcluster, Runtime::Loader& runtime, Random::RandomGenerator& random, - Event::Dispatcher& dispatcher, Upstream::ResourcePriority priority); + Event::Dispatcher& dispatcher, TimeSource& time_source, + Upstream::ResourcePriority priority); void enableBackoffTimer(); void resetRetry(); @@ -104,17 +108,21 @@ class RetryStateImpl : public RetryState { Runtime::Loader& runtime_; Random::RandomGenerator& random_; Event::Dispatcher& dispatcher_; + TimeSource& time_source_; uint32_t retry_on_{}; uint32_t retries_remaining_{}; DoRetryCallback callback_; Event::TimerPtr retry_timer_; Upstream::ResourcePriority priority_; BackOffStrategyPtr backoff_strategy_; + BackOffStrategyPtr ratelimited_backoff_strategy_{}; std::vector retry_host_predicates_; Upstream::RetryPrioritySharedPtr retry_priority_; uint32_t host_selection_max_attempts_; std::vector retriable_status_codes_; std::vector retriable_headers_; + std::vector reset_headers_{}; + std::chrono::milliseconds reset_max_interval_{}; }; } // namespace Router diff --git a/source/common/router/router.cc b/source/common/router/router.cc index 88db133d6e2c..9e2e00128a4a 100644 --- a/source/common/router/router.cc +++ b/source/common/router/router.cc @@ -569,9 +569,9 @@ Http::FilterHeadersStatus Filter::decodeHeaders(Http::RequestHeaderMap& headers, // Ensure an http transport scheme is selected before continuing with decoding. ASSERT(headers.Scheme()); - retry_state_ = createRetryState(route_entry_->retryPolicy(), headers, *cluster_, - request_vcluster_, config_.runtime_, config_.random_, - callbacks_->dispatcher(), route_entry_->priority()); + retry_state_ = createRetryState( + route_entry_->retryPolicy(), headers, *cluster_, request_vcluster_, config_.runtime_, + config_.random_, callbacks_->dispatcher(), config_.timeSource(), route_entry_->priority()); // Determine which shadow policies to use. It's possible that we don't do any shadowing due to // runtime keys. @@ -1575,13 +1575,15 @@ uint32_t Filter::numRequestsAwaitingHeaders() { [](const auto& req) -> bool { return req->awaitingHeaders(); }); } -RetryStatePtr -ProdFilter::createRetryState(const RetryPolicy& policy, Http::RequestHeaderMap& request_headers, - const Upstream::ClusterInfo& cluster, const VirtualCluster* vcluster, - Runtime::Loader& runtime, Random::RandomGenerator& random, - Event::Dispatcher& dispatcher, Upstream::ResourcePriority priority) { +RetryStatePtr ProdFilter::createRetryState(const RetryPolicy& policy, + Http::RequestHeaderMap& request_headers, + const Upstream::ClusterInfo& cluster, + const VirtualCluster* vcluster, Runtime::Loader& runtime, + Random::RandomGenerator& random, + Event::Dispatcher& dispatcher, TimeSource& time_source, + Upstream::ResourcePriority priority) { return RetryStateImpl::create(policy, request_headers, cluster, vcluster, runtime, random, - dispatcher, priority); + dispatcher, time_source, priority); } } // namespace Router diff --git a/source/common/router/router.h b/source/common/router/router.h index 65ef129ccf3a..e1be0571e2b6 100644 --- a/source/common/router/router.h +++ b/source/common/router/router.h @@ -473,11 +473,13 @@ class Filter : Logger::Loggable, bool dropped); void chargeUpstreamAbort(Http::Code code, bool dropped, UpstreamRequest& upstream_request); void cleanup(); - virtual RetryStatePtr - createRetryState(const RetryPolicy& policy, Http::RequestHeaderMap& request_headers, - const Upstream::ClusterInfo& cluster, const VirtualCluster* vcluster, - Runtime::Loader& runtime, Random::RandomGenerator& random, - Event::Dispatcher& dispatcher, Upstream::ResourcePriority priority) PURE; + virtual RetryStatePtr createRetryState(const RetryPolicy& policy, + Http::RequestHeaderMap& request_headers, + const Upstream::ClusterInfo& cluster, + const VirtualCluster* vcluster, Runtime::Loader& runtime, + Random::RandomGenerator& random, + Event::Dispatcher& dispatcher, TimeSource& time_source, + Upstream::ResourcePriority priority) PURE; std::unique_ptr createConnPool(); UpstreamRequestPtr createUpstreamRequest(); @@ -567,6 +569,7 @@ class ProdFilter : public Filter { const Upstream::ClusterInfo& cluster, const VirtualCluster* vcluster, Runtime::Loader& runtime, Random::RandomGenerator& random, Event::Dispatcher& dispatcher, + TimeSource& time_source, Upstream::ResourcePriority priority) override; }; diff --git a/source/common/upstream/health_discovery_service.cc b/source/common/upstream/health_discovery_service.cc index 21fa74e34588..4d66e0f6c63f 100644 --- a/source/common/upstream/health_discovery_service.cc +++ b/source/common/upstream/health_discovery_service.cc @@ -50,8 +50,8 @@ HdsDelegate::HdsDelegate(Stats::Scope& scope, Grpc::RawAsyncClientPtr async_clie api_(api) { health_check_request_.mutable_health_check_request()->mutable_node()->MergeFrom( local_info_.node()); - backoff_strategy_ = std::make_unique(RetryInitialDelayMilliseconds, - RetryMaxDelayMilliseconds, random_); + backoff_strategy_ = std::make_unique( + RetryInitialDelayMilliseconds, RetryMaxDelayMilliseconds, random_); hds_retry_timer_ = dispatcher.createTimer([this]() -> void { establishNewStream(); }); hds_stream_response_timer_ = dispatcher.createTimer([this]() -> void { sendResponse(); }); diff --git a/test/common/common/backoff_strategy_test.cc b/test/common/common/backoff_strategy_test.cc index 20ffe937065c..bfc540234c7b 100644 --- a/test/common/common/backoff_strategy_test.cc +++ b/test/common/common/backoff_strategy_test.cc @@ -9,20 +9,20 @@ using testing::Return; namespace Envoy { -TEST(BackOffStrategyTest, JitteredBackOffBasicFlow) { +TEST(ExponentialBackOffStrategyTest, JitteredBackOffBasicFlow) { NiceMock random; ON_CALL(random, random()).WillByDefault(Return(27)); - JitteredBackOffStrategy jittered_back_off(25, 30, random); + JitteredExponentialBackOffStrategy jittered_back_off(25, 30, random); EXPECT_EQ(2, jittered_back_off.nextBackOffMs()); EXPECT_EQ(27, jittered_back_off.nextBackOffMs()); } -TEST(BackOffStrategyTest, JitteredBackOffBasicReset) { +TEST(ExponentialBackOffStrategyTest, JitteredBackOffBasicReset) { NiceMock random; ON_CALL(random, random()).WillByDefault(Return(27)); - JitteredBackOffStrategy jittered_back_off(25, 30, random); + JitteredExponentialBackOffStrategy jittered_back_off(25, 30, random); EXPECT_EQ(2, jittered_back_off.nextBackOffMs()); EXPECT_EQ(27, jittered_back_off.nextBackOffMs()); @@ -30,22 +30,23 @@ TEST(BackOffStrategyTest, JitteredBackOffBasicReset) { EXPECT_EQ(2, jittered_back_off.nextBackOffMs()); // Should start from start } -TEST(BackOffStrategyTest, JitteredBackOffDoesntOverflow) { +TEST(ExponentialBackOffStrategyTest, JitteredBackOffDoesntOverflow) { NiceMock random; ON_CALL(random, random()).WillByDefault(Return(std::numeric_limits::max() - 1)); - JitteredBackOffStrategy jittered_back_off(1, std::numeric_limits::max(), random); + JitteredExponentialBackOffStrategy jittered_back_off(1, std::numeric_limits::max(), + random); for (int iter = 0; iter < 100; ++iter) { EXPECT_GE(std::numeric_limits::max(), jittered_back_off.nextBackOffMs()); } EXPECT_EQ(std::numeric_limits::max() - 1, jittered_back_off.nextBackOffMs()); } -TEST(BackOffStrategyTest, JitteredBackOffWithMaxInterval) { +TEST(ExponentialBackOffStrategyTest, JitteredBackOffWithMaxInterval) { NiceMock random; ON_CALL(random, random()).WillByDefault(Return(9999)); - JitteredBackOffStrategy jittered_back_off(5, 100, random); + JitteredExponentialBackOffStrategy jittered_back_off(5, 100, random); EXPECT_EQ(4, jittered_back_off.nextBackOffMs()); EXPECT_EQ(9, jittered_back_off.nextBackOffMs()); EXPECT_EQ(19, jittered_back_off.nextBackOffMs()); @@ -55,11 +56,11 @@ TEST(BackOffStrategyTest, JitteredBackOffWithMaxInterval) { EXPECT_EQ(99, jittered_back_off.nextBackOffMs()); } -TEST(BackOffStrategyTest, JitteredBackOffWithMaxIntervalReset) { +TEST(ExponentialBackOffStrategyTest, JitteredBackOffWithMaxIntervalReset) { NiceMock random; ON_CALL(random, random()).WillByDefault(Return(9999)); - JitteredBackOffStrategy jittered_back_off(5, 100, random); + JitteredExponentialBackOffStrategy jittered_back_off(5, 100, random); EXPECT_EQ(4, jittered_back_off.nextBackOffMs()); EXPECT_EQ(9, jittered_back_off.nextBackOffMs()); EXPECT_EQ(19, jittered_back_off.nextBackOffMs()); @@ -78,7 +79,23 @@ TEST(BackOffStrategyTest, JitteredBackOffWithMaxIntervalReset) { EXPECT_EQ(99, jittered_back_off.nextBackOffMs()); } -TEST(BackOffStrategyTest, FixedBackOffBasicReset) { +TEST(LowerBoundBackOffStrategyTest, JitteredBackOffWithLowRandomValue) { + NiceMock random; + ON_CALL(random, random()).WillByDefault(Return(22)); + + JitteredLowerBoundBackOffStrategy jittered_lower_bound_back_off(500, random); + EXPECT_EQ(522, jittered_lower_bound_back_off.nextBackOffMs()); +} + +TEST(LowerBoundBackOffStrategyTest, JitteredBackOffWithHighRandomValue) { + NiceMock random; + ON_CALL(random, random()).WillByDefault(Return(9999)); + + JitteredLowerBoundBackOffStrategy jittered_lower_bound_back_off(500, random); + EXPECT_EQ(749, jittered_lower_bound_back_off.nextBackOffMs()); +} + +TEST(FixedBackOffStrategyTest, FixedBackOffBasicReset) { FixedBackOffStrategy fixed_back_off(30); EXPECT_EQ(30, fixed_back_off.nextBackOffMs()); EXPECT_EQ(30, fixed_back_off.nextBackOffMs()); diff --git a/test/common/config/utility_test.cc b/test/common/config/utility_test.cc index 23ab3e0b0235..d755486a872b 100644 --- a/test/common/config/utility_test.cc +++ b/test/common/config/utility_test.cc @@ -265,7 +265,7 @@ TEST(UtilityTest, PrepareDnsRefreshStrategy) { BackOffStrategyPtr strategy = Utility::prepareDnsRefreshStrategy(cluster, 5000, random); - EXPECT_NE(nullptr, dynamic_cast(strategy.get())); + EXPECT_NE(nullptr, dynamic_cast(strategy.get())); } { diff --git a/test/common/router/BUILD b/test/common/router/BUILD index a377e5672fdd..0fad319d411c 100644 --- a/test/common/router/BUILD +++ b/test/common/router/BUILD @@ -61,6 +61,18 @@ envoy_cc_fuzz_test( ], ) +envoy_cc_test( + name = "reset_header_parser_test", + srcs = ["reset_header_parser_test.cc"], + deps = [ + "//source/common/http:header_utility_lib", + "//source/common/router:reset_header_parser_lib", + "//test/test_common:simulated_time_system_lib", + "//test/test_common:utility_lib", + "@envoy_api//envoy/config/route/v3:pkg_cc_proto", + ], +) + envoy_cc_test( name = "rds_impl_test", srcs = ["rds_impl_test.cc"], @@ -155,6 +167,7 @@ envoy_cc_test( srcs = ["retry_state_impl_test.cc"], deps = [ "//source/common/http:header_map_lib", + "//source/common/router:reset_header_parser_lib", "//source/common/router:retry_state_lib", "//source/common/upstream:resource_manager_lib", "//test/mocks/router:router_mocks", diff --git a/test/common/router/config_impl_test.cc b/test/common/router/config_impl_test.cc index 7585b4ec0524..97bd2fd21528 100644 --- a/test/common/router/config_impl_test.cc +++ b/test/common/router/config_impl_test.cc @@ -306,6 +306,7 @@ most_specific_header_mutations_wins: {0} Stats::TestSymbolTable symbol_table_; Api::ApiPtr api_; NiceMock factory_context_; + Event::SimulatedTimeSystem test_time_; }; class RouteMatcherTest : public testing::Test, public ConfigImplTestBase {}; @@ -3580,6 +3581,86 @@ TEST_F(RouteMatcherTest, InvalidRetryBackOff) { "retry_policy.max_interval must greater than or equal to the base_interval"); } +TEST_F(RouteMatcherTest, RateLimitedRetryBackOff) { + const std::string yaml = R"EOF( +virtual_hosts: +- name: www + domains: + - www.lyft.com + routes: + - match: + prefix: "/no-backoff" + route: + cluster: www + - match: + prefix: "/sub-ms-interval" + route: + cluster: www + retry_policy: + rate_limited_retry_back_off: + reset_headers: + - name: Retry-After + format: SECONDS + max_interval: 0.0001s # < 1 ms + - match: + prefix: "/typical-backoff" + route: + cluster: www + retry_policy: + rate_limited_retry_back_off: + reset_headers: + - name: Retry-After + format: SECONDS + - name: RateLimit-Reset + format: UNIX_TIMESTAMP + max_interval: 0.050s + )EOF"; + + const time_t known_date_time = 1000000000; + test_time_.setSystemTime(std::chrono::system_clock::from_time_t(known_date_time)); + + TestConfigImpl config(parseRouteConfigurationFromYaml(yaml), factory_context_, true); + + // has no ratelimit retry back off + EXPECT_EQ(true, config.route(genHeaders("www.lyft.com", "/no-backoff", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .resetHeaders() + .empty()); + EXPECT_EQ(std::chrono::milliseconds(300000), + config.route(genHeaders("www.lyft.com", "/no-backoff", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .resetMaxInterval()); + + // has sub millisecond interval + EXPECT_EQ(1, config.route(genHeaders("www.lyft.com", "/sub-ms-interval", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .resetHeaders() + .size()); + EXPECT_EQ(std::chrono::milliseconds(1), + config.route(genHeaders("www.lyft.com", "/sub-ms-interval", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .resetMaxInterval()); + + // a typical configuration + Http::TestRequestHeaderMapImpl headers = genHeaders("www.lyft.com", "/typical-backoff", "GET"); + const auto& retry_policy = config.route(headers, 0)->routeEntry()->retryPolicy(); + EXPECT_EQ(2, retry_policy.resetHeaders().size()); + + Http::TestResponseHeaderMapImpl expected_0{{"Retry-After", "2"}}; + Http::TestResponseHeaderMapImpl expected_1{{"RateLimit-Reset", "1000000005"}}; + + EXPECT_EQ(std::chrono::milliseconds(2000), + retry_policy.resetHeaders()[0]->parseInterval(test_time_.timeSystem(), expected_0)); + EXPECT_EQ(std::chrono::milliseconds(5000), + retry_policy.resetHeaders()[1]->parseInterval(test_time_.timeSystem(), expected_1)); + + EXPECT_EQ(std::chrono::milliseconds(50), retry_policy.resetMaxInterval()); +} + TEST_F(RouteMatcherTest, HedgeRouteLevel) { const std::string yaml = R"EOF( virtual_hosts: diff --git a/test/common/router/reset_header_parser_test.cc b/test/common/router/reset_header_parser_test.cc new file mode 100644 index 000000000000..bb6135390db4 --- /dev/null +++ b/test/common/router/reset_header_parser_test.cc @@ -0,0 +1,210 @@ +#include "envoy/config/route/v3/route_components.pb.h" +#include "envoy/http/protocol.h" +#include "envoy/json/json_object.h" + +#include "common/json/json_loader.h" +#include "common/router/reset_header_parser.h" + +#include "test/test_common/simulated_time_system.h" +#include "test/test_common/utility.h" + +#include "gtest/gtest.h" + +namespace Envoy { +namespace Router { +namespace { + +envoy::config::route::v3::RetryPolicy::ResetHeader +parseResetHeaderParserFromYaml(const std::string& yaml) { + envoy::config::route::v3::RetryPolicy::ResetHeader reset_header; + TestUtility::loadFromYaml(yaml, reset_header); + return reset_header; +} + +TEST(ResetHeaderParserConstructorTest, FormatUnset) { + const std::string yaml = R"EOF( +name: retry-after + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + EXPECT_EQ("retry-after", reset_header_parser.name().get()); + EXPECT_EQ(ResetHeaderFormat::Seconds, reset_header_parser.format()); +} + +TEST(ResetHeaderParserConstructorTest, FormatSeconds) { + const std::string yaml = R"EOF( +name: retry-after +format: SECONDS + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + EXPECT_EQ("retry-after", reset_header_parser.name().get()); + EXPECT_EQ(ResetHeaderFormat::Seconds, reset_header_parser.format()); +} + +TEST(ResetHeaderParserConstructorTest, FormatUnixTimestamp) { + const std::string yaml = R"EOF( +name: retry-after +format: UNIX_TIMESTAMP + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + EXPECT_EQ("retry-after", reset_header_parser.name().get()); + EXPECT_EQ(ResetHeaderFormat::UnixTimestamp, reset_header_parser.format()); +} + +class ResetHeaderParserParseIntervalTest : public testing::Test { +public: + ResetHeaderParserParseIntervalTest() { + const time_t known_date_time = 1000000000; + test_time_.setSystemTime(std::chrono::system_clock::from_time_t(known_date_time)); + } + + Event::SimulatedTimeSystem test_time_; +}; + +TEST_F(ResetHeaderParserParseIntervalTest, NoHeaderMatches) { + const std::string yaml = R"EOF( +name: retry-after +format: SECONDS + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + Http::TestResponseHeaderMapImpl response_headers{{":status", "200"}}; + + EXPECT_EQ(absl::nullopt, + reset_header_parser.parseInterval(test_time_.timeSystem(), response_headers)); +} + +TEST_F(ResetHeaderParserParseIntervalTest, HeaderMatchesButUnsupportedFormatDate) { + const std::string yaml = R"EOF( +name: retry-after +format: SECONDS + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + Http::TestResponseHeaderMapImpl response_headers{ + {"retry-after", "Fri, 17 Jul 2020 11:59:51 GMT"}}; + + EXPECT_EQ(absl::nullopt, + reset_header_parser.parseInterval(test_time_.timeSystem(), response_headers)); +} + +TEST_F(ResetHeaderParserParseIntervalTest, HeaderMatchesButUnsupportedFormatFloat) { + const std::string yaml = R"EOF( +name: retry-after +format: SECONDS + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + Http::TestResponseHeaderMapImpl response_headers{{"retry-after", "2.5"}}; + + EXPECT_EQ(absl::nullopt, + reset_header_parser.parseInterval(test_time_.timeSystem(), response_headers)); +} + +TEST_F(ResetHeaderParserParseIntervalTest, HeaderMatchesSupportedFormatSeconds) { + const std::string yaml = R"EOF( +name: retry-after +format: SECONDS + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + Http::TestResponseHeaderMapImpl response_headers{{"retry-after", "5"}}; + + EXPECT_EQ(absl::optional(5000), + reset_header_parser.parseInterval(test_time_.timeSystem(), response_headers)); +} + +TEST_F(ResetHeaderParserParseIntervalTest, HeaderMatchesSupportedFormatSecondsCaseInsensitive) { + const std::string yaml = R"EOF( +name: retry-after +format: SECONDS + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + Http::TestResponseHeaderMapImpl response_headers{{"Retry-After", "5"}}; + + EXPECT_EQ(absl::optional(5000), + reset_header_parser.parseInterval(test_time_.timeSystem(), response_headers)); +} + +TEST_F(ResetHeaderParserParseIntervalTest, HeaderMatchesButUnsupportedFormatTimestampFloat) { + const std::string yaml = R"EOF( +name: retry-after +format: UNIX_TIMESTAMP + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + Http::TestResponseHeaderMapImpl response_headers{{"retry-after", "1595320702.1234"}}; + + EXPECT_EQ(absl::nullopt, + reset_header_parser.parseInterval(test_time_.timeSystem(), response_headers)); +} + +TEST_F(ResetHeaderParserParseIntervalTest, HeaderMatchesSupportedFormatTimestampButInThePast) { + const std::string yaml = R"EOF( +name: retry-after +format: UNIX_TIMESTAMP + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + Http::TestResponseHeaderMapImpl response_headers{{"retry-after", "999999999"}}; + + EXPECT_EQ(absl::nullopt, + reset_header_parser.parseInterval(test_time_.timeSystem(), response_headers)); +} + +TEST_F(ResetHeaderParserParseIntervalTest, HeaderMatchesSupportedFormatTimestampEmptyInterval) { + const std::string yaml = R"EOF( +name: retry-after +format: UNIX_TIMESTAMP + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + Http::TestResponseHeaderMapImpl response_headers{{"retry-after", "1000000000"}}; + + EXPECT_EQ(absl::optional(0), + reset_header_parser.parseInterval(test_time_.timeSystem(), response_headers)); +} + +TEST_F(ResetHeaderParserParseIntervalTest, HeaderMatchesSupportedFormatTimestampNonEmptyInterval) { + const std::string yaml = R"EOF( +name: retry-after +format: UNIX_TIMESTAMP + )EOF"; + + ResetHeaderParserImpl reset_header_parser = + ResetHeaderParserImpl(parseResetHeaderParserFromYaml(yaml)); + + Http::TestResponseHeaderMapImpl response_headers{{"retry-after", "1000000007"}}; + + EXPECT_EQ(absl::optional(7000), + reset_header_parser.parseInterval(test_time_.timeSystem(), response_headers)); +} + +} // namespace +} // namespace Router +} // namespace Envoy diff --git a/test/common/router/retry_state_impl_test.cc b/test/common/router/retry_state_impl_test.cc index 6f3d6441baaf..f7bc5cb87c36 100644 --- a/test/common/router/retry_state_impl_test.cc +++ b/test/common/router/retry_state_impl_test.cc @@ -4,6 +4,7 @@ #include "envoy/stats/stats.h" #include "common/http/header_map_impl.h" +#include "common/router/reset_header_parser.h" #include "common/router/retry_state_impl.h" #include "common/upstream/resource_manager_impl.h" @@ -42,7 +43,8 @@ class RouterRetryStateImplTest : public testing::Test { void setup(Http::RequestHeaderMap& request_headers) { state_ = RetryStateImpl::create(policy_, request_headers, cluster_, &virtual_cluster_, runtime_, - random_, dispatcher_, Upstream::ResourcePriority::Default); + random_, dispatcher_, test_time_.timeSystem(), + Upstream::ResourcePriority::Default); } void expectTimerCreateAndEnable() { @@ -123,6 +125,7 @@ class RouterRetryStateImplTest : public testing::Test { void TearDown() override { cleanupOutstandingResources(); } + Event::SimulatedTimeSystem test_time_; NiceMock policy_; NiceMock cluster_; TestVirtualCluster virtual_cluster_; @@ -925,6 +928,128 @@ TEST_F(RouterRetryStateImplTest, CustomBackOffIntervalDefaultMax) { retry_timer_->invokeCallback(); } +TEST_F(RouterRetryStateImplTest, ParseRateLimitedResetInterval) { + // Set a fixed system time to be used for all these tests + const time_t known_date_time = 1000000000; + test_time_.setSystemTime(std::chrono::system_clock::from_time_t(known_date_time)); + + Protobuf::RepeatedPtrField reset_headers; + auto* reset_header_1 = reset_headers.Add(); + reset_header_1->set_name("Retry-After"); + reset_header_1->set_format(envoy::config::route::v3::RetryPolicy::SECONDS); + + auto* reset_header_2 = reset_headers.Add(); + reset_header_2->set_name("X-RateLimit-Reset"); + reset_header_2->set_format(envoy::config::route::v3::RetryPolicy::UNIX_TIMESTAMP); + + policy_.reset_headers_ = ResetHeaderParserImpl::buildResetHeaderParserVector(reset_headers); + + // Failure case: Matches reset header (seconds) but exceeds max_interval (>5min) + { + Http::TestRequestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"}}; + setup(request_headers); + EXPECT_TRUE(state_->enabled()); + + Http::TestResponseHeaderMapImpl response_headers{{":status", "429"}, {"Retry-After", "301"}}; + EXPECT_EQ(absl::nullopt, state_->parseResetInterval(response_headers)); + } + + // Failure case: Matches reset header (timestamp) but exceeds max_interval (>5min) + { + Http::TestRequestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"}}; + setup(request_headers); + EXPECT_TRUE(state_->enabled()); + + Http::TestResponseHeaderMapImpl response_headers{{":status", "429"}, + {"X-RateLimit-Reset", "1000000301"}}; + EXPECT_EQ(absl::nullopt, state_->parseResetInterval(response_headers)); + } + + // The only reset header matches (seconds) and the header value is in within range + { + Http::TestRequestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"}}; + setup(request_headers); + EXPECT_TRUE(state_->enabled()); + + Http::TestResponseHeaderMapImpl response_headers{{":status", "429"}, {"Retry-After", "300"}}; + EXPECT_EQ(absl::optional(300000), + state_->parseResetInterval(response_headers)); + } + + // The only reset header matches (timestamp) and the header value is in within range + { + Http::TestRequestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"}}; + setup(request_headers); + EXPECT_TRUE(state_->enabled()); + + Http::TestResponseHeaderMapImpl response_headers{{":status", "429"}, + {"x-ratelimit-reset", "1000000300"}}; + EXPECT_EQ(absl::optional(300000), + state_->parseResetInterval(response_headers)); + } + + // The second (timestamp) and third (seconds) reset headers match but Retry-After comes first in + // reset_headers so it is used + { + Http::TestRequestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"}}; + setup(request_headers); + EXPECT_TRUE(state_->enabled()); + + Http::TestResponseHeaderMapImpl response_headers{ + {":status", "429"}, {"x-ratelimit-reset", "1000000002"}, {"retry-after", "3"}}; + EXPECT_EQ(absl::optional(3000), + state_->parseResetInterval(response_headers)); + } +} + +TEST_F(RouterRetryStateImplTest, RateLimitedRetryBackoffStrategy) { + Protobuf::RepeatedPtrField reset_headers; + auto* reset_header = reset_headers.Add(); + reset_header->set_name("Retry-After"); + reset_header->set_format(envoy::config::route::v3::RetryPolicy::SECONDS); + + policy_.num_retries_ = 3; + policy_.reset_headers_ = ResetHeaderParserImpl::buildResetHeaderParserVector(reset_headers); + + Http::TestRequestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"}}; + setup(request_headers); + EXPECT_TRUE(state_->enabled()); + + retry_timer_ = new Event::MockTimer(&dispatcher_); + Http::TestResponseHeaderMapImpl response_headers_reset_1{{":status", "500"}, + {"retry-after", "2"}}; + Http::TestResponseHeaderMapImpl response_headers_plain{{":status", "500"}}; + Http::TestResponseHeaderMapImpl response_headers_reset_2{{":status", "500"}, + {"retry-after", "5"}}; + + // reset header present -> ratelimit backoff used + EXPECT_CALL(random_, random()).WillOnce(Return(190)); + EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(2190), _)); + EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryHeaders(response_headers_reset_1, callback_)); + EXPECT_CALL(callback_ready_, ready()); + retry_timer_->invokeCallback(); + + // reset header not present -> exponential backoff used + EXPECT_CALL(random_, random()).WillOnce(Return(190)); + EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(15), _)); + EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryHeaders(response_headers_plain, callback_)); + EXPECT_CALL(callback_ready_, ready()); + retry_timer_->invokeCallback(); + + // reset header present -> ratelimit backoff used + EXPECT_CALL(random_, random()).WillOnce(Return(2190)); + EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(7190), _)); + EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryHeaders(response_headers_reset_2, callback_)); + EXPECT_CALL(callback_ready_, ready()); + retry_timer_->invokeCallback(); + + EXPECT_EQ(RetryStatus::NoRetryLimitExceeded, + state_->shouldRetryHeaders(response_headers_reset_2, callback_)); + + EXPECT_EQ(2UL, cluster_.stats().upstream_rq_retry_backoff_ratelimited_.value()); + EXPECT_EQ(1UL, cluster_.stats().upstream_rq_retry_backoff_exponential_.value()); +} + TEST_F(RouterRetryStateImplTest, HostSelectionAttempts) { policy_.host_selection_max_attempts_ = 2; policy_.retry_on_ = RetryPolicy::RETRY_ON_CONNECT_FAILURE; diff --git a/test/common/router/router_test.cc b/test/common/router/router_test.cc index 0e829d87914e..6cc60aad3c01 100644 --- a/test/common/router/router_test.cc +++ b/test/common/router/router_test.cc @@ -71,7 +71,7 @@ class RouterTestFilter : public Filter { RetryStatePtr createRetryState(const RetryPolicy&, Http::RequestHeaderMap&, const Upstream::ClusterInfo&, const VirtualCluster*, Runtime::Loader&, Random::RandomGenerator&, Event::Dispatcher&, - Upstream::ResourcePriority) override { + TimeSource&, Upstream::ResourcePriority) override { EXPECT_EQ(nullptr, retry_state_); retry_state_ = new NiceMock(); if (reject_all_hosts_) { diff --git a/test/common/router/router_upstream_log_test.cc b/test/common/router/router_upstream_log_test.cc index 8662e760364a..42ed52b60d83 100644 --- a/test/common/router/router_upstream_log_test.cc +++ b/test/common/router/router_upstream_log_test.cc @@ -64,7 +64,7 @@ class TestFilter : public Filter { RetryStatePtr createRetryState(const RetryPolicy&, Http::RequestHeaderMap&, const Upstream::ClusterInfo&, const VirtualCluster*, Runtime::Loader&, Random::RandomGenerator&, Event::Dispatcher&, - Upstream::ResourcePriority) override { + TimeSource&, Upstream::ResourcePriority) override { EXPECT_EQ(nullptr, retry_state_); retry_state_ = new NiceMock(); return RetryStatePtr{retry_state_}; diff --git a/test/extensions/common/dynamic_forward_proxy/dns_cache_impl_test.cc b/test/extensions/common/dynamic_forward_proxy/dns_cache_impl_test.cc index c12f94d4e99b..332f9e7651e8 100644 --- a/test/extensions/common/dynamic_forward_proxy/dns_cache_impl_test.cc +++ b/test/extensions/common/dynamic_forward_proxy/dns_cache_impl_test.cc @@ -757,7 +757,7 @@ TEST(UtilityTest, PrepareDnsRefreshStrategy) { BackOffStrategyPtr strategy = Config::Utility::prepareDnsRefreshStrategy< envoy::extensions::common::dynamic_forward_proxy::v3::DnsCacheConfig>(dns_cache_config, 5000, random); - EXPECT_NE(nullptr, dynamic_cast(strategy.get())); + EXPECT_NE(nullptr, dynamic_cast(strategy.get())); } { diff --git a/test/integration/stats_integration_test.cc b/test/integration/stats_integration_test.cc index f66a9b8a14bf..2ffc1ade58c9 100644 --- a/test/integration/stats_integration_test.cc +++ b/test/integration/stats_integration_test.cc @@ -288,6 +288,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // 2020/07/21 12034 44811 46000 Add configurable histogram buckets. // 2020/07/31 12035 45002 46000 Init manager store unready targets in hash map. // 2020/08/10 12275 44949 46000 Re-organize tls histogram maps to improve continuity. + // 2020/08/11 12202 44949 46500 router: add new retry back-off strategy // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -308,7 +309,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // https://github.com/envoyproxy/envoy/issues/12209 // EXPECT_MEMORY_EQ(m_per_cluster, 44949); } - EXPECT_MEMORY_LE(m_per_cluster, 46000); // Round up to allow platform variations. + EXPECT_MEMORY_LE(m_per_cluster, 46500); // Round up to allow platform variations. } TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { @@ -366,6 +367,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // 2020/07/21 12034 36923 38000 Add configurable histogram buckets. // 2020/07/31 12035 37114 38000 Init manager store unready targets in hash map. // 2020/08/10 12275 37061 38000 Re-organize tls histogram maps to improve continuity. + // 2020/08/11 12202 37061 38500 router: add new retry back-off strategy // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -386,7 +388,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // https://github.com/envoyproxy/envoy/issues/12209 // EXPECT_MEMORY_EQ(m_per_cluster, 37061); } - EXPECT_MEMORY_LE(m_per_cluster, 38000); // Round up to allow platform variations. + EXPECT_MEMORY_LE(m_per_cluster, 38500); // Round up to allow platform variations. } TEST_P(ClusterMemoryTestRunner, MemoryLargeHostSizeWithStats) { diff --git a/test/mocks/router/mocks.h b/test/mocks/router/mocks.h index 890a6ccef893..f0c35d2e72fc 100644 --- a/test/mocks/router/mocks.h +++ b/test/mocks/router/mocks.h @@ -119,6 +119,10 @@ class TestRetryPolicy : public RetryPolicy { absl::optional baseInterval() const override { return base_interval_; } absl::optional maxInterval() const override { return max_interval_; } + std::chrono::milliseconds resetMaxInterval() const override { return reset_max_interval_; } + const std::vector& resetHeaders() const override { + return reset_headers_; + } std::chrono::milliseconds per_try_timeout_{0}; uint32_t num_retries_{}; @@ -129,6 +133,8 @@ class TestRetryPolicy : public RetryPolicy { std::vector retriable_request_headers_; absl::optional base_interval_{}; absl::optional max_interval_{}; + std::vector reset_headers_{}; + std::chrono::milliseconds reset_max_interval_{300000}; }; class MockInternalRedirectPolicy : public InternalRedirectPolicy { @@ -157,6 +163,8 @@ class MockRetryState : public RetryState { void expectResetRetry(); MOCK_METHOD(bool, enabled, ()); + MOCK_METHOD(absl::optional, parseResetInterval, + (const Http::ResponseHeaderMap& response_headers), (const)); MOCK_METHOD(RetryStatus, shouldRetryHeaders, (const Http::ResponseHeaderMap& response_headers, DoRetryCallback callback)); MOCK_METHOD(bool, wouldRetryFromHeaders, (const Http::ResponseHeaderMap& response_headers));