Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1698,17 +1698,21 @@

<property>
<name>fs.s3a.retry.throttle.limit</name>
<value>${fs.s3a.attempts.maximum}</value>
<value>20</value>
<description>
Number of times to retry any throttled request.
</description>
</property>

<property>
<name>fs.s3a.retry.throttle.interval</name>
<value>1000ms</value>
<value>5000s</value>
<description>
Interval between retry attempts on throttled requests.
Initial between retry attempts on throttled requests, +/- 50%. chosen at random.
i.e. for an intial value of 3000ms, the initial delay would be in the range 1500ms to 4500ms.
Backoffs are exponential; again randomness is used to avoid the thundering heard problem.
Given that throttling in S3 is per-second, very short delays will not initial spread
out work and so continue to create the problem.
</description>
</property>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -733,8 +733,7 @@ private Constants() {
/**
* Default throttled retry limit: {@value}.
*/
public static final int RETRY_THROTTLE_LIMIT_DEFAULT =
DEFAULT_MAX_ERROR_RETRIES;
public static final int RETRY_THROTTLE_LIMIT_DEFAULT = 20;

/**
* Interval between retry attempts on throttled requests: {@value}.
Expand All @@ -745,7 +744,7 @@ private Constants() {
/**
* Default throttled retry interval: {@value}.
*/
public static final String RETRY_THROTTLE_INTERVAL_DEFAULT = "500ms";
public static final String RETRY_THROTTLE_INTERVAL_DEFAULT = "5000ms";

/**
* Should etags be exposed as checksums?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ public AmazonS3 createS3Client(URI name,
Configuration conf = getConf();
final ClientConfiguration awsConf = S3AUtils
.createAwsConf(getConf(), bucket, Constants.AWS_SERVICE_IDENTIFIER_S3);

// throttling is explicitly disabled on the S3 client so that
// all failures are collected
awsConf.setUseThrottleRetries(false);

if (!StringUtils.isEmpty(userAgentSuffix)) {
awsConf.setUserAgentSuffix(userAgentSuffix);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1647,10 +1647,11 @@ protected void incrementGauge(Statistic statistic, long count) {
* @param ex exception.
*/
public void operationRetried(Exception ex) {
Statistic stat = isThrottleException(ex)
? STORE_IO_THROTTLED
: IGNORED_ERRORS;
incrementStatistic(stat);
if (isThrottleException(ex)) {
operationThrottled(false);
} else {
incrementStatistic(IGNORED_ERRORS);
}
}

/**
Expand Down Expand Up @@ -1683,11 +1684,28 @@ public void operationRetried(
public void metastoreOperationRetried(Exception ex,
int retries,
boolean idempotent) {
operationRetried(ex);
incrementStatistic(S3GUARD_METADATASTORE_RETRY);
if (isThrottleException(ex)) {
operationThrottled(true);
} else {
incrementStatistic(IGNORED_ERRORS);
}
}

/**
* Note that an operation was throttled -this will update
* specific counters/metrics.
* @param metastore was the throttling observed in the S3Guard metastore?
*/
private void operationThrottled(boolean metastore) {
LOG.debug("Request throttled on {}", metastore ? "S3": "DynamoDB");
if (metastore) {
incrementStatistic(S3GUARD_METADATASTORE_THROTTLED);
instrumentation.addValueToQuantiles(S3GUARD_METADATASTORE_THROTTLE_RATE, 1);
instrumentation.addValueToQuantiles(S3GUARD_METADATASTORE_THROTTLE_RATE,
1);
} else {
incrementStatistic(STORE_IO_THROTTLED);
instrumentation.addValueToQuantiles(STORE_IO_THROTTLE_RATE, 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource {
private final MutableCounterLong ignoredErrors;
private final MutableQuantiles putLatencyQuantile;
private final MutableQuantiles throttleRateQuantile;
private final MutableQuantiles s3GuardThrottleRateQuantile;
private final MutableCounterLong numberOfFilesCreated;
private final MutableCounterLong numberOfFilesCopied;
private final MutableCounterLong bytesOfFilesCopied;
Expand Down Expand Up @@ -248,7 +249,9 @@ public S3AInstrumentation(URI name) {
int interval = 1;
putLatencyQuantile = quantiles(S3GUARD_METADATASTORE_PUT_PATH_LATENCY,
"ops", "latency", interval);
throttleRateQuantile = quantiles(S3GUARD_METADATASTORE_THROTTLE_RATE,
s3GuardThrottleRateQuantile = quantiles(S3GUARD_METADATASTORE_THROTTLE_RATE,
"events", "frequency (Hz)", interval);
throttleRateQuantile = quantiles(STORE_IO_THROTTLE_RATE,
"events", "frequency (Hz)", interval);

registerAsMetricsSource(name);
Expand Down Expand Up @@ -617,6 +620,7 @@ public void close() {
// task in a shared thread pool.
putLatencyQuantile.stop();
throttleRateQuantile.stop();
s3GuardThrottleRateQuantile.stop();
metricsSystem.unregisterSource(metricsSourceName);
int activeSources = --metricsSourceActiveCounter;
if (activeSources == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ public enum Statistic {
"S3Guard metadata store authoritative directories updated from S3"),

STORE_IO_THROTTLED("store_io_throttled", "Requests throttled and retried"),
STORE_IO_THROTTLE_RATE("store_io_throttle_rate",
"Rate of S3 request throttling"),

DELEGATION_TOKENS_ISSUED("delegation_tokens_issued",
"Number of delegation tokens issued");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -439,8 +439,8 @@ private void dumpEntry(CsvFile csv, DDBPathMetadata md) {
private Pair<Long, Long> scanMetastore(CsvFile csv) {
S3GuardTableAccess tableAccess = new S3GuardTableAccess(getStore());
ExpressionSpecBuilder builder = new ExpressionSpecBuilder();
Iterable<DDBPathMetadata> results = tableAccess.scanMetadata(
builder);
Iterable<DDBPathMetadata> results =
getStore().wrapWithRetries(tableAccess.scanMetadata(builder));
long live = 0;
long tombstone = 0;
for (DDBPathMetadata md : results) {
Expand Down
Loading