Skip to content

Commit

Permalink
Assorted remote metrics fixes. (Cherry-pick of #15914) (#15976)
Browse files Browse the repository at this point in the history
* Fix the `RemoteStoreReadBlobTimeMicros` metric, which was not including the actual fetch time.
* Add the relevant unit suffix to time-based metrics which don't have one.
* Move `RemoteStoreBlobBytes{Up,Down}loaded` to observation metrics, in order to record histograms.
    * Additionally, report a `sum` value for histograms.
* Simplify the "time to first byte" calculation by using an `Option<Instant>` for the start time.

[ci skip-build-wheels]
  • Loading branch information
stuhood authored Jun 28, 2022
1 parent cf0db49 commit d21b332
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 24 deletions.
1 change: 1 addition & 0 deletions src/python/pants/goal/stats_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def __call__(
f" mean: {histogram.get_mean_value():.3f}\n"
f" std dev: {histogram.get_stddev():.3f}\n"
f" total observations: {histogram.total_count}\n"
f" sum: {int(histogram.get_mean_value() * histogram.total_count)}\n"
f"{percentile_to_vals}"
)

Expand Down
25 changes: 10 additions & 15 deletions src/rust/engine/fs/store/src/remote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use remexec::{
ServerCapabilities,
};
use tonic::{Code, Request, Status};
use workunit_store::{in_workunit, Metric, ObservationMetric};
use workunit_store::{in_workunit, ObservationMetric};

use crate::StoreError;

Expand Down Expand Up @@ -324,7 +324,7 @@ impl ByteStore {
|workunit| async move {
let result = result_future.await;
if result.is_ok() {
workunit.increment_counter(Metric::RemoteStoreBlobBytesUploaded, len as u64);
workunit.record_observation(ObservationMetric::RemoteStoreBlobBytesUploaded, len as u64);
}
result
},
Expand Down Expand Up @@ -356,7 +356,7 @@ impl ByteStore {
let mut client = self.byte_stream_client.as_ref().clone();

let result_future = async move {
let start_time = Instant::now();
let mut start_opt = Some(Instant::now());

let stream_result = client
.read({
Expand All @@ -380,22 +380,17 @@ impl ByteStore {
};

let read_result_closure = async {
let mut got_first_response = false;
let mut buf = BytesMut::with_capacity(digest.size_bytes);
while let Some(response) = stream.next().await {
// Record the observed time to receive the first response for this read.
if !got_first_response {
got_first_response = true;

if let Some(start) = start_opt.take() {
if let Some(workunit_store_handle) = workunit_store::get_workunit_store_handle() {
let timing: Result<u64, _> = Instant::now()
.duration_since(start_time)
.as_micros()
.try_into();
let timing: Result<u64, _> =
Instant::now().duration_since(start).as_micros().try_into();
if let Ok(obs) = timing {
workunit_store_handle
.store
.record_observation(ObservationMetric::RemoteStoreTimeToFirstByte, obs);
.record_observation(ObservationMetric::RemoteStoreTimeToFirstByteMicros, obs);
}
}
}
Expand Down Expand Up @@ -429,14 +424,14 @@ impl ByteStore {
Level::Trace,
desc = Some(workunit_desc),
|workunit| async move {
let result = result_future.await;
workunit.record_observation(
ObservationMetric::RemoteStoreReadBlobTimeMicros,
start.elapsed().as_micros() as u64,
);
let result = result_future.await;
if result.is_ok() {
workunit.increment_counter(
Metric::RemoteStoreBlobBytesDownloaded,
workunit.record_observation(
ObservationMetric::RemoteStoreBlobBytesDownloaded,
digest.size_bytes as u64,
);
}
Expand Down
7 changes: 4 additions & 3 deletions src/rust/engine/process_execution/src/remote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,10 @@ impl CommandRunner {
.as_micros()
.try_into();
if let Ok(obs) = timing {
context
.workunit_store
.record_observation(ObservationMetric::RemoteExecutionRPCFirstResponseTime, obs);
context.workunit_store.record_observation(
ObservationMetric::RemoteExecutionRPCFirstResponseTimeMicros,
obs,
);
}
}

Expand Down
12 changes: 6 additions & 6 deletions src/rust/engine/workunit_store/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,6 @@ pub enum Metric {
RemoteExecutionSuccess,
RemoteExecutionTimeouts,
RemoteStoreMissingDigest,
/// Total number of bytes of blobs downloaded from a remote CAS.
RemoteStoreBlobBytesDownloaded,
/// Total number of bytes of blobs uploaded to a remote CAS.
RemoteStoreBlobBytesUploaded,
/// Number of times that we backtracked due to missing digests.
BacktrackAttempts,
}
Expand All @@ -71,9 +67,13 @@ pub enum ObservationMetric {
LocalStoreReadBlobSize,
LocalStoreReadBlobTimeMicros,
RemoteProcessTimeRunMs,
RemoteExecutionRPCFirstResponseTime,
RemoteStoreTimeToFirstByte,
RemoteExecutionRPCFirstResponseTimeMicros,
RemoteStoreTimeToFirstByteMicros,
RemoteStoreReadBlobTimeMicros,
/// Total number of bytes of blobs downloaded from a remote CAS.
RemoteStoreBlobBytesDownloaded,
/// Total number of bytes of blobs uploaded to a remote CAS.
RemoteStoreBlobBytesUploaded,
/// The time saved (in milliseconds) thanks to a local cache hit instead of running the process
/// directly.
LocalCacheTimeSavedMs,
Expand Down

0 comments on commit d21b332

Please sign in to comment.