From ee18e58c58595295dd09b2a6b6fb976e39453345 Mon Sep 17 00:00:00 2001 From: Victor Elias Date: Tue, 10 Aug 2021 16:21:23 -0300 Subject: [PATCH 1/5] monitor: Create realtime ratio histogram metric This will allow for simpler queries to be performed to evaluate our transcoding performance. Using Prometheus' histogram_quantile() [1] function, we can actually plot and alert on a concrete p99 value for the transcoding realtime ratio. [1] https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile --- monitor/census.go | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/monitor/census.go b/monitor/census.go index dccde33f1e..f8d0ad4e9e 100644 --- a/monitor/census.go +++ b/monitor/census.go @@ -126,6 +126,7 @@ type ( mSourceSegmentDuration *stats.Float64Measure mHTTPClientTimeout1 *stats.Int64Measure mHTTPClientTimeout2 *stats.Int64Measure + mRealtimeRatio *stats.Float64Measure mRealtime3x *stats.Int64Measure mRealtime2x *stats.Int64Measure mRealtime1x *stats.Int64Measure @@ -229,6 +230,7 @@ func InitCensus(nodeType NodeType, version string) { } census.mHTTPClientTimeout1 = stats.Int64("http_client_timeout_1", "Number of times HTTP connection was dropped before transcoding complete", "tot") census.mHTTPClientTimeout2 = stats.Int64("http_client_timeout_2", "Number of times HTTP connection was dropped before transcoded segments was sent back to client", "tot") + census.mRealtimeRatio = stats.Float64("http_client_segment_transcoded_realtime_ratio", "Ratio of source segment duration / transcode time as measured on broadcaster API", "rat") census.mRealtime3x = stats.Int64("http_client_segment_transcoded_realtime_3x", "Number of segment transcoded 3x faster than realtime", "tot") census.mRealtime2x = stats.Int64("http_client_segment_transcoded_realtime_2x", "Number of segment transcoded 2x faster than realtime", "tot") census.mRealtime1x = stats.Int64("http_client_segment_transcoded_realtime_1x", "Number of segment transcoded 1x faster than realtime", "tot") @@ -378,6 +380,13 @@ func InitCensus(nodeType NodeType, version string) { TagKeys: baseTags, Aggregation: view.Count(), }, + { + Name: "http_client_segment_transcoded_realtime_ratio", + Measure: census.mRealtimeRatio, + Description: "Ratio of source segment duration / transcode time as measured on broadcaster API", + TagKeys: baseTags, + Aggregation: view.Distribution(0.5, 1, 2, 3, 5, 10), + }, { Name: "http_client_segment_transcoded_realtime_3x", Measure: census.mRealtime3x, @@ -1118,19 +1127,21 @@ func SegmentFullyProcessed(segDur, processDur float64) { if processDur == 0 { return } - xRealtime := processDur / segDur + ratio := segDur / processDur + var bucketM stats.Measurement switch { - case xRealtime < 1.0/3.0: - stats.Record(census.ctx, census.mRealtime3x.M(1)) - case xRealtime < 1.0/2.0: - stats.Record(census.ctx, census.mRealtime2x.M(1)) - case xRealtime < 1.0: - stats.Record(census.ctx, census.mRealtime1x.M(1)) - case xRealtime < 2.0: - stats.Record(census.ctx, census.mRealtimeHalf.M(1)) + case ratio > 3: + bucketM = census.mRealtime3x.M(1) + case ratio > 2: + bucketM = census.mRealtime2x.M(1) + case ratio > 1: + bucketM = census.mRealtime1x.M(1) + case ratio > 0.5: + bucketM = census.mRealtimeHalf.M(1) default: - stats.Record(census.ctx, census.mRealtimeSlow.M(1)) + bucketM = census.mRealtimeSlow.M(1) } + stats.Record(census.ctx, bucketM, census.mRealtimeRatio.M(ratio)) } func AuthWebhookFinished(dur time.Duration) { From 7f3b0eb2de1058148ce0436fa4cdc85c47e6735b Mon Sep 17 00:00:00 2001 From: Victor Elias Date: Tue, 10 Aug 2021 16:49:26 -0300 Subject: [PATCH 2/5] monitor: Add some additional histogram buckets --- monitor/census.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monitor/census.go b/monitor/census.go index f8d0ad4e9e..7d5483890a 100644 --- a/monitor/census.go +++ b/monitor/census.go @@ -385,7 +385,7 @@ func InitCensus(nodeType NodeType, version string) { Measure: census.mRealtimeRatio, Description: "Ratio of source segment duration / transcode time as measured on broadcaster API", TagKeys: baseTags, - Aggregation: view.Distribution(0.5, 1, 2, 3, 5, 10), + Aggregation: view.Distribution(0.5, 1, 2, 3, 5, 10, 50, 100), }, { Name: "http_client_segment_transcoded_realtime_3x", From adbed8a53f286271444fa8f5616abf580f3b5019 Mon Sep 17 00:00:00 2001 From: Victor Elias Date: Mon, 23 Aug 2021 18:50:40 -0300 Subject: [PATCH 3/5] Update monitor/census.go Co-authored-by: Yondon Fu --- monitor/census.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monitor/census.go b/monitor/census.go index 7d5483890a..e82f2245f2 100644 --- a/monitor/census.go +++ b/monitor/census.go @@ -383,7 +383,7 @@ func InitCensus(nodeType NodeType, version string) { { Name: "http_client_segment_transcoded_realtime_ratio", Measure: census.mRealtimeRatio, - Description: "Ratio of source segment duration / transcode time as measured on broadcaster API", + Description: "Ratio of source segment duration / transcode time as measured on HTTP client", TagKeys: baseTags, Aggregation: view.Distribution(0.5, 1, 2, 3, 5, 10, 50, 100), }, From e2d297538dd8302d3c7a4924da286f35c8c5100f Mon Sep 17 00:00:00 2001 From: Victor Elias Date: Mon, 23 Aug 2021 18:50:46 -0300 Subject: [PATCH 4/5] Update monitor/census.go Co-authored-by: Yondon Fu --- monitor/census.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monitor/census.go b/monitor/census.go index e82f2245f2..0a6e7096e8 100644 --- a/monitor/census.go +++ b/monitor/census.go @@ -230,7 +230,7 @@ func InitCensus(nodeType NodeType, version string) { } census.mHTTPClientTimeout1 = stats.Int64("http_client_timeout_1", "Number of times HTTP connection was dropped before transcoding complete", "tot") census.mHTTPClientTimeout2 = stats.Int64("http_client_timeout_2", "Number of times HTTP connection was dropped before transcoded segments was sent back to client", "tot") - census.mRealtimeRatio = stats.Float64("http_client_segment_transcoded_realtime_ratio", "Ratio of source segment duration / transcode time as measured on broadcaster API", "rat") + census.mRealtimeRatio = stats.Float64("http_client_segment_transcoded_realtime_ratio", "Ratio of source segment duration / transcode time as measured on HTTP client", "rat") census.mRealtime3x = stats.Int64("http_client_segment_transcoded_realtime_3x", "Number of segment transcoded 3x faster than realtime", "tot") census.mRealtime2x = stats.Int64("http_client_segment_transcoded_realtime_2x", "Number of segment transcoded 2x faster than realtime", "tot") census.mRealtime1x = stats.Int64("http_client_segment_transcoded_realtime_1x", "Number of segment transcoded 1x faster than realtime", "tot") From 8913d605eeb8c0ba9a2438eba5002a08b0709f13 Mon Sep 17 00:00:00 2001 From: Victor Elias Date: Tue, 4 Jan 2022 12:47:02 -0300 Subject: [PATCH 5/5] Update CHANGELOG_PENDING --- CHANGELOG_PENDING.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index e18ed1cf5f..1503d85a74 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -24,6 +24,8 @@ #### Broadcaster +- \#1989 Record realtime ratio metric as a histogram (@victorges) + #### Orchestrator #### Transcoder