Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix metrics reporting period #14019

Merged
merged 2 commits into from
May 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion config/core/configmaps/observability.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ metadata:
app.kubernetes.io/component: observability
app.kubernetes.io/version: devel
annotations:
knative.dev/example-checksum: "fed4756e"
knative.dev/example-checksum: "54abd711"
data:
_example: |
################################
Expand Down Expand Up @@ -95,11 +95,22 @@ data:
# It supports either prometheus (the default) or opencensus.
metrics.backend-destination: prometheus

# metrics.reporting-period-seconds specifies the global metrics reporting period for control and data plane components.
# If a zero or negative value is passed the default reporting period is used (10 secs).
# If the attribute is not specified a default value is used per metrics backend.
# For the prometheus backend the default reporting period is 5s while for opencensus it is 60s.
metrics.reporting-period-seconds: "5"

# metrics.request-metrics-backend-destination specifies the request metrics
# destination. It enables queue proxy to send request metrics.
# Currently supported values: prometheus (the default), opencensus.
metrics.request-metrics-backend-destination: prometheus

# metrics.request-metrics-reporting-period-seconds specifies the request metrics reporting period in sec at queue proxy.
# If a zero or negative value is passed the default reporting period is used (10 secs).
# If the attribute is not specified, it is overridden by the value of metrics.reporting-period-seconds.
metrics.request-metrics-reporting-period-seconds: "5"

# profiling.enable indicates whether it is allowed to retrieve runtime profiling data from
# the pods via an HTTP server in the format expected by the pprof visualization tool. When
# enabled, the Knative Serving pods expose the profiling data on an alternate HTTP port 8008.
Expand Down
14 changes: 8 additions & 6 deletions pkg/queue/sharedmain/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,9 @@ type config struct {
ServingEnableProbeRequestLog bool `split_words:"true"` // optional

// Metrics configuration
ServingRequestMetricsBackend string `split_words:"true"` // optional
MetricsCollectorAddress string `split_words:"true"` // optional
ServingRequestMetricsBackend string `split_words:"true"` // optional
ServingRequestMetricsReportingPeriodSeconds int `split_words:"true"` // optional
MetricsCollectorAddress string `split_words:"true"` // optional

// Tracing configuration
TracingConfigDebug bool `split_words:"true"` // optional
Expand Down Expand Up @@ -366,7 +367,7 @@ func supportsMetrics(ctx context.Context, logger *zap.SugaredLogger, env config)
if env.ServingRequestMetricsBackend == "" {
return false
}
if err := setupMetricsExporter(ctx, logger, env.ServingRequestMetricsBackend, env.MetricsCollectorAddress); err != nil {
if err := setupMetricsExporter(ctx, logger, env.ServingRequestMetricsBackend, env.ServingRequestMetricsReportingPeriodSeconds, env.MetricsCollectorAddress); err != nil {
logger.Errorw("Error setting up request metrics exporter. Request metrics will be unavailable.", zap.Error(err))
return false
}
Expand Down Expand Up @@ -412,7 +413,7 @@ func requestAppMetricsHandler(logger *zap.SugaredLogger, currentHandler http.Han
return h
}

func setupMetricsExporter(ctx context.Context, logger *zap.SugaredLogger, backend string, collectorAddress string) error {
func setupMetricsExporter(ctx context.Context, logger *zap.SugaredLogger, backend string, reportingPeriod int, collectorAddress string) error {
// Set up OpenCensus exporter.
// NOTE: We use revision as the component instead of queue because queue is
// implementation specific. The current metrics are request relative. Using
Expand All @@ -424,8 +425,9 @@ func setupMetricsExporter(ctx context.Context, logger *zap.SugaredLogger, backen
Component: "revision",
PrometheusPort: networking.UserQueueMetricsPort,
ConfigMap: map[string]string{
metrics.BackendDestinationKey: backend,
"metrics.opencensus-address": collectorAddress,
metrics.BackendDestinationKey: backend,
"metrics.opencensus-address": collectorAddress,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to make constants public at the knative/pkg repo to avoid hardcoding values. Will fix in another PR.

"metrics.reporting-period-seconds": strconv.Itoa(reportingPeriod),
},
}
return metrics.UpdateExporter(ctx, ops, logger)
Expand Down
3 changes: 3 additions & 0 deletions pkg/reconciler/revision/resources/deploy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ var (
}, {
Name: "SERVING_REQUEST_METRICS_BACKEND",
Value: "",
}, {
Name: "SERVING_REQUEST_METRICS_REPORTING_PERIOD_SECONDS",
Value: "0",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be a default value?

Copy link
Contributor Author

@skonto skonto May 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Zero or negative value is translated by Opencensus to the default value which is 10s (imho it is implicit, implementation specific and it is better to have an explicit value per case). Here we have the default of 5sec for Prometheus and 60sec for Opencensus (this is taken from knative/pkg constants). They can be changed in the future if we feel that some other values work better in practice.

}, {
Name: "TRACING_CONFIG_BACKEND",
Value: "",
Expand Down
3 changes: 3 additions & 0 deletions pkg/reconciler/revision/resources/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,9 @@ func makeQueueContainer(rev *v1.Revision, cfg *config.Config) (*corev1.Container
}, {
Name: "SERVING_REQUEST_METRICS_BACKEND",
Value: cfg.Observability.RequestMetricsBackend,
}, {
Name: "SERVING_REQUEST_METRICS_REPORTING_PERIOD_SECONDS",
Value: strconv.Itoa(cfg.Observability.RequestMetricsReportingPeriodSeconds),
}, {
Name: "TRACING_CONFIG_BACKEND",
Value: string(cfg.Tracing.Backend),
Expand Down
55 changes: 28 additions & 27 deletions pkg/reconciler/revision/resources/queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -903,33 +903,34 @@ func TestTCPProbeGeneration(t *testing.T) {
}

var defaultEnv = map[string]string{
"CONTAINER_CONCURRENCY": "0",
"ENABLE_HTTP2_AUTO_DETECTION": "false",
"ENABLE_PROFILING": "false",
"METRICS_DOMAIN": metrics.Domain(),
"METRICS_COLLECTOR_ADDRESS": "",
"QUEUE_SERVING_PORT": "8012",
"QUEUE_SERVING_TLS_PORT": "8112",
"REVISION_TIMEOUT_SECONDS": "45",
"REVISION_RESPONSE_START_TIMEOUT_SECONDS": "0",
"REVISION_IDLE_TIMEOUT_SECONDS": "0",
"SERVING_CONFIGURATION": "",
"SERVING_ENABLE_PROBE_REQUEST_LOG": "false",
"SERVING_ENABLE_REQUEST_LOG": "false",
"SERVING_LOGGING_CONFIG": "",
"SERVING_LOGGING_LEVEL": "",
"SERVING_NAMESPACE": "foo",
"SERVING_REQUEST_LOG_TEMPLATE": "",
"SERVING_REQUEST_METRICS_BACKEND": "",
"SERVING_REVISION": "bar",
"SERVING_SERVICE": "",
"SYSTEM_NAMESPACE": system.Namespace(),
"TRACING_CONFIG_BACKEND": "",
"TRACING_CONFIG_DEBUG": "false",
"TRACING_CONFIG_SAMPLE_RATE": "0",
"TRACING_CONFIG_ZIPKIN_ENDPOINT": "",
"USER_PORT": strconv.Itoa(v1.DefaultUserPort),
"ROOT_CA": "",
"CONTAINER_CONCURRENCY": "0",
"ENABLE_HTTP2_AUTO_DETECTION": "false",
"ENABLE_PROFILING": "false",
"METRICS_DOMAIN": metrics.Domain(),
"METRICS_COLLECTOR_ADDRESS": "",
"QUEUE_SERVING_PORT": "8012",
"QUEUE_SERVING_TLS_PORT": "8112",
"REVISION_TIMEOUT_SECONDS": "45",
"REVISION_RESPONSE_START_TIMEOUT_SECONDS": "0",
"REVISION_IDLE_TIMEOUT_SECONDS": "0",
"SERVING_CONFIGURATION": "",
"SERVING_ENABLE_PROBE_REQUEST_LOG": "false",
"SERVING_ENABLE_REQUEST_LOG": "false",
"SERVING_LOGGING_CONFIG": "",
"SERVING_LOGGING_LEVEL": "",
"SERVING_NAMESPACE": "foo",
"SERVING_REQUEST_LOG_TEMPLATE": "",
"SERVING_REQUEST_METRICS_BACKEND": "",
"SERVING_REQUEST_METRICS_REPORTING_PERIOD_SECONDS": "0",
"SERVING_REVISION": "bar",
"SERVING_SERVICE": "",
"SYSTEM_NAMESPACE": system.Namespace(),
"TRACING_CONFIG_BACKEND": "",
"TRACING_CONFIG_DEBUG": "false",
"TRACING_CONFIG_SAMPLE_RATE": "0",
"TRACING_CONFIG_ZIPKIN_ENDPOINT": "",
"USER_PORT": strconv.Itoa(v1.DefaultUserPort),
"ROOT_CA": "",
}

func probeJSON(container *corev1.Container) string {
Expand Down