Skip to content

Commit

Permalink
[8.14](backport #4961) Introduce agent.monitoring.metrics_period (#5003)
Browse files Browse the repository at this point in the history
* Introduce agent.monitoring.metrics_period (#4961)

* feat: introduce agent.monitoring.metrics_period

* doc: add changelog/fragments

* fix: TestDiagnosticLocalConfig unit-test

* doc: reword summary in changelog fragment

(cherry picked from commit 6a45256)

# Conflicts:
#	_meta/config/common.p2.yml.tmpl
#	_meta/config/common.reference.p2.yml.tmpl
#	_meta/config/elastic-agent.docker.yml.tmpl
#	elastic-agent.docker.yml
#	elastic-agent.reference.yml
#	elastic-agent.yml
#	internal/pkg/agent/application/monitoring/v1_monitor.go
#	internal/pkg/agent/application/monitoring/v1_monitor_test.go

* fix conflicts

---------

Co-authored-by: Panos Koutsovasilis <panos.koutsovasilis@elastic.co>
  • Loading branch information
mergify[bot] and pkoutsovasilis committed Jun 26, 2024
1 parent dba6a86 commit fce8a9c
Show file tree
Hide file tree
Showing 11 changed files with 196 additions and 68 deletions.
2 changes: 2 additions & 0 deletions _meta/config/common.p2.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ inputs:
# logs: true
# # enables metrics monitoring
# metrics: true
# # metrics_period defines how frequent we should sample monitoring metrics. Default is 60 seconds.
# metrics_period: 60s
# # exposes /debug/pprof/ endpoints
# # recommended that these endpoints are only enabled if the monitoring endpoint is set to localhost
# pprof.enabled: false
Expand Down
2 changes: 2 additions & 0 deletions _meta/config/common.reference.p2.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ inputs:
# logs: false
# # enables metrics monitoring
# metrics: false
# # metrics_period defines how frequent we should sample monitoring metrics. Default is 60 seconds.
# metrics_period: 60s
# # exposes /debug/pprof/ endpoints
# # recommended that these endpoints are only enabled if the monitoring endpoint is set to localhost
# pprof.enabled: false
Expand Down
10 changes: 6 additions & 4 deletions _meta/config/elastic-agent.docker.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@ inputs:
data_stream.namespace: default
use_output: default
streams:
- metricsets:
- metricsets:
- cpu
# Dataset name must conform to the naming conventions for Elasticsearch indices, cannot contain dashes (-), and cannot exceed 100 bytes
data_stream.dataset: system.cpu
- metricsets:
- metricsets:
- memory
data_stream.dataset: system.memory
- metricsets:
- metricsets:
- network
data_stream.dataset: system.network
- metricsets:
- metricsets:
- filesystem
data_stream.dataset: system.filesystem

Expand Down Expand Up @@ -112,6 +112,8 @@ inputs:
# logs: false
# # enables metrics monitoring
# metrics: false
# # metrics_period defines how frequent we should sample monitoring metrics. Default is 60 seconds.
# metrics_period: 60s
# # exposes /debug/pprof/ endpoints
# # recommended that these endpoints are only enabled if the monitoring endpoint is set to localhost
# pprof.enabled: false
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: feature

# Change summary; a 80ish characters long description of the change.
summary: Allow configuring `agent.monitoring.metrics_period`.

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: elastic-agent

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/4961

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
10 changes: 6 additions & 4 deletions elastic-agent.docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@ inputs:
data_stream.namespace: default
use_output: default
streams:
- metricsets:
- metricsets:
- cpu
# Dataset name must conform to the naming conventions for Elasticsearch indices, cannot contain dashes (-), and cannot exceed 100 bytes
data_stream.dataset: system.cpu
- metricsets:
- metricsets:
- memory
data_stream.dataset: system.memory
- metricsets:
- metricsets:
- network
data_stream.dataset: system.network
- metricsets:
- metricsets:
- filesystem
data_stream.dataset: system.filesystem

Expand Down Expand Up @@ -112,6 +112,8 @@ inputs:
# logs: false
# # enables metrics monitoring
# metrics: false
# # metrics_period defines how frequent we should sample monitoring metrics. Default is 60 seconds.
# metrics_period: 60s
# # exposes /debug/pprof/ endpoints
# # recommended that these endpoints are only enabled if the monitoring endpoint is set to localhost
# pprof.enabled: false
Expand Down
2 changes: 2 additions & 0 deletions elastic-agent.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ inputs:
# logs: false
# # enables metrics monitoring
# metrics: false
# # metrics_period defines how frequent we should sample monitoring metrics. Default is 60 seconds.
# metrics_period: 60s
# # exposes /debug/pprof/ endpoints
# # recommended that these endpoints are only enabled if the monitoring endpoint is set to localhost
# pprof.enabled: false
Expand Down
2 changes: 2 additions & 0 deletions elastic-agent.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ inputs:
# logs: true
# # enables metrics monitoring
# metrics: true
# # metrics_period defines how frequent we should sample monitoring metrics. Default is 60 seconds.
# metrics_period: 60s
# # exposes /debug/pprof/ endpoints
# # recommended that these endpoints are only enabled if the monitoring endpoint is set to localhost
# pprof.enabled: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ agent:
http: null
logs: false
metrics: false
metrics_period: ""
namespace: ""
pprof: null
traces: true
Expand Down
20 changes: 16 additions & 4 deletions internal/pkg/agent/application/monitoring/v1_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const (
agentKey = "agent"
monitoringKey = "monitoring"
useOutputKey = "use_output"
monitoringMetricsPeriodKey = "metrics_period"
monitoringOutput = "monitoring"
defaultMonitoringNamespace = "default"
agentName = "elastic-agent"
Expand All @@ -58,7 +59,7 @@ const (

// metricset execution period used for the monitoring metrics inputs
// we set this to 60s to reduce the load/data volume on the monitoring cluster
metricsCollectionInterval = 60 * time.Second
defaultMetricsCollectionInterval = 60 * time.Second
)

var (
Expand Down Expand Up @@ -122,6 +123,7 @@ func (b *BeatsMonitor) MonitoringConfig(
cfg := make(map[string]interface{})

monitoringOutputName := defaultOutputName
metricsCollectionIntervalString := b.config.C.MetricsPeriod
if agentCfg, found := policy[agentKey]; found {
// The agent section is required for feature flags
cfg[agentKey] = agentCfg
Expand All @@ -136,6 +138,12 @@ func (b *BeatsMonitor) MonitoringConfig(
monitoringOutputName = useStr
}
}

if metricsPeriod, found := monitoringMap[monitoringMetricsPeriodKey]; found {
if metricsPeriodStr, ok := metricsPeriod.(string); ok {
metricsCollectionIntervalString = metricsPeriodStr
}
}
}
}
}
Expand All @@ -158,7 +166,7 @@ func (b *BeatsMonitor) MonitoringConfig(
}

if b.config.C.MonitorMetrics {
if err := b.injectMetricsInput(cfg, componentIDToBinary, monitoringOutput, components); err != nil {
if err := b.injectMetricsInput(cfg, componentIDToBinary, components, metricsCollectionIntervalString); err != nil {
return nil, errors.New(err, "failed to inject monitoring output")
}
}
Expand Down Expand Up @@ -534,8 +542,12 @@ func (b *BeatsMonitor) monitoringNamespace() string {
return defaultMonitoringNamespace
}

func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentIDToBinary map[string]string, monitoringOutputName string, componentList []component.Component) error {
metricsCollectionIntervalString := metricsCollectionInterval.String()
// injectMetricsInput injects monitoring config for agent monitoring to the `cfg` object.
func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentIDToBinary map[string]string, componentList []component.Component, metricsCollectionIntervalString string) error {
if metricsCollectionIntervalString == "" {
metricsCollectionIntervalString = defaultMetricsCollectionInterval.String()
}

monitoringNamespace := b.monitoringNamespace()
fixedAgentName := strings.ReplaceAll(agentName, "-", "_")
beatsStreams := make([]interface{}, 0, len(componentIDToBinary))
Expand Down
Loading

0 comments on commit fce8a9c

Please sign in to comment.