diff --git a/.chloggen/k8sclusterreceiver-add-k8s-node-condition.yaml b/.chloggen/k8sclusterreceiver-add-k8s-node-condition.yaml new file mode 100755 index 000000000000..1ecdd6e5c6ce --- /dev/null +++ b/.chloggen/k8sclusterreceiver-add-k8s-node-condition.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: "enhancement" + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: k8sclusterreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "add k8s.node.condition metric" + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [27617] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/receiver/k8sclusterreceiver/documentation.md b/receiver/k8sclusterreceiver/documentation.md index e2fffca2edae..5cbe995baaa0 100644 --- a/receiver/k8sclusterreceiver/documentation.md +++ b/receiver/k8sclusterreceiver/documentation.md @@ -396,6 +396,20 @@ metrics: enabled: true ``` +### k8s.node.condition + +The condition of a particular Node. + +| Unit | Metric Type | Value Type | +| ---- | ----------- | ---------- | +| {condition} | Gauge | Int | + +#### Attributes + +| Name | Description | Values | +| ---- | ----------- | ------ | +| condition | the name of Kubernetes Node condition. Example: Ready, Memory, PID, DiskPressure | Any Str | + ### k8s.pod.status_reason Current status reason of the pod (1 - Evicted, 2 - NodeAffinity, 3 - NodeLost, 4 - Shutdown, 5 - UnexpectedAdmissionError, 6 - Unknown) diff --git a/receiver/k8sclusterreceiver/internal/collection/collector.go b/receiver/k8sclusterreceiver/internal/collection/collector.go index c32a2fa57042..4269d4f112ae 100644 --- a/receiver/k8sclusterreceiver/internal/collection/collector.go +++ b/receiver/k8sclusterreceiver/internal/collection/collector.go @@ -69,6 +69,7 @@ func (dc *DataCollector) CollectMetricData(currentTime time.Time) pmetric.Metric if crm.ScopeMetrics().Len() > 0 { crm.MoveTo(customRMs.AppendEmpty()) } + node.RecordMetrics(dc.metricsBuilder, o.(*corev1.Node), ts) }) dc.metadataStore.ForEach(gvk.Namespace, func(o any) { namespace.RecordMetrics(dc.metricsBuilder, o.(*corev1.Namespace), ts) diff --git a/receiver/k8sclusterreceiver/internal/metadata/generated_config.go b/receiver/k8sclusterreceiver/internal/metadata/generated_config.go index 784d34de376b..163b8b4bbddf 100644 --- a/receiver/k8sclusterreceiver/internal/metadata/generated_config.go +++ b/receiver/k8sclusterreceiver/internal/metadata/generated_config.go @@ -52,6 +52,7 @@ type MetricsConfig struct { K8sJobMaxParallelPods MetricConfig `mapstructure:"k8s.job.max_parallel_pods"` K8sJobSuccessfulPods MetricConfig `mapstructure:"k8s.job.successful_pods"` K8sNamespacePhase MetricConfig `mapstructure:"k8s.namespace.phase"` + K8sNodeCondition MetricConfig `mapstructure:"k8s.node.condition"` K8sPodPhase MetricConfig `mapstructure:"k8s.pod.phase"` K8sPodStatusReason MetricConfig `mapstructure:"k8s.pod.status_reason"` K8sReplicasetAvailable MetricConfig `mapstructure:"k8s.replicaset.available"` @@ -153,6 +154,9 @@ func DefaultMetricsConfig() MetricsConfig { K8sNamespacePhase: MetricConfig{ Enabled: true, }, + K8sNodeCondition: MetricConfig{ + Enabled: false, + }, K8sPodPhase: MetricConfig{ Enabled: true, }, diff --git a/receiver/k8sclusterreceiver/internal/metadata/generated_config_test.go b/receiver/k8sclusterreceiver/internal/metadata/generated_config_test.go index 1bccc30933af..c6178f76878c 100644 --- a/receiver/k8sclusterreceiver/internal/metadata/generated_config_test.go +++ b/receiver/k8sclusterreceiver/internal/metadata/generated_config_test.go @@ -53,6 +53,7 @@ func TestMetricsBuilderConfig(t *testing.T) { K8sJobMaxParallelPods: MetricConfig{Enabled: true}, K8sJobSuccessfulPods: MetricConfig{Enabled: true}, K8sNamespacePhase: MetricConfig{Enabled: true}, + K8sNodeCondition: MetricConfig{Enabled: true}, K8sPodPhase: MetricConfig{Enabled: true}, K8sPodStatusReason: MetricConfig{Enabled: true}, K8sReplicasetAvailable: MetricConfig{Enabled: true}, @@ -138,6 +139,7 @@ func TestMetricsBuilderConfig(t *testing.T) { K8sJobMaxParallelPods: MetricConfig{Enabled: false}, K8sJobSuccessfulPods: MetricConfig{Enabled: false}, K8sNamespacePhase: MetricConfig{Enabled: false}, + K8sNodeCondition: MetricConfig{Enabled: false}, K8sPodPhase: MetricConfig{Enabled: false}, K8sPodStatusReason: MetricConfig{Enabled: false}, K8sReplicasetAvailable: MetricConfig{Enabled: false}, diff --git a/receiver/k8sclusterreceiver/internal/metadata/generated_metrics.go b/receiver/k8sclusterreceiver/internal/metadata/generated_metrics.go index 65f97a8fec2f..3d1bf97bcc37 100644 --- a/receiver/k8sclusterreceiver/internal/metadata/generated_metrics.go +++ b/receiver/k8sclusterreceiver/internal/metadata/generated_metrics.go @@ -1335,6 +1335,57 @@ func newMetricK8sNamespacePhase(cfg MetricConfig) metricK8sNamespacePhase { return m } +type metricK8sNodeCondition struct { + data pmetric.Metric // data buffer for generated metric. + config MetricConfig // metric config provided by user. + capacity int // max observed number of data points added to the metric. +} + +// init fills k8s.node.condition metric with initial data. +func (m *metricK8sNodeCondition) init() { + m.data.SetName("k8s.node.condition") + m.data.SetDescription("The condition of a particular Node.") + m.data.SetUnit("{condition}") + m.data.SetEmptyGauge() + m.data.Gauge().DataPoints().EnsureCapacity(m.capacity) +} + +func (m *metricK8sNodeCondition) recordDataPoint(start pcommon.Timestamp, ts pcommon.Timestamp, val int64, conditionAttributeValue string) { + if !m.config.Enabled { + return + } + dp := m.data.Gauge().DataPoints().AppendEmpty() + dp.SetStartTimestamp(start) + dp.SetTimestamp(ts) + dp.SetIntValue(val) + dp.Attributes().PutStr("condition", conditionAttributeValue) +} + +// updateCapacity saves max length of data point slices that will be used for the slice capacity. +func (m *metricK8sNodeCondition) updateCapacity() { + if m.data.Gauge().DataPoints().Len() > m.capacity { + m.capacity = m.data.Gauge().DataPoints().Len() + } +} + +// emit appends recorded metric data to a metrics slice and prepares it for recording another set of data points. +func (m *metricK8sNodeCondition) emit(metrics pmetric.MetricSlice) { + if m.config.Enabled && m.data.Gauge().DataPoints().Len() > 0 { + m.updateCapacity() + m.data.MoveTo(metrics.AppendEmpty()) + m.init() + } +} + +func newMetricK8sNodeCondition(cfg MetricConfig) metricK8sNodeCondition { + m := metricK8sNodeCondition{config: cfg} + if cfg.Enabled { + m.data = pmetric.NewMetric() + m.init() + } + return m +} + type metricK8sPodPhase struct { data pmetric.Metric // data buffer for generated metric. config MetricConfig // metric config provided by user. @@ -2168,6 +2219,7 @@ type MetricsBuilder struct { metricK8sJobMaxParallelPods metricK8sJobMaxParallelPods metricK8sJobSuccessfulPods metricK8sJobSuccessfulPods metricK8sNamespacePhase metricK8sNamespacePhase + metricK8sNodeCondition metricK8sNodeCondition metricK8sPodPhase metricK8sPodPhase metricK8sPodStatusReason metricK8sPodStatusReason metricK8sReplicasetAvailable metricK8sReplicasetAvailable @@ -2229,6 +2281,7 @@ func NewMetricsBuilder(mbc MetricsBuilderConfig, settings receiver.CreateSetting metricK8sJobMaxParallelPods: newMetricK8sJobMaxParallelPods(mbc.Metrics.K8sJobMaxParallelPods), metricK8sJobSuccessfulPods: newMetricK8sJobSuccessfulPods(mbc.Metrics.K8sJobSuccessfulPods), metricK8sNamespacePhase: newMetricK8sNamespacePhase(mbc.Metrics.K8sNamespacePhase), + metricK8sNodeCondition: newMetricK8sNodeCondition(mbc.Metrics.K8sNodeCondition), metricK8sPodPhase: newMetricK8sPodPhase(mbc.Metrics.K8sPodPhase), metricK8sPodStatusReason: newMetricK8sPodStatusReason(mbc.Metrics.K8sPodStatusReason), metricK8sReplicasetAvailable: newMetricK8sReplicasetAvailable(mbc.Metrics.K8sReplicasetAvailable), @@ -2334,6 +2387,7 @@ func (mb *MetricsBuilder) EmitForResource(rmo ...ResourceMetricsOption) { mb.metricK8sJobMaxParallelPods.emit(ils.Metrics()) mb.metricK8sJobSuccessfulPods.emit(ils.Metrics()) mb.metricK8sNamespacePhase.emit(ils.Metrics()) + mb.metricK8sNodeCondition.emit(ils.Metrics()) mb.metricK8sPodPhase.emit(ils.Metrics()) mb.metricK8sPodStatusReason.emit(ils.Metrics()) mb.metricK8sReplicasetAvailable.emit(ils.Metrics()) @@ -2505,6 +2559,11 @@ func (mb *MetricsBuilder) RecordK8sNamespacePhaseDataPoint(ts pcommon.Timestamp, mb.metricK8sNamespacePhase.recordDataPoint(mb.startTime, ts, val) } +// RecordK8sNodeConditionDataPoint adds a data point to k8s.node.condition metric. +func (mb *MetricsBuilder) RecordK8sNodeConditionDataPoint(ts pcommon.Timestamp, val int64, conditionAttributeValue string) { + mb.metricK8sNodeCondition.recordDataPoint(mb.startTime, ts, val, conditionAttributeValue) +} + // RecordK8sPodPhaseDataPoint adds a data point to k8s.pod.phase metric. func (mb *MetricsBuilder) RecordK8sPodPhaseDataPoint(ts pcommon.Timestamp, val int64) { mb.metricK8sPodPhase.recordDataPoint(mb.startTime, ts, val) diff --git a/receiver/k8sclusterreceiver/internal/metadata/generated_metrics_test.go b/receiver/k8sclusterreceiver/internal/metadata/generated_metrics_test.go index 2123b700d69f..1883a0be42fa 100644 --- a/receiver/k8sclusterreceiver/internal/metadata/generated_metrics_test.go +++ b/receiver/k8sclusterreceiver/internal/metadata/generated_metrics_test.go @@ -163,6 +163,9 @@ func TestMetricsBuilder(t *testing.T) { allMetricsCount++ mb.RecordK8sNamespacePhaseDataPoint(ts, 1) + allMetricsCount++ + mb.RecordK8sNodeConditionDataPoint(ts, 1, "condition-val") + defaultMetricsCount++ allMetricsCount++ mb.RecordK8sPodPhaseDataPoint(ts, 1) @@ -606,6 +609,21 @@ func TestMetricsBuilder(t *testing.T) { assert.Equal(t, ts, dp.Timestamp()) assert.Equal(t, pmetric.NumberDataPointValueTypeInt, dp.ValueType()) assert.Equal(t, int64(1), dp.IntValue()) + case "k8s.node.condition": + assert.False(t, validatedMetrics["k8s.node.condition"], "Found a duplicate in the metrics slice: k8s.node.condition") + validatedMetrics["k8s.node.condition"] = true + assert.Equal(t, pmetric.MetricTypeGauge, ms.At(i).Type()) + assert.Equal(t, 1, ms.At(i).Gauge().DataPoints().Len()) + assert.Equal(t, "The condition of a particular Node.", ms.At(i).Description()) + assert.Equal(t, "{condition}", ms.At(i).Unit()) + dp := ms.At(i).Gauge().DataPoints().At(0) + assert.Equal(t, start, dp.StartTimestamp()) + assert.Equal(t, ts, dp.Timestamp()) + assert.Equal(t, pmetric.NumberDataPointValueTypeInt, dp.ValueType()) + assert.Equal(t, int64(1), dp.IntValue()) + attrVal, ok := dp.Attributes().Get("condition") + assert.True(t, ok) + assert.EqualValues(t, "condition-val", attrVal.Str()) case "k8s.pod.phase": assert.False(t, validatedMetrics["k8s.pod.phase"], "Found a duplicate in the metrics slice: k8s.pod.phase") validatedMetrics["k8s.pod.phase"] = true diff --git a/receiver/k8sclusterreceiver/internal/metadata/testdata/config.yaml b/receiver/k8sclusterreceiver/internal/metadata/testdata/config.yaml index 69eb73f82c3f..11f9f2c95cb4 100644 --- a/receiver/k8sclusterreceiver/internal/metadata/testdata/config.yaml +++ b/receiver/k8sclusterreceiver/internal/metadata/testdata/config.yaml @@ -55,6 +55,8 @@ all_set: enabled: true k8s.namespace.phase: enabled: true + k8s.node.condition: + enabled: true k8s.pod.phase: enabled: true k8s.pod.status_reason: @@ -210,6 +212,8 @@ none_set: enabled: false k8s.namespace.phase: enabled: false + k8s.node.condition: + enabled: false k8s.pod.phase: enabled: false k8s.pod.status_reason: diff --git a/receiver/k8sclusterreceiver/internal/node/nodes.go b/receiver/k8sclusterreceiver/internal/node/nodes.go index 9d939d1711be..eceed30bb806 100644 --- a/receiver/k8sclusterreceiver/internal/node/nodes.go +++ b/receiver/k8sclusterreceiver/internal/node/nodes.go @@ -18,6 +18,7 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/maps" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/experimentalmetricmetadata" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/metadata" + imetadata "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/metadata" ) const ( @@ -47,6 +48,19 @@ func Transform(node *corev1.Node) *corev1.Node { return newNode } +func RecordMetrics(mb *imetadata.MetricsBuilder, node *corev1.Node, ts pcommon.Timestamp) { + for _, c := range node.Status.Conditions { + mb.RecordK8sNodeConditionDataPoint(ts, nodeConditionValues[c.Status], string(c.Type)) + } + rb := mb.NewResourceBuilder() + rb.SetK8sNodeUID(string(node.UID)) + rb.SetK8sNodeName(node.Name) + rb.SetK8sKubeletVersion(node.Status.NodeInfo.KubeletVersion) + rb.SetK8sKubeproxyVersion(node.Status.NodeInfo.KubeProxyVersion) + + mb.EmitForResource(imetadata.WithResource(rb.Emit())) +} + func CustomMetrics(set receiver.CreateSettings, rb *metadata.ResourceBuilder, node *corev1.Node, nodeConditionTypesToReport, allocatableTypesToReport []string, ts pcommon.Timestamp) pmetric.ResourceMetrics { rm := pmetric.NewResourceMetrics() diff --git a/receiver/k8sclusterreceiver/internal/node/nodes_test.go b/receiver/k8sclusterreceiver/internal/node/nodes_test.go index 670a2be2eb54..c3bc5ae5c544 100644 --- a/receiver/k8sclusterreceiver/internal/node/nodes_test.go +++ b/receiver/k8sclusterreceiver/internal/node/nodes_test.go @@ -158,6 +158,30 @@ func TestNodeConditionValue(t *testing.T) { } } +func TestNodeMetrics(t *testing.T) { + n := testutils.NewNode("1") + + ts := pcommon.Timestamp(time.Now().UnixNano()) + mbc := metadata.DefaultMetricsBuilderConfig() + mbc.Metrics.K8sNodeCondition.Enabled = true + mb := metadata.NewMetricsBuilder(mbc, receivertest.NewNopCreateSettings()) + RecordMetrics(mb, n, ts) + m := mb.Emit() + + expectedFile := filepath.Join("testdata", "expected_mdatagen.yaml") + expected, err := golden.ReadMetrics(expectedFile) + require.NoError(t, err) + require.NoError(t, pmetrictest.CompareMetrics(expected, m, + pmetrictest.IgnoreTimestamp(), + pmetrictest.IgnoreStartTimestamp(), + pmetrictest.IgnoreResourceMetricsOrder(), + pmetrictest.IgnoreMetricsOrder(), + pmetrictest.IgnoreScopeMetricsOrder(), + pmetrictest.IgnoreMetricDataPointsOrder(), + ), + ) +} + func TestTransform(t *testing.T) { originalNode := &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ diff --git a/receiver/k8sclusterreceiver/internal/node/testdata/expected_mdatagen.yaml b/receiver/k8sclusterreceiver/internal/node/testdata/expected_mdatagen.yaml new file mode 100644 index 000000000000..5a36ba746f7f --- /dev/null +++ b/receiver/k8sclusterreceiver/internal/node/testdata/expected_mdatagen.yaml @@ -0,0 +1,47 @@ +resourceMetrics: + - resource: + attributes: + - key: k8s.node.name + value: + stringValue: test-node-1 + - key: k8s.node.uid + value: + stringValue: test-node-1-uid + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: The condition of a particular Node. + gauge: + dataPoints: + - asInt: "1" + attributes: + - key: condition + value: + stringValue: "Ready" + - asInt: "0" + attributes: + - key: condition + value: + stringValue: "PIDPressure" + - asInt: "0" + attributes: + - key: condition + value: + stringValue: "DiskPressure" + - asInt: "0" + attributes: + - key: condition + value: + stringValue: "MemoryPressure" + - asInt: "0" + attributes: + - key: condition + value: + stringValue: "NetworkUnavailable" + + name: k8s.node.condition + unit: "{condition}" + + scope: + name: otelcol/k8sclusterreceiver + version: latest diff --git a/receiver/k8sclusterreceiver/metadata.yaml b/receiver/k8sclusterreceiver/metadata.yaml index 40fde1e957e6..433fde5e5a51 100644 --- a/receiver/k8sclusterreceiver/metadata.yaml +++ b/receiver/k8sclusterreceiver/metadata.yaml @@ -186,6 +186,10 @@ attributes: description: the name of the resource on which the quota is applied type: string enabled: true + condition: + description: "the name of Kubernetes Node condition. Example: Ready, Memory, PID, DiskPressure" + type: string + enabled: true metrics: k8s.container.cpu_request: @@ -478,8 +482,15 @@ metrics: attributes: - k8s.namespace.name - resource - - # k8s.node.condition_* metrics (k8s.node.condition_ready, k8s.node.condition_memory_pressure, etc) are controlled + k8s.node.condition: + enabled: false + description: The condition of a particular Node. + unit: "{condition}" + gauge: + value_type: int + attributes: + - condition + # k8s.node.condition_* metrics (k8s.node.condition_ready, k8s.node.condition_memory_pressure, etc) are controlled # by node_conditions_to_report config option. By default, only k8s.node.condition_ready is enabled. # k8s.node.allocatable_* metrics (k8s.node.allocatable_cpu, k8s.node.allocatable_memory, etc) are controlled