Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify node_draining metric by switching to just in time collection. #153

Merged
merged 1 commit into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions controllers/metrics.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,3 @@
package controllers

import (
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

const MetricsNamespace = "openshift_upgrade_controller"

var (
nodeDraining = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "node_draining",
Help: "Node draining status",
},
[]string{"node"},
)
)

func init() {
metrics.Registry.MustRegister(nodeDraining)
}
66 changes: 36 additions & 30 deletions controllers/node_controller.go → controllers/node_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ package controllers

import (
"context"
"fmt"

"github.com/prometheus/client_golang/prometheus"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand All @@ -34,41 +34,47 @@ const (
LastAppliedDrainerAnnotationKey = "machineconfiguration.openshift.io/lastAppliedDrain"
)

// NodeReconciler reconciles a Node object
type NodeReconciler struct {
var nodeDrainingDesc = prometheus.NewDesc(
MetricsNamespace+"_node_draining",
"Node draining status",
[]string{
"node",
},
nil,
)

// NodeCollector collects metrics from Nodes
type NodeCollector struct {
client.Client
Scheme *runtime.Scheme
}

//+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch

// Reconcile reacts to Node changes and updates the node draining metric.
func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
var node corev1.Node
if err := r.Get(ctx, req.NamespacedName, &node); err != nil {
nodeDraining.DeleteLabelValues(req.Name)
return ctrl.Result{}, client.IgnoreNotFound(err)
}
if !node.DeletionTimestamp.IsZero() {
nodeDraining.DeleteLabelValues(node.Name)
return ctrl.Result{}, nil
}
var _ prometheus.Collector = &NodeCollector{}

desiredDrain := node.Annotations[DesiredDrainerAnnotationKey]
lastAppliedDrain := node.Annotations[LastAppliedDrainerAnnotationKey]
// Describe implements prometheus.Collector.
// Sends the static description of the metrics to the provided channel.
func (*NodeCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- nodeDrainingDesc
}

if desiredDrain == lastAppliedDrain {
nodeDraining.WithLabelValues(node.Name).Set(0)
return ctrl.Result{}, nil
}
// Collect implements prometheus.Collector.
// Sends a metric with the current value of the Node draining status to the provided channel.
func (c *NodeCollector) Collect(ch chan<- prometheus.Metric) {
ctx := context.Background()

nodeDraining.WithLabelValues(node.Name).Set(1)
return ctrl.Result{}, nil
}
var nodes corev1.NodeList
if err := c.Client.List(ctx, &nodes); err != nil {
err := fmt.Errorf("failed list to nodes: %w", err)
ch <- prometheus.NewInvalidMetric(nodeDrainingDesc, err)
}

// SetupWithManager sets up the controller with the Manager.
func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&corev1.Node{}).
Complete(r)
for _, node := range nodes.Items {
ch <- prometheus.MustNewConstMetric(
nodeDrainingDesc,
prometheus.GaugeValue,
boolToFloat64(node.Annotations[DesiredDrainerAnnotationKey] != node.Annotations[LastAppliedDrainerAnnotationKey]),
node.Name,
)
}
}
64 changes: 64 additions & 0 deletions controllers/node_collector_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package controllers

import (
"strings"
"testing"

"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)

func Test_NodeCollector(t *testing.T) {
scheme := runtime.NewScheme()
require.NoError(t, clientgoscheme.AddToScheme(scheme))

client := fake.NewClientBuilder().
WithScheme(scheme).
WithRuntimeObjects(
nodeWithDrainAnnotation("new-node1", "", ""),
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "new-node2",
},
},
nodeWithDrainAnnotation("node", "a", "a"),
nodeWithDrainAnnotation("node-draining1", "a", "b"),
nodeWithDrainAnnotation("node-draining2", "a", ""),
).
Build()

subject := &NodeCollector{
Client: client,
}

metrics := `
# HELP openshift_upgrade_controller_node_draining Node draining status
# TYPE openshift_upgrade_controller_node_draining gauge
openshift_upgrade_controller_node_draining{node="new-node1"} 0
openshift_upgrade_controller_node_draining{node="new-node2"} 0
openshift_upgrade_controller_node_draining{node="node"} 0
openshift_upgrade_controller_node_draining{node="node-draining1"} 1
openshift_upgrade_controller_node_draining{node="node-draining2"} 1
`

require.NoError(t,
testutil.CollectAndCompare(subject, strings.NewReader(metrics), "openshift_upgrade_controller_node_draining"),
)
}

func nodeWithDrainAnnotation(name, desiredDrainer, lastAppliedDrainer string) *corev1.Node {
return &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Annotations: map[string]string{
DesiredDrainerAnnotationKey: desiredDrainer,
LastAppliedDrainerAnnotationKey: lastAppliedDrainer,
},
},
}
}
71 changes: 0 additions & 71 deletions controllers/node_controller_test.go

This file was deleted.

10 changes: 3 additions & 7 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,10 @@ func main() {
ManagedClusterVersionName: managedClusterVersionName,
ManagedClusterVersionNamespace: managedClusterVersionNamespace,
})

if err = (&controllers.NodeReconciler{
metrics.Registry.MustRegister(&controllers.NodeCollector{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Node")
os.Exit(1)
}
})

if err = (&controllers.ClusterVersionReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Expand Down