Skip to content

Commit

Permalink
Fixed Otel Controller - Auxiliares resources deletion (#2575)
Browse files Browse the repository at this point in the history
* Fixed HPA deletion

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fix e2e tests

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Added reconciliation test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Added owned objects function

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fixed controller test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fixed controller test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fixed controller test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Add PodDisruptionBudget

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Change vars setting

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Change vars setting

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Update e2e tests to chainsaw

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Added ingress e2e tests

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Added Volumes, changed function's place

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Added Volumes, changed function's place

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Added Volumes, changed function's place

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fixed Suite Test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Added RBAC for Volumes

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Added RBAC for Volumes

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fixed Suite Test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fixed Suite Test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fixed Suite Test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Add a sleep

* Fixed Chainsaw test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fixed Chainsaw test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fixed Chainsaw test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

* Fixed Chainsaw test

Signed-off-by: Yuri Sa <yurimsa@gmail.com>

---------

Signed-off-by: Yuri Sa <yurimsa@gmail.com>
Co-authored-by: Jacob Aronoff <jacobaronoff45@gmail.com>
  • Loading branch information
yuriolisa and jaronoff97 authored Feb 28, 2024
1 parent aa994e8 commit 0ef71e3
Show file tree
Hide file tree
Showing 25 changed files with 416 additions and 41 deletions.
16 changes: 16 additions & 0 deletions .chloggen/fix-hpa-delete.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'bug_fix'

# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action)
component: operator

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fixed HPA deletion

# One or more tracking issues related to the change
issues: [2568, 2587, 2651]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ endif

START_KIND_CLUSTER ?= true

KUBE_VERSION ?= 1.24
KUBE_VERSION ?= 1.29
KIND_CONFIG ?= kind-$(KUBE_VERSION).yaml
KIND_CLUSTER_NAME ?= "otel-operator"

Expand Down Expand Up @@ -367,7 +367,7 @@ KUSTOMIZE_VERSION ?= v5.0.3
CONTROLLER_TOOLS_VERSION ?= v0.12.0
GOLANGCI_LINT_VERSION ?= v1.54.0
KIND_VERSION ?= v0.20.0
CHAINSAW_VERSION ?= v0.1.6
CHAINSAW_VERSION ?= v0.1.7

.PHONY: install-tools
install-tools: kustomize golangci-lint kind controller-gen envtest crdoc kind operator-sdk chainsaw
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ metadata:
categories: Logging & Tracing,Monitoring
certified: "false"
containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator
createdAt: "2024-02-15T18:18:46Z"
createdAt: "2024-02-27T11:16:56Z"
description: Provides the OpenTelemetry components, including the Collector
operators.operatorframework.io/builder: operator-sdk-v1.29.0
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
Expand Down Expand Up @@ -175,6 +175,8 @@ spec:
- ""
resources:
- configmaps
- persistentvolumeclaims
- persistentvolumes
- pods
- serviceaccounts
- services
Expand Down
2 changes: 2 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ rules:
- ""
resources:
- configmaps
- persistentvolumeclaims
- persistentvolumes
- pods
- serviceaccounts
- services
Expand Down
3 changes: 3 additions & 0 deletions controllers/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,9 @@ service:
"app.kubernetes.io/instance": "test.test",
"app.kubernetes.io/managed-by": "opentelemetry-operator",
"app.kubernetes.io/name": "test-ingress",
"app.kubernetes.io/component": "opentelemetry-collector",
"app.kubernetes.io/part-of": "opentelemetry",
"app.kubernetes.io/version": "latest",
},
Annotations: map[string]string{
"something": "true",
Expand Down
24 changes: 22 additions & 2 deletions controllers/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/util/retry"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -77,7 +78,7 @@ func BuildOpAMPBridge(params manifests.Params) ([]client.Object, error) {
}

// reconcileDesiredObjects runs the reconcile process using the mutateFn over the given list of objects.
func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner metav1.Object, scheme *runtime.Scheme, desiredObjects ...client.Object) error {
func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner metav1.Object, scheme *runtime.Scheme, desiredObjects []client.Object, ownedObjects map[types.UID]client.Object) error {
var errs []error
for _, desired := range desiredObjects {
l := logger.WithValues(
Expand All @@ -91,7 +92,6 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
continue
}
}

// existing is an object the controller runtime will hydrate for us
// we obtain the existing object by deep copying the desired object because it's the most convenient way
existing := desired.DeepCopyObject().(client.Object)
Expand All @@ -116,9 +116,29 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
}

l.V(1).Info(fmt.Sprintf("desired has been %s", op))
// This object is still managed by the operator, remove it from the list of objects to prune
delete(ownedObjects, existing.GetUID())
}
if len(errs) > 0 {
return fmt.Errorf("failed to create objects for %s: %w", owner.GetName(), errors.Join(errs...))
}
// Pruning owned objects in the cluster which are not should not be present after the reconciliation.
pruneErrs := []error{}
for _, obj := range ownedObjects {
l := logger.WithValues(
"object_name", obj.GetName(),
"object_kind", obj.GetObjectKind().GroupVersionKind(),
)

l.Info("pruning unmanaged resource")
err := kubeClient.Delete(ctx, obj)
if err != nil {
l.Error(err, "failed to delete resource")
pruneErrs = append(pruneErrs, err)
}
}
if len(pruneErrs) > 0 {
return fmt.Errorf("failed to prune objects for %s: %w", owner.GetName(), errors.Join(pruneErrs...))
}
return nil
}
2 changes: 1 addition & 1 deletion controllers/opampbridge_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func (r *OpAMPBridgeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
if buildErr != nil {
return ctrl.Result{}, buildErr
}
err := reconcileDesiredObjects(ctx, r.Client, log, &params.OpAMPBridge, params.Scheme, desiredObjects...)
err := reconcileDesiredObjects(ctx, r.Client, log, &params.OpAMPBridge, params.Scheme, desiredObjects, nil)
return opampbridgeStatus.HandleReconcileStatus(ctx, log, params, err)
}

Expand Down
91 changes: 87 additions & 4 deletions controllers/opentelemetrycollector_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,32 @@ package controllers

import (
"context"
"fmt"

"github.com/go-logr/logr"
routev1 "github.com/openshift/api/route/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
policyV1 "k8s.io/api/policy/v1"
rbacv1 "k8s.io/api/rbac/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
"github.com/open-telemetry/opentelemetry-operator/internal/api/convert"
"github.com/open-telemetry/opentelemetry-operator/internal/autodetect/openshift"
"github.com/open-telemetry/opentelemetry-operator/internal/config"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/collector"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils"
collectorStatus "github.com/open-telemetry/opentelemetry-operator/internal/status/collector"
"github.com/open-telemetry/opentelemetry-operator/pkg/featuregate"
)
Expand All @@ -57,6 +65,71 @@ type Params struct {
Config config.Config
}

func (r *OpenTelemetryCollectorReconciler) findOtelOwnedObjects(ctx context.Context, params manifests.Params) (map[types.UID]client.Object, error) {
ownedObjects := map[types.UID]client.Object{}

listOps := &client.ListOptions{
Namespace: params.OtelCol.Namespace,
LabelSelector: labels.SelectorFromSet(manifestutils.SelectorLabels(params.OtelCol.ObjectMeta, collector.ComponentOpenTelemetryCollector)),
}
hpaList := &autoscalingv2.HorizontalPodAutoscalerList{}
err := r.List(ctx, hpaList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing HorizontalPodAutoscalers: %w", err)
}
for i := range hpaList.Items {
ownedObjects[hpaList.Items[i].GetUID()] = &hpaList.Items[i]
}
if featuregate.PrometheusOperatorIsAvailable.IsEnabled() {
servicemonitorList := &monitoringv1.ServiceMonitorList{}
err = r.List(ctx, servicemonitorList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing ServiceMonitors: %w", err)
}
for i := range servicemonitorList.Items {
ownedObjects[servicemonitorList.Items[i].GetUID()] = servicemonitorList.Items[i]
}

podMonitorList := &monitoringv1.PodMonitorList{}
err = r.List(ctx, podMonitorList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing PodMonitors: %w", err)
}
for i := range podMonitorList.Items {
ownedObjects[podMonitorList.Items[i].GetUID()] = podMonitorList.Items[i]
}
}
ingressList := &networkingv1.IngressList{}
err = r.List(ctx, ingressList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing Ingresses: %w", err)
}
for i := range ingressList.Items {
ownedObjects[ingressList.Items[i].GetUID()] = &ingressList.Items[i]
}

if params.Config.OpenShiftRoutesAvailability() == openshift.RoutesAvailable {
routesList := &routev1.RouteList{}
err = r.List(ctx, routesList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing Routes: %w", err)
}
for i := range routesList.Items {
ownedObjects[routesList.Items[i].GetUID()] = &routesList.Items[i]
}
}
pdbList := &policyV1.PodDisruptionBudgetList{}
err = r.List(ctx, pdbList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing PodDisruptionBudgets: %w", err)
}
for i := range pdbList.Items {
ownedObjects[pdbList.Items[i].GetUID()] = &pdbList.Items[i]
}

return ownedObjects, nil
}

func (r *OpenTelemetryCollectorReconciler) getParams(instance v1alpha1.OpenTelemetryCollector) (manifests.Params, error) {
otelCol, err := convert.V1Alpha1to2(instance)
if err != nil {
Expand Down Expand Up @@ -84,7 +157,7 @@ func NewReconciler(p Params) *OpenTelemetryCollectorReconciler {
return r
}

// +kubebuilder:rbac:groups="",resources=pods;configmaps;services;serviceaccounts,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=pods;configmaps;services;serviceaccounts;persistentvolumeclaims;persistentvolumes,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
// +kubebuilder:rbac:groups=apps,resources=daemonsets;deployments;statefulsets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalers,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -134,9 +207,13 @@ func (r *OpenTelemetryCollectorReconciler) Reconcile(ctx context.Context, req ct
if buildErr != nil {
return ctrl.Result{}, buildErr
}
// TODO: https://github.com/open-telemetry/opentelemetry-operator/issues/2620
// TODO: Change &instance to use params.OtelCol
err = reconcileDesiredObjects(ctx, r.Client, log, &instance, params.Scheme, desiredObjects...)

ownedObjects, err := r.findOtelOwnedObjects(ctx, params)
if err != nil {
return ctrl.Result{}, err
}

err = reconcileDesiredObjects(ctx, r.Client, log, &instance, params.Scheme, desiredObjects, ownedObjects)
return collectorStatus.HandleReconcileStatus(ctx, log, params, instance, err)
}

Expand All @@ -150,6 +227,9 @@ func (r *OpenTelemetryCollectorReconciler) SetupWithManager(mgr ctrl.Manager) er
Owns(&appsv1.Deployment{}).
Owns(&appsv1.DaemonSet{}).
Owns(&appsv1.StatefulSet{}).
Owns(&corev1.PersistentVolume{}).
Owns(&corev1.PersistentVolumeClaim{}).
Owns(&networkingv1.Ingress{}).
Owns(&autoscalingv2.HorizontalPodAutoscaler{}).
Owns(&policyV1.PodDisruptionBudget{})

Expand All @@ -162,6 +242,9 @@ func (r *OpenTelemetryCollectorReconciler) SetupWithManager(mgr ctrl.Manager) er
builder.Owns(&monitoringv1.ServiceMonitor{})
builder.Owns(&monitoringv1.PodMonitor{})
}
if r.config.OpenShiftRoutesAvailability() == openshift.RoutesAvailable {
builder.Owns(&routev1.Route{})
}

return builder.Complete(r)
}
14 changes: 11 additions & 3 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ import (
"time"

routev1 "github.com/openshift/api/route/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/stretchr/testify/assert"
v1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -104,7 +106,7 @@ func TestMain(m *testing.M) {

testEnv = &envtest.Environment{
CRDDirectoryPaths: []string{filepath.Join("..", "config", "crd", "bases")},
CRDs: []*apiextensionsv1.CustomResourceDefinition{testdata.OpenShiftRouteCRD},
CRDs: []*apiextensionsv1.CustomResourceDefinition{testdata.OpenShiftRouteCRD, testdata.ServiceMonitorCRD, testdata.PodMonitorCRD},
WebhookInstallOptions: envtest.WebhookInstallOptions{
Paths: []string{filepath.Join("..", "config", "webhook")},
},
Expand All @@ -114,12 +116,18 @@ func TestMain(m *testing.M) {
fmt.Printf("failed to start testEnv: %v", err)
os.Exit(1)
}

if err = monitoringv1.AddToScheme(testScheme); err != nil {
fmt.Printf("failed to register scheme: %v", err)
os.Exit(1)
}
if err = networkingv1.AddToScheme(testScheme); err != nil {
fmt.Printf("failed to register scheme: %v", err)
os.Exit(1)
}
if err = routev1.AddToScheme(testScheme); err != nil {
fmt.Printf("failed to register scheme: %v", err)
os.Exit(1)
}

if err = v1alpha1.AddToScheme(testScheme); err != nil {
fmt.Printf("failed to register scheme: %v", err)
os.Exit(1)
Expand Down
6 changes: 6 additions & 0 deletions internal/api/convert/v1alpha.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ func V1Alpha1to2(in v1alpha1.OpenTelemetryCollector) (v1beta1.OpenTelemetryColle
Pods: m.Pods,
}
}
if copy.Spec.MaxReplicas != nil && copy.Spec.Autoscaler.MaxReplicas == nil {
copy.Spec.Autoscaler.MaxReplicas = copy.Spec.MaxReplicas
}
if copy.Spec.MinReplicas != nil && copy.Spec.Autoscaler.MinReplicas == nil {
copy.Spec.Autoscaler.MinReplicas = copy.Spec.MinReplicas
}
out.Spec.OpenTelemetryCommonFields.Autoscaler = &v1beta1.AutoscalerSpec{
MinReplicas: copy.Spec.Autoscaler.MinReplicas,
MaxReplicas: copy.Spec.Autoscaler.MaxReplicas,
Expand Down
5 changes: 2 additions & 3 deletions internal/manifests/collector/horizontalpodautoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,22 @@ import (
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils"
"github.com/open-telemetry/opentelemetry-operator/internal/naming"
)

func HorizontalPodAutoscaler(params manifests.Params) (client.Object, error) {
func HorizontalPodAutoscaler(params manifests.Params) (*autoscalingv2.HorizontalPodAutoscaler, error) {
name := naming.Collector(params.OtelCol.Name)
labels := manifestutils.Labels(params.OtelCol.ObjectMeta, name, params.OtelCol.Spec.Image, ComponentOpenTelemetryCollector, params.Config.LabelsFilter())
annotations, err := Annotations(params.OtelCol)
if err != nil {
return nil, err
}

var result client.Object
var result *autoscalingv2.HorizontalPodAutoscaler

objectMeta := metav1.ObjectMeta{
Name: naming.HorizontalPodAutoscaler(params.OtelCol.Name),
Expand Down
5 changes: 1 addition & 4 deletions internal/manifests/collector/horizontalpodautoscaler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

Expand Down Expand Up @@ -95,11 +94,9 @@ func TestHPA(t *testing.T) {
},
Log: logger,
}
raw, err := HorizontalPodAutoscaler(params)
hpa, err := HorizontalPodAutoscaler(params)
require.NoError(t, err)

hpa := raw.(*autoscalingv2.HorizontalPodAutoscaler)

// verify
assert.Equal(t, "my-instance-collector", hpa.Name)
assert.Equal(t, "my-instance-collector", hpa.Labels["app.kubernetes.io/name"])
Expand Down
Loading

0 comments on commit 0ef71e3

Please sign in to comment.