Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPVE-627: fix: concurrent apply / status checks -> LVMCluster #391

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions controllers/internal/multierror.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package internal

import (
"strings"
)

const DefaultMultiErrorSeparator = ";"

// NewMultiError creates a MultiError that uses the default separator for each error.
func NewMultiError(errs []error) error {
return &MultiError{Errors: errs, Separator: DefaultMultiErrorSeparator}
}

// MultiError is an error that aggregates multiple errors together and uses
// a separator to aggregate them when called with Error.
type MultiError struct {
Errors []error
Separator string
}

func (m *MultiError) Error() string {
errs := make([]string, len(m.Errors))
for i, err := range m.Errors {
errs[i] = err.Error()
}
return strings.Join(errs, m.Separator)
}
16 changes: 8 additions & 8 deletions controllers/lvm_volumegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package controllers
import (
"context"
"errors"
"fmt"

lvmv1alpha1 "github.com/openshift/lvm-operator/api/v1alpha1"
k8serror "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -39,6 +40,7 @@ func (c lvmVG) getName() string {
}

func (c lvmVG) ensureCreated(r *LVMClusterReconciler, ctx context.Context, lvmCluster *lvmv1alpha1.LVMCluster) error {
unitLogger := r.Log.WithValues("topolvmNode", c.getName())

lvmVolumeGroups := lvmVolumeGroups(r.Namespace, lvmCluster.Spec.Storage.DeviceClasses)

Expand All @@ -50,10 +52,8 @@ func (c lvmVG) ensureCreated(r *LVMClusterReconciler, ctx context.Context, lvmCl
},
}

err := cutil.SetControllerReference(lvmCluster, existingVolumeGroup, r.Scheme)
if err != nil {
r.Log.Error(err, "failed to set controller reference to LVMVolumeGroup with name", volumeGroup.Name)
return err
if err := cutil.SetControllerReference(lvmCluster, existingVolumeGroup, r.Scheme); err != nil {
return fmt.Errorf("failed to set controller reference to LVMVolumeGroup: %w", err)
}

result, err := cutil.CreateOrUpdate(ctx, r.Client, existingVolumeGroup, func() error {
Expand All @@ -63,10 +63,10 @@ func (c lvmVG) ensureCreated(r *LVMClusterReconciler, ctx context.Context, lvmCl
})

if err != nil {
r.Log.Error(err, "failed to reconcile LVMVolumeGroup", "name", volumeGroup.Name)
return err
return fmt.Errorf("%s failed to reconcile: %w", c.getName(), err)
}
r.Log.Info("successfully reconciled LVMVolumeGroup", "operation", result, "name", volumeGroup.Name)

unitLogger.Info("LVMVolumeGroup applied to cluster", "operation", result, "name", volumeGroup.Name)
}
return nil
}
Expand Down Expand Up @@ -138,7 +138,7 @@ func lvmVolumeGroups(namespace string, deviceClasses []lvmv1alpha1.DeviceClass)
NodeSelector: deviceClass.NodeSelector,
DeviceSelector: deviceClass.DeviceSelector,
ThinPoolConfig: deviceClass.ThinPoolConfig,
Default: len(deviceClasses) == 1 || deviceClass.Default, //True if there is only one device class or default is explicitly set.
Default: len(deviceClasses) == 1 || deviceClass.Default, // True if there is only one device class or default is explicitly set.
},
}
lvmVolumeGroups = append(lvmVolumeGroups, lvmVolumeGroup)
Expand Down
34 changes: 24 additions & 10 deletions controllers/lvmcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ import (
"github.com/go-logr/logr"
configv1 "github.com/openshift/api/config/v1"
secv1client "github.com/openshift/client-go/security/clientset/versioned/typed/security/v1"

lvmv1alpha1 "github.com/openshift/lvm-operator/api/v1alpha1"
"github.com/openshift/lvm-operator/controllers/internal"

topolvmv1 "github.com/topolvm/topolvm/api/v1"

Expand Down Expand Up @@ -165,7 +165,7 @@ func (r *LVMClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// errors returned by this will be updated in the reconcileSucceeded condition of the LVMCluster
func (r *LVMClusterReconciler) reconcile(ctx context.Context, instance *lvmv1alpha1.LVMCluster) (ctrl.Result, error) {

//The resource was deleted
// The resource was deleted
if !instance.DeletionTimestamp.IsZero() {
// Check for existing LogicalVolumes
lvsExist, err := r.logicalVolumesExist(ctx, instance)
Expand Down Expand Up @@ -197,7 +197,7 @@ func (r *LVMClusterReconciler) reconcile(ctx context.Context, instance *lvmv1alp
r.Log.Info("successfully added finalizer")
}

resourceCreationList := []resourceManager{
resources := []resourceManager{
&csiDriver{},
&topolvmController{r.TopoLVMLeaderElectionPassthrough},
&openshiftSccs{},
Expand All @@ -208,16 +208,30 @@ func (r *LVMClusterReconciler) reconcile(ctx context.Context, instance *lvmv1alp
&topolvmVolumeSnapshotClass{},
}

// handle create/update
for _, unit := range resourceCreationList {
err := unit.ensureCreated(r, ctx, instance)
if err != nil {
r.Log.Error(err, "failed to reconcile", "resource", unit.getName())
return ctrl.Result{}, err
resourceSyncStart := time.Now()
results := make(chan error, len(resources))
create := func(i int) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Errgroup cancel the context on the first error. We dont want the creation of other resources to abort in case of an error on one of the resources.

results <- resources[i].ensureCreated(r, ctx, instance)
}

for i := range resources {
go create(i)
}

var errs []error
for i := 0; i < len(resources); i++ {
if err := <-results; err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if something goes wrong and we will stuck there forever?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you look at the code path there should not be a way to get stuck here. This will always return exactly the amount of times that a resource is attempted to be created. This means that there will always be the correct amount of results, no matter the amount of failures in the goroutines.

errs = append(errs, err)
}
}

r.Log.Info("successfully reconciled LVMCluster")
resourceSyncElapsedTime := time.Since(resourceSyncStart)
if len(errs) > 0 {
return ctrl.Result{}, fmt.Errorf("failed to reconcile resources managed by LVMCluster within %v: %w",
resourceSyncElapsedTime, internal.NewMultiError(errs))
}

r.Log.Info("successfully reconciled LVMCluster", "resourceSyncElapsedTime", resourceSyncElapsedTime)

return ctrl.Result{}, nil
}
Expand Down
29 changes: 15 additions & 14 deletions controllers/scc.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,29 @@ func (c openshiftSccs) getName() string {
}

func (c openshiftSccs) ensureCreated(r *LVMClusterReconciler, ctx context.Context, lvmCluster *lvmv1alpha1.LVMCluster) error {
unitLogger := r.Log.WithValues("resourceManager", c.getName())
if !IsOpenshift(r) {
r.Log.Info("not creating SCCs as this is not an Openshift cluster")
unitLogger.Info("not creating SCCs as this is not an Openshift cluster")
return nil
}
sccs := getAllSCCs(r.Namespace)
for _, scc := range sccs {
_, err := r.SecurityClient.SecurityContextConstraints().Get(ctx, scc.Name, metav1.GetOptions{})
if err != nil && errors.IsNotFound(err) {
r.Log.Info("creating SecurityContextConstraint", "SecurityContextConstraint", scc.Name)
_, err := r.SecurityClient.SecurityContextConstraints().Create(ctx, scc, metav1.CreateOptions{})
if err != nil {
r.Log.Error(err, "failed to create SCC", "SecurityContextConstraint", scc.Name)
return err
}
r.Log.Info("successfully created SCC", "SecurityContextConstraint", scc.Name)
} else if err == nil {
if err == nil {
// Don't update the SCC
r.Log.Info("already exists", "SecurityContextConstraint", scc.Name)
} else {
r.Log.Error(err, "something went wrong when checking for SecurityContextConstraint", "SecurityContextConstraint", scc.Name)
return err
unitLogger.Info("SecurityContextConstraint exists, skipping creation", "name", scc.Name)
continue
}

if !errors.IsNotFound(err) {
return fmt.Errorf("something went wrong when checking for SecurityContextConstraint: %w", err)
}

if _, err := r.SecurityClient.SecurityContextConstraints().Create(ctx, scc, metav1.CreateOptions{}); err != nil {
return fmt.Errorf("%s failed to reconcile: %w", c.getName(), err)
}

unitLogger.Info("SecurityContextConstraint created", "name", scc.Name)
}

return nil
Expand Down
16 changes: 10 additions & 6 deletions controllers/topolvm_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ package controllers
import (
"context"
"fmt"
v1 "github.com/openshift/api/config/v1"
"path/filepath"

v1 "github.com/openshift/api/config/v1"
lvmv1alpha1 "github.com/openshift/lvm-operator/api/v1alpha1"
"github.com/pkg/errors"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -52,6 +52,7 @@ func (c topolvmController) getName() string {
//+kubebuilder:rbac:groups=apps,resources=deployments,verbs=create;update;delete;get;list;watch

func (c topolvmController) ensureCreated(r *LVMClusterReconciler, ctx context.Context, lvmCluster *lvmv1alpha1.LVMCluster) error {
unitLogger := r.Log.WithValues("resourceManager", c.getName())

// get the desired state of topolvm controller deployment
desiredDeployment := getControllerDeployment(lvmCluster, r.Namespace, r.ImageName, c.topoLVMLeaderElectionPassthrough)
Expand All @@ -64,20 +65,23 @@ func (c topolvmController) ensureCreated(r *LVMClusterReconciler, ctx context.Co

err := cutil.SetControllerReference(lvmCluster, existingDeployment, r.Scheme)
if err != nil {
r.Log.Error(err, "failed to set controller reference to topolvm controller deployment with name", existingDeployment.Name)
return err
return fmt.Errorf("failed to set controller reference for csi controller: %w", err)
}

result, err := cutil.CreateOrUpdate(ctx, r.Client, existingDeployment, func() error {
return c.setTopolvmControllerDesiredState(existingDeployment, desiredDeployment)
})

if err != nil {
r.Log.Error(err, "csi controller reconcile failure", "name", desiredDeployment.Name)
return err
return fmt.Errorf("could not create/update csi controller: %w", err)
}
unitLogger.Info("Deployment applied to cluster", "operation", result, "name", desiredDeployment.Name)

if err := verifyDeploymentReadiness(existingDeployment); err != nil {
return fmt.Errorf("csi controller is not ready: %w", err)
}
unitLogger.Info("Deployment is ready", "name", desiredDeployment.Name)

r.Log.Info("csi controller", "operation", result, "name", desiredDeployment.Name)
return nil
}

Expand Down
8 changes: 4 additions & 4 deletions controllers/topolvm_csi_driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package controllers

import (
"context"
"fmt"

lvmv1alpha1 "github.com/openshift/lvm-operator/api/v1alpha1"
"github.com/pkg/errors"
Expand All @@ -44,6 +45,7 @@ func (c csiDriver) getName() string {
//+kubebuilder:rbac:groups=storage.k8s.io,resources=csidrivers,verbs=get;create;delete;watch;list

func (c csiDriver) ensureCreated(r *LVMClusterReconciler, ctx context.Context, lvmCluster *lvmv1alpha1.LVMCluster) error {
unitLogger := r.Log.WithValues("resourceManager", c.getName())
csiDriverResource := getCSIDriverResource()

result, err := cutil.CreateOrUpdate(ctx, r.Client, csiDriverResource, func() error {
Expand All @@ -52,11 +54,9 @@ func (c csiDriver) ensureCreated(r *LVMClusterReconciler, ctx context.Context, l
})

if err != nil {
r.Log.Error(err, "csi driver reconcile failure", "name", csiDriverResource.Name)
return err
return fmt.Errorf("%s failed to reconcile: %w", c.getName(), err)
}

r.Log.Info("csi driver", "operation", result, "name", csiDriverResource.Name)
unitLogger.Info("CSIDriver applied to cluster", "operation", result, "name", csiDriverResource.Name)
return nil
}

Expand Down
17 changes: 11 additions & 6 deletions controllers/topolvm_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ func (n topolvmNode) ensureCreated(r *LVMClusterReconciler, ctx context.Context,

err := cutil.SetControllerReference(lvmCluster, ds, r.Scheme)
if err != nil {
r.Log.Error(err, "failed to set controller reference to topolvm node daemonset with name", ds.Name)
return err
return fmt.Errorf("failed to set controller reference to topolvm node daemonset: %w", err)
}

unitLogger.Info("running CreateOrUpdate")
Expand Down Expand Up @@ -93,11 +92,17 @@ func (n topolvmNode) ensureCreated(r *LVMClusterReconciler, ctx context.Context,
})

if err != nil {
r.Log.Error(err, fmt.Sprintf("%s reconcile failure", topolvmNodeName), "name", ds.Name)
} else {
r.Log.Info(topolvmNodeName, "operation", result, "name", ds.Name)
return fmt.Errorf("%s failed to reconcile: %w", n.getName(), err)
}
return err

unitLogger.Info("DaemonSet applied to cluster", "operation", result, "name", ds.Name)

if err := verifyDaemonSetReadiness(ds); err != nil {
return fmt.Errorf("DaemonSet is not considered ready: %w", err)
}
unitLogger.Info("DaemonSet is ready", "name", ds.Name)

return nil
}

// ensureDeleted should wait for the resources to be cleaned up
Expand Down
11 changes: 4 additions & 7 deletions controllers/topolvm_snapshotclass.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,21 @@ func (s topolvmVolumeSnapshotClass) getName() string {
//+kubebuilder:rbac:groups=snapshot.storage.k8s.io,resources=volumesnapshotclasses,verbs=get;create;delete;watch;list

func (s topolvmVolumeSnapshotClass) ensureCreated(r *LVMClusterReconciler, ctx context.Context, lvmCluster *lvmv1alpha1.LVMCluster) error {

unitLogger := r.Log.WithValues("resourceManager", s.getName())
// one volume snapshot class for every deviceClass based on CR is created
topolvmSnapshotClasses := getTopolvmSnapshotClasses(lvmCluster)
for _, vsc := range topolvmSnapshotClasses {

// we anticipate no edits to volume snapshot class
result, err := cutil.CreateOrUpdate(ctx, r.Client, vsc, func() error { return nil })
if err != nil {
// this is necessary in case the VolumeSnapshotClass CRDs are not registered in the Distro, e.g. for OpenShift Local
if discovery.IsGroupDiscoveryFailedError(errors.Unwrap(err)) {
r.Log.Info("topolvm volume snapshot classes do not exist on the cluster, ignoring", "VolumeSnapshotClass", vscName)
r.Log.Info("volume snapshot class CRDs do not exist on the cluster, ignoring", "VolumeSnapshotClass", vscName)
return nil
}
r.Log.Error(err, "topolvm volume snapshot class reconcile failure", "name", vsc.Name)
return err
} else {
r.Log.Info("topolvm volume snapshot class", "operation", result, "name", vsc.Name)
return fmt.Errorf("%s failed to reconcile: %w", s.getName(), err)
}
unitLogger.Info("VolumeSnapshotClass applied to cluster", "operation", result, "name", vsc.Name)
}
return nil
}
Expand Down
8 changes: 3 additions & 5 deletions controllers/topolvm_storageclass.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,17 @@ func (s topolvmStorageClass) getName() string {
//+kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=get;create;delete;watch;list

func (s topolvmStorageClass) ensureCreated(r *LVMClusterReconciler, ctx context.Context, lvmCluster *lvmv1alpha1.LVMCluster) error {
unitLogger := r.Log.WithValues("resourceManager", s.getName())

// one storage class for every deviceClass based on CR is created
topolvmStorageClasses := getTopolvmStorageClasses(r, ctx, lvmCluster)
for _, sc := range topolvmStorageClasses {

// we anticipate no edits to storage class
result, err := cutil.CreateOrUpdate(ctx, r.Client, sc, func() error { return nil })
if err != nil {
r.Log.Error(err, "topolvm storage class reconcile failure", "name", sc.Name)
return err
} else {
r.Log.Info("topolvm storage class", "operation", result, "name", sc.Name)
return fmt.Errorf("%s failed to reconcile: %w", s.getName(), err)
}
unitLogger.Info("StorageClass applied to cluster", "operation", result, "name", sc.Name)
}
return nil
}
Expand Down
Loading