From af73fca5a3afb5c7a3cd5ba027db8ebcc2ec423f Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Mon, 11 Nov 2024 12:42:48 +0100 Subject: [PATCH] rbd: add locking for VolumeGroupSnapshot operations Add VolumeGroupLocks in the CSI Controller Server so that operations are protected against concurrent requests for the same VolumeGroupSnapshot. Signed-off-by: Niels de Vos --- internal/rbd/controllerserver.go | 4 +++ internal/rbd/driver/driver.go | 1 + internal/rbd/group_controllerserver.go | 37 +++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index 1745dbc3793..e721bc7c51f 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -53,6 +53,10 @@ type ControllerServer struct { // A map storing all volumes/snapshots with ongoing operations. OperationLocks *util.OperationLock + // A map storing all volumes with ongoing operations so that additional operations + // for that same volume (as defined by volumegroup ID/volumegroup name) return an Aborted error + VolumeGroupLocks *util.VolumeLocks + // Cluster name ClusterName string diff --git a/internal/rbd/driver/driver.go b/internal/rbd/driver/driver.go index 4377d5e7629..7d58321caff 100644 --- a/internal/rbd/driver/driver.go +++ b/internal/rbd/driver/driver.go @@ -62,6 +62,7 @@ func NewControllerServer(d *csicommon.CSIDriver) *rbd.ControllerServer { DefaultControllerServer: csicommon.NewDefaultControllerServer(d), VolumeLocks: util.NewVolumeLocks(), SnapshotLocks: util.NewVolumeLocks(), + VolumeGroupLocks: util.NewVolumeLocks(), OperationLocks: util.NewOperationLock(), } } diff --git a/internal/rbd/group_controllerserver.go b/internal/rbd/group_controllerserver.go index 6107b606251..4e4d3c408bc 100644 --- a/internal/rbd/group_controllerserver.go +++ b/internal/rbd/group_controllerserver.go @@ -24,6 +24,7 @@ import ( "google.golang.org/grpc/status" "github.com/ceph/ceph-csi/internal/rbd/types" + "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" ) @@ -50,6 +51,14 @@ func (cs *ControllerServer) CreateVolumeGroupSnapshot( vgsName = req.GetName() ) + // Existence and conflict checks + if acquired := cs.VolumeGroupLocks.TryAcquire(vgsName); !acquired { + log.ErrorLog(ctx, util.SnapshotOperationAlreadyExistsFmt, vgsName) + + return nil, status.Errorf(codes.Aborted, util.SnapshotOperationAlreadyExistsFmt, vgsName) + } + defer cs.VolumeGroupLocks.Release(vgsName) + mgr := NewManager(cs.Driver.GetInstanceID(), req.GetParameters(), req.GetSecrets()) defer mgr.Destroy(ctx) @@ -166,15 +175,25 @@ func (cs *ControllerServer) DeleteVolumeGroupSnapshot( // 1. verify that all snapshots in the request are all snapshots in the group // 2. delete the group + groupSnapshotID := req.GetGroupSnapshotId() + + // Existence and conflict checks + if acquired := cs.VolumeGroupLocks.TryAcquire(groupSnapshotID); !acquired { + log.ErrorLog(ctx, util.SnapshotOperationAlreadyExistsFmt, groupSnapshotID) + + return nil, status.Errorf(codes.Aborted, util.SnapshotOperationAlreadyExistsFmt, groupSnapshotID) + } + defer cs.VolumeGroupLocks.Release(groupSnapshotID) + mgr := NewManager(cs.Driver.GetInstanceID(), nil, req.GetSecrets()) defer mgr.Destroy(ctx) - groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, req.GetGroupSnapshotId()) + groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, groupSnapshotID) if err != nil { return nil, status.Errorf( codes.Internal, "failed to get volume group snapshot with id %q: %v", - req.GetGroupSnapshotId(), err) + groupSnapshotID, err) } defer groupSnapshot.Destroy(ctx) @@ -195,15 +214,25 @@ func (cs *ControllerServer) GetVolumeGroupSnapshot( ctx context.Context, req *csi.GetVolumeGroupSnapshotRequest, ) (*csi.GetVolumeGroupSnapshotResponse, error) { + groupSnapshotID := req.GetGroupSnapshotId() + + // Existence and conflict checks + if acquired := cs.VolumeGroupLocks.TryAcquire(groupSnapshotID); !acquired { + log.ErrorLog(ctx, util.SnapshotOperationAlreadyExistsFmt, groupSnapshotID) + + return nil, status.Errorf(codes.Aborted, util.SnapshotOperationAlreadyExistsFmt, groupSnapshotID) + } + defer cs.VolumeGroupLocks.Release(groupSnapshotID) + mgr := NewManager(cs.Driver.GetInstanceID(), nil, req.GetSecrets()) defer mgr.Destroy(ctx) - groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, req.GetGroupSnapshotId()) + groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, groupSnapshotID) if err != nil { return nil, status.Errorf( codes.Internal, "failed to get volume group snapshot with id %q: %v", - req.GetGroupSnapshotId(), err) + groupSnapshotID, err) } defer groupSnapshot.Destroy(ctx)