Skip to content

Commit

Permalink
DeviceManager: Return no error if the device not found while deleting
Browse files Browse the repository at this point in the history
Discussion on issue intel#413, raised a requirement that PMEM-CSI should handle the
DeleteVolume() call for volumes that the underlined device already deleted
manually by the admin.  So for such a volume we just ignore and return no error
in DeviceManager.DeleteVolume(). This also makes DeleteVolume() idempotent.
  • Loading branch information
avalluri committed Oct 29, 2019
1 parent 248ee6b commit c123983
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 12 deletions.
2 changes: 1 addition & 1 deletion pkg/pmem-csi-driver/controllerserver-master.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ func (cs *masterController) DeleteVolume(ctx context.Context, req *csi.DeleteVol
defer conn.Close() // nolint:errcheck
klog.V(4).Infof("Asking node %s to delete volume name:%s id:%s", node, vol.name, vol.id)
if _, err := csi.NewControllerClient(conn).DeleteVolume(ctx, req); err != nil {
return nil, status.Error(codes.Internal, "Failed to delete volume name:"+vol.name+" id:"+vol.id+" on "+node+": "+err.Error())
return nil, err
}
}
cs.mutex.Lock()
Expand Down
4 changes: 4 additions & 0 deletions pkg/pmem-csi-driver/controllerserver-node.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package pmemcsidriver
import (
"crypto/sha1"
"encoding/hex"
"errors"
"fmt"
"strconv"
"sync"
Expand Down Expand Up @@ -273,6 +274,9 @@ func (cs *nodeControllerServer) DeleteVolume(ctx context.Context, req *csi.Delet
}

if err := cs.dm.DeleteDevice(req.VolumeId, eraseafter); err != nil {
if errors.Is(err, pmdmanager.ErrDeviceInUse) {
return nil, status.Errorf(codes.FailedPrecondition, err.Error())
}
return nil, status.Errorf(codes.Internal, "Failed to delete volume: %s", err.Error())
}
if cs.sm != nil {
Expand Down
15 changes: 13 additions & 2 deletions pkg/pmem-device-manager/pmd-lvm.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package pmdmanager

import (
"errors"
"fmt"
"strconv"
"strings"
Expand Down Expand Up @@ -148,11 +149,21 @@ func (lvm *pmemLvm) DeleteDevice(volumeId string, flush bool) error {
lvmMutex.Lock()
defer lvmMutex.Unlock()

device, err := lvm.getDevice(volumeId)
if err != nil {
var err error
var device *PmemDeviceInfo

if device, err = lvm.getDevice(volumeId); err != nil {
if errors.Is(err, ErrDeviceNotFound) {
return nil
}
return err
}
if err := clearDevice(device, flush); err != nil {
if errors.Is(err, ErrDeviceNotFound) {
// Remove device from cache
delete(lvm.devices, volumeId)
return nil
}
return err
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/pmem-device-manager/pmd-ndctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,15 @@ func (pmem *pmemNdctl) DeleteDevice(volumeId string, flush bool) error {

device, err := pmem.getDevice(volumeId)
if err != nil {
if errors.Is(err, ErrDeviceNotFound) {
return nil
}
return err
}
if err := clearDevice(device, flush); err != nil {
if errors.Is(err, ErrDeviceNotFound) {
return nil
}
return err
}
return pmem.ctx.DestroyNamespaceByName(volumeId)
Expand Down
17 changes: 10 additions & 7 deletions pkg/pmem-device-manager/pmd-util.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import (
"time"

pmemexec "github.com/intel/pmem-csi/pkg/pmem-exec"
"golang.org/x/sys/unix"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/volume/util/hostutil"
)

const (
Expand All @@ -33,17 +33,20 @@ func clearDevice(dev *PmemDeviceInfo, flush bool) error {
klog.Errorf("clearDevice: %s does not exist", dev.Path)
return err
}

// Check if device
if (fileinfo.Mode() & os.ModeDevice) == 0 {
klog.Errorf("clearDevice: %s is not device", dev.Path)
return fmt.Errorf("%s is not device", dev.Path)
}
devOpen, err := hostutil.NewHostUtil().DeviceOpened(dev.Path)

fd, err := unix.Open(dev.Path, unix.O_RDONLY|unix.O_EXCL|unix.O_CLOEXEC, 0)
defer unix.Close(fd)

if err != nil {
return err
}
if devOpen {
return fmt.Errorf("%s is in use", dev.Path)
return fmt.Errorf("failed to clear device %q as %w", dev.Path, ErrDeviceInUse)
}

if blocks == 0 {
klog.V(5).Infof("Wiping entire device: %s", dev.Path)
// use one iteration instead of shred's default=3 for speed
Expand Down Expand Up @@ -75,5 +78,5 @@ func waitDeviceAppears(dev *PmemDeviceInfo) error {
i, dev.Path, retryStatTimeout)
time.Sleep(retryStatTimeout)
}
return fmt.Errorf("device %s did not appear after multiple retries", dev.Path)
return fmt.Errorf("%w(%s)", ErrDeviceNotReady, dev.Path)
}
26 changes: 24 additions & 2 deletions test/e2e/storage/sanity.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ import (
"github.com/kubernetes-csi/csi-test/pkg/sanity"
sanityutils "github.com/kubernetes-csi/csi-test/utils"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand All @@ -43,9 +45,9 @@ import (
clientset "k8s.io/client-go/kubernetes"
clientexec "k8s.io/client-go/util/exec"
"k8s.io/kubernetes/test/e2e/framework"
testutils "k8s.io/kubernetes/test/utils"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
testutils "k8s.io/kubernetes/test/utils"

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
Expand All @@ -67,7 +69,7 @@ var _ = Describe("sanity", func() {
// and deletes all extra entries that it does not know about.
TargetPath: "/var/lib/kubelet/plugins/kubernetes.io/csi/pv/pmem-sanity-target.XXXXXX",
StagingPath: "/var/lib/kubelet/plugins/kubernetes.io/csi/pv/pmem-sanity-staging.XXXXXX",
IDGen: &sanity.DefaultIDGenerator{},
IDGen: &sanity.DefaultIDGenerator{},
}

f := framework.NewDefaultFramework("pmem")
Expand Down Expand Up @@ -352,6 +354,26 @@ var _ = Describe("sanity", func() {
Expect(resp.AvailableCapacity).To(Equal(nodeCapacity), "capacity mismatch")
})

It("delete volume should fail with appropriate error", func() {
v.namePrefix = "delete-volume"

name, vol := v.create(2*1024*1024, nodeID)
// Publish for the second time.
nodeID := v.publish(name, vol)

_, err := v.cc.DeleteVolume(v.ctx, &csi.DeleteVolumeRequest{
VolumeId: vol.GetVolumeId(),
})
Expect(err).ShouldNot(BeNil(), fmt.Sprintf("Volume(%s) in use cannot be deleted", name))
s, ok := status.FromError(err)
Expect(ok).Should(BeTrue(), "Expected a status error")
Expect(s.Code()).Should(BeEquivalentTo(codes.FailedPrecondition), "Expected device busy error")

v.unpublish(vol, nodeID)

v.remove(vol, name)
})

var (
numWorkers = flag.Int("pmem.sanity.workers", 10, "number of worker creating volumes in parallel and thus also the maximum number of volumes at any time")
numVolumes = flag.Int("pmem.sanity.volumes",
Expand Down

0 comments on commit c123983

Please sign in to comment.