From 975d0a4bb9089f04607d3c7ccfda07686af5b9b8 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Mon, 5 Mar 2018 09:18:49 +0100 Subject: [PATCH 1/3] Lock subPath volumes Users must not be allowed to step outside the volume with subPath. Therefore the final subPath directory must be "locked" somehow and checked if it's inside volume. On Windows, we lock the directories. On Linux, we bind-mount the final subPath into /var/lib/kubelet/pods//volume-subpaths//, it can't be changed to symlink user once it's bind-mounted. --- .../cm/container_manager_linux_test.go | 12 + pkg/kubelet/container/helpers.go | 2 +- .../container/testing/fake_runtime_helper.go | 4 +- pkg/kubelet/kubelet_pods.go | 67 +- pkg/kubelet/kubelet_pods_test.go | 8 +- .../kuberuntime/kuberuntime_container.go | 17 +- .../kuberuntime/kuberuntime_container_test.go | 8 +- .../kuberuntime/kuberuntime_manager_test.go | 2 +- pkg/kubelet/rkt/rkt.go | 2 +- pkg/kubelet/util/util.go | 13 + pkg/kubelet/util/util_unsupported.go | 9 + .../volumemanager/reconciler/reconciler.go | 2 +- pkg/util/io/BUILD | 1 + pkg/util/io/consistentread.go | 45 + pkg/util/mount/BUILD | 6 +- pkg/util/mount/fake.go | 12 + pkg/util/mount/mount.go | 60 +- pkg/util/mount/mount_linux.go | 481 +++++++ pkg/util/mount/mount_linux_test.go | 1224 +++++++++++++++++ pkg/util/mount/mount_unsupported.go | 18 +- pkg/util/mount/mount_windows.go | 311 +++++ pkg/util/mount/mount_windows_test.go | 446 ++++++ pkg/util/mount/nsenter_mount.go | 51 + pkg/util/mount/nsenter_mount_unsupported.go | 16 + pkg/util/removeall/removeall_test.go | 12 + .../operationexecutor/operation_executor.go | 7 +- .../operation_executor_test.go | 4 +- .../operationexecutor/operation_generator.go | 14 +- 28 files changed, 2798 insertions(+), 56 deletions(-) create mode 100644 pkg/util/io/consistentread.go create mode 100644 pkg/util/mount/mount_windows.go create mode 100644 pkg/util/mount/mount_windows_test.go diff --git a/pkg/kubelet/cm/container_manager_linux_test.go b/pkg/kubelet/cm/container_manager_linux_test.go index d1cba37ae4727..efaf3b3294bee 100644 --- a/pkg/kubelet/cm/container_manager_linux_test.go +++ b/pkg/kubelet/cm/container_manager_linux_test.go @@ -75,6 +75,18 @@ func (mi *fakeMountInterface) PathIsDevice(pathname string) (bool, error) { return true, nil } +func (mi *fakeMountInterface) PrepareSafeSubpath(subPath mount.Subpath) (newHostPath string, cleanupAction func(), err error) { + return "", nil, nil +} + +func (mi *fakeMountInterface) CleanSubPaths(_, _ string) error { + return nil +} + +func (mi *fakeMountInterface) SafeMakeDir(_, _ string, _ os.FileMode) error { + return nil +} + func fakeContainerMgrMountInt() mount.Interface { return &fakeMountInterface{ []mount.MountPoint{ diff --git a/pkg/kubelet/container/helpers.go b/pkg/kubelet/container/helpers.go index 7930fed552bd9..bccf1ace01930 100644 --- a/pkg/kubelet/container/helpers.go +++ b/pkg/kubelet/container/helpers.go @@ -48,7 +48,7 @@ type HandlerRunner interface { // RuntimeHelper wraps kubelet to make container runtime // able to get necessary informations like the RunContainerOptions, DNS settings, Host IP. type RuntimeHelper interface { - GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, useClusterFirstPolicy bool, err error) + GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, useClusterFirstPolicy bool, cleanupAction func(), err error) GetClusterDNS(pod *v1.Pod) (dnsServers []string, dnsSearches []string, useClusterFirstPolicy bool, err error) // GetPodCgroupParent returns the the CgroupName identifer, and its literal cgroupfs form on the host // of a pod. diff --git a/pkg/kubelet/container/testing/fake_runtime_helper.go b/pkg/kubelet/container/testing/fake_runtime_helper.go index 76bbd0a4bc37c..703ca80953b2a 100644 --- a/pkg/kubelet/container/testing/fake_runtime_helper.go +++ b/pkg/kubelet/container/testing/fake_runtime_helper.go @@ -32,12 +32,12 @@ type FakeRuntimeHelper struct { Err error } -func (f *FakeRuntimeHelper) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, bool, error) { +func (f *FakeRuntimeHelper) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, bool, func(), error) { var opts kubecontainer.RunContainerOptions if len(container.TerminationMessagePath) != 0 { opts.PodContainerDir = f.PodContainerDir } - return &opts, false, nil + return &opts, false, nil, nil } func (f *FakeRuntimeHelper) GetPodCgroupParent(pod *v1.Pod) string { diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 8f70d375cf3ae..ca44368312887 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -61,6 +61,7 @@ import ( kubetypes "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/util" + mountutil "k8s.io/kubernetes/pkg/util/mount" "k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/pkg/volume/util/volumehelper" volumevalidation "k8s.io/kubernetes/pkg/volume/validation" @@ -110,7 +111,7 @@ func (kl *Kubelet) makeDevices(pod *v1.Pod, container *v1.Container) ([]kubecont } // makeMounts determines the mount points for the given container. -func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap) ([]kubecontainer.Mount, error) { +func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap, mounter mountutil.Interface) ([]kubecontainer.Mount, func(), error) { // Kubernetes only mounts on /etc/hosts if : // - container does not use hostNetwork and // - container is not an infrastructure(pause) container @@ -120,7 +121,8 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h mountEtcHostsFile := !pod.Spec.HostNetwork && len(podIP) > 0 && runtime.GOOS != "windows" glog.V(3).Infof("container: %v/%v/%v podIP: %q creating hosts mount: %v", pod.Namespace, pod.Name, container.Name, podIP, mountEtcHostsFile) mounts := []kubecontainer.Mount{} - for _, mount := range container.VolumeMounts { + var cleanupAction func() = nil + for i, mount := range container.VolumeMounts { mountEtcHostsFile = mountEtcHostsFile && (mount.MountPath != etcHostsPath) vol, ok := podVolumes[mount.Name] if !ok || vol.Mounter == nil { @@ -138,25 +140,29 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h } hostPath, err := volume.GetPath(vol.Mounter) if err != nil { - return nil, err + return nil, cleanupAction, err } if mount.SubPath != "" { if filepath.IsAbs(mount.SubPath) { - return nil, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath) + return nil, cleanupAction, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath) } err = volumevalidation.ValidatePathNoBacksteps(mount.SubPath) if err != nil { - return nil, fmt.Errorf("unable to provision SubPath `%s`: %v", mount.SubPath, err) + return nil, cleanupAction, fmt.Errorf("unable to provision SubPath `%s`: %v", mount.SubPath, err) } fileinfo, err := os.Lstat(hostPath) if err != nil { - return nil, err + return nil, cleanupAction, err } perm := fileinfo.Mode() - hostPath = filepath.Join(hostPath, mount.SubPath) + volumePath, err := filepath.EvalSymlinks(hostPath) + if err != nil { + return nil, cleanupAction, err + } + hostPath = filepath.Join(volumePath, mount.SubPath) if subPathExists, err := util.FileOrSymlinkExists(hostPath); err != nil { glog.Errorf("Could not determine if subPath %s exists; will not attempt to change its permissions", hostPath) @@ -165,17 +171,25 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h // incorrect ownership and mode. For example, the sub path directory must have at least g+rwx // when the pod specifies an fsGroup, and if the directory is not created here, Docker will // later auto-create it with the incorrect mode 0750 - if err := os.MkdirAll(hostPath, perm); err != nil { - glog.Errorf("failed to mkdir:%s", hostPath) - return nil, err - } - - // chmod the sub path because umask may have prevented us from making the sub path with the same - // permissions as the mounter path - if err := os.Chmod(hostPath, perm); err != nil { - return nil, err + // Make extra care not to escape the volume! + if err := mounter.SafeMakeDir(hostPath, volumePath, perm); err != nil { + glog.Errorf("failed to mkdir %q: %v", hostPath, err) + return nil, cleanupAction, err } } + hostPath, cleanupAction, err = mounter.PrepareSafeSubpath(mountutil.Subpath{ + VolumeMountIndex: i, + Path: hostPath, + VolumeName: mount.Name, + VolumePath: volumePath, + PodDir: podDir, + ContainerName: container.Name, + }) + if err != nil { + // Don't pass detailed error back to the user because it could give information about host filesystem + glog.Errorf("failed to prepare subPath for volumeMount %q of container %q: %v", mount.Name, container.Name, err) + return nil, cleanupAction, fmt.Errorf("failed to prepare subPath for volumeMount %q of container %q", mount.Name, container.Name) + } } // Docker Volume Mounts fail on Windows if it is not of the form C:/ @@ -203,11 +217,11 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h hostAliases := pod.Spec.HostAliases hostsMount, err := makeHostsMount(podDir, podIP, hostName, hostDomain, hostAliases) if err != nil { - return nil, err + return nil, cleanupAction, err } mounts = append(mounts, *hostsMount) } - return mounts, nil + return mounts, cleanupAction, nil } // makeHostsMount makes the mountpoint for the hosts file that the containers @@ -314,14 +328,14 @@ func (kl *Kubelet) GetPodCgroupParent(pod *v1.Pod) string { // GenerateRunContainerOptions generates the RunContainerOptions, which can be used by // the container runtime to set parameters for launching a container. -func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, bool, error) { +func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, bool, func(), error) { var err error useClusterFirstPolicy := false cgroupParent := kl.GetPodCgroupParent(pod) opts := &kubecontainer.RunContainerOptions{CgroupParent: cgroupParent} hostname, hostDomainName, err := kl.GeneratePodHostNameAndDomain(pod) if err != nil { - return nil, false, err + return nil, false, nil, err } opts.Hostname = hostname podName := volumehelper.GetUniquePodName(pod) @@ -331,16 +345,17 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai // TODO(random-liu): Move following convert functions into pkg/kubelet/container opts.Devices, err = kl.makeDevices(pod, container) if err != nil { - return nil, false, err + return nil, false, nil, err } - opts.Mounts, err = makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes) + var cleanupAction func() + opts.Mounts, cleanupAction, err = makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes, kl.mounter) if err != nil { - return nil, false, err + return nil, false, cleanupAction, err } opts.Envs, err = kl.makeEnvironmentVariables(pod, container, podIP) if err != nil { - return nil, false, err + return nil, false, cleanupAction, err } // Disabling adding TerminationMessagePath on Windows as these files would be mounted as docker volume and @@ -356,7 +371,7 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai opts.DNS, opts.DNSSearch, useClusterFirstPolicy, err = kl.GetClusterDNS(pod) if err != nil { - return nil, false, err + return nil, false, cleanupAction, err } // only do this check if the experimental behavior is enabled, otherwise allow it to default to false @@ -364,7 +379,7 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai opts.EnableHostUserNamespace = kl.enableHostUserNamespace(pod) } - return opts, useClusterFirstPolicy, nil + return opts, useClusterFirstPolicy, cleanupAction, nil } var masterServices = sets.NewString("kubernetes") diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index ef3cb60cb3b55..a970315440460 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -39,6 +39,7 @@ import ( containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" "k8s.io/kubernetes/pkg/kubelet/server/portforward" "k8s.io/kubernetes/pkg/kubelet/server/remotecommand" + "k8s.io/kubernetes/pkg/util/mount" ) func TestMakeMounts(t *testing.T) { @@ -149,13 +150,14 @@ func TestMakeMounts(t *testing.T) { for name, tc := range testCases { t.Run(name, func(t *testing.T) { + fm := &mount.FakeMounter{} pod := v1.Pod{ Spec: v1.PodSpec{ HostNetwork: true, }, } - mounts, err := makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", "", tc.podVolumes) + mounts, _, err := makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", "", tc.podVolumes, fm) // validate only the error if we expect an error if tc.expectErr { @@ -338,7 +340,7 @@ func TestGenerateRunContainerOptions_DNSConfigurationParams(t *testing.T) { options := make([]*kubecontainer.RunContainerOptions, 4) for i, pod := range pods { var err error - options[i], _, err = kubelet.GenerateRunContainerOptions(pod, &v1.Container{}, "") + options[i], _, _, err = kubelet.GenerateRunContainerOptions(pod, &v1.Container{}, "") if err != nil { t.Fatalf("failed to generate container options: %v", err) } @@ -371,7 +373,7 @@ func TestGenerateRunContainerOptions_DNSConfigurationParams(t *testing.T) { kubelet.resolverConfig = "/etc/resolv.conf" for i, pod := range pods { var err error - options[i], _, err = kubelet.GenerateRunContainerOptions(pod, &v1.Container{}, "") + options[i], _, _, err = kubelet.GenerateRunContainerOptions(pod, &v1.Container{}, "") if err != nil { t.Fatalf("failed to generate container options: %v", err) } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 30901f58dd202..04ad50559ed0c 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -99,7 +99,10 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb restartCount = containerStatus.RestartCount + 1 } - containerConfig, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef) + containerConfig, cleanupAction, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef) + if cleanupAction != nil { + defer cleanupAction() + } if err != nil { m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err)) return "Generate Container Config Failed", err @@ -157,20 +160,20 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb } // generateContainerConfig generates container config for kubelet runtime v1. -func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string) (*runtimeapi.ContainerConfig, error) { - opts, _, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) +func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string) (*runtimeapi.ContainerConfig, func(), error) { + opts, _, cleanupAction, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) if err != nil { - return nil, err + return nil, nil, err } uid, username, err := m.getImageUser(container.Image) if err != nil { - return nil, err + return nil, cleanupAction, err } // Verify RunAsNonRoot. Non-root verification only supports numeric user. if err := verifyRunAsNonRoot(pod, container, uid, username); err != nil { - return nil, err + return nil, cleanupAction, err } command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs) @@ -207,7 +210,7 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Contai } config.Envs = envs - return config, nil + return config, cleanupAction, nil } // generateLinuxContainerConfig generates linux container config for kubelet runtime v1. diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_test.go b/pkg/kubelet/kuberuntime/kuberuntime_container_test.go index f00dc4d7a680e..59b230db490b1 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_test.go @@ -170,7 +170,7 @@ func makeExpetectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIn container := &pod.Spec.Containers[containerIndex] podIP := "" restartCount := 0 - opts, _, _ := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) + opts, _, _, _ := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) containerLogsPath := buildContainerLogsPath(container.Name, restartCount) restartCountUint32 := uint32(restartCount) envs := make([]*runtimeapi.KeyValue, len(opts.Envs)) @@ -222,7 +222,7 @@ func TestGenerateContainerConfig(t *testing.T) { } expectedConfig := makeExpetectedConfig(m, pod, 0) - containerConfig, err := m.generateContainerConfig(&pod.Spec.Containers[0], pod, 0, "", pod.Spec.Containers[0].Image) + containerConfig, _, err := m.generateContainerConfig(&pod.Spec.Containers[0], pod, 0, "", pod.Spec.Containers[0].Image) assert.NoError(t, err) assert.Equal(t, expectedConfig, containerConfig, "generate container config for kubelet runtime v1.") @@ -252,7 +252,7 @@ func TestGenerateContainerConfig(t *testing.T) { } expectedConfig = makeExpetectedConfig(m, podWithContainerSecurityContext, 0) - containerConfig, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image) + containerConfig, _, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image) assert.Error(t, err) imageId, _ := imageService.PullImage(&runtimeapi.ImageSpec{Image: "busybox"}, nil) @@ -264,6 +264,6 @@ func TestGenerateContainerConfig(t *testing.T) { podWithContainerSecurityContext.Spec.Containers[0].SecurityContext.RunAsUser = nil podWithContainerSecurityContext.Spec.Containers[0].SecurityContext.RunAsNonRoot = &runAsNonRootTrue - _, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image) + _, _, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image) assert.Error(t, err, "RunAsNonRoot should fail for non-numeric username") } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index 8ad095858f223..bba9afa32167d 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -141,7 +141,7 @@ func makeFakeContainer(t *testing.T, m *kubeGenericRuntimeManager, template cont sandboxConfig, err := m.generatePodSandboxConfig(template.pod, template.sandboxAttempt) assert.NoError(t, err, "generatePodSandboxConfig for container template %+v", template) - containerConfig, err := m.generateContainerConfig(template.container, template.pod, template.attempt, "", template.container.Image) + containerConfig, _, err := m.generateContainerConfig(template.container, template.pod, template.attempt, "", template.container.Image) assert.NoError(t, err, "generateContainerConfig for container template %+v", template) podSandboxID := apitest.BuildSandboxName(sandboxConfig.Metadata) diff --git a/pkg/kubelet/rkt/rkt.go b/pkg/kubelet/rkt/rkt.go index d16c92db311a3..a5353a20bf984 100644 --- a/pkg/kubelet/rkt/rkt.go +++ b/pkg/kubelet/rkt/rkt.go @@ -832,7 +832,7 @@ func (r *Runtime) newAppcRuntimeApp(pod *v1.Pod, podIP string, c v1.Container, r } // TODO: determine how this should be handled for rkt - opts, _, err := r.runtimeHelper.GenerateRunContainerOptions(pod, &c, podIP) + opts, _, _, err := r.runtimeHelper.GenerateRunContainerOptions(pod, &c, podIP) if err != nil { return err } diff --git a/pkg/kubelet/util/util.go b/pkg/kubelet/util/util.go index eb7cf142754bb..cb70c07f86f78 100644 --- a/pkg/kubelet/util/util.go +++ b/pkg/kubelet/util/util.go @@ -19,6 +19,8 @@ package util import ( "fmt" "net/url" + "path/filepath" + "strings" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -45,3 +47,14 @@ func parseEndpoint(endpoint string) (string, string, error) { return u.Scheme, "", fmt.Errorf("protocol %q not supported", u.Scheme) } } + +func pathWithinBase(fullPath, basePath string) bool { + rel, err := filepath.Rel(basePath, fullPath) + if err != nil { + return false + } + if strings.HasPrefix(rel, "..") { + return false + } + return true +} diff --git a/pkg/kubelet/util/util_unsupported.go b/pkg/kubelet/util/util_unsupported.go index 616aabfe3b3bf..a08abb276156c 100644 --- a/pkg/kubelet/util/util_unsupported.go +++ b/pkg/kubelet/util/util_unsupported.go @@ -31,3 +31,12 @@ func CreateListener(endpoint string) (net.Listener, error) { func GetAddressAndDialer(endpoint string) (string, func(addr string, timeout time.Duration) (net.Conn, error), error) { return "", nil, fmt.Errorf("GetAddressAndDialer is unsupported in this build") } + +// LockAndCheckSubPath empty implementation +func LockAndCheckSubPath(volumePath, subPath string) ([]uintptr, error) { + return []uintptr{}, nil +} + +// UnlockPath empty implementation +func UnlockPath(fileHandles []uintptr) { +} diff --git a/pkg/kubelet/volumemanager/reconciler/reconciler.go b/pkg/kubelet/volumemanager/reconciler/reconciler.go index 77ac02db5911c..94ace4ba2f6a3 100644 --- a/pkg/kubelet/volumemanager/reconciler/reconciler.go +++ b/pkg/kubelet/volumemanager/reconciler/reconciler.go @@ -174,7 +174,7 @@ func (rc *reconciler) reconcile() { // Volume is mounted, unmount it glog.V(12).Infof(mountedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmountVolume", "")) err := rc.operationExecutor.UnmountVolume( - mountedVolume.MountedVolume, rc.actualStateOfWorld) + mountedVolume.MountedVolume, rc.actualStateOfWorld, rc.kubeletPodsDir) if err != nil && !nestedpendingoperations.IsAlreadyExists(err) && !exponentialbackoff.IsExponentialBackoff(err) { diff --git a/pkg/util/io/BUILD b/pkg/util/io/BUILD index def8f7bee2582..e456b74f19c70 100644 --- a/pkg/util/io/BUILD +++ b/pkg/util/io/BUILD @@ -11,6 +11,7 @@ load( go_library( name = "go_default_library", srcs = [ + "consistentread.go", "io.go", "writer.go", ], diff --git a/pkg/util/io/consistentread.go b/pkg/util/io/consistentread.go new file mode 100644 index 0000000000000..6e1f17b098551 --- /dev/null +++ b/pkg/util/io/consistentread.go @@ -0,0 +1,45 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package io + +import ( + "bytes" + "fmt" + "io/ioutil" +) + +// ConsistentRead repeatedly reads a file until it gets the same content twice. +// This is useful when reading files in /proc that are larger than page size +// and kernel may modify them between individual read() syscalls. +func ConsistentRead(filename string, attempts int) ([]byte, error) { + oldContent, err := ioutil.ReadFile(filename) + if err != nil { + return nil, err + } + for i := 0; i < attempts; i++ { + newContent, err := ioutil.ReadFile(filename) + if err != nil { + return nil, err + } + if bytes.Compare(oldContent, newContent) == 0 { + return newContent, nil + } + // Files are different, continue reading + oldContent = newContent + } + return nil, fmt.Errorf("could not get consistent content of %s after %d attempts", filename, attempts) +} diff --git a/pkg/util/mount/BUILD b/pkg/util/mount/BUILD index 906c610f98de3..04226945de33a 100644 --- a/pkg/util/mount/BUILD +++ b/pkg/util/mount/BUILD @@ -20,6 +20,7 @@ go_library( tags = ["automanaged"], deps = [ "//pkg/util/exec:go_default_library", + "//pkg/util/io:go_default_library", "//vendor/github.com/golang/glog:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", ], @@ -33,7 +34,10 @@ go_test( ], library = ":go_default_library", tags = ["automanaged"], - deps = ["//pkg/util/exec:go_default_library"], + deps = [ + "//pkg/util/exec:go_default_library", + "//vendor/github.com/golang/glog:go_default_library", + ], ) filegroup( diff --git a/pkg/util/mount/fake.go b/pkg/util/mount/fake.go index 2b71fa0a72851..2ad5f0f3af03c 100644 --- a/pkg/util/mount/fake.go +++ b/pkg/util/mount/fake.go @@ -17,6 +17,7 @@ limitations under the License. package mount import ( + "os" "path/filepath" "sync" @@ -171,3 +172,14 @@ func (f *FakeMounter) PathIsDevice(pathname string) (bool, error) { func (f *FakeMounter) GetDeviceNameFromMount(mountPath, pluginDir string) (string, error) { return getDeviceNameFromMount(f, mountPath, pluginDir) } + +func (f *FakeMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + return subPath.Path, nil, nil +} + +func (f *FakeMounter) CleanSubPaths(podDir string, volumeName string) error { + return nil +} +func (mounter *FakeMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return nil +} diff --git a/pkg/util/mount/mount.go b/pkg/util/mount/mount.go index 0c458d64b4995..45a29fdbd5b12 100644 --- a/pkg/util/mount/mount.go +++ b/pkg/util/mount/mount.go @@ -20,9 +20,11 @@ package mount import ( "fmt" + "os" "path" "path/filepath" "strings" + "syscall" "github.com/golang/glog" "k8s.io/kubernetes/pkg/util/exec" @@ -68,6 +70,42 @@ type Interface interface { // GetDeviceNameFromMount finds the device name by checking the mount path // to get the global mount path which matches its plugin directory GetDeviceNameFromMount(mountPath, pluginDir string) (string, error) + // SafeMakeDir makes sure that the created directory does not escape given + // base directory mis-using symlinks. The directory is created in the same + // mount namespace as where kubelet is running. Note that the function makes + // sure that it creates the directory somewhere under the base, nothing + // else. E.g. if the directory already exists, it may exists outside of the + // base due to symlinks. + SafeMakeDir(pathname string, base string, perm os.FileMode) error + // CleanSubPaths removes any bind-mounts created by PrepareSafeSubpath in given + // pod volume directory. + CleanSubPaths(podDir string, volumeName string) error + // PrepareSafeSubpath does everything that's necessary to prepare a subPath + // that's 1) inside given volumePath and 2) immutable after this call. + // + // newHostPath - location of prepared subPath. It should be used instead of + // hostName when running the container. + // cleanupAction - action to run when the container is running or it failed to start. + // + // CleanupAction must be called immediately after the container with given + // subpath starts. On the other hand, Interface.CleanSubPaths must be called + // when the pod finishes. + PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) +} + +type Subpath struct { + // index of the VolumeMount for this container + VolumeMountIndex int + // Full path to the subpath directory on the host + Path string + // name of the volume that is a valid directory name. + VolumeName string + // Full path to the volume path + VolumePath string + // Path to the pod's directory, including pod UID + PodDir string + // Name of the container + ContainerName string } // Compile-time check to ensure all Mounter implementations satisfy @@ -213,6 +251,13 @@ func getDeviceNameFromMount(mounter Interface, mountPath, pluginDir string) (str return path.Base(mountPath), nil } +func isNotDirErr(err error) bool { + if e, ok := err.(*os.PathError); ok && e.Err == syscall.ENOTDIR { + return true + } + return false +} + // IsNotMountPoint determines if a directory is a mountpoint. // It should return ErrNotExist when the directory does not exist. // This method uses the List() of all mountpoints @@ -222,7 +267,7 @@ func IsNotMountPoint(mounter Interface, file string) (bool, error) { // IsLikelyNotMountPoint provides a quick check // to determine whether file IS A mountpoint notMnt, notMntErr := mounter.IsLikelyNotMountPoint(file) - if notMntErr != nil { + if notMntErr != nil && isNotDirErr(notMntErr) { return notMnt, notMntErr } // identified as mountpoint, so return this fact @@ -243,3 +288,16 @@ func IsNotMountPoint(mounter Interface, file string) (bool, error) { } return notMnt, nil } + +// pathWithinBase checks if give path is withing given base directory. +func pathWithinBase(fullPath, basePath string) bool { + rel, err := filepath.Rel(basePath, fullPath) + if err != nil { + return false + } + if strings.HasPrefix(rel, "..") { + // Needed to escape the base path + return false + } + return true +} diff --git a/pkg/util/mount/mount_linux.go b/pkg/util/mount/mount_linux.go index 4c141ad5b0a95..7ba3630fd5348 100644 --- a/pkg/util/mount/mount_linux.go +++ b/pkg/util/mount/mount_linux.go @@ -23,8 +23,10 @@ import ( "fmt" "hash/fnv" "io" + "io/ioutil" "os" "os/exec" + "path/filepath" "strconv" "strings" "syscall" @@ -32,6 +34,7 @@ import ( "github.com/golang/glog" "k8s.io/apimachinery/pkg/util/sets" utilexec "k8s.io/kubernetes/pkg/util/exec" + utilio "k8s.io/kubernetes/pkg/util/io" ) const ( @@ -48,6 +51,11 @@ const ( fsckErrorsCorrected = 1 // 'fsck' found errors but exited without correcting them fsckErrorsUncorrected = 4 + + // place for subpath mounts + containerSubPathDirectoryName = "volume-subpaths" + // syscall.Openat flags used to traverse directories not following symlinks + nofollowFlags = syscall.O_RDONLY | syscall.O_NOFOLLOW ) // Mounter provides the default implementation of mount.Interface @@ -297,6 +305,7 @@ func readProcMountsFrom(file io.Reader, out *[]MountPoint) (uint32, error) { if err == io.EOF { break } + // See `man proc` for authoritative description of format of the file. fields := strings.Fields(line) if len(fields) != expectedNumFieldsPerLine { return 0, fmt.Errorf("wrong number of fields (expected %d, got %d): %s", expectedNumFieldsPerLine, len(fields), line) @@ -430,3 +439,475 @@ func (mounter *SafeFormatAndMount) getDiskFormat(disk string) (string, error) { // and MD RAID are reported as FSTYPE and caught above). return "unknown data, probably partitions", nil } + +func (mounter *Mounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + newHostPath, err = doBindSubPath(mounter, subPath, os.Getpid()) + // There is no action when the container starts. Bind-mount will be cleaned + // when container stops by CleanSubPaths. + cleanupAction = nil + return newHostPath, cleanupAction, err +} + +// This implementation is shared between Linux and NsEnterMounter +// kubeletPid is PID of kubelet in the PID namespace where bind-mount is done, +// i.e. pid on the *host* if kubelet runs in a container. +func doBindSubPath(mounter Interface, subpath Subpath, kubeletPid int) (hostPath string, err error) { + // Check early for symlink. This is just a pre-check to avoid bind-mount + // before the final check. + evalSubPath, err := filepath.EvalSymlinks(subpath.Path) + if err != nil { + return "", fmt.Errorf("evalSymlinks %q failed: %v", subpath.Path, err) + } + glog.V(5).Infof("doBindSubPath %q, full subpath %q for volumepath %q", subpath.Path, evalSubPath, subpath.VolumePath) + + evalSubPath = filepath.Clean(evalSubPath) + if !pathWithinBase(evalSubPath, subpath.VolumePath) { + return "", fmt.Errorf("subpath %q not within volume path %q", evalSubPath, subpath.VolumePath) + } + + // Prepare directory for bind mounts + // containerName is DNS label, i.e. safe as a directory name. + bindDir := filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName) + err = os.MkdirAll(bindDir, 0750) + if err != nil && !os.IsExist(err) { + return "", fmt.Errorf("error creating directory %s: %s", bindDir, err) + } + bindPathTarget := filepath.Join(bindDir, strconv.Itoa(subpath.VolumeMountIndex)) + + success := false + defer func() { + // Cleanup subpath on error + if !success { + glog.V(4).Infof("doBindSubPath() failed for %q, cleaning up subpath", bindPathTarget) + if cleanErr := cleanSubPath(mounter, subpath); cleanErr != nil { + glog.Errorf("Failed to clean subpath %q: %v", bindPathTarget, cleanErr) + } + } + }() + + // Check it's not already bind-mounted + notMount, err := IsNotMountPoint(mounter, bindPathTarget) + if err != nil { + if !os.IsNotExist(err) { + return "", fmt.Errorf("error checking path %s for mount: %s", bindPathTarget, err) + } + // Ignore ErrorNotExist: the file/directory will be created below if it does not exist yet. + notMount = true + } + if !notMount { + // It's already mounted + glog.V(5).Infof("Skipping bind-mounting subpath %s: already mounted", bindPathTarget) + success = true + return bindPathTarget, nil + } + + // Create target of the bind mount. A directory for directories, empty file + // for everything else. + t, err := os.Lstat(subpath.Path) + if err != nil { + return "", fmt.Errorf("lstat %s failed: %s", subpath.Path, err) + } + if t.Mode()&os.ModeDir > 0 { + if err = os.Mkdir(bindPathTarget, 0750); err != nil && !os.IsExist(err) { + return "", fmt.Errorf("error creating directory %s: %s", bindPathTarget, err) + } + } else { + // "/bin/touch ". + // A file is enough for all possible targets (symlink, device, pipe, + // socket, ...), bind-mounting them into a file correctly changes type + // of the target file. + if err = ioutil.WriteFile(bindPathTarget, []byte{}, 0640); err != nil { + return "", fmt.Errorf("error creating file %s: %s", bindPathTarget, err) + } + } + + // Safe open subpath and get the fd + fd, err := doSafeOpen(evalSubPath, subpath.VolumePath) + if err != nil { + return "", fmt.Errorf("error opening subpath %v: %v", evalSubPath, err) + } + defer syscall.Close(fd) + + mountSource := fmt.Sprintf("/proc/%d/fd/%v", kubeletPid, fd) + + // Do the bind mount + glog.V(5).Infof("bind mounting %q at %q", mountSource, bindPathTarget) + if err = mounter.Mount(mountSource, bindPathTarget, "" /*fstype*/, []string{"bind"}); err != nil { + return "", fmt.Errorf("error mounting %s: %s", subpath.Path, err) + } + + success = true + glog.V(3).Infof("Bound SubPath %s into %s", subpath.Path, bindPathTarget) + return bindPathTarget, nil +} + +func (mounter *Mounter) CleanSubPaths(podDir string, volumeName string) error { + return doCleanSubPaths(mounter, podDir, volumeName) +} + +// This implementation is shared between Linux and NsEnterMounter +func doCleanSubPaths(mounter Interface, podDir string, volumeName string) error { + glog.V(4).Infof("Cleaning up subpath mounts for %s", podDir) + // scan /var/lib/kubelet/pods//volume-subpaths//* + subPathDir := filepath.Join(podDir, containerSubPathDirectoryName, volumeName) + containerDirs, err := ioutil.ReadDir(subPathDir) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return fmt.Errorf("error reading %s: %s", subPathDir, err) + } + + for _, containerDir := range containerDirs { + if !containerDir.IsDir() { + glog.V(4).Infof("Container file is not a directory: %s", containerDir.Name()) + continue + } + glog.V(4).Infof("Cleaning up subpath mounts for container %s", containerDir.Name()) + + // scan /var/lib/kubelet/pods//volume-subpaths///* + fullContainerDirPath := filepath.Join(subPathDir, containerDir.Name()) + subPaths, err := ioutil.ReadDir(fullContainerDirPath) + if err != nil { + return fmt.Errorf("error reading %s: %s", fullContainerDirPath, err) + } + for _, subPath := range subPaths { + if err = doCleanSubPath(mounter, fullContainerDirPath, subPath.Name()); err != nil { + return err + } + } + // Whole container has been processed, remove its directory. + if err := os.Remove(fullContainerDirPath); err != nil { + return fmt.Errorf("error deleting %s: %s", fullContainerDirPath, err) + } + glog.V(5).Infof("Removed %s", fullContainerDirPath) + } + // Whole pod volume subpaths have been cleaned up, remove its subpath directory. + if err := os.Remove(subPathDir); err != nil { + return fmt.Errorf("error deleting %s: %s", subPathDir, err) + } + glog.V(5).Infof("Removed %s", subPathDir) + + // Remove entire subpath directory if it's the last one + podSubPathDir := filepath.Join(podDir, containerSubPathDirectoryName) + if err := os.Remove(podSubPathDir); err != nil && !os.IsExist(err) { + return fmt.Errorf("error deleting %s: %s", podSubPathDir, err) + } + glog.V(5).Infof("Removed %s", podSubPathDir) + return nil +} + +// doCleanSubPath tears down the single subpath bind mount +func doCleanSubPath(mounter Interface, fullContainerDirPath, subPathIndex string) error { + // process /var/lib/kubelet/pods//volume-subpaths/// + glog.V(4).Infof("Cleaning up subpath mounts for subpath %v", subPathIndex) + fullSubPath := filepath.Join(fullContainerDirPath, subPathIndex) + notMnt, err := IsNotMountPoint(mounter, fullSubPath) + if err != nil { + return fmt.Errorf("error checking %s for mount: %s", fullSubPath, err) + } + // Unmount it + if !notMnt { + if err = mounter.Unmount(fullSubPath); err != nil { + return fmt.Errorf("error unmounting %s: %s", fullSubPath, err) + } + glog.V(5).Infof("Unmounted %s", fullSubPath) + } + // Remove it *non*-recursively, just in case there were some hiccups. + if err = os.Remove(fullSubPath); err != nil { + return fmt.Errorf("error deleting %s: %s", fullSubPath, err) + } + glog.V(5).Infof("Removed %s", fullSubPath) + return nil +} + +// cleanSubPath will teardown the subpath bind mount and any remove any directories if empty +func cleanSubPath(mounter Interface, subpath Subpath) error { + containerDir := filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName) + + // Clean subdir bindmount + if err := doCleanSubPath(mounter, containerDir, strconv.Itoa(subpath.VolumeMountIndex)); err != nil && !os.IsNotExist(err) { + return err + } + + // Recusively remove directories if empty + if err := removeEmptyDirs(subpath.PodDir, containerDir); err != nil { + return err + } + + return nil +} + +// removeEmptyDirs works backwards from endDir to baseDir and removes each directory +// if it is empty. It stops once it encounters a directory that has content +func removeEmptyDirs(baseDir, endDir string) error { + if !pathWithinBase(endDir, baseDir) { + return fmt.Errorf("endDir %q is not within baseDir %q", endDir, baseDir) + } + + for curDir := endDir; curDir != baseDir; curDir = filepath.Dir(curDir) { + s, err := os.Stat(curDir) + if err != nil { + if os.IsNotExist(err) { + glog.V(5).Infof("curDir %q doesn't exist, skipping", curDir) + continue + } + return fmt.Errorf("error stat %q: %v", curDir, err) + } + if !s.IsDir() { + return fmt.Errorf("path %q not a directory", curDir) + } + + err = os.Remove(curDir) + if os.IsExist(err) { + glog.V(5).Infof("Directory %q not empty, not removing", curDir) + break + } else if err != nil { + return fmt.Errorf("error removing directory %q: %v", curDir, err) + } + glog.V(5).Infof("Removed directory %q", curDir) + } + return nil +} + +func (mounter *Mounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return doSafeMakeDir(pathname, base, perm) +} + +// This implementation is shared between Linux and NsEnterMounter +func doSafeMakeDir(pathname string, base string, perm os.FileMode) error { + glog.V(4).Infof("Creating directory %q within base %q", pathname, base) + + if !pathWithinBase(pathname, base) { + return fmt.Errorf("path %s is outside of allowed base %s", pathname, base) + } + + // Quick check if the directory already exists + s, err := os.Stat(pathname) + if err == nil { + // Path exists + if s.IsDir() { + // The directory already exists. It can be outside of the parent, + // but there is no race-proof check. + glog.V(4).Infof("Directory %s already exists", pathname) + return nil + } + return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR} + } + + // Find all existing directories + existingPath, toCreate, err := findExistingPrefix(base, pathname) + if err != nil { + return fmt.Errorf("error opening directory %s: %s", pathname, err) + } + // Ensure the existing directory is inside allowed base + fullExistingPath, err := filepath.EvalSymlinks(existingPath) + if err != nil { + return fmt.Errorf("error opening directory %s: %s", existingPath, err) + } + if !pathWithinBase(fullExistingPath, base) { + return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err) + } + + glog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...)) + parentFD, err := doSafeOpen(fullExistingPath, base) + if err != nil { + return fmt.Errorf("cannot open directory %s: %s", existingPath, err) + } + childFD := -1 + defer func() { + if parentFD != -1 { + if err = syscall.Close(parentFD); err != nil { + glog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err) + } + } + if childFD != -1 { + if err = syscall.Close(childFD); err != nil { + glog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", childFD, pathname, err) + } + } + }() + + currentPath := fullExistingPath + // create the directories one by one, making sure nobody can change + // created directory into symlink. + for _, dir := range toCreate { + currentPath = filepath.Join(currentPath, dir) + glog.V(4).Infof("Creating %s", dir) + err = syscall.Mkdirat(parentFD, currentPath, uint32(perm)) + if err != nil { + return fmt.Errorf("cannot create directory %s: %s", currentPath, err) + } + // Dive into the created directory + childFD, err := syscall.Openat(parentFD, dir, nofollowFlags, 0) + if err != nil { + return fmt.Errorf("cannot open %s: %s", currentPath, err) + } + // We can be sure that childFD is safe to use. It could be changed + // by user after Mkdirat() and before Openat(), however: + // - it could not be changed to symlink - we use nofollowFlags + // - it could be changed to a file (or device, pipe, socket, ...) + // but either subsequent Mkdirat() fails or we mount this file + // to user's container. Security is no violated in both cases + // and user either gets error or the file that it can already access. + + if err = syscall.Close(parentFD); err != nil { + glog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err) + } + parentFD = childFD + childFD = -1 + } + + // Everything was created. mkdirat(..., perm) above was affected by current + // umask and we must apply the right permissions to the last directory + // (that's the one that will be available to the container as subpath) + // so user can read/write it. This is the behavior of previous code. + // TODO: chmod all created directories, not just the last one. + // parentFD is the last created directory. + if err = syscall.Fchmod(parentFD, uint32(perm)&uint32(os.ModePerm)); err != nil { + return fmt.Errorf("chmod %q failed: %s", currentPath, err) + } + return nil +} + +// findExistingPrefix finds prefix of pathname that exists. In addition, it +// returns list of remaining directories that don't exist yet. +func findExistingPrefix(base, pathname string) (string, []string, error) { + rel, err := filepath.Rel(base, pathname) + if err != nil { + return base, nil, err + } + dirs := strings.Split(rel, string(filepath.Separator)) + + // Do OpenAt in a loop to find the first non-existing dir. Resolve symlinks. + // This should be faster than looping through all dirs and calling os.Stat() + // on each of them, as the symlinks are resolved only once with OpenAt(). + currentPath := base + fd, err := syscall.Open(currentPath, syscall.O_RDONLY, 0) + if err != nil { + return pathname, nil, fmt.Errorf("error opening %s: %s", currentPath, err) + } + defer func() { + if err = syscall.Close(fd); err != nil { + glog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err) + } + }() + for i, dir := range dirs { + childFD, err := syscall.Openat(fd, dir, syscall.O_RDONLY, 0) + if err != nil { + if os.IsNotExist(err) { + return currentPath, dirs[i:], nil + } + return base, nil, err + } + if err = syscall.Close(fd); err != nil { + glog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err) + } + fd = childFD + currentPath = filepath.Join(currentPath, dir) + } + return pathname, []string{}, nil +} + +// This implementation is shared between Linux and NsEnterMounter +// Open path and return its fd. +// Symlinks are disallowed (pathname must already resolve symlinks), +// and the path must be within the base directory. +func doSafeOpen(pathname string, base string) (int, error) { + // Calculate segments to follow + subpath, err := filepath.Rel(base, pathname) + if err != nil { + return -1, err + } + segments := strings.Split(subpath, string(filepath.Separator)) + + // Assumption: base is the only directory that we have under control. + // Base dir is not allowed to be a symlink. + parentFD, err := syscall.Open(base, nofollowFlags, 0) + if err != nil { + return -1, fmt.Errorf("cannot open directory %s: %s", base, err) + } + defer func() { + if parentFD != -1 { + if err = syscall.Close(parentFD); err != nil { + glog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", parentFD, pathname, err) + } + } + }() + + childFD := -1 + defer func() { + if childFD != -1 { + if err = syscall.Close(childFD); err != nil { + glog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", childFD, pathname, err) + } + } + }() + + currentPath := base + + // Follow the segments one by one using openat() to make + // sure the user cannot change already existing directories into symlinks. + for _, seg := range segments { + currentPath = filepath.Join(currentPath, seg) + if !pathWithinBase(currentPath, base) { + return -1, fmt.Errorf("path %s is outside of allowed base %s", currentPath, base) + } + + glog.V(5).Infof("Opening path %s", currentPath) + childFD, err = syscall.Openat(parentFD, seg, nofollowFlags, 0) + if err != nil { + return -1, fmt.Errorf("cannot open %s: %s", currentPath, err) + } + + // Close parentFD + if err = syscall.Close(parentFD); err != nil { + return -1, fmt.Errorf("closing fd for %q failed: %v", filepath.Dir(currentPath), err) + } + // Set child to new parent + parentFD = childFD + childFD = -1 + } + + // We made it to the end, return this fd, don't close it + finalFD := parentFD + parentFD = -1 + + return finalFD, nil +} + +type mountInfo struct { + mountPoint string + // list of "optional parameters", mount propagation is one of them + optional []string +} + +// parseMountInfo parses /proc/xxx/mountinfo. +func parseMountInfo(filename string) ([]mountInfo, error) { + content, err := utilio.ConsistentRead(filename, maxListTries) + if err != nil { + return []mountInfo{}, err + } + contentStr := string(content) + infos := []mountInfo{} + + for _, line := range strings.Split(contentStr, "\n") { + if line == "" { + // the last split() item is empty string following the last \n + continue + } + fields := strings.Fields(line) + if len(fields) < 7 { + return nil, fmt.Errorf("wrong number of fields in (expected %d, got %d): %s", 8, len(fields), line) + } + info := mountInfo{ + mountPoint: fields[4], + optional: []string{}, + } + for i := 6; i < len(fields) && fields[i] != "-"; i++ { + info.optional = append(info.optional, fields[i]) + } + infos = append(infos, info) + } + return infos, nil +} diff --git a/pkg/util/mount/mount_linux_test.go b/pkg/util/mount/mount_linux_test.go index e13cb7a0f9886..778d9e502b587 100644 --- a/pkg/util/mount/mount_linux_test.go +++ b/pkg/util/mount/mount_linux_test.go @@ -19,8 +19,18 @@ limitations under the License. package mount import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "reflect" "strings" + "syscall" "testing" + + "strconv" + + "github.com/golang/glog" ) func TestReadProcMountsFrom(t *testing.T) { @@ -181,3 +191,1217 @@ func TestGetDeviceNameFromMount(t *testing.T) { } } } + +func TestPathWithinBase(t *testing.T) { + tests := []struct { + name string + fullPath string + basePath string + expected bool + }{ + { + name: "good subpath", + fullPath: "/a/b/c", + basePath: "/a", + expected: true, + }, + { + name: "good subpath 2", + fullPath: "/a/b/c", + basePath: "/a/b", + expected: true, + }, + { + name: "good subpath end slash", + fullPath: "/a/b/c/", + basePath: "/a/b", + expected: true, + }, + { + name: "good subpath backticks", + fullPath: "/a/b/../c", + basePath: "/a", + expected: true, + }, + { + name: "good subpath equal", + fullPath: "/a/b/c", + basePath: "/a/b/c", + expected: true, + }, + { + name: "good subpath equal 2", + fullPath: "/a/b/c/", + basePath: "/a/b/c", + expected: true, + }, + { + name: "good subpath root", + fullPath: "/a", + basePath: "/", + expected: true, + }, + { + name: "bad subpath parent", + fullPath: "/a/b/c", + basePath: "/a/b/c/d", + expected: false, + }, + { + name: "bad subpath outside", + fullPath: "/b/c", + basePath: "/a/b/c", + expected: false, + }, + { + name: "bad subpath prefix", + fullPath: "/a/b/cd", + basePath: "/a/b/c", + expected: false, + }, + { + name: "bad subpath backticks", + fullPath: "/a/../b", + basePath: "/a", + expected: false, + }, + } + for _, test := range tests { + if pathWithinBase(test.fullPath, test.basePath) != test.expected { + t.Errorf("test %q failed: expected %v", test.name, test.expected) + } + + } +} + +func TestSafeMakeDir(t *testing.T) { + defaultPerm := os.FileMode(0750) + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) error + path string + checkPath string + expectError bool + }{ + { + "directory-does-not-exist", + func(base string) error { + return nil + }, + "test/directory", + "test/directory", + false, + }, + { + "directory-exists", + func(base string) error { + return os.MkdirAll(filepath.Join(base, "test/directory"), 0750) + }, + "test/directory", + "test/directory", + false, + }, + { + "create-base", + func(base string) error { + return nil + }, + "", + "", + false, + }, + { + "escape-base-using-dots", + func(base string) error { + return nil + }, + "..", + "", + true, + }, + { + "escape-base-using-dots-2", + func(base string) error { + return nil + }, + "test/../../..", + "", + true, + }, + { + "follow-symlinks", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "destination"), defaultPerm); err != nil { + return err + } + return os.Symlink("destination", filepath.Join(base, "test")) + }, + "test/directory", + "destination/directory", + false, + }, + { + "follow-symlink-loop", + func(base string) error { + return os.Symlink("test", filepath.Join(base, "test")) + }, + "test/directory", + "", + true, + }, + { + "follow-symlink-multiple follow", + func(base string) error { + /* test1/dir points to test2 and test2/dir points to test1 */ + if err := os.MkdirAll(filepath.Join(base, "test1"), defaultPerm); err != nil { + return err + } + if err := os.MkdirAll(filepath.Join(base, "test2"), defaultPerm); err != nil { + return err + } + if err := os.Symlink(filepath.Join(base, "test2"), filepath.Join(base, "test1/dir")); err != nil { + return err + } + if err := os.Symlink(filepath.Join(base, "test1"), filepath.Join(base, "test2/dir")); err != nil { + return err + } + return nil + }, + "test1/dir/dir/dir/dir/dir/dir/dir/foo", + "test2/foo", + false, + }, + { + "danglink-symlink", + func(base string) error { + return os.Symlink("non-existing", filepath.Join(base, "test")) + }, + "test/directory", + "", + true, + }, + { + "non-directory", + func(base string) error { + return ioutil.WriteFile(filepath.Join(base, "test"), []byte{}, defaultPerm) + }, + "test/directory", + "", + true, + }, + { + "non-directory-final", + func(base string) error { + return ioutil.WriteFile(filepath.Join(base, "test"), []byte{}, defaultPerm) + }, + "test", + "", + true, + }, + { + "escape-with-relative-symlink", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "dir"), defaultPerm); err != nil { + return err + } + if err := os.MkdirAll(filepath.Join(base, "exists"), defaultPerm); err != nil { + return err + } + return os.Symlink("../exists", filepath.Join(base, "dir/test")) + }, + "dir/test", + "", + false, + }, + { + "escape-with-relative-symlink-not-exists", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "dir"), defaultPerm); err != nil { + return err + } + return os.Symlink("../not-exists", filepath.Join(base, "dir/test")) + }, + "dir/test", + "", + true, + }, + { + "escape-with-symlink", + func(base string) error { + return os.Symlink("/", filepath.Join(base, "test")) + }, + "test/directory", + "", + true, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "safe-make-dir-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + test.prepare(base) + pathToCreate := filepath.Join(base, test.path) + err = doSafeMakeDir(pathToCreate, base, defaultPerm) + if err != nil && !test.expectError { + t.Errorf("test %q: %s", test.name, err) + } + if err != nil { + glog.Infof("got error: %s", err) + } + if err == nil && test.expectError { + t.Errorf("test %q: expected error, got none", test.name) + } + + if test.checkPath != "" { + if _, err := os.Stat(filepath.Join(base, test.checkPath)); err != nil { + t.Errorf("test %q: cannot read path %s", test.name, test.checkPath) + } + } + + os.RemoveAll(base) + } + +} + +func validateDirEmpty(dir string) error { + files, err := ioutil.ReadDir(dir) + if err != nil { + return err + } + + if len(files) != 0 { + return fmt.Errorf("Directory %q is not empty", dir) + } + return nil +} + +func validateDirExists(dir string) error { + _, err := ioutil.ReadDir(dir) + if err != nil { + return err + } + return nil +} + +func validateDirNotExists(dir string) error { + _, err := ioutil.ReadDir(dir) + if os.IsNotExist(err) { + return nil + } + if err != nil { + return err + } + return fmt.Errorf("dir %q still exists", dir) +} + +func validateFileExists(file string) error { + if _, err := os.Stat(file); err != nil { + return err + } + return nil +} + +func TestRemoveEmptyDirs(t *testing.T) { + defaultPerm := os.FileMode(0750) + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) error + // Function that validates directory structure after the test + validate func(base string) error + baseDir string + endDir string + expectError bool + }{ + { + name: "all-empty", + prepare: func(base string) error { + return os.MkdirAll(filepath.Join(base, "a/b/c"), defaultPerm) + }, + validate: func(base string) error { + return validateDirEmpty(filepath.Join(base, "a")) + }, + baseDir: "a", + endDir: "a/b/c", + expectError: false, + }, + { + name: "dir-not-empty", + prepare: func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "a/b/c"), defaultPerm); err != nil { + return err + } + return os.Mkdir(filepath.Join(base, "a/b/d"), defaultPerm) + }, + validate: func(base string) error { + if err := validateDirNotExists(filepath.Join(base, "a/b/c")); err != nil { + return err + } + return validateDirExists(filepath.Join(base, "a/b")) + }, + baseDir: "a", + endDir: "a/b/c", + expectError: false, + }, + { + name: "path-not-within-base", + prepare: func(base string) error { + return os.MkdirAll(filepath.Join(base, "a/b/c"), defaultPerm) + }, + validate: func(base string) error { + return validateDirExists(filepath.Join(base, "a")) + }, + baseDir: "a", + endDir: "b/c", + expectError: true, + }, + { + name: "path-already-deleted", + prepare: func(base string) error { + return nil + }, + validate: func(base string) error { + return nil + }, + baseDir: "a", + endDir: "a/b/c", + expectError: false, + }, + { + name: "path-not-dir", + prepare: func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "a/b"), defaultPerm); err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(base, "a/b", "c"), []byte{}, defaultPerm) + }, + validate: func(base string) error { + if err := validateDirExists(filepath.Join(base, "a/b")); err != nil { + return err + } + return validateFileExists(filepath.Join(base, "a/b/c")) + }, + baseDir: "a", + endDir: "a/b/c", + expectError: true, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "remove-empty-dirs-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + if err = test.prepare(base); err != nil { + os.RemoveAll(base) + t.Fatalf("failed to prepare test %q: %v", test.name, err.Error()) + } + + err = removeEmptyDirs(filepath.Join(base, test.baseDir), filepath.Join(base, test.endDir)) + if err != nil && !test.expectError { + t.Errorf("test %q failed: %v", test.name, err) + } + if err == nil && test.expectError { + t.Errorf("test %q failed: expected error, got success", test.name) + } + + if err = test.validate(base); err != nil { + t.Errorf("test %q failed validation: %v", test.name, err) + } + + os.RemoveAll(base) + } +} + +func TestCleanSubPaths(t *testing.T) { + defaultPerm := os.FileMode(0750) + testVol := "vol1" + + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) ([]MountPoint, error) + // Function that validates directory structure after the test + validate func(base string) error + expectError bool + }{ + { + name: "not-exists", + prepare: func(base string) ([]MountPoint, error) { + return nil, nil + }, + validate: func(base string) error { + return nil + }, + expectError: false, + }, + { + name: "subpath-not-mount", + prepare: func(base string) ([]MountPoint, error) { + return nil, os.MkdirAll(filepath.Join(base, containerSubPathDirectoryName, testVol, "container1", "0"), defaultPerm) + }, + validate: func(base string) error { + return validateDirNotExists(filepath.Join(base, containerSubPathDirectoryName)) + }, + expectError: false, + }, + { + name: "subpath-file", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol, "container1") + if err := os.MkdirAll(path, defaultPerm); err != nil { + return nil, err + } + return nil, ioutil.WriteFile(filepath.Join(path, "0"), []byte{}, defaultPerm) + }, + validate: func(base string) error { + return validateDirNotExists(filepath.Join(base, containerSubPathDirectoryName)) + }, + expectError: false, + }, + { + name: "subpath-container-not-dir", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol) + if err := os.MkdirAll(path, defaultPerm); err != nil { + return nil, err + } + return nil, ioutil.WriteFile(filepath.Join(path, "container1"), []byte{}, defaultPerm) + }, + validate: func(base string) error { + return validateDirExists(filepath.Join(base, containerSubPathDirectoryName, testVol)) + }, + expectError: true, + }, + { + name: "subpath-multiple-container-not-dir", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol) + if err := os.MkdirAll(filepath.Join(path, "container1"), defaultPerm); err != nil { + return nil, err + } + return nil, ioutil.WriteFile(filepath.Join(path, "container2"), []byte{}, defaultPerm) + }, + validate: func(base string) error { + path := filepath.Join(base, containerSubPathDirectoryName, testVol) + if err := validateDirNotExists(filepath.Join(path, "container1")); err != nil { + return err + } + return validateFileExists(filepath.Join(path, "container2")) + }, + expectError: true, + }, + { + name: "subpath-mount", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol, "container1", "0") + if err := os.MkdirAll(path, defaultPerm); err != nil { + return nil, err + } + mounts := []MountPoint{{Device: "/dev/sdb", Path: path}} + return mounts, nil + }, + validate: func(base string) error { + return validateDirNotExists(filepath.Join(base, containerSubPathDirectoryName)) + }, + }, + { + name: "subpath-mount-multiple", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol, "container1", "0") + path2 := filepath.Join(base, containerSubPathDirectoryName, testVol, "container1", "1") + path3 := filepath.Join(base, containerSubPathDirectoryName, testVol, "container2", "1") + if err := os.MkdirAll(path, defaultPerm); err != nil { + return nil, err + } + if err := os.MkdirAll(path2, defaultPerm); err != nil { + return nil, err + } + if err := os.MkdirAll(path3, defaultPerm); err != nil { + return nil, err + } + mounts := []MountPoint{ + {Device: "/dev/sdb", Path: path}, + {Device: "/dev/sdb", Path: path3}, + } + return mounts, nil + }, + validate: func(base string) error { + return validateDirNotExists(filepath.Join(base, containerSubPathDirectoryName)) + }, + }, + { + name: "subpath-mount-multiple-vols", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol, "container1", "0") + path2 := filepath.Join(base, containerSubPathDirectoryName, "vol2", "container1", "1") + if err := os.MkdirAll(path, defaultPerm); err != nil { + return nil, err + } + if err := os.MkdirAll(path2, defaultPerm); err != nil { + return nil, err + } + mounts := []MountPoint{ + {Device: "/dev/sdb", Path: path}, + } + return mounts, nil + }, + validate: func(base string) error { + baseSubdir := filepath.Join(base, containerSubPathDirectoryName) + if err := validateDirNotExists(filepath.Join(baseSubdir, testVol)); err != nil { + return err + } + return validateDirExists(baseSubdir) + }, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "clean-subpaths-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + mounts, err := test.prepare(base) + if err != nil { + os.RemoveAll(base) + t.Fatalf("failed to prepare test %q: %v", test.name, err.Error()) + } + + fm := &FakeMounter{MountPoints: mounts} + + err = doCleanSubPaths(fm, base, testVol) + if err != nil && !test.expectError { + t.Errorf("test %q failed: %v", test.name, err) + } + if err == nil && test.expectError { + t.Errorf("test %q failed: expected error, got success", test.name) + } + if err = test.validate(base); err != nil { + t.Errorf("test %q failed validation: %v", test.name, err) + } + + os.RemoveAll(base) + } +} + +var ( + testVol = "vol1" + testPod = "pod0" + testContainer = "container0" + testSubpath = 1 +) + +func setupFakeMounter(testMounts []string) *FakeMounter { + mounts := []MountPoint{} + for _, mountPoint := range testMounts { + mounts = append(mounts, MountPoint{Device: "/foo", Path: mountPoint}) + } + return &FakeMounter{MountPoints: mounts} +} + +func getTestPaths(base string) (string, string) { + return filepath.Join(base, testVol), + filepath.Join(base, testPod, containerSubPathDirectoryName, testVol, testContainer, strconv.Itoa(testSubpath)) +} + +func TestBindSubPath(t *testing.T) { + defaultPerm := os.FileMode(0750) + + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) ([]string, string, string, error) + expectError bool + }{ + { + name: "subpath-dir", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "dir0") + return nil, volpath, subpath, os.MkdirAll(subpath, defaultPerm) + }, + expectError: false, + }, + { + name: "subpath-dir-symlink", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "dir0") + if err := os.MkdirAll(subpath, defaultPerm); err != nil { + return nil, "", "", err + } + subpathLink := filepath.Join(volpath, "dirLink") + return nil, volpath, subpath, os.Symlink(subpath, subpathLink) + }, + expectError: false, + }, + { + name: "subpath-file", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "file0") + if err := os.MkdirAll(volpath, defaultPerm); err != nil { + return nil, "", "", err + } + return nil, volpath, subpath, ioutil.WriteFile(subpath, []byte{}, defaultPerm) + }, + expectError: false, + }, + { + name: "subpath-not-exists", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "file0") + return nil, volpath, subpath, nil + }, + expectError: true, + }, + { + name: "subpath-outside", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "dir0") + if err := os.MkdirAll(volpath, defaultPerm); err != nil { + return nil, "", "", err + } + return nil, volpath, subpath, os.Symlink(base, subpath) + }, + expectError: true, + }, + { + name: "subpath-symlink-child-outside", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpathDir := filepath.Join(volpath, "dir0") + subpath := filepath.Join(subpathDir, "child0") + if err := os.MkdirAll(subpathDir, defaultPerm); err != nil { + return nil, "", "", err + } + return nil, volpath, subpath, os.Symlink(base, subpath) + }, + expectError: true, + }, + { + name: "subpath-child-outside-exists", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpathDir := filepath.Join(volpath, "dir0") + child := filepath.Join(base, "child0") + subpath := filepath.Join(subpathDir, "child0") + if err := os.MkdirAll(volpath, defaultPerm); err != nil { + return nil, "", "", err + } + // touch file outside + if err := ioutil.WriteFile(child, []byte{}, defaultPerm); err != nil { + return nil, "", "", err + } + + // create symlink for subpath dir + return nil, volpath, subpath, os.Symlink(base, subpathDir) + }, + expectError: true, + }, + { + name: "subpath-child-outside-not-exists", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpathDir := filepath.Join(volpath, "dir0") + subpath := filepath.Join(subpathDir, "child0") + if err := os.MkdirAll(volpath, defaultPerm); err != nil { + return nil, "", "", err + } + // create symlink for subpath dir + return nil, volpath, subpath, os.Symlink(base, subpathDir) + }, + expectError: true, + }, + { + name: "subpath-child-outside-exists-middle-dir-symlink", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpathDir := filepath.Join(volpath, "dir0") + symlinkDir := filepath.Join(subpathDir, "linkDir0") + child := filepath.Join(base, "child0") + subpath := filepath.Join(symlinkDir, "child0") + if err := os.MkdirAll(subpathDir, defaultPerm); err != nil { + return nil, "", "", err + } + // touch file outside + if err := ioutil.WriteFile(child, []byte{}, defaultPerm); err != nil { + return nil, "", "", err + } + + // create symlink for middle dir + return nil, volpath, subpath, os.Symlink(base, symlinkDir) + }, + expectError: true, + }, + { + name: "subpath-backstepping", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "dir0") + symlinkBase := filepath.Join(volpath, "..") + if err := os.MkdirAll(volpath, defaultPerm); err != nil { + return nil, "", "", err + } + + // create symlink for subpath + return nil, volpath, subpath, os.Symlink(symlinkBase, subpath) + }, + expectError: true, + }, + { + name: "subpath-mountdir-already-exists", + prepare: func(base string) ([]string, string, string, error) { + volpath, subpathMount := getTestPaths(base) + if err := os.MkdirAll(subpathMount, defaultPerm); err != nil { + return nil, "", "", err + } + + subpath := filepath.Join(volpath, "dir0") + return nil, volpath, subpath, os.MkdirAll(subpath, defaultPerm) + }, + expectError: false, + }, + { + name: "subpath-mount-already-exists", + prepare: func(base string) ([]string, string, string, error) { + volpath, subpathMount := getTestPaths(base) + mounts := []string{subpathMount} + if err := os.MkdirAll(subpathMount, defaultPerm); err != nil { + return nil, "", "", err + } + + subpath := filepath.Join(volpath, "dir0") + return mounts, volpath, subpath, os.MkdirAll(subpath, defaultPerm) + }, + expectError: false, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "bind-subpath-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + mounts, volPath, subPath, err := test.prepare(base) + if err != nil { + os.RemoveAll(base) + t.Fatalf("failed to prepare test %q: %v", test.name, err.Error()) + } + + fm := setupFakeMounter(mounts) + + subpath := Subpath{ + VolumeMountIndex: testSubpath, + Path: subPath, + VolumeName: testVol, + VolumePath: volPath, + PodDir: filepath.Join(base, "pod0"), + ContainerName: testContainer, + } + + _, subpathMount := getTestPaths(base) + bindPathTarget, err := doBindSubPath(fm, subpath, 1) + if test.expectError { + if err == nil { + t.Errorf("test %q failed: expected error, got success", test.name) + } + if bindPathTarget != "" { + t.Errorf("test %q failed: expected empty bindPathTarget, got %v", test.name, bindPathTarget) + } + if err = validateDirNotExists(subpathMount); err != nil { + t.Errorf("test %q failed: %v", test.name, err) + } + } + if !test.expectError { + if err != nil { + t.Errorf("test %q failed: %v", test.name, err) + } + if bindPathTarget != subpathMount { + t.Errorf("test %q failed: expected bindPathTarget %v, got %v", test.name, subpathMount, bindPathTarget) + } + if err = validateFileExists(subpathMount); err != nil { + t.Errorf("test %q failed: %v", test.name, err) + } + } + + os.RemoveAll(base) + } +} + +func TestParseMountInfo(t *testing.T) { + info := + `62 0 253:0 / / rw,relatime shared:1 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered +78 62 0:41 / /tmp rw,nosuid,nodev shared:30 - tmpfs tmpfs rw,seclabel +80 62 0:42 / /var/lib/nfs/rpc_pipefs rw,relatime shared:31 - rpc_pipefs sunrpc rw +82 62 0:43 / /var/lib/foo rw,relatime shared:32 - tmpfs tmpfs rw +83 63 0:44 / /var/lib/bar rw,relatime - tmpfs tmpfs rw +227 62 253:0 /var/lib/docker/devicemapper /var/lib/docker/devicemapper rw,relatime - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered +224 62 253:0 /var/lib/docker/devicemapper/test/shared /var/lib/docker/devicemapper/test/shared rw,relatime master:1 shared:44 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered +76 17 8:1 / /mnt/stateful_partition rw,nosuid,nodev,noexec,relatime - ext4 /dev/sda1 rw,commit=30,data=ordered +80 17 8:1 /var /var rw,nosuid,nodev,noexec,relatime shared:30 - ext4 /dev/sda1 rw,commit=30,data=ordered +189 80 8:1 /var/lib/kubelet /var/lib/kubelet rw,relatime shared:30 - ext4 /dev/sda1 rw,commit=30,data=ordered +818 77 8:40 / /var/lib/kubelet/pods/c25464af-e52e-11e7-ab4d-42010a800002/volumes/kubernetes.io~gce-pd/vol1 rw,relatime shared:290 - ext4 /dev/sdc rw,data=ordered +819 78 8:48 / /var/lib/kubelet/pods/c25464af-e52e-11e7-ab4d-42010a800002/volumes/kubernetes.io~gce-pd/vol1 rw,relatime shared:290 - ext4 /dev/sdd rw,data=ordered +900 100 8:48 /dir1 /var/lib/kubelet/pods/c25464af-e52e-11e7-ab4d-42010a800002/volume-subpaths/vol1/subpath1/0 rw,relatime shared:290 - ext4 /dev/sdd rw,data=ordered +901 101 8:1 /dir1 /var/lib/kubelet/pods/c25464af-e52e-11e7-ab4d-42010a800002/volume-subpaths/vol1/subpath1/1 rw,relatime shared:290 - ext4 /dev/sdd rw,data=ordered +902 102 8:1 /var/lib/kubelet/pods/d4076f24-e53a-11e7-ba15-42010a800002/volumes/kubernetes.io~empty-dir/vol1/dir1 /var/lib/kubelet/pods/d4076f24-e53a-11e7-ba15-42010a800002/volume-subpaths/vol1/subpath1/0 rw,relatime shared:30 - ext4 /dev/sda1 rw,commit=30,data=ordered +903 103 8:1 /var/lib/kubelet/pods/d4076f24-e53a-11e7-ba15-42010a800002/volumes/kubernetes.io~empty-dir/vol2/dir1 /var/lib/kubelet/pods/d4076f24-e53a-11e7-ba15-42010a800002/volume-subpaths/vol1/subpath1/1 rw,relatime shared:30 - ext4 /dev/sda1 rw,commit=30,data=ordered +178 25 253:0 /etc/bar /var/lib/kubelet/pods/12345/volume-subpaths/vol1/subpath1/0 rw,relatime shared:1 - ext4 /dev/sdb2 rw,errors=remount-ro,data=ordered +698 186 0:41 /tmp1/dir1 /var/lib/kubelet/pods/41135147-e697-11e7-9342-42010a800002/volume-subpaths/vol1/subpath1/0 rw shared:26 - tmpfs tmpfs rw +918 77 8:50 / /var/lib/kubelet/pods/2345/volumes/kubernetes.io~gce-pd/vol1 rw,relatime shared:290 - ext4 /dev/sdc rw,data=ordered +919 78 8:58 / /var/lib/kubelet/pods/2345/volumes/kubernetes.io~gce-pd/vol1 rw,relatime shared:290 - ext4 /dev/sdd rw,data=ordered +920 100 8:50 /dir1 /var/lib/kubelet/pods/2345/volume-subpaths/vol1/subpath1/0 rw,relatime shared:290 - ext4 /dev/sdc rw,data=ordered +150 23 1:58 / /media/nfs_vol rw,relatime shared:89 - nfs4 172.18.4.223:/srv/nfs rw,vers=4.0,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=172.18.4.223,local_lock=none,addr=172.18.4.223 +151 24 1:58 / /media/nfs_bindmount rw,relatime shared:89 - nfs4 172.18.4.223:/srv/nfs/foo rw,vers=4.0,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=172.18.4.223,local_lock=none,addr=172.18.4.223 +134 23 0:58 / /var/lib/kubelet/pods/43219158-e5e1-11e7-a392-0e858b8eaf40/volumes/kubernetes.io~nfs/nfs1 rw,relatime shared:89 - nfs4 172.18.4.223:/srv/nfs rw,vers=4.0,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=172.18.4.223,local_lock=none,addr=172.18.4.223 +187 23 0:58 / /var/lib/kubelet/pods/1fc5ea21-eff4-11e7-ac80-0e858b8eaf40/volumes/kubernetes.io~nfs/nfs2 rw,relatime shared:96 - nfs4 172.18.4.223:/srv/nfs2 rw,vers=4.0,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=172.18.4.223,local_lock=none,addr=172.18.4.223 +188 24 0:58 / /var/lib/kubelet/pods/43219158-e5e1-11e7-a392-0e858b8eaf40/volume-subpaths/nfs1/subpath1/0 rw,relatime shared:89 - nfs4 172.18.4.223:/srv/nfs/foo rw,vers=4.0,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=172.18.4.223,local_lock=none,addr=172.18.4.223 +347 60 0:71 / /var/lib/kubelet/pods/13195d46-f9fa-11e7-bbf1-5254007a695a/volumes/kubernetes.io~nfs/vol2 rw,relatime shared:170 - nfs 172.17.0.3:/exports/2 rw,vers=3,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=172.17.0.3,mountvers=3,mountport=20048,mountproto=udp,local_lock=none,addr=172.17.0.3 +` + tempDir, filename, err := writeFile(info) + if err != nil { + t.Fatalf("cannot create temporary file: %v", err) + } + defer os.RemoveAll(tempDir) + + tests := []struct { + name string + mountPoint string + expectedInfo mountInfo + }{ + { + "simple bind mount", + "/var/lib/kubelet", + mountInfo{ + mountPoint: "/var/lib/kubelet", + optional: []string{"shared:30"}, + }, + }, + } + + infos, err := parseMountInfo(filename) + if err != nil { + t.Fatalf("Cannot parse %s: %s", filename, err) + } + + for _, test := range tests { + found := false + for _, info := range infos { + if info.mountPoint == test.mountPoint { + found = true + if !reflect.DeepEqual(info, test.expectedInfo) { + t.Errorf("Test case %q:\n expected: %+v\n got: %+v", test.name, test.expectedInfo, info) + } + break + } + } + if !found { + t.Errorf("Test case %q: mountPoint %s not found", test.name, test.mountPoint) + } + } +} + +func TestSafeOpen(t *testing.T) { + defaultPerm := os.FileMode(0750) + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) error + path string + expectError bool + }{ + { + "directory-does-not-exist", + func(base string) error { + return nil + }, + "test/directory", + true, + }, + { + "directory-exists", + func(base string) error { + return os.MkdirAll(filepath.Join(base, "test/directory"), 0750) + }, + "test/directory", + false, + }, + { + "escape-base-using-dots", + func(base string) error { + return nil + }, + "..", + true, + }, + { + "escape-base-using-dots-2", + func(base string) error { + return os.MkdirAll(filepath.Join(base, "test"), 0750) + }, + "test/../../..", + true, + }, + { + "symlink", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "destination"), defaultPerm); err != nil { + return err + } + return os.Symlink("destination", filepath.Join(base, "test")) + }, + "test", + true, + }, + { + "symlink-nested", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "dir1/dir2"), defaultPerm); err != nil { + return err + } + return os.Symlink("dir1", filepath.Join(base, "dir1/dir2/test")) + }, + "test", + true, + }, + { + "symlink-loop", + func(base string) error { + return os.Symlink("test", filepath.Join(base, "test")) + }, + "test", + true, + }, + { + "symlink-not-exists", + func(base string) error { + return os.Symlink("non-existing", filepath.Join(base, "test")) + }, + "test", + true, + }, + { + "non-directory", + func(base string) error { + return ioutil.WriteFile(filepath.Join(base, "test"), []byte{}, defaultPerm) + }, + "test/directory", + true, + }, + { + "non-directory-final", + func(base string) error { + return ioutil.WriteFile(filepath.Join(base, "test"), []byte{}, defaultPerm) + }, + "test", + false, + }, + { + "escape-with-relative-symlink", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "dir"), defaultPerm); err != nil { + return err + } + if err := os.MkdirAll(filepath.Join(base, "exists"), defaultPerm); err != nil { + return err + } + return os.Symlink("../exists", filepath.Join(base, "dir/test")) + }, + "dir/test", + true, + }, + { + "escape-with-relative-symlink-not-exists", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "dir"), defaultPerm); err != nil { + return err + } + return os.Symlink("../not-exists", filepath.Join(base, "dir/test")) + }, + "dir/test", + true, + }, + { + "escape-with-symlink", + func(base string) error { + return os.Symlink("/", filepath.Join(base, "test")) + }, + "test", + true, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "safe-open-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + test.prepare(base) + pathToCreate := filepath.Join(base, test.path) + fd, err := doSafeOpen(pathToCreate, base) + if err != nil && !test.expectError { + t.Errorf("test %q: %s", test.name, err) + } + if err != nil { + glog.Infof("got error: %s", err) + } + if err == nil && test.expectError { + t.Errorf("test %q: expected error, got none", test.name) + } + + syscall.Close(fd) + os.RemoveAll(base) + } +} + +func TestFindExistingPrefix(t *testing.T) { + defaultPerm := os.FileMode(0750) + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) error + path string + expectedPath string + expectedDirs []string + expectError bool + }{ + { + "directory-does-not-exist", + func(base string) error { + return nil + }, + "directory", + "", + []string{"directory"}, + false, + }, + { + "directory-exists", + func(base string) error { + return os.MkdirAll(filepath.Join(base, "test/directory"), 0750) + }, + "test/directory", + "test/directory", + []string{}, + false, + }, + { + "follow-symlinks", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "destination/directory"), defaultPerm); err != nil { + return err + } + return os.Symlink("destination", filepath.Join(base, "test")) + }, + "test/directory", + "test/directory", + []string{}, + false, + }, + { + "follow-symlink-loop", + func(base string) error { + return os.Symlink("test", filepath.Join(base, "test")) + }, + "test/directory", + "", + nil, + true, + }, + { + "follow-symlink-multiple follow", + func(base string) error { + /* test1/dir points to test2 and test2/dir points to test1 */ + if err := os.MkdirAll(filepath.Join(base, "test1"), defaultPerm); err != nil { + return err + } + if err := os.MkdirAll(filepath.Join(base, "test2"), defaultPerm); err != nil { + return err + } + if err := os.Symlink(filepath.Join(base, "test2"), filepath.Join(base, "test1/dir")); err != nil { + return err + } + if err := os.Symlink(filepath.Join(base, "test1"), filepath.Join(base, "test2/dir")); err != nil { + return err + } + return nil + }, + "test1/dir/dir/foo/bar", + "test1/dir/dir", + []string{"foo", "bar"}, + false, + }, + { + "danglink-symlink", + func(base string) error { + return os.Symlink("non-existing", filepath.Join(base, "test")) + }, + // OS returns IsNotExist error both for dangling symlink and for + // non-existing directory. + "test/directory", + "", + []string{"test", "directory"}, + false, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "find-prefix-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + test.prepare(base) + path := filepath.Join(base, test.path) + existingPath, dirs, err := findExistingPrefix(base, path) + if err != nil && !test.expectError { + t.Errorf("test %q: %s", test.name, err) + } + if err != nil { + glog.Infof("got error: %s", err) + } + if err == nil && test.expectError { + t.Errorf("test %q: expected error, got none", test.name) + } + + fullExpectedPath := filepath.Join(base, test.expectedPath) + if existingPath != fullExpectedPath { + t.Errorf("test %q: expected path %q, got %q", test.name, fullExpectedPath, existingPath) + } + if !reflect.DeepEqual(dirs, test.expectedDirs) { + t.Errorf("test %q: expected dirs %v, got %v", test.name, test.expectedDirs, dirs) + } + os.RemoveAll(base) + } +} + +func writeFile(content string) (string, string, error) { + tempDir, err := ioutil.TempDir("", "mounter_shared_test") + if err != nil { + return "", "", err + } + filename := filepath.Join(tempDir, "mountinfo") + err = ioutil.WriteFile(filename, []byte(content), 0600) + if err != nil { + os.RemoveAll(tempDir) + return "", "", err + } + return tempDir, filename, nil +} diff --git a/pkg/util/mount/mount_unsupported.go b/pkg/util/mount/mount_unsupported.go index 632ad0606ee29..c722305acccf1 100644 --- a/pkg/util/mount/mount_unsupported.go +++ b/pkg/util/mount/mount_unsupported.go @@ -1,4 +1,4 @@ -// +build !linux +// +build !linux,!windows /* Copyright 2014 The Kubernetes Authors. @@ -18,6 +18,10 @@ limitations under the License. package mount +import ( + "os" +) + type Mounter struct { mounterPath string } @@ -65,3 +69,15 @@ func (mounter *SafeFormatAndMount) formatAndMount(source string, target string, func (mounter *SafeFormatAndMount) diskLooksUnformatted(disk string) (bool, error) { return true, nil } + +func (mounter *Mounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + return subPath.Path, nil, nil +} + +func (mounter *Mounter) CleanSubPaths(podDir string, volumeName string) error { + return nil +} + +func (mounter *Mounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return nil +} diff --git a/pkg/util/mount/mount_windows.go b/pkg/util/mount/mount_windows.go new file mode 100644 index 0000000000000..97c0dda2a3b80 --- /dev/null +++ b/pkg/util/mount/mount_windows.go @@ -0,0 +1,311 @@ +// +build windows + +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mount + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "syscall" + + "github.com/golang/glog" +) + +type Mounter struct { + mounterPath string +} + +func (mounter *Mounter) Mount(source string, target string, fstype string, options []string) error { + return nil +} + +func (mounter *Mounter) Unmount(target string) error { + return nil +} + +func (mounter *Mounter) List() ([]MountPoint, error) { + return []MountPoint{}, nil +} + +func (mounter *Mounter) IsMountPointMatch(mp MountPoint, dir string) bool { + return (mp.Path == dir) +} + +func (mounter *Mounter) IsNotMountPoint(dir string) (bool, error) { + return IsNotMountPoint(mounter, dir) +} + +func (mounter *Mounter) IsLikelyNotMountPoint(file string) (bool, error) { + return true, nil +} + +func (mounter *Mounter) GetDeviceNameFromMount(mountPath, pluginDir string) (string, error) { + return "", nil +} + +func (mounter *Mounter) DeviceOpened(pathname string) (bool, error) { + return false, nil +} + +func (mounter *Mounter) PathIsDevice(pathname string) (bool, error) { + return true, nil +} + +func (mounter *SafeFormatAndMount) formatAndMount(source string, target string, fstype string, options []string) error { + return nil +} + +func (mounter *SafeFormatAndMount) diskLooksUnformatted(disk string) (bool, error) { + return true, nil +} + +// check whether hostPath is within volume path +// this func will lock all intermediate subpath directories, need to close handle outside of this func after container started +func lockAndCheckSubPath(volumePath, hostPath string) ([]uintptr, error) { + if len(volumePath) == 0 || len(hostPath) == 0 { + return []uintptr{}, nil + } + + finalSubPath, err := filepath.EvalSymlinks(hostPath) + if err != nil { + return []uintptr{}, fmt.Errorf("cannot read link %s: %s", hostPath, err) + } + finalVolumePath, err := filepath.EvalSymlinks(volumePath) + if err != nil { + return []uintptr{}, fmt.Errorf("cannot read link %s: %s", volumePath, err) + } + + return lockAndCheckSubPathWithoutSymlink(finalVolumePath, finalSubPath) +} + +// lock all intermediate subPath directories and check they are all within volumePath +// volumePath & subPath should not contain any symlink, otherwise it will return error +func lockAndCheckSubPathWithoutSymlink(volumePath, subPath string) ([]uintptr, error) { + if len(volumePath) == 0 || len(subPath) == 0 { + return []uintptr{}, nil + } + + // get relative path to volumePath + relSubPath, err := filepath.Rel(volumePath, subPath) + if err != nil { + return []uintptr{}, fmt.Errorf("Rel(%s, %s) error: %v", volumePath, subPath, err) + } + if strings.HasPrefix(relSubPath, "..") { + return []uintptr{}, fmt.Errorf("SubPath %q not within volume path %q", subPath, volumePath) + } + + if relSubPath == "." { + // volumePath and subPath are equal + return []uintptr{}, nil + } + + fileHandles := []uintptr{} + var errorResult error + + currentFullPath := volumePath + dirs := strings.Split(relSubPath, string(os.PathSeparator)) + for _, dir := range dirs { + // lock intermediate subPath directory first + currentFullPath = filepath.Join(currentFullPath, dir) + handle, err := lockPath(currentFullPath) + if err != nil { + errorResult = fmt.Errorf("cannot lock path %s: %s", currentFullPath, err) + break + } + fileHandles = append(fileHandles, handle) + + // make sure intermediate subPath directory does not contain symlink any more + stat, err := os.Lstat(currentFullPath) + if err != nil { + errorResult = fmt.Errorf("Lstat(%q) error: %v", currentFullPath, err) + break + } + if stat.Mode()&os.ModeSymlink != 0 { + errorResult = fmt.Errorf("subpath %q is an unexpected symlink after EvalSymlinks", currentFullPath) + break + } + + if !pathWithinBase(currentFullPath, volumePath) { + errorResult = fmt.Errorf("SubPath %q not within volume path %q", currentFullPath, volumePath) + break + } + } + + return fileHandles, errorResult +} + +// unlockPath unlock directories +func unlockPath(fileHandles []uintptr) { + if fileHandles != nil { + for _, handle := range fileHandles { + syscall.CloseHandle(syscall.Handle(handle)) + } + } +} + +// lockPath locks a directory or symlink, return handle, exec "syscall.CloseHandle(handle)" to unlock the path +func lockPath(path string) (uintptr, error) { + if len(path) == 0 { + return uintptr(syscall.InvalidHandle), syscall.ERROR_FILE_NOT_FOUND + } + pathp, err := syscall.UTF16PtrFromString(path) + if err != nil { + return uintptr(syscall.InvalidHandle), err + } + access := uint32(syscall.GENERIC_READ) + sharemode := uint32(syscall.FILE_SHARE_READ) + createmode := uint32(syscall.OPEN_EXISTING) + flags := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS | syscall.FILE_FLAG_OPEN_REPARSE_POINT) + fd, err := syscall.CreateFile(pathp, access, sharemode, nil, createmode, flags, 0) + return uintptr(fd), err +} + +// Lock all directories in subPath and check they're not symlinks. +func (mounter *Mounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + handles, err := lockAndCheckSubPath(subPath.VolumePath, subPath.Path) + + // Unlock the directories when the container starts + cleanupAction = func() { + unlockPath(handles) + } + return subPath.Path, cleanupAction, err +} + +// No bind-mounts for subpaths are necessary on Windows +func (mounter *Mounter) CleanSubPaths(podDir string, volumeName string) error { + return nil +} + +// SafeMakeDir makes sure that the created directory does not escape given base directory mis-using symlinks. +func (mounter *Mounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return doSafeMakeDir(pathname, base, perm) +} + +func doSafeMakeDir(pathname string, base string, perm os.FileMode) error { + glog.V(4).Infof("Creating directory %q within base %q", pathname, base) + + if !pathWithinBase(pathname, base) { + return fmt.Errorf("path %s is outside of allowed base %s", pathname, base) + } + + // Quick check if the directory already exists + s, err := os.Stat(pathname) + if err == nil { + // Path exists + if s.IsDir() { + // The directory already exists. It can be outside of the parent, + // but there is no race-proof check. + glog.V(4).Infof("Directory %s already exists", pathname) + return nil + } + return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR} + } + + // Find all existing directories + existingPath, toCreate, err := findExistingPrefix(base, pathname) + if err != nil { + return fmt.Errorf("error opening directory %s: %s", pathname, err) + } + if len(toCreate) == 0 { + return nil + } + + // Ensure the existing directory is inside allowed base + fullExistingPath, err := filepath.EvalSymlinks(existingPath) + if err != nil { + return fmt.Errorf("error opening existing directory %s: %s", existingPath, err) + } + fullBasePath, err := filepath.EvalSymlinks(base) + if err != nil { + return fmt.Errorf("cannot read link %s: %s", base, err) + } + if !pathWithinBase(fullExistingPath, fullBasePath) { + return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err) + } + + // lock all intermediate directories from fullBasePath to fullExistingPath (top to bottom) + fileHandles, err := lockAndCheckSubPathWithoutSymlink(fullBasePath, fullExistingPath) + defer unlockPath(fileHandles) + if err != nil { + return err + } + + glog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...)) + currentPath := fullExistingPath + // create the directories one by one, making sure nobody can change + // created directory into symlink by lock that directory immediately + for _, dir := range toCreate { + currentPath = filepath.Join(currentPath, dir) + glog.V(4).Infof("Creating %s", dir) + if err := os.Mkdir(currentPath, perm); err != nil { + return fmt.Errorf("cannot create directory %s: %s", currentPath, err) + } + handle, err := lockPath(currentPath) + if err != nil { + return fmt.Errorf("cannot lock path %s: %s", currentPath, err) + } + defer syscall.CloseHandle(syscall.Handle(handle)) + // make sure newly created directory does not contain symlink after lock + stat, err := os.Lstat(currentPath) + if err != nil { + return fmt.Errorf("Lstat(%q) error: %v", currentPath, err) + } + if stat.Mode()&os.ModeSymlink != 0 { + return fmt.Errorf("subpath %q is an unexpected symlink after Mkdir", currentPath) + } + } + + return nil +} + +// findExistingPrefix finds prefix of pathname that exists. In addition, it +// returns list of remaining directories that don't exist yet. +func findExistingPrefix(base, pathname string) (string, []string, error) { + rel, err := filepath.Rel(base, pathname) + if err != nil { + return base, nil, err + } + + if strings.HasPrefix(rel, "..") { + return base, nil, fmt.Errorf("pathname(%s) is not within base(%s)", pathname, base) + } + + if rel == "." { + // base and pathname are equal + return pathname, []string{}, nil + } + + dirs := strings.Split(rel, string(filepath.Separator)) + + parent := base + currentPath := base + for i, dir := range dirs { + parent = currentPath + currentPath = filepath.Join(parent, dir) + if _, err := os.Lstat(currentPath); err != nil { + if os.IsNotExist(err) { + return parent, dirs[i:], nil + } + return base, nil, err + } + } + + return pathname, []string{}, nil +} diff --git a/pkg/util/mount/mount_windows_test.go b/pkg/util/mount/mount_windows_test.go new file mode 100644 index 0000000000000..312238cafe7a4 --- /dev/null +++ b/pkg/util/mount/mount_windows_test.go @@ -0,0 +1,446 @@ +// +build windows + +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mount + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func makeLink(link, target string) error { + if output, err := exec.Command("cmd", "/c", "mklink", "/D", link, target).CombinedOutput(); err != nil { + return fmt.Errorf("mklink failed: %v, link(%q) target(%q) output: %q", err, link, target, string(output)) + } + return nil +} + +func TestDoSafeMakeDir(t *testing.T) { + const testingVolumePath = `c:\tmp\DoSafeMakeDirTest` + os.MkdirAll(testingVolumePath, 0755) + defer os.RemoveAll(testingVolumePath) + + tests := []struct { + volumePath string + subPath string + expectError bool + symlinkTarget string + }{ + { + volumePath: testingVolumePath, + subPath: ``, + expectError: true, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `x`), + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\d`), + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `symlink`), + expectError: false, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `symlink\c\d`), + expectError: true, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `symlink\y926`), + expectError: true, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\symlink`), + expectError: false, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\x\symlink`), + expectError: false, + symlinkTarget: filepath.Join(testingVolumePath, `a`), + }, + } + + for _, test := range tests { + if len(test.volumePath) > 0 && len(test.subPath) > 0 && len(test.symlinkTarget) > 0 { + // make all parent sub directories + if parent := filepath.Dir(test.subPath); parent != "." { + os.MkdirAll(parent, 0755) + } + + // make last element as symlink + linkPath := test.subPath + if _, err := os.Stat(linkPath); err != nil && os.IsNotExist(err) { + if err := makeLink(linkPath, test.symlinkTarget); err != nil { + t.Fatalf("unexpected error: %v", fmt.Errorf("mklink link(%q) target(%q) error: %q", linkPath, test.symlinkTarget, err)) + } + } + } + + err := doSafeMakeDir(test.subPath, test.volumePath, os.FileMode(0755)) + if test.expectError { + assert.NotNil(t, err, "Expect error during doSafeMakeDir(%s, %s)", test.subPath, test.volumePath) + continue + } + assert.Nil(t, err, "Expect no error during doSafeMakeDir(%s, %s)", test.subPath, test.volumePath) + if _, err := os.Stat(test.subPath); os.IsNotExist(err) { + t.Errorf("subPath should exists after doSafeMakeDir(%s, %s)", test.subPath, test.volumePath) + } + } +} + +func TestLockAndCheckSubPath(t *testing.T) { + const testingVolumePath = `c:\tmp\LockAndCheckSubPathTest` + + tests := []struct { + volumePath string + subPath string + expectedHandleCount int + expectError bool + symlinkTarget string + }{ + { + volumePath: `c:\`, + subPath: ``, + expectedHandleCount: 0, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: ``, + subPath: `a`, + expectedHandleCount: 0, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a`), + expectedHandleCount: 1, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\d`), + expectedHandleCount: 4, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `symlink`), + expectedHandleCount: 0, + expectError: true, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\symlink`), + expectedHandleCount: 0, + expectError: true, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\d\symlink`), + expectedHandleCount: 2, + expectError: false, + symlinkTarget: filepath.Join(testingVolumePath, `a\b`), + }, + } + + for _, test := range tests { + if len(test.volumePath) > 0 && len(test.subPath) > 0 { + os.MkdirAll(test.volumePath, 0755) + if len(test.symlinkTarget) == 0 { + // make all intermediate sub directories + os.MkdirAll(test.subPath, 0755) + } else { + // make all parent sub directories + if parent := filepath.Dir(test.subPath); parent != "." { + os.MkdirAll(parent, 0755) + } + + // make last element as symlink + linkPath := test.subPath + if _, err := os.Stat(linkPath); err != nil && os.IsNotExist(err) { + if err := makeLink(linkPath, test.symlinkTarget); err != nil { + t.Fatalf("unexpected error: %v", fmt.Errorf("mklink link(%q) target(%q) error: %q", linkPath, test.symlinkTarget, err)) + } + } + } + } + + fileHandles, err := lockAndCheckSubPath(test.volumePath, test.subPath) + unlockPath(fileHandles) + assert.Equal(t, test.expectedHandleCount, len(fileHandles)) + if test.expectError { + assert.NotNil(t, err, "Expect error during LockAndCheckSubPath(%s, %s)", test.volumePath, test.subPath) + continue + } + assert.Nil(t, err, "Expect no error during LockAndCheckSubPath(%s, %s)", test.volumePath, test.subPath) + } + + // remove dir will happen after closing all file handles + assert.Nil(t, os.RemoveAll(testingVolumePath), "Expect no error during remove dir %s", testingVolumePath) +} + +func TestLockAndCheckSubPathWithoutSymlink(t *testing.T) { + const testingVolumePath = `c:\tmp\LockAndCheckSubPathWithoutSymlinkTest` + + tests := []struct { + volumePath string + subPath string + expectedHandleCount int + expectError bool + symlinkTarget string + }{ + { + volumePath: `c:\`, + subPath: ``, + expectedHandleCount: 0, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: ``, + subPath: `a`, + expectedHandleCount: 0, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a`), + expectedHandleCount: 1, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\d`), + expectedHandleCount: 4, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `symlink`), + expectedHandleCount: 1, + expectError: true, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\symlink`), + expectedHandleCount: 4, + expectError: true, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\d\symlink`), + expectedHandleCount: 5, + expectError: true, + symlinkTarget: filepath.Join(testingVolumePath, `a\b`), + }, + } + + for _, test := range tests { + if len(test.volumePath) > 0 && len(test.subPath) > 0 { + os.MkdirAll(test.volumePath, 0755) + if len(test.symlinkTarget) == 0 { + // make all intermediate sub directories + os.MkdirAll(test.subPath, 0755) + } else { + // make all parent sub directories + if parent := filepath.Dir(test.subPath); parent != "." { + os.MkdirAll(parent, 0755) + } + + // make last element as symlink + linkPath := test.subPath + if _, err := os.Stat(linkPath); err != nil && os.IsNotExist(err) { + if err := makeLink(linkPath, test.symlinkTarget); err != nil { + t.Fatalf("unexpected error: %v", fmt.Errorf("mklink link(%q) target(%q) error: %q", linkPath, test.symlinkTarget, err)) + } + } + } + } + + fileHandles, err := lockAndCheckSubPathWithoutSymlink(test.volumePath, test.subPath) + unlockPath(fileHandles) + assert.Equal(t, test.expectedHandleCount, len(fileHandles)) + if test.expectError { + assert.NotNil(t, err, "Expect error during LockAndCheckSubPath(%s, %s)", test.volumePath, test.subPath) + continue + } + assert.Nil(t, err, "Expect no error during LockAndCheckSubPath(%s, %s)", test.volumePath, test.subPath) + } + + // remove dir will happen after closing all file handles + assert.Nil(t, os.RemoveAll(testingVolumePath), "Expect no error during remove dir %s", testingVolumePath) +} + +func TestFindExistingPrefix(t *testing.T) { + const testingVolumePath = `c:\tmp\FindExistingPrefixTest` + + tests := []struct { + base string + pathname string + expectError bool + expectedExistingPath string + expectedToCreateDirs []string + createSubPathBeforeTest bool + }{ + { + base: `c:\tmp\a`, + pathname: `c:\tmp\b`, + expectError: true, + expectedExistingPath: "", + expectedToCreateDirs: []string{}, + createSubPathBeforeTest: false, + }, + { + base: ``, + pathname: `c:\tmp\b`, + expectError: true, + expectedExistingPath: "", + expectedToCreateDirs: []string{}, + createSubPathBeforeTest: false, + }, + { + base: `c:\tmp\a`, + pathname: `d:\tmp\b`, + expectError: true, + expectedExistingPath: "", + expectedToCreateDirs: []string{}, + createSubPathBeforeTest: false, + }, + { + base: testingVolumePath, + pathname: testingVolumePath, + expectError: false, + expectedExistingPath: testingVolumePath, + expectedToCreateDirs: []string{}, + createSubPathBeforeTest: false, + }, + { + base: testingVolumePath, + pathname: filepath.Join(testingVolumePath, `a\b`), + expectError: false, + expectedExistingPath: filepath.Join(testingVolumePath, `a\b`), + expectedToCreateDirs: []string{}, + createSubPathBeforeTest: true, + }, + { + base: testingVolumePath, + pathname: filepath.Join(testingVolumePath, `a\b\c\`), + expectError: false, + expectedExistingPath: filepath.Join(testingVolumePath, `a\b`), + expectedToCreateDirs: []string{`c`}, + createSubPathBeforeTest: false, + }, + { + base: testingVolumePath, + pathname: filepath.Join(testingVolumePath, `a\b\c\d`), + expectError: false, + expectedExistingPath: filepath.Join(testingVolumePath, `a\b`), + expectedToCreateDirs: []string{`c`, `d`}, + createSubPathBeforeTest: false, + }, + } + + for _, test := range tests { + if test.createSubPathBeforeTest { + os.MkdirAll(test.pathname, 0755) + } + + existingPath, toCreate, err := findExistingPrefix(test.base, test.pathname) + if test.expectError { + assert.NotNil(t, err, "Expect error during findExistingPrefix(%s, %s)", test.base, test.pathname) + continue + } + assert.Nil(t, err, "Expect no error during findExistingPrefix(%s, %s)", test.base, test.pathname) + + assert.Equal(t, test.expectedExistingPath, existingPath, "Expect result not equal with findExistingPrefix(%s, %s) return: %q, expected: %q", + test.base, test.pathname, existingPath, test.expectedExistingPath) + + assert.Equal(t, test.expectedToCreateDirs, toCreate, "Expect result not equal with findExistingPrefix(%s, %s) return: %q, expected: %q", + test.base, test.pathname, toCreate, test.expectedToCreateDirs) + + } + // remove dir will happen after closing all file handles + assert.Nil(t, os.RemoveAll(testingVolumePath), "Expect no error during remove dir %s", testingVolumePath) +} + +func TestPathWithinBase(t *testing.T) { + tests := []struct { + fullPath string + basePath string + expectedResult bool + }{ + { + fullPath: `c:\tmp\a\b\c`, + basePath: `c:\tmp`, + expectedResult: true, + }, + { + fullPath: `c:\tmp1`, + basePath: `c:\tmp2`, + expectedResult: false, + }, + { + fullPath: `c:\tmp`, + basePath: `c:\tmp`, + expectedResult: true, + }, + { + fullPath: `c:\tmp`, + basePath: `c:\tmp\a\b\c`, + expectedResult: false, + }, + } + + for _, test := range tests { + result := pathWithinBase(test.fullPath, test.basePath) + assert.Equal(t, result, test.expectedResult, "Expect result not equal with pathWithinBase(%s, %s) return: %q, expected: %q", + test.fullPath, test.basePath, result, test.expectedResult) + } +} diff --git a/pkg/util/mount/nsenter_mount.go b/pkg/util/mount/nsenter_mount.go index 4af8ef0d82ea0..908e40280cdef 100644 --- a/pkg/util/mount/nsenter_mount.go +++ b/pkg/util/mount/nsenter_mount.go @@ -22,10 +22,23 @@ import ( "fmt" "os" "path/filepath" + "regexp" + "strconv" "strings" "github.com/golang/glog" "k8s.io/kubernetes/pkg/util/exec" + utilio "k8s.io/kubernetes/pkg/util/io" +) + +const ( + // hostProcSelfStatusPath is the default path to /proc/self/status on the host + hostProcSelfStatusPath = "/rootfs/proc/self/status" +) + +var ( + // pidRegExp matches "Pid: " in /proc/self/status + pidRegExp = regexp.MustCompile(`\nPid:\t([0-9]*)\n`) ) // NsenterMounter is part of experimental support for running the kubelet @@ -239,3 +252,41 @@ func (n *NsenterMounter) absHostPath(command string) string { } return path } + +func (mounter *NsenterMounter) CleanSubPaths(podDir string, volumeName string) error { + return doCleanSubPaths(mounter, podDir, volumeName) +} + +// getPidOnHost returns kubelet's pid in the host pid namespace +func (mounter *NsenterMounter) getPidOnHost(procStatusPath string) (int, error) { + // Get the PID from /rootfs/proc/self/status + statusBytes, err := utilio.ConsistentRead(procStatusPath, maxListTries) + if err != nil { + return 0, fmt.Errorf("error reading %s: %s", procStatusPath, err) + } + matches := pidRegExp.FindSubmatch(statusBytes) + if len(matches) < 2 { + return 0, fmt.Errorf("cannot parse %s: no Pid:", procStatusPath) + } + return strconv.Atoi(string(matches[1])) +} + +func (mounter *NsenterMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + hostPid, err := mounter.getPidOnHost(hostProcSelfStatusPath) + if err != nil { + return "", nil, err + } + glog.V(4).Infof("Kubelet's PID on the host is %d", hostPid) + + // Bind-mount the subpath to avoid using symlinks in subpaths. + newHostPath, err = doBindSubPath(mounter, subPath, hostPid) + + // There is no action when the container starts. Bind-mount will be cleaned + // when container stops by CleanSubPaths. + cleanupAction = nil + return newHostPath, cleanupAction, err +} + +func (mounter *NsenterMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return doSafeMakeDir(pathname, base, perm) +} diff --git a/pkg/util/mount/nsenter_mount_unsupported.go b/pkg/util/mount/nsenter_mount_unsupported.go index e955e1b781b4f..6039ce4523c5f 100644 --- a/pkg/util/mount/nsenter_mount_unsupported.go +++ b/pkg/util/mount/nsenter_mount_unsupported.go @@ -18,6 +18,10 @@ limitations under the License. package mount +import ( + "os" +) + type NsenterMounter struct{} func NewNsenterMounter() *NsenterMounter { @@ -61,3 +65,15 @@ func (*NsenterMounter) PathIsDevice(pathname string) (bool, error) { func (*NsenterMounter) GetDeviceNameFromMount(mountPath, pluginDir string) (string, error) { return "", nil } + +func (*NsenterMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return nil +} + +func (*NsenterMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + return subPath.Path, nil, nil +} + +func (*NsenterMounter) CleanSubPaths(podDir string, volumeName string) error { + return nil +} diff --git a/pkg/util/removeall/removeall_test.go b/pkg/util/removeall/removeall_test.go index a5b19fe41a207..2dbf906be571c 100644 --- a/pkg/util/removeall/removeall_test.go +++ b/pkg/util/removeall/removeall_test.go @@ -161,3 +161,15 @@ func TestRemoveAllOneFilesystem(t *testing.T) { } } } + +func (mounter *fakeMounter) PrepareSafeSubpath(subPath mount.Subpath) (newHostPath string, cleanupAction func(), err error) { + return "", nil, nil +} + +func (mounter *fakeMounter) CleanSubPaths(_, _ string) error { + return nil +} + +func (mounter *fakeMounter) SafeMakeDir(_, _ string, _ os.FileMode) error { + return nil +} diff --git a/pkg/volume/util/operationexecutor/operation_executor.go b/pkg/volume/util/operationexecutor/operation_executor.go index 0c1569095f504..f6215b7b3deb1 100644 --- a/pkg/volume/util/operationexecutor/operation_executor.go +++ b/pkg/volume/util/operationexecutor/operation_executor.go @@ -96,7 +96,7 @@ type OperationExecutor interface { // UnmountVolume unmounts the volume from the pod specified in // volumeToUnmount and updates the actual state of the world to reflect that. - UnmountVolume(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) error + UnmountVolume(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) error // UnmountDevice unmounts the volumes global mount path from the device (for // attachable volumes only, freeing it for detach. It then updates the @@ -677,10 +677,11 @@ func (oe *operationExecutor) MountVolume( func (oe *operationExecutor) UnmountVolume( volumeToUnmount MountedVolume, - actualStateOfWorld ActualStateOfWorldMounterUpdater) error { + actualStateOfWorld ActualStateOfWorldMounterUpdater, + podsDir string) error { unmountFunc, err := - oe.operationGenerator.GenerateUnmountVolumeFunc(volumeToUnmount, actualStateOfWorld) + oe.operationGenerator.GenerateUnmountVolumeFunc(volumeToUnmount, actualStateOfWorld, podsDir) if err != nil { return err } diff --git a/pkg/volume/util/operationexecutor/operation_executor_test.go b/pkg/volume/util/operationexecutor/operation_executor_test.go index b312b29d451b7..99ff69ea6c7a4 100644 --- a/pkg/volume/util/operationexecutor/operation_executor_test.go +++ b/pkg/volume/util/operationexecutor/operation_executor_test.go @@ -120,7 +120,7 @@ func TestOperationExecutor_UnmountVolume_ConcurrentUnmountForAllPlugins(t *testi PodUID: pod.UID, } } - oe.UnmountVolume(volumesToUnmount[i], nil /* actualStateOfWorldMounterUpdater */) + oe.UnmountVolume(volumesToUnmount[i], nil /* actualStateOfWorldMounterUpdater */, "" /*podsDir*/) } // Assert @@ -245,7 +245,7 @@ func (fopg *fakeOperationGenerator) GenerateMountVolumeFunc(waitForAttachTimeout return nil }, nil } -func (fopg *fakeOperationGenerator) GenerateUnmountVolumeFunc(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) (func() error, error) { +func (fopg *fakeOperationGenerator) GenerateUnmountVolumeFunc(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) (func() error, error) { return func() error { startOperationAndBlock(fopg.ch, fopg.quit) return nil diff --git a/pkg/volume/util/operationexecutor/operation_generator.go b/pkg/volume/util/operationexecutor/operation_generator.go index 2633649278eb0..4d12eb3543586 100644 --- a/pkg/volume/util/operationexecutor/operation_generator.go +++ b/pkg/volume/util/operationexecutor/operation_generator.go @@ -18,6 +18,7 @@ package operationexecutor import ( "fmt" + "path" "time" "github.com/golang/glog" @@ -76,7 +77,7 @@ type OperationGenerator interface { GenerateMountVolumeFunc(waitForAttachTimeout time.Duration, volumeToMount VolumeToMount, actualStateOfWorldMounterUpdater ActualStateOfWorldMounterUpdater, isRemount bool) (func() error, error) // Generates the UnmountVolume function needed to perform the unmount of a volume plugin - GenerateUnmountVolumeFunc(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) (func() error, error) + GenerateUnmountVolumeFunc(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) (func() error, error) // Generates the AttachVolume function needed to perform attach of a volume plugin GenerateAttachVolumeFunc(volumeToAttach VolumeToAttach, actualStateOfWorld ActualStateOfWorldAttacherUpdater) (func() error, error) @@ -494,7 +495,8 @@ func (og *operationGenerator) GenerateMountVolumeFunc( func (og *operationGenerator) GenerateUnmountVolumeFunc( volumeToUnmount MountedVolume, - actualStateOfWorld ActualStateOfWorldMounterUpdater) (func() error, error) { + actualStateOfWorld ActualStateOfWorldMounterUpdater, + podsDir string) (func() error, error) { // Get mountable plugin volumePlugin, err := og.volumePluginMgr.FindPluginByName(volumeToUnmount.PluginName) @@ -509,6 +511,14 @@ func (og *operationGenerator) GenerateUnmountVolumeFunc( } return func() error { + mounter := og.volumePluginMgr.Host.GetMounter() + + // Remove all bind-mounts for subPaths + podDir := path.Join(podsDir, string(volumeToUnmount.PodUID)) + if err := mounter.CleanSubPaths(podDir, volumeToUnmount.OuterVolumeSpecName); err != nil { + return volumeToUnmount.GenerateErrorDetailed("error cleaning subPath mounts", err) + } + // Execute unmount unmountErr := volumeUnmounter.TearDown() if unmountErr != nil { From 6b7688cf50a211f1bdf76c5383870209246fdf46 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Mon, 5 Mar 2018 09:18:49 +0100 Subject: [PATCH 2/3] Add subpath e2e tests --- test/e2e/framework/util.go | 8 +- test/e2e/framework/volume_util.go | 67 ++- test/e2e/storage/BUILD | 3 + test/e2e/storage/subpath.go | 741 ++++++++++++++++++++++++++++++ 4 files changed, 813 insertions(+), 6 deletions(-) create mode 100644 test/e2e/storage/subpath.go diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 1f1073f8677ed..777d59cb1bb0b 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -1278,23 +1278,23 @@ func WaitForPodRunningInNamespace(c clientset.Interface, pod *v1.Pod) error { if pod.Status.Phase == v1.PodRunning { return nil } - return waitTimeoutForPodRunningInNamespace(c, pod.Name, pod.Namespace, PodStartTimeout) + return WaitTimeoutForPodRunningInNamespace(c, pod.Name, pod.Namespace, PodStartTimeout) } // Waits default amount of time (PodStartTimeout) for the specified pod to become running. // Returns an error if timeout occurs first, or pod goes in to failed state. func WaitForPodNameRunningInNamespace(c clientset.Interface, podName, namespace string) error { - return waitTimeoutForPodRunningInNamespace(c, podName, namespace, PodStartTimeout) + return WaitTimeoutForPodRunningInNamespace(c, podName, namespace, PodStartTimeout) } // Waits an extended amount of time (slowPodStartTimeout) for the specified pod to become running. // The resourceVersion is used when Watching object changes, it tells since when we care // about changes to the pod. Returns an error if timeout occurs first, or pod goes in to failed state. func waitForPodRunningInNamespaceSlow(c clientset.Interface, podName, namespace string) error { - return waitTimeoutForPodRunningInNamespace(c, podName, namespace, slowPodStartTimeout) + return WaitTimeoutForPodRunningInNamespace(c, podName, namespace, slowPodStartTimeout) } -func waitTimeoutForPodRunningInNamespace(c clientset.Interface, podName, namespace string, timeout time.Duration) error { +func WaitTimeoutForPodRunningInNamespace(c clientset.Interface, podName, namespace string, timeout time.Duration) error { return wait.PollImmediate(Poll, timeout, podRunning(c, podName, namespace)) } diff --git a/test/e2e/framework/volume_util.go b/test/e2e/framework/volume_util.go index f82555bcd43ab..ddbbd85d16491 100644 --- a/test/e2e/framework/volume_util.go +++ b/test/e2e/framework/volume_util.go @@ -126,8 +126,12 @@ func StartVolumeServer(client clientset.Interface, config VolumeTestConfig) *v1. for src, dst := range config.ServerVolumes { mountName := fmt.Sprintf("path%d", i) volumes[i].Name = mountName - volumes[i].VolumeSource.HostPath = &v1.HostPathVolumeSource{ - Path: src, + if src == "" { + volumes[i].VolumeSource.EmptyDir = &v1.EmptyDirVolumeSource{} + } else { + volumes[i].VolumeSource.HostPath = &v1.HostPathVolumeSource{ + Path: src, + } } mounts[i].Name = mountName @@ -379,3 +383,62 @@ func InjectHtml(client clientset.Interface, config VolumeTestConfig, volume v1.V err = WaitForPodSuccessInNamespace(client, injectPod.Name, injectPod.Namespace) Expect(err).NotTo(HaveOccurred()) } + +// NFS-specific wrapper for CreateStorageServer. +func NewNFSServer(cs clientset.Interface, namespace string, args []string) (config VolumeTestConfig, pod *v1.Pod, ip string) { + config = VolumeTestConfig{ + Namespace: namespace, + Prefix: "nfs", + ServerImage: NfsServerImage, + ServerPorts: []int{2049}, + ServerVolumes: map[string]string{"": "/exports"}, + } + if len(args) > 0 { + config.ServerArgs = args + } + pod = StartVolumeServer(cs, config) + return config, pod, pod.Status.PodIP +} + +// GlusterFS-specific wrapper for CreateStorageServer. Also creates the gluster endpoints object. +func NewGlusterfsServer(cs clientset.Interface, namespace string) (config VolumeTestConfig, pod *v1.Pod, ip string) { + config = VolumeTestConfig{ + Namespace: namespace, + Prefix: "gluster", + ServerImage: GlusterfsServerImage, + ServerPorts: []int{24007, 24008, 49152}, + } + pod = StartVolumeServer(cs, config) + ip = pod.Status.PodIP + + By("creating Gluster endpoints") + endpoints := &v1.Endpoints{ + TypeMeta: metav1.TypeMeta{ + Kind: "Endpoints", + APIVersion: "v1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: config.Prefix + "-server", + }, + Subsets: []v1.EndpointSubset{ + { + Addresses: []v1.EndpointAddress{ + { + IP: ip, + }, + }, + Ports: []v1.EndpointPort{ + { + Name: "gluster", + Port: 24007, + Protocol: v1.ProtocolTCP, + }, + }, + }, + }, + } + endpoints, err := cs.CoreV1().Endpoints(namespace).Create(endpoints) + Expect(err).NotTo(HaveOccurred(), "failed to create endpoints for Gluster server") + + return config, pod, ip +} diff --git a/test/e2e/storage/BUILD b/test/e2e/storage/BUILD index b26b7ad3167b7..b49727da19263 100644 --- a/test/e2e/storage/BUILD +++ b/test/e2e/storage/BUILD @@ -18,6 +18,7 @@ go_library( "persistent_volumes-vsphere.go", "pv_reclaimpolicy.go", "pvc_label_selector.go", + "subpath.go", "volume_provisioning.go", "volumes.go", "vsphere_utils.go", @@ -54,10 +55,12 @@ go_library( "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/fields:go_default_library", "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library", "//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library", diff --git a/test/e2e/storage/subpath.go b/test/e2e/storage/subpath.go new file mode 100644 index 0000000000000..6dd783cb7e58e --- /dev/null +++ b/test/e2e/storage/subpath.go @@ -0,0 +1,741 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package storage + +import ( + "fmt" + "path/filepath" + "strings" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/kubernetes/pkg/api/v1" + "k8s.io/kubernetes/test/e2e/framework" + + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +var ( + volumePath = "/test-volume" + volumeName = "test-volume" + fileName = "test-file" + retryDuration = 180 + mountImage = "gcr.io/google_containers/mounttest:0.8" +) + +type volInfo struct { + source *v1.VolumeSource + node string +} + +type volSource interface { + createVolume(f *framework.Framework) volInfo + cleanupVolume(f *framework.Framework) +} + +var initVolSources = map[string]func() volSource{ + "hostPath": initHostpath, + "hostPathSymlink": initHostpathSymlink, + "emptyDir": initEmptydir, + "gcePD": initGCEPD, + "gcePDPartitioned": initGCEPDPartition, + "nfs": initNFS, + "nfsPVC": initNFSPVC, + "gluster": initGluster, +} + +var _ = framework.KubeDescribe("Subpath", func() { + var ( + subPath string + subPathDir string + filePathInSubpath string + filePathInVolume string + pod *v1.Pod + vol volSource + ) + + f := framework.NewDefaultFramework("subpath") + + for volType, volInit := range initVolSources { + curVolType := volType + curVolInit := volInit + + Context(fmt.Sprintf("[Volume type: %v]", curVolType), func() { + BeforeEach(func() { + By(fmt.Sprintf("Initializing %s volume", curVolType)) + vol = curVolInit() + subPath = f.Namespace.Name + subPathDir = filepath.Join(volumePath, subPath) + filePathInSubpath = filepath.Join(volumePath, fileName) + filePathInVolume = filepath.Join(subPathDir, fileName) + volInfo := vol.createVolume(f) + pod = testPodSubpath(subPath, curVolType, volInfo.source) + pod.Namespace = f.Namespace.Name + pod.Spec.NodeName = volInfo.node + }) + + AfterEach(func() { + By("Deleting pod") + err := framework.DeletePodWithWait(f, f.ClientSet, pod) + Expect(err).ToNot(HaveOccurred()) + + By("Cleaning up volume") + vol.cleanupVolume(f) + }) + + It("should support non-existent path", func() { + // Write the file in the subPath from container 0 + setWriteCommand(filePathInSubpath, &pod.Spec.Containers[0]) + + // Read it from outside the subPath from container 1 + testReadFile(f, filePathInVolume, pod, 1) + }) + + It("should support existing directory", func() { + // Create the directory + setInitCommand(pod, fmt.Sprintf("mkdir -p %s", subPathDir)) + + // Write the file in the subPath from container 0 + setWriteCommand(filePathInSubpath, &pod.Spec.Containers[0]) + + // Read it from outside the subPath from container 1 + testReadFile(f, filePathInVolume, pod, 1) + }) + + It("should support existing single file", func() { + // Create the file in the init container + setInitCommand(pod, fmt.Sprintf("mkdir -p %s; echo \"mount-tester new file\" > %s", subPathDir, filePathInVolume)) + + // Read it from inside the subPath from container 0 + testReadFile(f, filePathInSubpath, pod, 0) + }) + + It("should fail if subpath directory is outside the volume [Slow]", func() { + // Create the subpath outside the volume + setInitCommand(pod, fmt.Sprintf("ln -s /bin %s", subPathDir)) + + // Pod should fail + testPodFailSupath(f, pod) + }) + + It("should fail if subpath file is outside the volume [Slow]", func() { + // Create the subpath outside the volume + setInitCommand(pod, fmt.Sprintf("ln -s /bin/sh %s", subPathDir)) + + // Pod should fail + testPodFailSupath(f, pod) + }) + + It("should fail if non-existent subpath is outside the volume [Slow]", func() { + // Create the subpath outside the volume + setInitCommand(pod, fmt.Sprintf("ln -s /bin/notanexistingpath %s", subPathDir)) + + // Pod should fail + testPodFailSupath(f, pod) + }) + + It("should fail if subpath with backstepping is outside the volume [Slow]", func() { + // Create the subpath outside the volume + setInitCommand(pod, fmt.Sprintf("ln -s ../ %s", subPathDir)) + + // Pod should fail + testPodFailSupath(f, pod) + }) + + It("should support creating multiple subpath from same volumes [Slow]", func() { + subpathDir1 := filepath.Join(volumePath, "subpath1") + subpathDir2 := filepath.Join(volumePath, "subpath2") + filepath1 := filepath.Join("/test-subpath1", fileName) + filepath2 := filepath.Join("/test-subpath2", fileName) + setInitCommand(pod, fmt.Sprintf("mkdir -p %s; mkdir -p %s", subpathDir1, subpathDir2)) + + addSubpathVolumeContainer(&pod.Spec.Containers[0], v1.VolumeMount{ + Name: volumeName, + MountPath: "/test-subpath1", + SubPath: "subpath1", + }) + addSubpathVolumeContainer(&pod.Spec.Containers[0], v1.VolumeMount{ + Name: volumeName, + MountPath: "/test-subpath2", + SubPath: "subpath2", + }) + + addMultipleWrites(&pod.Spec.Containers[0], filepath1, filepath2) + testMultipleReads(f, pod, 0, filepath1, filepath2) + }) + + It("should support restarting containers [Slow]", func() { + // Create the directory + setInitCommand(pod, fmt.Sprintf("mkdir -p %v", subPathDir)) + + testPodContainerRestart(f, pod, filePathInVolume, filePathInSubpath) + }) + }) + } + + // TODO: add a test case for the same disk with two partitions +}) + +func testPodSubpath(subpath, volumeType string, source *v1.VolumeSource) *v1.Pod { + suffix := strings.ToLower(fmt.Sprintf("%s-%s", volumeType, rand.String(4))) + privileged := true + gracePeriod := int64(1) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("pod-subpath-test-%s", suffix), + }, + Spec: v1.PodSpec{ + InitContainers: []v1.Container{ + { + Name: fmt.Sprintf("init-volume-%s", suffix), + Image: "busybox", + VolumeMounts: []v1.VolumeMount{ + { + Name: volumeName, + MountPath: volumePath, + }, + }, + SecurityContext: &v1.SecurityContext{ + Privileged: &privileged, + }, + }, + }, + Containers: []v1.Container{ + { + Name: fmt.Sprintf("test-container-subpath-%s", suffix), + Image: mountImage, + VolumeMounts: []v1.VolumeMount{ + { + Name: volumeName, + MountPath: volumePath, + SubPath: subpath, + }, + }, + SecurityContext: &v1.SecurityContext{ + Privileged: &privileged, + }, + }, + { + Name: fmt.Sprintf("test-container-volume-%s", suffix), + Image: mountImage, + VolumeMounts: []v1.VolumeMount{ + { + Name: volumeName, + MountPath: volumePath, + }, + }, + SecurityContext: &v1.SecurityContext{ + Privileged: &privileged, + }, + }, + }, + RestartPolicy: v1.RestartPolicyNever, + TerminationGracePeriodSeconds: &gracePeriod, + Volumes: []v1.Volume{ + { + Name: volumeName, + VolumeSource: *source, + }, + }, + }, + } +} + +func setInitCommand(pod *v1.Pod, command string) { + pod.Spec.InitContainers[0].Command = []string{"/bin/sh", "-ec", command} +} + +func setWriteCommand(file string, container *v1.Container) { + container.Args = []string{ + fmt.Sprintf("--new_file_0644=%v", file), + fmt.Sprintf("--file_mode=%v", file), + } +} + +func addSubpathVolumeContainer(container *v1.Container, volumeMount v1.VolumeMount) { + existingMounts := container.VolumeMounts + container.VolumeMounts = append(existingMounts, volumeMount) +} + +func addMultipleWrites(container *v1.Container, file1 string, file2 string) { + container.Args = []string{ + fmt.Sprintf("--new_file_0644=%v", file1), + fmt.Sprintf("--new_file_0666=%v", file2), + } +} + +func testMultipleReads(f *framework.Framework, pod *v1.Pod, containerIndex int, file1 string, file2 string) { + By(fmt.Sprintf("Creating pod %s", pod.Name)) + f.TestContainerOutput("multi_subpath", pod, containerIndex, []string{ + "content of file \"" + file1 + "\": mount-tester new file", + "content of file \"" + file2 + "\": mount-tester new file", + }) +} + +func setReadCommand(file string, container *v1.Container) { + container.Args = []string{ + fmt.Sprintf("--file_content_in_loop=%v", file), + fmt.Sprintf("--retry_time=%d", retryDuration), + } +} + +func testReadFile(f *framework.Framework, file string, pod *v1.Pod, containerIndex int) { + setReadCommand(file, &pod.Spec.Containers[containerIndex]) + + By(fmt.Sprintf("Creating pod %s", pod.Name)) + f.TestContainerOutput("subpath", pod, containerIndex, []string{ + "content of file \"" + file + "\": mount-tester new file", + }) +} + +func testPodFailSupath(f *framework.Framework, pod *v1.Pod) { + By(fmt.Sprintf("Creating pod %s", pod.Name)) + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod) + Expect(err).ToNot(HaveOccurred()) + defer func() { + framework.DeletePodWithWait(f, f.ClientSet, pod) + }() + err = framework.WaitTimeoutForPodRunningInNamespace(f.ClientSet, pod.Name, pod.Namespace, time.Minute) + Expect(err).To(HaveOccurred()) + + By("Checking for subpath error event") + selector := fields.Set{ + "involvedObject.kind": "Pod", + "involvedObject.name": pod.Name, + "involvedObject.namespace": f.Namespace.Name, + "reason": "Failed", + }.AsSelector().String() + options := metav1.ListOptions{FieldSelector: selector} + events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(options) + Expect(err).NotTo(HaveOccurred()) + Expect(len(events.Items)).NotTo(Equal(0)) + Expect(events.Items[0].Message).To(ContainSubstring("subPath")) +} + +func testPodContainerRestart(f *framework.Framework, pod *v1.Pod, fileInVolume, fileInSubpath string) { + pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure + + // Add liveness probe to subpath container + pod.Spec.Containers[0].Image = "busybox" + pod.Spec.Containers[0].Command = []string{"/bin/sh", "-ec", "sleep 100000"} + pod.Spec.Containers[0].LivenessProbe = &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", fileInSubpath}, + }, + }, + InitialDelaySeconds: 15, + FailureThreshold: 1, + PeriodSeconds: 2, + } + + // Set volume container to write file + writeCmd := fmt.Sprintf("echo test > %v", fileInVolume) + pod.Spec.Containers[1].Image = "busybox" + pod.Spec.Containers[1].Command = []string{"/bin/sh", "-ec", fmt.Sprintf("%v; sleep 100000", writeCmd)} + + // Start pod + By(fmt.Sprintf("Creating pod %s", pod.Name)) + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod) + Expect(err).ToNot(HaveOccurred()) + + err = framework.WaitTimeoutForPodRunningInNamespace(f.ClientSet, pod.Name, pod.Namespace, time.Minute) + Expect(err).ToNot(HaveOccurred()) + + By("Failing liveness probe") + out, err := podContainerExec(pod, 1, fmt.Sprintf("rm %v", fileInVolume)) + framework.Logf("Pod exec output: %v", out) + Expect(err).ToNot(HaveOccurred()) + + // Check that container has restarted + By("Waiting for container to restart") + restarts := int32(0) + err = wait.PollImmediate(10*time.Second, 2*time.Minute, func() (bool, error) { + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(pod.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + for _, status := range pod.Status.ContainerStatuses { + if status.Name == pod.Spec.Containers[0].Name { + framework.Logf("Container %v, restarts: %v", status.Name, status.RestartCount) + restarts = status.RestartCount + if restarts > 0 { + framework.Logf("Container has restart count: %v", restarts) + return true, nil + } + } + } + return false, nil + }) + Expect(err).ToNot(HaveOccurred()) + + // Fix liveness probe + By("Rewriting the file") + writeCmd = fmt.Sprintf("echo test-after > %v", fileInVolume) + out, err = podContainerExec(pod, 1, writeCmd) + framework.Logf("Pod exec output: %v", out) + Expect(err).ToNot(HaveOccurred()) + + // Wait for container restarts to stabilize + By("Waiting for container to stop restarting") + stableCount := int(0) + stableThreshold := int(time.Minute / framework.Poll) + err = wait.PollImmediate(framework.Poll, 2*time.Minute, func() (bool, error) { + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(pod.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + for _, status := range pod.Status.ContainerStatuses { + if status.Name == pod.Spec.Containers[0].Name { + if status.RestartCount == restarts { + stableCount++ + if stableCount > stableThreshold { + framework.Logf("Container restart has stabilized") + return true, nil + } + } else { + restarts = status.RestartCount + stableCount = 0 + framework.Logf("Container has restart count: %v", restarts) + } + break + } + } + return false, nil + }) + Expect(err).ToNot(HaveOccurred()) + + // Verify content of file in subpath + out, err = podContainerExec(pod, 0, fmt.Sprintf("cat %v", fileInSubpath)) + framework.Logf("Pod exec output: %v", out) + Expect(err).ToNot(HaveOccurred()) + Expect(strings.TrimSpace(out)).To(BeEquivalentTo("test-after")) +} + +func podContainerExec(pod *v1.Pod, containerIndex int, bashExec string) (string, error) { + return framework.RunKubectl("exec", fmt.Sprintf("--namespace=%s", pod.Namespace), pod.Name, "--container", pod.Spec.Containers[containerIndex].Name, "--", "/bin/sh", "-c", bashExec) +} + +type hostpathSource struct { +} + +func initHostpath() volSource { + return &hostpathSource{} +} + +func (s *hostpathSource) createVolume(f *framework.Framework) volInfo { + return volInfo{ + source: &v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/tmp", + }, + }, + } +} + +func (s *hostpathSource) cleanupVolume(f *framework.Framework) { +} + +type hostpathSymlinkSource struct { +} + +func initHostpathSymlink() volSource { + return &hostpathSymlinkSource{} +} + +func (s *hostpathSymlinkSource) createVolume(f *framework.Framework) volInfo { + nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet) + Expect(len(nodes.Items)).NotTo(BeZero(), "No available nodes for scheduling") + + node0 := &nodes.Items[0] + sourcePath := fmt.Sprintf("/tmp/%v", f.Namespace.Name) + targetPath := fmt.Sprintf("/tmp/%v-link", f.Namespace.Name) + cmd := fmt.Sprintf("mkdir %v -m 777 && ln -s %v %v", sourcePath, sourcePath, targetPath) + privileged := true + + // Launch pod to initialize hostpath directory and symlink + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("hostpath-symlink-prep-%s", f.Namespace.Name), + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: fmt.Sprintf("init-volume-%s", f.Namespace.Name), + Image: "busybox", + Command: []string{"/bin/sh", "-ec", cmd}, + VolumeMounts: []v1.VolumeMount{ + { + Name: volumeName, + MountPath: "/tmp", + }, + }, + SecurityContext: &v1.SecurityContext{ + Privileged: &privileged, + }, + }, + }, + RestartPolicy: v1.RestartPolicyNever, + Volumes: []v1.Volume{ + { + Name: volumeName, + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/tmp", + }, + }, + }, + }, + NodeName: node0.Name, + }, + } + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod) + Expect(err).ToNot(HaveOccurred()) + + err = framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, pod.Namespace) + Expect(err).ToNot(HaveOccurred()) + + err = framework.DeletePodWithWait(f, f.ClientSet, pod) + Expect(err).ToNot(HaveOccurred()) + + return volInfo{ + source: &v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: targetPath, + }, + }, + node: node0.Name, + } +} + +func (s *hostpathSymlinkSource) cleanupVolume(f *framework.Framework) { +} + +type emptydirSource struct { +} + +func initEmptydir() volSource { + return &emptydirSource{} +} + +func (s *emptydirSource) createVolume(f *framework.Framework) volInfo { + return volInfo{ + source: &v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + } +} + +func (s *emptydirSource) cleanupVolume(f *framework.Framework) { +} + +type gcepdSource struct { + diskName string +} + +func initGCEPD() volSource { + framework.SkipUnlessProviderIs("gce", "gke") + return &gcepdSource{} +} + +func (s *gcepdSource) createVolume(f *framework.Framework) volInfo { + var err error + + framework.Logf("Creating GCE PD volume") + s.diskName, err = framework.CreatePDWithRetry() + framework.ExpectNoError(err, "Error creating PD") + + return volInfo{ + source: &v1.VolumeSource{ + GCEPersistentDisk: &v1.GCEPersistentDiskVolumeSource{PDName: s.diskName}, + }, + } +} + +func (s *gcepdSource) cleanupVolume(f *framework.Framework) { + if s.diskName != "" { + err := framework.DeletePDWithRetry(s.diskName) + framework.ExpectNoError(err, "Error deleting PD") + } +} + +type gcepdPartitionSource struct { + diskName string +} + +func initGCEPDPartition() volSource { + // Need to manually create, attach, partition, detach the GCE PD + // with disk name "subpath-partitioned-disk" before running this test + manual := true + if manual { + framework.Skipf("Skipping manual GCE PD partition test") + } + framework.SkipUnlessProviderIs("gce", "gke") + return &gcepdPartitionSource{diskName: "subpath-partitioned-disk"} +} + +func (s *gcepdPartitionSource) createVolume(f *framework.Framework) volInfo { + // TODO: automate partitioned of GCE PD once it supports raw block volumes + // framework.Logf("Creating GCE PD volume") + // s.diskName, err = framework.CreatePDWithRetry() + // framework.ExpectNoError(err, "Error creating PD") + + return volInfo{ + source: &v1.VolumeSource{ + GCEPersistentDisk: &v1.GCEPersistentDiskVolumeSource{ + PDName: s.diskName, + Partition: 1, + }, + }, + } +} + +func (s *gcepdPartitionSource) cleanupVolume(f *framework.Framework) { + if s.diskName != "" { + // err := framework.DeletePDWithRetry(s.diskName) + // framework.ExpectNoError(err, "Error deleting PD") + } +} + +type nfsSource struct { + serverPod *v1.Pod +} + +func initNFS() volSource { + return &nfsSource{} +} + +func (s *nfsSource) createVolume(f *framework.Framework) volInfo { + var serverIP string + + framework.Logf("Creating NFS server") + _, s.serverPod, serverIP = framework.NewNFSServer(f.ClientSet, f.Namespace.Name, []string{"-G", "777", "/exports"}) + + return volInfo{ + source: &v1.VolumeSource{ + NFS: &v1.NFSVolumeSource{ + Server: serverIP, + Path: "/exports", + }, + }, + } +} + +func (s *nfsSource) cleanupVolume(f *framework.Framework) { + if s.serverPod != nil { + framework.DeletePodWithWait(f, f.ClientSet, s.serverPod) + } +} + +type glusterSource struct { + serverPod *v1.Pod +} + +func initGluster() volSource { + framework.SkipUnlessNodeOSDistroIs("gci", "ubuntu") + return &glusterSource{} +} + +func (s *glusterSource) createVolume(f *framework.Framework) volInfo { + framework.Logf("Creating GlusterFS server") + _, s.serverPod, _ = framework.NewGlusterfsServer(f.ClientSet, f.Namespace.Name) + + return volInfo{ + source: &v1.VolumeSource{ + Glusterfs: &v1.GlusterfsVolumeSource{ + EndpointsName: "gluster-server", + Path: "test_vol", + }, + }, + } +} + +func (s *glusterSource) cleanupVolume(f *framework.Framework) { + if s.serverPod != nil { + framework.DeletePodWithWait(f, f.ClientSet, s.serverPod) + err := f.ClientSet.CoreV1().Endpoints(f.Namespace.Name).Delete("gluster-server", nil) + Expect(err).NotTo(HaveOccurred(), "Gluster delete endpoints failed") + } +} + +// TODO: need a better way to wrap PVC. A generic framework should support both static and dynamic PV. +// For static PV, can reuse createVolume methods for inline volumes +type nfsPVCSource struct { + serverPod *v1.Pod + pvc *v1.PersistentVolumeClaim + pv *v1.PersistentVolume +} + +func initNFSPVC() volSource { + return &nfsPVCSource{} +} + +func (s *nfsPVCSource) createVolume(f *framework.Framework) volInfo { + var serverIP string + + framework.Logf("Creating NFS server") + _, s.serverPod, serverIP = framework.NewNFSServer(f.ClientSet, f.Namespace.Name, []string{"-G", "777", "/exports"}) + + pvConfig := framework.PersistentVolumeConfig{ + NamePrefix: "nfs-", + StorageClassName: f.Namespace.Name, + PVSource: v1.PersistentVolumeSource{ + NFS: &v1.NFSVolumeSource{ + Server: serverIP, + Path: "/exports", + }, + }, + } + pvcConfig := framework.PersistentVolumeClaimConfig{ + StorageClassName: &f.Namespace.Name, + } + + framework.Logf("Creating PVC and PV") + pv, pvc, err := framework.CreatePVCPV(f.ClientSet, pvConfig, pvcConfig, f.Namespace.Name, false) + Expect(err).NotTo(HaveOccurred(), "PVC, PV creation failed") + + err = framework.WaitOnPVandPVC(f.ClientSet, f.Namespace.Name, pv, pvc) + Expect(err).NotTo(HaveOccurred(), "PVC, PV failed to bind") + + s.pvc = pvc + s.pv = pv + + return volInfo{ + source: &v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvc.Name, + }, + }, + } +} + +func (s *nfsPVCSource) cleanupVolume(f *framework.Framework) { + if s.pvc != nil || s.pv != nil { + if errs := framework.PVPVCCleanup(f.ClientSet, f.Namespace.Name, s.pv, s.pvc); len(errs) != 0 { + framework.Failf("Failed to delete PVC or PV: %v", utilerrors.NewAggregate(errs)) + } + } + if s.serverPod != nil { + framework.DeletePodWithWait(f, f.ClientSet, s.serverPod) + } +} From a3a1714fbad5a473409a1071e917aec3963ee8bf Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Mon, 5 Mar 2018 09:18:49 +0100 Subject: [PATCH 3/3] Add feature gate for subpath --- pkg/api/validation/validation.go | 6 ++- pkg/api/validation/validation_test.go | 53 +++++++++++++++++++++++++ pkg/features/kube_features.go | 8 ++++ pkg/kubelet/BUILD | 1 + pkg/kubelet/kubelet_pods.go | 4 ++ pkg/kubelet/kubelet_pods_test.go | 57 +++++++++++++++++++++++++++ 6 files changed, 128 insertions(+), 1 deletion(-) diff --git a/pkg/api/validation/validation.go b/pkg/api/validation/validation.go index 785a059cde535..82a1d363c5965 100644 --- a/pkg/api/validation/validation.go +++ b/pkg/api/validation/validation.go @@ -1780,7 +1780,11 @@ func ValidateVolumeMounts(mounts []api.VolumeMount, volumes sets.String, fldPath } mountpoints.Insert(mnt.MountPath) if len(mnt.SubPath) > 0 { - allErrs = append(allErrs, validateLocalDescendingPath(mnt.SubPath, fldPath.Child("subPath"))...) + if !utilfeature.DefaultFeatureGate.Enabled(features.VolumeSubpath) { + allErrs = append(allErrs, field.Forbidden(fldPath.Child("subPath"), "subPath is disabled by feature-gate")) + } else { + allErrs = append(allErrs, validateLocalDescendingPath(mnt.SubPath, fldPath.Child("subPath"))...) + } } } return allErrs diff --git a/pkg/api/validation/validation_test.go b/pkg/api/validation/validation_test.go index a9cb2cb496c46..949e0bbbba774 100644 --- a/pkg/api/validation/validation_test.go +++ b/pkg/api/validation/validation_test.go @@ -10144,3 +10144,56 @@ func TestValidateFlexVolumeSource(t *testing.T) { } } } + +func TestValidateDisabledSubpath(t *testing.T) { + utilfeature.DefaultFeatureGate.Set("VolumeSubpath=false") + defer utilfeature.DefaultFeatureGate.Set("VolumeSubpath=true") + + volumes := []api.Volume{ + {Name: "abc", VolumeSource: api.VolumeSource{PersistentVolumeClaim: &api.PersistentVolumeClaimVolumeSource{ClaimName: "testclaim1"}}}, + {Name: "abc-123", VolumeSource: api.VolumeSource{PersistentVolumeClaim: &api.PersistentVolumeClaimVolumeSource{ClaimName: "testclaim2"}}}, + {Name: "123", VolumeSource: api.VolumeSource{HostPath: &api.HostPathVolumeSource{Path: "/foo/baz"}}}, + } + vols, v1err := ValidateVolumes(volumes, field.NewPath("field")) + if len(v1err) > 0 { + t.Errorf("Invalid test volume - expected success %v", v1err) + return + } + + cases := map[string]struct { + mounts []api.VolumeMount + expectError bool + }{ + "subpath not specified": { + []api.VolumeMount{ + { + Name: "abc-123", + MountPath: "/bab", + }, + }, + false, + }, + "subpath specified": { + []api.VolumeMount{ + { + Name: "abc-123", + MountPath: "/bab", + SubPath: "baz", + }, + }, + true, + }, + } + + for name, test := range cases { + errs := ValidateVolumeMounts(test.mounts, vols, field.NewPath("field")) + + if len(errs) != 0 && !test.expectError { + t.Errorf("test %v failed: %+v", name, errs) + } + + if len(errs) == 0 && test.expectError { + t.Errorf("test %v failed, expected error", name) + } + } +} diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 7643f93c2bb8a..464667e3ed78a 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -129,6 +129,13 @@ const ( // Mount secret, configMap, downwardAPI and projected volumes ReadOnly. Note: this feature // gate is present only for backward compatability, it will be removed in the 1.11 release. ReadOnlyAPIDataVolumes utilfeature.Feature = "ReadOnlyAPIDataVolumes" + + // owner: @saad-ali + // ga + // + // Allow mounting a subpath of a volume in a container + // Do not remove this feature gate even though it's GA + VolumeSubpath utilfeature.Feature = "VolumeSubpath" ) func init() { @@ -152,6 +159,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS RotateKubeletClientCertificate: {Default: false, PreRelease: utilfeature.Alpha}, PersistentLocalVolumes: {Default: false, PreRelease: utilfeature.Alpha}, LocalStorageCapacityIsolation: {Default: false, PreRelease: utilfeature.Alpha}, + VolumeSubpath: {Default: true, PreRelease: utilfeature.GA}, // inherited features from generic apiserver, relisted here to get a conflict if it is changed // unintentionally on either side: diff --git a/pkg/kubelet/BUILD b/pkg/kubelet/BUILD index 6b6f104514f40..09695944df3a3 100644 --- a/pkg/kubelet/BUILD +++ b/pkg/kubelet/BUILD @@ -231,6 +231,7 @@ go_test( "//vendor/k8s.io/apimachinery/pkg/util/strategicpatch:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library", + "//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library", "//vendor/k8s.io/client-go/pkg/api/v1:go_default_library", "//vendor/k8s.io/client-go/rest:go_default_library", "//vendor/k8s.io/client-go/testing:go_default_library", diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index ca44368312887..e20d43fb7ff13 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -143,6 +143,10 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h return nil, cleanupAction, err } if mount.SubPath != "" { + if !utilfeature.DefaultFeatureGate.Enabled(features.VolumeSubpath) { + return nil, cleanupAction, fmt.Errorf("volume subpaths are disabled") + } + if filepath.IsAbs(mount.SubPath) { return nil, cleanupAction, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath) } diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index a970315440460..ab47825b108c5 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + utilfeature "k8s.io/apiserver/pkg/util/feature" core "k8s.io/client-go/testing" "k8s.io/client-go/tools/record" "k8s.io/kubernetes/pkg/api" @@ -2145,3 +2146,59 @@ func TestTruncatePodHostname(t *testing.T) { assert.Equal(t, test.output, output) } } + +func TestDisabledSubpath(t *testing.T) { + fm := &mount.FakeMounter{} + pod := v1.Pod{ + Spec: v1.PodSpec{ + HostNetwork: true, + }, + } + podVolumes := kubecontainer.VolumeMap{ + "disk": kubecontainer.VolumeInfo{Mounter: &stubVolume{path: "/mnt/disk"}}, + } + + cases := map[string]struct { + container v1.Container + expectError bool + }{ + "subpath not specified": { + v1.Container{ + VolumeMounts: []v1.VolumeMount{ + { + MountPath: "/mnt/path3", + Name: "disk", + ReadOnly: true, + }, + }, + }, + false, + }, + "subpath specified": { + v1.Container{ + VolumeMounts: []v1.VolumeMount{ + { + MountPath: "/mnt/path3", + SubPath: "/must/not/be/absolute", + Name: "disk", + ReadOnly: true, + }, + }, + }, + true, + }, + } + + utilfeature.DefaultFeatureGate.Set("VolumeSubpath=false") + defer utilfeature.DefaultFeatureGate.Set("VolumeSubpath=true") + + for name, test := range cases { + _, _, err := makeMounts(&pod, "/pod", &test.container, "fakepodname", "", "", podVolumes, fm) + if err != nil && !test.expectError { + t.Errorf("test %v failed: %v", name, err) + } + if err == nil && test.expectError { + t.Errorf("test %v failed: expected error", name) + } + } +}