Skip to content

Commit

Permalink
Lock subPath volumes
Browse files Browse the repository at this point in the history
Users must not be allowed to step outside the volume with subPath.
Therefore the final subPath directory must be "locked" somehow
and checked if it's inside volume.

On Windows, we lock the directories. On Linux, we bind-mount the final
subPath into /var/lib/kubelet/pods/<uid>/volume-subpaths/<container name>/<subPathName>,
it can't be changed to symlink user once it's bind-mounted.
  • Loading branch information
jsafrane committed Mar 5, 2018
1 parent 8495e84 commit 975d0a4
Show file tree
Hide file tree
Showing 28 changed files with 2,798 additions and 56 deletions.
12 changes: 12 additions & 0 deletions pkg/kubelet/cm/container_manager_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ func (mi *fakeMountInterface) PathIsDevice(pathname string) (bool, error) {
return true, nil
}

func (mi *fakeMountInterface) PrepareSafeSubpath(subPath mount.Subpath) (newHostPath string, cleanupAction func(), err error) {
return "", nil, nil
}

func (mi *fakeMountInterface) CleanSubPaths(_, _ string) error {
return nil
}

func (mi *fakeMountInterface) SafeMakeDir(_, _ string, _ os.FileMode) error {
return nil
}

func fakeContainerMgrMountInt() mount.Interface {
return &fakeMountInterface{
[]mount.MountPoint{
Expand Down
2 changes: 1 addition & 1 deletion pkg/kubelet/container/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ type HandlerRunner interface {
// RuntimeHelper wraps kubelet to make container runtime
// able to get necessary informations like the RunContainerOptions, DNS settings, Host IP.
type RuntimeHelper interface {
GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, useClusterFirstPolicy bool, err error)
GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, useClusterFirstPolicy bool, cleanupAction func(), err error)
GetClusterDNS(pod *v1.Pod) (dnsServers []string, dnsSearches []string, useClusterFirstPolicy bool, err error)
// GetPodCgroupParent returns the the CgroupName identifer, and its literal cgroupfs form on the host
// of a pod.
Expand Down
4 changes: 2 additions & 2 deletions pkg/kubelet/container/testing/fake_runtime_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ type FakeRuntimeHelper struct {
Err error
}

func (f *FakeRuntimeHelper) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, bool, error) {
func (f *FakeRuntimeHelper) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, bool, func(), error) {
var opts kubecontainer.RunContainerOptions
if len(container.TerminationMessagePath) != 0 {
opts.PodContainerDir = f.PodContainerDir
}
return &opts, false, nil
return &opts, false, nil, nil
}

func (f *FakeRuntimeHelper) GetPodCgroupParent(pod *v1.Pod) string {
Expand Down
67 changes: 41 additions & 26 deletions pkg/kubelet/kubelet_pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ import (
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/util"
mountutil "k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/volume/util/volumehelper"
volumevalidation "k8s.io/kubernetes/pkg/volume/validation"
Expand Down Expand Up @@ -110,7 +111,7 @@ func (kl *Kubelet) makeDevices(pod *v1.Pod, container *v1.Container) ([]kubecont
}

// makeMounts determines the mount points for the given container.
func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap) ([]kubecontainer.Mount, error) {
func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap, mounter mountutil.Interface) ([]kubecontainer.Mount, func(), error) {
// Kubernetes only mounts on /etc/hosts if :
// - container does not use hostNetwork and
// - container is not an infrastructure(pause) container
Expand All @@ -120,7 +121,8 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
mountEtcHostsFile := !pod.Spec.HostNetwork && len(podIP) > 0 && runtime.GOOS != "windows"
glog.V(3).Infof("container: %v/%v/%v podIP: %q creating hosts mount: %v", pod.Namespace, pod.Name, container.Name, podIP, mountEtcHostsFile)
mounts := []kubecontainer.Mount{}
for _, mount := range container.VolumeMounts {
var cleanupAction func() = nil
for i, mount := range container.VolumeMounts {
mountEtcHostsFile = mountEtcHostsFile && (mount.MountPath != etcHostsPath)
vol, ok := podVolumes[mount.Name]
if !ok || vol.Mounter == nil {
Expand All @@ -138,25 +140,29 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
}
hostPath, err := volume.GetPath(vol.Mounter)
if err != nil {
return nil, err
return nil, cleanupAction, err
}
if mount.SubPath != "" {
if filepath.IsAbs(mount.SubPath) {
return nil, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath)
return nil, cleanupAction, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath)
}

err = volumevalidation.ValidatePathNoBacksteps(mount.SubPath)
if err != nil {
return nil, fmt.Errorf("unable to provision SubPath `%s`: %v", mount.SubPath, err)
return nil, cleanupAction, fmt.Errorf("unable to provision SubPath `%s`: %v", mount.SubPath, err)
}

fileinfo, err := os.Lstat(hostPath)
if err != nil {
return nil, err
return nil, cleanupAction, err
}
perm := fileinfo.Mode()

hostPath = filepath.Join(hostPath, mount.SubPath)
volumePath, err := filepath.EvalSymlinks(hostPath)
if err != nil {
return nil, cleanupAction, err
}
hostPath = filepath.Join(volumePath, mount.SubPath)

if subPathExists, err := util.FileOrSymlinkExists(hostPath); err != nil {
glog.Errorf("Could not determine if subPath %s exists; will not attempt to change its permissions", hostPath)
Expand All @@ -165,17 +171,25 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
// incorrect ownership and mode. For example, the sub path directory must have at least g+rwx
// when the pod specifies an fsGroup, and if the directory is not created here, Docker will
// later auto-create it with the incorrect mode 0750
if err := os.MkdirAll(hostPath, perm); err != nil {
glog.Errorf("failed to mkdir:%s", hostPath)
return nil, err
}

// chmod the sub path because umask may have prevented us from making the sub path with the same
// permissions as the mounter path
if err := os.Chmod(hostPath, perm); err != nil {
return nil, err
// Make extra care not to escape the volume!
if err := mounter.SafeMakeDir(hostPath, volumePath, perm); err != nil {
glog.Errorf("failed to mkdir %q: %v", hostPath, err)
return nil, cleanupAction, err
}
}
hostPath, cleanupAction, err = mounter.PrepareSafeSubpath(mountutil.Subpath{
VolumeMountIndex: i,
Path: hostPath,
VolumeName: mount.Name,
VolumePath: volumePath,
PodDir: podDir,
ContainerName: container.Name,
})
if err != nil {
// Don't pass detailed error back to the user because it could give information about host filesystem
glog.Errorf("failed to prepare subPath for volumeMount %q of container %q: %v", mount.Name, container.Name, err)
return nil, cleanupAction, fmt.Errorf("failed to prepare subPath for volumeMount %q of container %q", mount.Name, container.Name)
}
}

// Docker Volume Mounts fail on Windows if it is not of the form C:/
Expand Down Expand Up @@ -203,11 +217,11 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
hostAliases := pod.Spec.HostAliases
hostsMount, err := makeHostsMount(podDir, podIP, hostName, hostDomain, hostAliases)
if err != nil {
return nil, err
return nil, cleanupAction, err
}
mounts = append(mounts, *hostsMount)
}
return mounts, nil
return mounts, cleanupAction, nil
}

// makeHostsMount makes the mountpoint for the hosts file that the containers
Expand Down Expand Up @@ -314,14 +328,14 @@ func (kl *Kubelet) GetPodCgroupParent(pod *v1.Pod) string {

// GenerateRunContainerOptions generates the RunContainerOptions, which can be used by
// the container runtime to set parameters for launching a container.
func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, bool, error) {
func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, bool, func(), error) {
var err error
useClusterFirstPolicy := false
cgroupParent := kl.GetPodCgroupParent(pod)
opts := &kubecontainer.RunContainerOptions{CgroupParent: cgroupParent}
hostname, hostDomainName, err := kl.GeneratePodHostNameAndDomain(pod)
if err != nil {
return nil, false, err
return nil, false, nil, err
}
opts.Hostname = hostname
podName := volumehelper.GetUniquePodName(pod)
Expand All @@ -331,16 +345,17 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai
// TODO(random-liu): Move following convert functions into pkg/kubelet/container
opts.Devices, err = kl.makeDevices(pod, container)
if err != nil {
return nil, false, err
return nil, false, nil, err
}

opts.Mounts, err = makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes)
var cleanupAction func()
opts.Mounts, cleanupAction, err = makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes, kl.mounter)
if err != nil {
return nil, false, err
return nil, false, cleanupAction, err
}
opts.Envs, err = kl.makeEnvironmentVariables(pod, container, podIP)
if err != nil {
return nil, false, err
return nil, false, cleanupAction, err
}

// Disabling adding TerminationMessagePath on Windows as these files would be mounted as docker volume and
Expand All @@ -356,15 +371,15 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai

opts.DNS, opts.DNSSearch, useClusterFirstPolicy, err = kl.GetClusterDNS(pod)
if err != nil {
return nil, false, err
return nil, false, cleanupAction, err
}

// only do this check if the experimental behavior is enabled, otherwise allow it to default to false
if kl.experimentalHostUserNamespaceDefaulting {
opts.EnableHostUserNamespace = kl.enableHostUserNamespace(pod)
}

return opts, useClusterFirstPolicy, nil
return opts, useClusterFirstPolicy, cleanupAction, nil
}

var masterServices = sets.NewString("kubernetes")
Expand Down
8 changes: 5 additions & 3 deletions pkg/kubelet/kubelet_pods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
"k8s.io/kubernetes/pkg/kubelet/server/portforward"
"k8s.io/kubernetes/pkg/kubelet/server/remotecommand"
"k8s.io/kubernetes/pkg/util/mount"
)

func TestMakeMounts(t *testing.T) {
Expand Down Expand Up @@ -149,13 +150,14 @@ func TestMakeMounts(t *testing.T) {

for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
fm := &mount.FakeMounter{}
pod := v1.Pod{
Spec: v1.PodSpec{
HostNetwork: true,
},
}

mounts, err := makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", "", tc.podVolumes)
mounts, _, err := makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", "", tc.podVolumes, fm)

// validate only the error if we expect an error
if tc.expectErr {
Expand Down Expand Up @@ -338,7 +340,7 @@ func TestGenerateRunContainerOptions_DNSConfigurationParams(t *testing.T) {
options := make([]*kubecontainer.RunContainerOptions, 4)
for i, pod := range pods {
var err error
options[i], _, err = kubelet.GenerateRunContainerOptions(pod, &v1.Container{}, "")
options[i], _, _, err = kubelet.GenerateRunContainerOptions(pod, &v1.Container{}, "")
if err != nil {
t.Fatalf("failed to generate container options: %v", err)
}
Expand Down Expand Up @@ -371,7 +373,7 @@ func TestGenerateRunContainerOptions_DNSConfigurationParams(t *testing.T) {
kubelet.resolverConfig = "/etc/resolv.conf"
for i, pod := range pods {
var err error
options[i], _, err = kubelet.GenerateRunContainerOptions(pod, &v1.Container{}, "")
options[i], _, _, err = kubelet.GenerateRunContainerOptions(pod, &v1.Container{}, "")
if err != nil {
t.Fatalf("failed to generate container options: %v", err)
}
Expand Down
17 changes: 10 additions & 7 deletions pkg/kubelet/kuberuntime/kuberuntime_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,10 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
restartCount = containerStatus.RestartCount + 1
}

containerConfig, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef)
containerConfig, cleanupAction, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef)
if cleanupAction != nil {
defer cleanupAction()
}
if err != nil {
m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
return "Generate Container Config Failed", err
Expand Down Expand Up @@ -157,20 +160,20 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
}

// generateContainerConfig generates container config for kubelet runtime v1.
func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string) (*runtimeapi.ContainerConfig, error) {
opts, _, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP)
func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string) (*runtimeapi.ContainerConfig, func(), error) {
opts, _, cleanupAction, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP)
if err != nil {
return nil, err
return nil, nil, err
}

uid, username, err := m.getImageUser(container.Image)
if err != nil {
return nil, err
return nil, cleanupAction, err
}

// Verify RunAsNonRoot. Non-root verification only supports numeric user.
if err := verifyRunAsNonRoot(pod, container, uid, username); err != nil {
return nil, err
return nil, cleanupAction, err
}

command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs)
Expand Down Expand Up @@ -207,7 +210,7 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Contai
}
config.Envs = envs

return config, nil
return config, cleanupAction, nil
}

// generateLinuxContainerConfig generates linux container config for kubelet runtime v1.
Expand Down
8 changes: 4 additions & 4 deletions pkg/kubelet/kuberuntime/kuberuntime_container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ func makeExpetectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIn
container := &pod.Spec.Containers[containerIndex]
podIP := ""
restartCount := 0
opts, _, _ := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP)
opts, _, _, _ := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP)
containerLogsPath := buildContainerLogsPath(container.Name, restartCount)
restartCountUint32 := uint32(restartCount)
envs := make([]*runtimeapi.KeyValue, len(opts.Envs))
Expand Down Expand Up @@ -222,7 +222,7 @@ func TestGenerateContainerConfig(t *testing.T) {
}

expectedConfig := makeExpetectedConfig(m, pod, 0)
containerConfig, err := m.generateContainerConfig(&pod.Spec.Containers[0], pod, 0, "", pod.Spec.Containers[0].Image)
containerConfig, _, err := m.generateContainerConfig(&pod.Spec.Containers[0], pod, 0, "", pod.Spec.Containers[0].Image)
assert.NoError(t, err)
assert.Equal(t, expectedConfig, containerConfig, "generate container config for kubelet runtime v1.")

Expand Down Expand Up @@ -252,7 +252,7 @@ func TestGenerateContainerConfig(t *testing.T) {
}

expectedConfig = makeExpetectedConfig(m, podWithContainerSecurityContext, 0)
containerConfig, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image)
containerConfig, _, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image)
assert.Error(t, err)

imageId, _ := imageService.PullImage(&runtimeapi.ImageSpec{Image: "busybox"}, nil)
Expand All @@ -264,6 +264,6 @@ func TestGenerateContainerConfig(t *testing.T) {
podWithContainerSecurityContext.Spec.Containers[0].SecurityContext.RunAsUser = nil
podWithContainerSecurityContext.Spec.Containers[0].SecurityContext.RunAsNonRoot = &runAsNonRootTrue

_, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image)
_, _, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image)
assert.Error(t, err, "RunAsNonRoot should fail for non-numeric username")
}
2 changes: 1 addition & 1 deletion pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ func makeFakeContainer(t *testing.T, m *kubeGenericRuntimeManager, template cont
sandboxConfig, err := m.generatePodSandboxConfig(template.pod, template.sandboxAttempt)
assert.NoError(t, err, "generatePodSandboxConfig for container template %+v", template)

containerConfig, err := m.generateContainerConfig(template.container, template.pod, template.attempt, "", template.container.Image)
containerConfig, _, err := m.generateContainerConfig(template.container, template.pod, template.attempt, "", template.container.Image)
assert.NoError(t, err, "generateContainerConfig for container template %+v", template)

podSandboxID := apitest.BuildSandboxName(sandboxConfig.Metadata)
Expand Down
2 changes: 1 addition & 1 deletion pkg/kubelet/rkt/rkt.go
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,7 @@ func (r *Runtime) newAppcRuntimeApp(pod *v1.Pod, podIP string, c v1.Container, r
}

// TODO: determine how this should be handled for rkt
opts, _, err := r.runtimeHelper.GenerateRunContainerOptions(pod, &c, podIP)
opts, _, _, err := r.runtimeHelper.GenerateRunContainerOptions(pod, &c, podIP)
if err != nil {
return err
}
Expand Down
13 changes: 13 additions & 0 deletions pkg/kubelet/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package util
import (
"fmt"
"net/url"
"path/filepath"
"strings"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
Expand All @@ -45,3 +47,14 @@ func parseEndpoint(endpoint string) (string, string, error) {
return u.Scheme, "", fmt.Errorf("protocol %q not supported", u.Scheme)
}
}

func pathWithinBase(fullPath, basePath string) bool {
rel, err := filepath.Rel(basePath, fullPath)
if err != nil {
return false
}
if strings.HasPrefix(rel, "..") {
return false
}
return true
}
9 changes: 9 additions & 0 deletions pkg/kubelet/util/util_unsupported.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,12 @@ func CreateListener(endpoint string) (net.Listener, error) {
func GetAddressAndDialer(endpoint string) (string, func(addr string, timeout time.Duration) (net.Conn, error), error) {
return "", nil, fmt.Errorf("GetAddressAndDialer is unsupported in this build")
}

// LockAndCheckSubPath empty implementation
func LockAndCheckSubPath(volumePath, subPath string) ([]uintptr, error) {
return []uintptr{}, nil
}

// UnlockPath empty implementation
func UnlockPath(fileHandles []uintptr) {
}
2 changes: 1 addition & 1 deletion pkg/kubelet/volumemanager/reconciler/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ func (rc *reconciler) reconcile() {
// Volume is mounted, unmount it
glog.V(12).Infof(mountedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmountVolume", ""))
err := rc.operationExecutor.UnmountVolume(
mountedVolume.MountedVolume, rc.actualStateOfWorld)
mountedVolume.MountedVolume, rc.actualStateOfWorld, rc.kubeletPodsDir)
if err != nil &&
!nestedpendingoperations.IsAlreadyExists(err) &&
!exponentialbackoff.IsExponentialBackoff(err) {
Expand Down
Loading

0 comments on commit 975d0a4

Please sign in to comment.