Skip to content

Commit

Permalink
gpu: support strict gpu share with hami (#2272)
Browse files Browse the repository at this point in the history
Signed-off-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
Co-authored-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
  • Loading branch information
ZiMengSheng and wangjianyu.wjy authored Dec 19, 2024
1 parent 9bb39a2 commit a62dd49
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 62 deletions.
8 changes: 8 additions & 0 deletions apis/extension/device_share.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ const (
LabelGPUModel string = NodeDomainPrefix + "/gpu-model"
LabelGPUDriverVersion string = NodeDomainPrefix + "/gpu-driver-version"
LabelSecondaryDeviceWellPlanned string = NodeDomainPrefix + "/secondary-device-well-planned"

LabelGPUIsolationProvider = DomainPrefix + "gpu-isolation-provider"
)

// DeviceAllocations would be injected into Pod as form of annotation during Pre-bind stage.
Expand Down Expand Up @@ -220,6 +222,12 @@ const (
GPUPartitionPolicyPrefer GPUPartitionPolicy = "Prefer"
)

type GPUIsolationProvider string

const (
GPUIsolationProviderHAMICore GPUIsolationProvider = "HAMi-core"
)

func GetDeviceAllocations(podAnnotations map[string]string) (DeviceAllocations, error) {
deviceAllocations := DeviceAllocations{}
data, ok := podAnnotations[AnnotationDeviceAllocated]
Expand Down
36 changes: 36 additions & 0 deletions pkg/koordlet/runtimehooks/hooks/gpu/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
schedulingv1alpha1 "github.com/koordinator-sh/koordinator/apis/scheduling/v1alpha1"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/protocol"
"github.com/koordinator-sh/koordinator/pkg/koordlet/util/system"
rmconfig "github.com/koordinator-sh/koordinator/pkg/runtimeproxy/config"
)

Expand Down Expand Up @@ -70,5 +71,40 @@ func (p *gpuPlugin) InjectContainerGPUEnv(proto protocol.HooksProtocol) error {
containerCtx.Response.AddContainerEnvs = make(map[string]string)
}
containerCtx.Response.AddContainerEnvs[GpuAllocEnv] = strings.Join(gpuIDs, ",")
if containerReq.PodLabels[ext.LabelGPUIsolationProvider] == string(ext.GPUIsolationProviderHAMICore) {
gpuResources := devices[0].Resources
gpuMemoryRatio, ok := gpuResources[ext.ResourceGPUMemoryRatio]
if !ok {
return fmt.Errorf("gpu memory ratio not found in gpu resource")
}
if gpuMemoryRatio.Value() < 100 {
gpuMemory, ok := gpuResources[ext.ResourceGPUMemory]
if !ok {
return fmt.Errorf("gpu memory not found in gpu resource")
}
containerCtx.Response.AddContainerEnvs["CUDA_DEVICE_MEMORY_LIMIT"] = fmt.Sprintf("%d", gpuMemory.Value())
gpuCore, ok := gpuResources[ext.ResourceGPUCore]
if ok {
containerCtx.Response.AddContainerEnvs["CUDA_DEVICE_SM_LIMIT"] = fmt.Sprintf("%d", gpuCore.Value())
}
containerCtx.Response.AddContainerEnvs["LD_PRELOAD"] = system.Conf.HAMICoreLibraryDirectoryPath
containerCtx.Response.AddContainerMounts = append(containerCtx.Response.AddContainerMounts,
&protocol.Mount{
Destination: system.Conf.HAMICoreLibraryDirectoryPath,
Type: "bind",
Source: system.Conf.HAMICoreLibraryDirectoryPath,
Options: []string{"rbind"},
},
// Because https://github.com/Project-HAMi/HAMi/issues/696, we create the directory in pod.
&protocol.Mount{
Destination: "/tmp/vgpulock",
Type: "bind",
Source: "/tmp/vgpulock",
Options: []string{"rbind"},
},
)
}
}

return nil
}
86 changes: 64 additions & 22 deletions pkg/koordlet/runtimehooks/hooks/gpu/gpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,49 +31,91 @@ func Test_InjectContainerGPUEnv(t *testing.T) {
expectedAllocStr string
expectedError bool
proto protocol.HooksProtocol
expectedMounts []*protocol.Mount
expectedEnvs map[string]string
}{
{
"test empty proto",
"",
true,
nil,
name: "test empty proto",
expectedAllocStr: "",
expectedError: true,
proto: nil,
},
{
"test normal gpu alloc",
"0,1",
false,
&protocol.ContainerContext{
name: "test normal gpu alloc",
expectedAllocStr: "0,1",
expectedError: false,
proto: &protocol.ContainerContext{
Request: protocol.ContainerRequest{
PodAnnotations: map[string]string{
ext.AnnotationDeviceAllocated: "{\"gpu\": [{\"minor\": 0},{\"minor\": 1}]}",
},
},
},
expectedEnvs: map[string]string{GpuAllocEnv: "0,1"},
},
{
"test empty gpu alloc",
"",
false,
&protocol.ContainerContext{
name: "test empty gpu alloc",
expectedAllocStr: "",
expectedError: false,
proto: &protocol.ContainerContext{
Request: protocol.ContainerRequest{
PodAnnotations: map[string]string{
ext.AnnotationDeviceAllocated: "{\"fpga\": [{\"minor\": 0},{\"minor\": 1}]}",
},
},
},
},
{
name: "gpu share with HAMi",
expectedAllocStr: "1",
expectedError: false,
proto: &protocol.ContainerContext{
Request: protocol.ContainerRequest{
PodLabels: map[string]string{
ext.LabelGPUIsolationProvider: string(ext.GPUIsolationProviderHAMICore),
},
PodAnnotations: map[string]string{
ext.AnnotationDeviceAllocated: `{"gpu":[{"minor":1,"resources":{"koordinator.sh/gpu-core":"50","koordinator.sh/gpu-memory":"16Gi","koordinator.sh/gpu-memory-ratio":"50"}}]}`,
},
},
},
expectedEnvs: map[string]string{
GpuAllocEnv: "1",
"CUDA_DEVICE_MEMORY_LIMIT": "17179869184",
"CUDA_DEVICE_SM_LIMIT": "50",
"LD_PRELOAD": "/usr/local/vgpu/libvgpu.so",
},
expectedMounts: []*protocol.Mount{
{
Destination: "/usr/local/vgpu/libvgpu.so",
Type: "bind",
Source: "/usr/local/vgpu/libvgpu.so",
Options: []string{"rbind"},
},
{
Destination: "/tmp/vgpulock",
Type: "bind",
Source: "/tmp/vgpulock",
Options: []string{"rbind"},
},
},
},
}
plugin := gpuPlugin{}
for _, tt := range tests {
var containerCtx *protocol.ContainerContext
if tt.proto != nil {
containerCtx = tt.proto.(*protocol.ContainerContext)
}
err := plugin.InjectContainerGPUEnv(containerCtx)
assert.Equal(t, tt.expectedError, err != nil, tt.name)
if tt.proto != nil {
containerCtx := tt.proto.(*protocol.ContainerContext)
assert.Equal(t, containerCtx.Response.AddContainerEnvs[GpuAllocEnv], tt.expectedAllocStr, tt.name)
}
t.Run(tt.name, func(t *testing.T) {
var containerCtx *protocol.ContainerContext
if tt.proto != nil {
containerCtx = tt.proto.(*protocol.ContainerContext)
}
err := plugin.InjectContainerGPUEnv(containerCtx)
assert.Equal(t, tt.expectedError, err != nil, tt.name)
if tt.proto != nil {
containerCtx := tt.proto.(*protocol.ContainerContext)
assert.Equal(t, containerCtx.Response.AddContainerEnvs[GpuAllocEnv], tt.expectedAllocStr, tt.name)
assert.Equal(t, containerCtx.Response.AddContainerEnvs, tt.expectedEnvs, tt.name)
assert.Equal(t, containerCtx.Response.AddContainerMounts, tt.expectedMounts, tt.name)
}
})
}
}
10 changes: 10 additions & 0 deletions pkg/koordlet/runtimehooks/protocol/container_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ func (c *ContainerRequest) FromReconciler(podMeta *statesinformer.PodMeta, conta
type ContainerResponse struct {
Resources Resources
AddContainerEnvs map[string]string
AddContainerMounts []*Mount
AddContainerDevices []*LinuxDevice
}

Expand Down Expand Up @@ -288,6 +289,15 @@ func (c *ContainerContext) NriDone(executor resourceexecutor.ResourceUpdateExecu
}
}

for _, m := range c.Response.AddContainerMounts {
adjust.AddMount(&api.Mount{
Destination: m.Destination,
Type: m.Type,
Source: m.Source,
Options: m.Options,
})
}

if len(c.Response.AddContainerDevices) != 0 {
for i := range c.Response.AddContainerDevices {
adjust.AddDevice(&api.LinuxDevice{
Expand Down
7 changes: 7 additions & 0 deletions pkg/koordlet/runtimehooks/protocol/protocol.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,13 @@ func (r *Resources) FromContainer(container *corev1.Container) {
}
}

type Mount struct {
Destination string `protobuf:"bytes,1,opt,name=destination,proto3" json:"destination,omitempty"`
Type string `protobuf:"bytes,2,opt,name=type,proto3" json:"type,omitempty"`
Source string `protobuf:"bytes,3,opt,name=source,proto3" json:"source,omitempty"`
Options []string `protobuf:"bytes,4,rep,name=options,proto3" json:"options,omitempty"`
}

func injectCPUShares(cgroupParent string, cpuShares int64, a *audit.EventHelper, e resourceexecutor.ResourceUpdateExecutor) (resourceexecutor.ResourceUpdater, error) {
cpuShareStr := strconv.FormatInt(cpuShares, 10)
updater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUSharesName, cgroupParent, cpuShareStr, a)
Expand Down
48 changes: 26 additions & 22 deletions pkg/koordlet/util/system/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ type Config struct {
RunRootDir string
RuntimeHooksConfigDir string

ContainerdEndPoint string
PouchEndpoint string
DockerEndPoint string
CrioEndPoint string
DefaultRuntimeType string
ContainerdEndPoint string
PouchEndpoint string
DockerEndPoint string
CrioEndPoint string
DefaultRuntimeType string
HAMICoreLibraryDirectoryPath string
}

func init() {
Expand Down Expand Up @@ -77,30 +78,32 @@ func InitSupportConfigs() {

func NewHostModeConfig() *Config {
return &Config{
CgroupRootDir: "/sys/fs/cgroup/",
ProcRootDir: "/proc/",
SysRootDir: "/sys/",
SysFSRootDir: "/sys/fs/",
VarRunRootDir: "/var/run/",
VarLibKubeletRootDir: "/var/lib/kubelet/",
RunRootDir: "/run/",
RuntimeHooksConfigDir: "/etc/runtime/hookserver.d",
DefaultRuntimeType: "containerd",
CgroupRootDir: "/sys/fs/cgroup/",
ProcRootDir: "/proc/",
SysRootDir: "/sys/",
SysFSRootDir: "/sys/fs/",
VarRunRootDir: "/var/run/",
VarLibKubeletRootDir: "/var/lib/kubelet/",
RunRootDir: "/run/",
RuntimeHooksConfigDir: "/etc/runtime/hookserver.d",
DefaultRuntimeType: "containerd",
HAMICoreLibraryDirectoryPath: "/usr/local/vgpu/libvgpu.so",
}
}

func NewDsModeConfig() *Config {
return &Config{
CgroupRootDir: "/host-cgroup/",
// some dirs are not covered by ns, or unused with `hostPID` is on
ProcRootDir: "/proc/",
SysRootDir: "/host-sys/",
SysFSRootDir: "/host-sys-fs/",
VarRunRootDir: "/host-var-run/",
VarLibKubeletRootDir: "/var/lib/kubelet/",
RunRootDir: "/host-run/",
RuntimeHooksConfigDir: "/host-etc-hookserver/",
DefaultRuntimeType: "containerd",
ProcRootDir: "/proc/",
SysRootDir: "/host-sys/",
SysFSRootDir: "/host-sys-fs/",
VarRunRootDir: "/host-var-run/",
VarLibKubeletRootDir: "/var/lib/kubelet/",
RunRootDir: "/host-run/",
RuntimeHooksConfigDir: "/host-etc-hookserver/",
DefaultRuntimeType: "containerd",
HAMICoreLibraryDirectoryPath: "/usr/local/vgpu/libvgpu.so",
}
}

Expand All @@ -122,4 +125,5 @@ func (c *Config) InitFlags(fs *flag.FlagSet) {
fs.StringVar(&c.PouchEndpoint, "pouch-endpoint", c.PouchEndpoint, "pouch endPoint")

fs.StringVar(&c.DefaultRuntimeType, "default-runtime-type", c.DefaultRuntimeType, "default runtime type during runtime hooks handle request, candidates are containerd/docker/pouch.")
fs.StringVar(&c.HAMICoreLibraryDirectoryPath, "hami-core-library-directory-path", c.HAMICoreLibraryDirectoryPath, "path of hami core library")
}
38 changes: 20 additions & 18 deletions pkg/koordlet/util/system/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,33 @@ import (

func Test_NewDsModeConfig(t *testing.T) {
expectConfig := &Config{
CgroupRootDir: "/host-cgroup/",
ProcRootDir: "/proc/",
SysRootDir: "/host-sys/",
SysFSRootDir: "/host-sys-fs/",
VarRunRootDir: "/host-var-run/",
VarLibKubeletRootDir: "/var/lib/kubelet/",
RunRootDir: "/host-run/",
RuntimeHooksConfigDir: "/host-etc-hookserver/",
DefaultRuntimeType: "containerd",
CgroupRootDir: "/host-cgroup/",
ProcRootDir: "/proc/",
SysRootDir: "/host-sys/",
SysFSRootDir: "/host-sys-fs/",
VarRunRootDir: "/host-var-run/",
VarLibKubeletRootDir: "/var/lib/kubelet/",
RunRootDir: "/host-run/",
RuntimeHooksConfigDir: "/host-etc-hookserver/",
DefaultRuntimeType: "containerd",
HAMICoreLibraryDirectoryPath: "/usr/local/vgpu/libvgpu.so",
}
defaultConfig := NewDsModeConfig()
assert.Equal(t, expectConfig, defaultConfig)
}

func Test_NewHostModeConfig(t *testing.T) {
expectConfig := &Config{
CgroupRootDir: "/sys/fs/cgroup/",
ProcRootDir: "/proc/",
SysRootDir: "/sys/",
SysFSRootDir: "/sys/fs/",
VarRunRootDir: "/var/run/",
VarLibKubeletRootDir: "/var/lib/kubelet/",
RunRootDir: "/run/",
RuntimeHooksConfigDir: "/etc/runtime/hookserver.d",
DefaultRuntimeType: "containerd",
CgroupRootDir: "/sys/fs/cgroup/",
ProcRootDir: "/proc/",
SysRootDir: "/sys/",
SysFSRootDir: "/sys/fs/",
VarRunRootDir: "/var/run/",
VarLibKubeletRootDir: "/var/lib/kubelet/",
RunRootDir: "/run/",
RuntimeHooksConfigDir: "/etc/runtime/hookserver.d",
DefaultRuntimeType: "containerd",
HAMICoreLibraryDirectoryPath: "/usr/local/vgpu/libvgpu.so",
}
defaultConfig := NewHostModeConfig()
assert.Equal(t, expectConfig, defaultConfig)
Expand Down

0 comments on commit a62dd49

Please sign in to comment.