Skip to content
This repository has been archived by the owner on Jan 11, 2023. It is now read-only.

ensure N series clusters get aks-docker-engine #4221

Merged
merged 7 commits into from
Nov 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 0 additions & 38 deletions pkg/acsengine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,44 +363,6 @@ func getDCOSDefaultRepositoryURL(orchestratorType string, orchestratorVersion st
return ""
}

func isNSeriesSKU(profile *api.AgentPoolProfile) bool {
/* If a new GPU sku becomes available, add a key to this map, but only if you have a confirmation
that we have an agreement with NVIDIA for this specific gpu.
*/
dm := map[string]bool{
// K80
"Standard_NC6": true,
"Standard_NC12": true,
"Standard_NC24": true,
"Standard_NC24r": true,
// M60
"Standard_NV6": true,
"Standard_NV12": true,
"Standard_NV24": true,
"Standard_NV24r": true,
// P40
"Standard_ND6s": true,
"Standard_ND12s": true,
"Standard_ND24s": true,
"Standard_ND24rs": true,
// P100
"Standard_NC6s_v2": true,
"Standard_NC12s_v2": true,
"Standard_NC24s_v2": true,
"Standard_NC24rs_v2": true,
// V100
"Standard_NC6s_v3": true,
"Standard_NC12s_v3": true,
"Standard_NC24s_v3": true,
"Standard_NC24rs_v3": true,
}
if _, ok := dm[profile.VMSize]; ok {
return dm[profile.VMSize]
}

return false
}

func getDCOSCustomDataPublicIPStr(orchestratorType string, masterCount int) string {
if orchestratorType == api.DCOS {
var buf bytes.Buffer
Expand Down
9 changes: 5 additions & 4 deletions pkg/acsengine/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/Azure/acs-engine/pkg/acsengine/transform"
"github.com/Azure/acs-engine/pkg/api"
"github.com/Azure/acs-engine/pkg/api/common"
"github.com/Azure/acs-engine/pkg/api/v20160330"
"github.com/Azure/acs-engine/pkg/api/vlabs"
"github.com/Azure/acs-engine/pkg/i18n"
Expand Down Expand Up @@ -497,14 +498,14 @@ func TestIsNSeriesSKU(t *testing.T) {
}

for _, sku := range validSkus {
if !isNSeriesSKU(&api.AgentPoolProfile{VMSize: sku}) {
t.Fatalf("Expected isNSeriesSKU(%s) to be true", sku)
if !common.IsNvidiaEnabledSKU(sku) {
t.Fatalf("Expected common.IsNvidiaEnabledSKU(%s) to be true", sku)
}
}

for _, sku := range invalidSkus {
if isNSeriesSKU(&api.AgentPoolProfile{VMSize: sku}) {
t.Fatalf("Expected isNSeriesSKU(%s) to be false", sku)
if common.IsNvidiaEnabledSKU(sku) {
t.Fatalf("Expected common.IsNvidiaEnabledSKU(%s) to be false", sku)
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/acsengine/template_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat
storagetier, _ := getStorageAccountType(profile.VMSize)
buf.WriteString(fmt.Sprintf(",storageprofile=managed,storagetier=%s", storagetier))
}
if isNSeriesSKU(profile) {
if common.IsNvidiaEnabledSKU(profile.VMSize) {
accelerator := "nvidia"
buf.WriteString(fmt.Sprintf(",accelerator=%s", accelerator))
}
Expand Down Expand Up @@ -786,7 +786,7 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat
return cs.Properties.IsNVIDIADevicePluginEnabled()
},
"IsNSeriesSKU": func(profile *api.AgentPoolProfile) bool {
return isNSeriesSKU(profile)
return common.IsNvidiaEnabledSKU(profile.VMSize)
},
"UseSinglePlacementGroup": func(profile *api.AgentPoolProfile) bool {
return *profile.SinglePlacementGroup
Expand Down
2 changes: 1 addition & 1 deletion pkg/api/addons.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ func (cs *ContainerService) setAddonsConfig() {

defaultNVIDIADevicePluginAddonsConfig := KubernetesAddon{
Name: NVIDIADevicePluginAddonName,
Enabled: helpers.PointerToBool(IsNSeriesSKU(cs.Properties) && common.IsKubernetesVersionGe(o.OrchestratorVersion, "1.10.0")),
Enabled: helpers.PointerToBool(cs.Properties.HasNSeriesSKU() && common.IsKubernetesVersionGe(o.OrchestratorVersion, "1.10.0")),
Containers: []KubernetesContainerSpec{
{
Name: NVIDIADevicePluginAddonName,
Expand Down
149 changes: 149 additions & 0 deletions pkg/api/common/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,152 @@ func ValidateDNSPrefix(dnsName string) error {
}
return nil
}

// IsNvidiaEnabledSKU determines if an VM SKU has nvidia driver support
func IsNvidiaEnabledSKU(vmSize string) bool {
/* If a new GPU sku becomes available, add a key to this map, but only if you have a confirmation
that we have an agreement with NVIDIA for this specific gpu.
*/
dm := map[string]bool{
// K80
"Standard_NC6": true,
"Standard_NC12": true,
"Standard_NC24": true,
"Standard_NC24r": true,
// M60
"Standard_NV6": true,
"Standard_NV12": true,
"Standard_NV24": true,
"Standard_NV24r": true,
// P40
"Standard_ND6s": true,
"Standard_ND12s": true,
"Standard_ND24s": true,
"Standard_ND24rs": true,
// P100
"Standard_NC6s_v2": true,
"Standard_NC12s_v2": true,
"Standard_NC24s_v2": true,
"Standard_NC24rs_v2": true,
// V100
"Standard_NC6s_v3": true,
"Standard_NC12s_v3": true,
"Standard_NC24s_v3": true,
"Standard_NC24rs_v3": true,
}
if _, ok := dm[vmSize]; ok {
return dm[vmSize]
}

return false
}

// GetNSeriesVMCasesForTesting returns a struct w/ VM SKUs and whether or not we expect them to be nvidia-enabled
func GetNSeriesVMCasesForTesting() []struct {
VMSKU string
Expected bool
} {
cases := []struct {
VMSKU string
Expected bool
}{
{
"Standard_NC6",
true,
},
{
"Standard_NC12",
true,
},
{
"Standard_NC24",
true,
},
{
"Standard_NC24r",
true,
},
{
"Standard_NV6",
true,
},
{
"Standard_NV12",
true,
},
{
"Standard_NV24",
true,
},
{
"Standard_NV24r",
true,
},
{
"Standard_ND6s",
true,
},
{
"Standard_ND12s",
true,
},
{
"Standard_ND24s",
true,
},
{
"Standard_ND24rs",
true,
},
{
"Standard_NC6s_v2",
true,
},
{
"Standard_NC12s_v2",
true,
},
{
"Standard_NC24s_v2",
true,
},
{
"Standard_NC24rs_v2",
true,
},
{
"Standard_NC24rs_v2",
true,
},
{
"Standard_NC6s_v3",
true,
},
{
"Standard_NC12s_v3",
true,
},
{
"Standard_NC24s_v3",
true,
},
{
"Standard_NC24rs_v3",
true,
},
{
"Standard_D2_v2",
false,
},
{
"gobledygook",
false,
},
{
"",
false,
},
}

return cases
}
11 changes: 11 additions & 0 deletions pkg/api/common/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,14 @@ func TestValidateDNSPrefix(t *testing.T) {
}
}
}

func TestIsNvidiaEnabledSKU(t *testing.T) {
cases := GetNSeriesVMCasesForTesting()

for _, c := range cases {
ret := IsNvidiaEnabledSKU(c.VMSKU)
if ret != c.Expected {
t.Fatalf("expected IsNvidiaEnabledSKU(%s) to return %t, but instead got %t", c.VMSKU, c.Expected, ret)
}
}
}
30 changes: 20 additions & 10 deletions pkg/api/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,19 +438,29 @@ func (p *Properties) setAgentProfileDefaults(isUpgrade, isScale bool) {
profile.AcceleratedNetworkingEnabledWindows = helpers.PointerToBool(DefaultAcceleratedNetworkingWindowsEnabled)
}

if profile.Distro == "" && profile.OSType != Windows {
if p.OrchestratorProfile.IsKubernetes() {
if profile.OSDiskSizeGB != 0 && profile.OSDiskSizeGB < VHDDiskSizeAKS {
profile.Distro = Ubuntu
} else {
if IsNSeriesSKU(p) {
profile.Distro = AKSDockerEngine
if profile.OSType != Windows {
if profile.Distro == "" {
if p.OrchestratorProfile.IsKubernetes() {
if profile.OSDiskSizeGB != 0 && profile.OSDiskSizeGB < VHDDiskSizeAKS {
profile.Distro = Ubuntu
} else {
profile.Distro = AKS
if profile.IsNSeriesSKU() {
profile.Distro = AKSDockerEngine
} else {
profile.Distro = AKS
}
}
} else if !p.OrchestratorProfile.IsOpenShift() {
profile.Distro = Ubuntu
}
// Ensure distro is set properly for N Series SKUs, because
// (1) At present, "aks-docker-engine" and "ubuntu" are the only working distro base for running GPU workloads on N Series SKUs
// (2) Previous versions of acs-engine had working implementations using the "aks" distro value,
// so we need to hard override it in order to produce a working cluster in upgrade/scale contexts
} else if p.OrchestratorProfile.IsKubernetes() && (isUpgrade || isScale) && profile.IsNSeriesSKU() {
if profile.Distro == AKS {
profile.Distro = AKSDockerEngine
}
} else if !p.OrchestratorProfile.IsOpenShift() {
profile.Distro = Ubuntu
}
}

Expand Down
Loading