Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Kubernetes arbitrary node selector #747

Merged
merged 7 commits into from
May 3, 2023
30 changes: 21 additions & 9 deletions iterative/kubernetes/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ func ResourceMachineCreate(ctx context.Context, d *terraform_schema.ResourceData
}

// Define the accelerator settings (i.e. GPU type, model, ...)
jobNodeSelector := map[string]string{}
jobAccelerator := instanceType["accelerator"]["model"]
jobGPUType := instanceType["accelerator"]["type"]
jobGPUCount := instanceType["accelerator"]["count"]
Expand All @@ -63,6 +62,27 @@ func ResourceMachineCreate(ctx context.Context, d *terraform_schema.ResourceData
if diskAmount := d.Get("instance_hdd_size").(int); diskAmount > 0 {
jobLimits[kubernetes_core.ResourceName("ephemeral-storage")] = kubernetes_resource.MustParse(strconv.Itoa(diskAmount) + "G")
}
if jobGPUCount > "0" && jobGPUType != "" {
jobLimits[kubernetes_core.ResourceName(jobGPUType)] = kubernetes_resource.MustParse(jobGPUCount)
}

// Get the node selector defined by the user
kubernetesNodeSelector := d.Get("kubernetes_node_selector").(map[string]interface{})

// Set the default key value if none is defined
if len(kubernetesNodeSelector) == 0 {
kubernetesNodeSelector["accelerator"] = "infer"
}

// Define the node selector
jobNodeSelector := map[string]string{}
for selector, value := range kubernetesNodeSelector {
if value.(string) != "infer" {
jobNodeSelector[selector] = value.(string)
} else if jobGPUCount > "0" && jobAccelerator != "" {
jobNodeSelector[selector] = jobAccelerator
}
}

// Use the default CML Docker image unless specified otherwise.
jobImageName := jobName
Expand All @@ -77,14 +97,6 @@ func ResourceMachineCreate(ctx context.Context, d *terraform_schema.ResourceData
return err
}

// If the resource requires GPU provisioning, determine how many GPUs and the kind of GPU it needs.
if jobGPUCount > "0" {
jobLimits[kubernetes_core.ResourceName(jobGPUType)] = kubernetes_resource.MustParse(jobGPUCount)
if jobAccelerator != "" {
jobNodeSelector = map[string]string{"accelerator": jobAccelerator}
}
}

// Lookup service account if set.
svcAccount, svcTokenAutomount, err := getServiceAccount(ctx, conn, namespace, d.Get("instance_permission_set").(string))
if err != nil {
Expand Down
8 changes: 8 additions & 0 deletions iterative/resource_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,14 @@ func machineSchema() *map[string]*schema.Schema {
Optional: true,
Default: "",
},
"kubernetes_node_selector": &schema.Schema{
Type: schema.TypeMap,
ForceNew: true,
Optional: true,
Elem: &schema.Schema{
Type: schema.TypeString,
},
},
"metadata": &schema.Schema{
Type: schema.TypeMap,
ForceNew: true,
Expand Down
8 changes: 8 additions & 0 deletions iterative/resource_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,14 @@ func resourceRunner() *schema.Resource {
Optional: true,
Default: "",
},
"kubernetes_node_selector": &schema.Schema{
Type: schema.TypeMap,
ForceNew: true,
Optional: true,
Elem: &schema.Schema{
Type: schema.TypeString,
},
},
"metadata": &schema.Schema{
Type: schema.TypeMap,
ForceNew: true,
Expand Down