Skip to content

Commit

Permalink
Upgrade to apiextensions.k8s.io/v1
Browse files Browse the repository at this point in the history
Define all fields but the PodTemplateSpec. Remove unused API fields.

Ensure Pod created condition is applied to the Job copy.
  • Loading branch information
alculquicondor committed Jul 19, 2021
1 parent 70a866e commit e679e1b
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 152 deletions.
197 changes: 107 additions & 90 deletions manifests/base/crd.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
apiVersion: apiextensions.k8s.io/v1beta1
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: mpijobs.kubeflow.org
Expand All @@ -12,164 +12,181 @@ spec:
shortNames:
- mj
- mpij
subresources:
status: {}
versions:
- name: v1alpha1
served: false
storage: false
schema:
openAPIV3Schema:
properties:
spec:
title: The MPIJob spec
description: Only one of gpus, processingUnits, or replicas should be specified
oneOf:
- properties:
gpus:
title: Total number of GPUs
description: Valid values are 1, 2, 4, or any multiple of 8
oneOf:
- type: integer
enum:
- 1
- 2
- 4
- type: integer
multipleOf: 8
minimum: 8
slotsPerWorker:
title: The number of slots per worker used in hostfile
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
gpusPerNode:
title: The maximum number of GPUs available per node
description: Defaults to the number of GPUs per worker
type: integer
minimum: 1
required:
- gpus
- properties:
processingUnits:
title: Total number of processing units
description: Valid values are 1, 2, 4, or any multiple of 8
oneOf:
- type: integer
enum:
- 1
- 2
- 4
- type: integer
multipleOf: 8
minimum: 8
slotsPerWorker:
title: The number of slots per worker used in hostfile
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
processingUnitsPerNode:
title: The maximum number of processing units available per node
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
processingResourceType:
title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu'
description: Defaults to 'nvidia.com/gpu'
type: string
enum:
- nvidia.com/gpu
- cpu
required:
- processingUnits
- properties:
replicas:
title: Total number of replicas
description: The processing resource limit should be specified for each replica
type: integer
minimum: 1
slotsPerWorker:
title: The number of slots per worker used in hostfile
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
processingResourceType:
title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu'
description: Defaults to 'nvidia.com/gpu'
type: string
enum:
- nvidia.com/gpu
- cpu
required:
- replicas
- name: v1alpha2
served: true
storage: false
schema:
openAPIV3Schema:
type: object
properties:
spec:
x-kubernetes-preserve-unknown-fields: true
type: object
properties:
slotsPerWorker:
type: integer
minimum: 1
mpiReplicaSpecs:
type: object
properties:
Launcher:
x-kubernetes-preserve-unknown-fields: true
type: object
properties:
replicas:
type: integer
minimum: 1
maximum: 1
Worker:
x-kubernetes-preserve-unknown-fields: true
type: object
properties:
replicas:
type: integer
minimum: 1
status:
x-kubernetes-preserve-unknown-fields: true
type: object
subresources:
status: {}
- name: v1
served: true
storage: false
schema:
openAPIV3Schema:
type: object
properties:
spec:
x-kubernetes-preserve-unknown-fields: true
type: object
properties:
slotsPerWorker:
type: integer
minimum: 1
mpiReplicaSpecs:
type: object
properties:
Launcher:
x-kubernetes-preserve-unknown-fields: true
type: object
properties:
replicas:
type: integer
minimum: 1
maximum: 1
Worker:
x-kubernetes-preserve-unknown-fields: true
type: object
properties:
replicas:
type: integer
minimum: 1
status:
x-kubernetes-preserve-unknown-fields: true
type: object
subresources:
status: {}
- name: v2beta1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
properties:
slotsPerWorker:
type: integer
minimum: 1
cleanPodPolicy:
type: string
enum: ["None", "Running", "All"]
description: "Defines which Pods must be deleted after the Job completes"
mpiReplicaSpecs:
type: object
properties:
Launcher:
type: object
properties:
replicas:
type: integer
minimum: 1
maximum: 1
template:
x-kubernetes-preserve-unknown-fields: true
type: object
restartPolicy:
type: string
enum: ["Never", "OnFailure", "Always"]
Worker:
type: object
properties:
replicas:
type: integer
minimum: 1
template:
x-kubernetes-preserve-unknown-fields: true
type: object
restartPolicy:
type: string
enum: ["Never", "OnFailure", "Always"]
required:
- Launcher
status:
type: object
properties:
conditions:
type: array
items:
type: object
properties:
type:
type: string
enum: ["Created", "Running", "Restarting", "Succeeded", "Failed"]
status:
type: string
enum: ["True", "False", "Unknown"]
reason:
type: string
message:
type: string
lastUpdateTime:
type: string
format: date-time
lastTransitionTime:
type: string
format: date-time
replicaStatuses:
type: object
properties:
Launcher:
type: object
properties:
active:
type: integer
succeeded:
type: integer
failed:
type: integer
Worker:
type: object
properties:
active:
type: integer
succeeded:
type: integer
failed:
type: integer
startTime:
type: string
format: date-time
completionTime:
type: string
format: date-time
lastReconcileTime:
type: string
format: date-time
subresources:
status: {}
9 changes: 0 additions & 9 deletions v2/pkg/apis/kubeflow/v2beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,6 @@ type MPIJobSpec struct {
// `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that
// specify the MPI replicas to run.
MPIReplicaSpecs map[MPIReplicaType]*common.ReplicaSpec `json:"mpiReplicaSpecs"`

// MainContainer specifies name of the main container which
// executes the MPI code.
MainContainer string `json:"mainContainer,omitempty"`

// `RunPolicy` encapsulates various runtime policies of the distributed training
// job, for example how to clean up resources and how long the job can stay
// active.
RunPolicy *common.RunPolicy `json:"runPolicy,omitempty"`
}

// MPIReplicaType is the type for MPIReplica.
Expand Down
5 changes: 0 additions & 5 deletions v2/pkg/apis/kubeflow/v2beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions v2/pkg/apis/kubeflow/validation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ func validateWorkerReplicaSpec(spec *common.ReplicaSpec, path *field.Path) field
return errs
}
errs = append(errs, validateReplicaSpec(spec, path)...)
if spec.Replicas != nil {
errs = append(errs, apivalidation.ValidateNonnegativeField(int64(*spec.Replicas), path.Child("replicas"))...)
if spec.Replicas != nil && *spec.Replicas <= 0 {
errs = append(errs, field.Invalid(path.Child("replicas"), *spec.Replicas, "must be greater than or equal to 1"))
}
return errs
}
Expand Down
36 changes: 36 additions & 0 deletions v2/pkg/apis/kubeflow/validation/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,42 @@ func TestValidateMPIJob(t *testing.T) {
},
},
},
"invalid replica counts": {
job: v2beta1.MPIJob{
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{
v2beta1.MPIReplicaTypeLauncher: {
Replicas: newInt32(2),
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{{}},
},
},
},
v2beta1.MPIReplicaTypeWorker: {
Replicas: newInt32(0),
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{{}},
},
},
},
},
},
},
wantErrs: field.ErrorList{
{
Type: field.ErrorTypeInvalid,
Field: "spec.mpiReplicaSpecs[Launcher].replicas",
},
{
Type: field.ErrorTypeInvalid,
Field: "spec.mpiReplicaSpecs[Worker].replicas",
},
},
},
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
Expand Down
Loading

0 comments on commit e679e1b

Please sign in to comment.