This repository has been archived by the owner on Sep 19, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 143
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e6cd641
commit 2ef5ae0
Showing
45 changed files
with
12,243 additions
and
1,031 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
// Copyright 2018 The Kubeflow Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package v1beta1 | ||
|
||
import ( | ||
common "github.com/kubeflow/tf-operator/pkg/apis/common/v1beta1" | ||
) | ||
|
||
const ( | ||
// EnvKubeflowNamespace is ENV for kubeflow namespace specified by user. | ||
EnvKubeflowNamespace = "KUBEFLOW_NAMESPACE" | ||
|
||
// DefaultPortName is name of the port used to communicate between Master and | ||
// workers. | ||
DefaultPortName = "pytorchjob-port" | ||
// DefaultContainerName is the name of the PyTorchJob container. | ||
DefaultContainerName = "pytorch" | ||
// DefaultPort is default value of the port. | ||
DefaultPort = 23456 | ||
// DefaultRestartPolicy is default RestartPolicy for PyTorchReplicaSpec. | ||
DefaultRestartPolicy = common.RestartPolicyOnFailure | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
// Copyright 2018 The Kubeflow Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package v1beta1 | ||
|
||
import ( | ||
"strings" | ||
|
||
common "github.com/kubeflow/tf-operator/pkg/apis/common/v1beta1" | ||
"k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
) | ||
|
||
// Int32 is a helper routine that allocates a new int32 value | ||
// to store v and returns a pointer to it. | ||
func Int32(v int32) *int32 { | ||
return &v | ||
} | ||
|
||
func addDefaultingFuncs(scheme *runtime.Scheme) error { | ||
return RegisterDefaults(scheme) | ||
} | ||
|
||
// setDefaultPort sets the default ports for pytorch container. | ||
func setDefaultPort(spec *v1.PodSpec) { | ||
index := 0 | ||
for i, container := range spec.Containers { | ||
if container.Name == DefaultContainerName { | ||
index = i | ||
break | ||
} | ||
} | ||
|
||
hasPyTorchJobPort := false | ||
for _, port := range spec.Containers[index].Ports { | ||
if port.Name == DefaultPortName { | ||
hasPyTorchJobPort = true | ||
break | ||
} | ||
} | ||
if !hasPyTorchJobPort { | ||
spec.Containers[index].Ports = append(spec.Containers[index].Ports, v1.ContainerPort{ | ||
Name: DefaultPortName, | ||
ContainerPort: DefaultPort, | ||
}) | ||
} | ||
} | ||
|
||
func setDefaultReplicas(spec *common.ReplicaSpec) { | ||
if spec.Replicas == nil { | ||
spec.Replicas = Int32(1) | ||
} | ||
if spec.RestartPolicy == "" { | ||
spec.RestartPolicy = DefaultRestartPolicy | ||
} | ||
} | ||
|
||
// setTypeNamesToCamelCase sets the name of all replica types from any case to correct case. | ||
func setTypeNamesToCamelCase(job *PyTorchJob) { | ||
setTypeNameToCamelCase(job, PyTorchReplicaTypeMaster) | ||
setTypeNameToCamelCase(job, PyTorchReplicaTypeWorker) | ||
} | ||
|
||
// setTypeNameToCamelCase sets the name of the replica type from any case to correct case. | ||
func setTypeNameToCamelCase(job *PyTorchJob, typ PyTorchReplicaType) { | ||
for t := range job.Spec.PyTorchReplicaSpecs { | ||
if strings.ToLower(string(t)) == strings.ToLower(string(typ)) && t != typ { | ||
spec := job.Spec.PyTorchReplicaSpecs[t] | ||
delete(job.Spec.PyTorchReplicaSpecs, t) | ||
job.Spec.PyTorchReplicaSpecs[typ] = spec | ||
return | ||
} | ||
} | ||
} | ||
|
||
// SetDefaults_PyTorchJob sets any unspecified values to defaults. | ||
func SetDefaults_PyTorchJob(job *PyTorchJob) { | ||
// Set default cleanpod policy to Running. | ||
if job.Spec.CleanPodPolicy == nil { | ||
policy := common.CleanPodPolicyNone | ||
job.Spec.CleanPodPolicy = &policy | ||
} | ||
|
||
// Update the key of PyTorchReplicaSpecs to camel case. | ||
setTypeNamesToCamelCase(job) | ||
|
||
for _, spec := range job.Spec.PyTorchReplicaSpecs { | ||
// Set default replicas to 1. | ||
setDefaultReplicas(spec) | ||
// Set default port to pytorch container. | ||
setDefaultPort(&spec.Template.Spec) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// Copyright 2018 The Kubeflow Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// +k8s:deepcopy-gen=package,register | ||
// +k8s:defaulter-gen=TypeMeta | ||
|
||
// Package v1beta1 is the v1alpha2 version of the API. | ||
// +groupName=kubeflow.org | ||
package v1beta1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
// Copyright 2018 The Kubeflow Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package v1beta1 | ||
|
||
import ( | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
"k8s.io/apimachinery/pkg/runtime/schema" | ||
) | ||
|
||
var ( | ||
// TODO: move SchemeBuilder with zz_generated.deepcopy.go to k8s.io/api. | ||
// localSchemeBuilder and AddToScheme will stay in k8s.io/kubernetes. | ||
SchemeBuilder runtime.SchemeBuilder | ||
localSchemeBuilder = &SchemeBuilder | ||
AddToScheme = localSchemeBuilder.AddToScheme | ||
) | ||
|
||
const ( | ||
// GroupName is the group name use in this package. | ||
GroupName = "kubeflow.org" | ||
// Kind is the kind name. | ||
Kind = "PyTorchJob" | ||
// GroupVersion is the version. | ||
GroupVersion = "v1beta1" | ||
// Plural is the Plural for pytorchJob. | ||
Plural = "pytorchjobs" | ||
// Singular is the singular for pytorchJob. | ||
Singular = "pytorchjob" | ||
) | ||
|
||
var ( | ||
// SchemeGroupVersion is the group version used to register these objects. | ||
SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: GroupVersion} | ||
// SchemeGroupVersionKind is the GroupVersionKind of the resource. | ||
SchemeGroupVersionKind = SchemeGroupVersion.WithKind(Kind) | ||
) | ||
|
||
func init() { | ||
// We only register manually written functions here. The registration of the | ||
// generated functions takes place in the generated files. The separation | ||
// makes the code compile even when the generated files are missing. | ||
localSchemeBuilder.Register(addKnownTypes) | ||
localSchemeBuilder.Register(addDefaultingFuncs) | ||
} | ||
|
||
// Resource takes an unqualified resource and returns a Group-qualified GroupResource. | ||
func Resource(resource string) schema.GroupResource { | ||
return SchemeGroupVersion.WithResource(resource).GroupResource() | ||
} | ||
|
||
// addKnownTypes adds the set of types defined in this package to the supplied scheme. | ||
func addKnownTypes(scheme *runtime.Scheme) error { | ||
scheme.AddKnownTypes(SchemeGroupVersion, | ||
&PyTorchJob{}, | ||
&PyTorchJobList{}, | ||
) | ||
metav1.AddToGroupVersion(scheme, SchemeGroupVersion) | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// Copyright 2018 The Kubeflow Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package v1beta1 | ||
|
||
import ( | ||
common "github.com/kubeflow/tf-operator/pkg/apis/common/v1beta1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
) | ||
|
||
// +genclient | ||
// +genclient:noStatus | ||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object | ||
// +resource:path=pytorchjob | ||
|
||
// PyTorchJob represents the configuration of PyTorchJob | ||
type PyTorchJob struct { | ||
metav1.TypeMeta `json:",inline"` | ||
|
||
// Standard object's metadata. | ||
metav1.ObjectMeta `json:"metadata,omitempty"` | ||
|
||
// Specification of the desired behavior of the PyTorchJob. | ||
Spec PyTorchJobSpec `json:"spec,omitempty"` | ||
|
||
// Most recently observed status of the PyTorchJob. | ||
// This data may not be up to date. | ||
// Populated by the system. | ||
// Read-only. | ||
Status common.JobStatus `json:"status,omitempty"` | ||
} | ||
|
||
// PyTorchJobSpec is a desired state description of the PyTorchJob. | ||
type PyTorchJobSpec struct { | ||
// CleanPodPolicy defines the policy to kill pods after PyTorchJob is | ||
// succeeded. | ||
// Default to Running. | ||
CleanPodPolicy *common.CleanPodPolicy `json:"cleanPodPolicy,omitempty"` | ||
|
||
// TTLSecondsAfterFinished is the TTL to clean up pytorch-jobs (temporary | ||
// before kubernetes adds the cleanup controller). | ||
// It may take extra ReconcilePeriod seconds for the cleanup, since | ||
// reconcile gets called periodically. | ||
// Default to infinite. | ||
TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"` | ||
|
||
// PyTorchReplicaSpecs is map of PyTorchReplicaType and PyTorchReplicaSpec | ||
// specifies the PyTorch replicas to run. | ||
// For example, | ||
// { | ||
// "Master": PyTorchReplicaSpec, | ||
// "Worker": PyTorchReplicaSpec, | ||
// } | ||
PyTorchReplicaSpecs map[PyTorchReplicaType]*common.ReplicaSpec `json:"pytorchReplicaSpecs"` | ||
} | ||
|
||
// PyTorchReplicaType is the type for PyTorchReplica. | ||
type PyTorchReplicaType common.ReplicaType | ||
|
||
const ( | ||
// PyTorchReplicaTypeMaster is the type of Master of distributed PyTorch | ||
PyTorchReplicaTypeMaster PyTorchReplicaType = "Master" | ||
|
||
// PyTorchReplicaTypeWorker is the type for workers of distributed PyTorch. | ||
PyTorchReplicaTypeWorker PyTorchReplicaType = "Worker" | ||
) | ||
|
||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object | ||
// +resource:path=pytorchjobs | ||
|
||
// PyTorchJobList is a list of PyTorchJobs. | ||
type PyTorchJobList struct { | ||
metav1.TypeMeta `json:",inline"` | ||
|
||
// Standard list metadata. | ||
metav1.ListMeta `json:"metadata,omitempty"` | ||
|
||
// List of PyTorchJobs. | ||
Items []PyTorchJob `json:"items"` | ||
} |
Oops, something went wrong.