Skip to content
This repository has been archived by the owner on Sep 19, 2022. It is now read-only.

Commit

Permalink
Pytorch operator v1beta1 APIs (#93)
Browse files Browse the repository at this point in the history
  • Loading branch information
johnugeorge authored and k8s-ci-robot committed Nov 7, 2018
1 parent e6cd641 commit 2ef5ae0
Show file tree
Hide file tree
Showing 45 changed files with 12,243 additions and 1,031 deletions.
217 changes: 185 additions & 32 deletions Gopkg.lock

Large diffs are not rendered by default.

4 changes: 0 additions & 4 deletions Gopkg.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@ required = [
branch = "master"
name = "github.com/onrik/logrus"

[[constraint]]
name = "github.com/stretchr/testify"
version = "1.2.2"

[[constraint]]
name = "github.com/sirupsen/logrus"
version = "~1.0.4"
Expand Down
8 changes: 7 additions & 1 deletion hack/update-codegen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ CODEGEN_PKG=${CODEGEN_PKG:-$(cd ${SCRIPT_ROOT}; ls -d -1 ./vendor/k8s.io/code-ge
# instead of the $GOPATH directly. For normal projects this can be dropped.
${CODEGEN_PKG}/generate-groups.sh "defaulter,deepcopy,client,informer,lister" \
github.com/kubeflow/pytorch-operator/pkg/client github.com/kubeflow/pytorch-operator/pkg/apis \
pytorch:v1alpha1,v1alpha2 \
pytorch:v1alpha1,v1alpha2,v1beta1 \
--go-header-file ${SCRIPT_ROOT}/hack/boilerplate/boilerplate.go.txt

echo "Generating defaulters for pytorch v1alpha1"
Expand All @@ -46,3 +46,9 @@ echo "Generating defaulters for pytorch v1alpha2"
--go-header-file ./hack/../hack/boilerplate/boilerplate.go.txt \
--output-package github.com/kubeflow/pytorch-operator/pkg/apis/pytorch/v1alpha2


echo "Generating defaulters for pytorch v1beta1"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/pytorch-operator/pkg/apis/pytorch/v1beta1 \
-O zz_generated.defaults \
--go-header-file ./hack/../hack/boilerplate/boilerplate.go.txt \
--output-package github.com/kubeflow/pytorch-operator/pkg/apis/pytorch/v1beta1
34 changes: 34 additions & 0 deletions pkg/apis/pytorch/v1beta1/constants.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2018 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1beta1

import (
common "github.com/kubeflow/tf-operator/pkg/apis/common/v1beta1"
)

const (
// EnvKubeflowNamespace is ENV for kubeflow namespace specified by user.
EnvKubeflowNamespace = "KUBEFLOW_NAMESPACE"

// DefaultPortName is name of the port used to communicate between Master and
// workers.
DefaultPortName = "pytorchjob-port"
// DefaultContainerName is the name of the PyTorchJob container.
DefaultContainerName = "pytorch"
// DefaultPort is default value of the port.
DefaultPort = 23456
// DefaultRestartPolicy is default RestartPolicy for PyTorchReplicaSpec.
DefaultRestartPolicy = common.RestartPolicyOnFailure
)
104 changes: 104 additions & 0 deletions pkg/apis/pytorch/v1beta1/defaults.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright 2018 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1beta1

import (
"strings"

common "github.com/kubeflow/tf-operator/pkg/apis/common/v1beta1"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// Int32 is a helper routine that allocates a new int32 value
// to store v and returns a pointer to it.
func Int32(v int32) *int32 {
return &v
}

func addDefaultingFuncs(scheme *runtime.Scheme) error {
return RegisterDefaults(scheme)
}

// setDefaultPort sets the default ports for pytorch container.
func setDefaultPort(spec *v1.PodSpec) {
index := 0
for i, container := range spec.Containers {
if container.Name == DefaultContainerName {
index = i
break
}
}

hasPyTorchJobPort := false
for _, port := range spec.Containers[index].Ports {
if port.Name == DefaultPortName {
hasPyTorchJobPort = true
break
}
}
if !hasPyTorchJobPort {
spec.Containers[index].Ports = append(spec.Containers[index].Ports, v1.ContainerPort{
Name: DefaultPortName,
ContainerPort: DefaultPort,
})
}
}

func setDefaultReplicas(spec *common.ReplicaSpec) {
if spec.Replicas == nil {
spec.Replicas = Int32(1)
}
if spec.RestartPolicy == "" {
spec.RestartPolicy = DefaultRestartPolicy
}
}

// setTypeNamesToCamelCase sets the name of all replica types from any case to correct case.
func setTypeNamesToCamelCase(job *PyTorchJob) {
setTypeNameToCamelCase(job, PyTorchReplicaTypeMaster)
setTypeNameToCamelCase(job, PyTorchReplicaTypeWorker)
}

// setTypeNameToCamelCase sets the name of the replica type from any case to correct case.
func setTypeNameToCamelCase(job *PyTorchJob, typ PyTorchReplicaType) {
for t := range job.Spec.PyTorchReplicaSpecs {
if strings.ToLower(string(t)) == strings.ToLower(string(typ)) && t != typ {
spec := job.Spec.PyTorchReplicaSpecs[t]
delete(job.Spec.PyTorchReplicaSpecs, t)
job.Spec.PyTorchReplicaSpecs[typ] = spec
return
}
}
}

// SetDefaults_PyTorchJob sets any unspecified values to defaults.
func SetDefaults_PyTorchJob(job *PyTorchJob) {
// Set default cleanpod policy to Running.
if job.Spec.CleanPodPolicy == nil {
policy := common.CleanPodPolicyNone
job.Spec.CleanPodPolicy = &policy
}

// Update the key of PyTorchReplicaSpecs to camel case.
setTypeNamesToCamelCase(job)

for _, spec := range job.Spec.PyTorchReplicaSpecs {
// Set default replicas to 1.
setDefaultReplicas(spec)
// Set default port to pytorch container.
setDefaultPort(&spec.Template.Spec)
}
}
20 changes: 20 additions & 0 deletions pkg/apis/pytorch/v1beta1/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright 2018 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// +k8s:deepcopy-gen=package,register
// +k8s:defaulter-gen=TypeMeta

// Package v1beta1 is the v1alpha2 version of the API.
// +groupName=kubeflow.org
package v1beta1
72 changes: 72 additions & 0 deletions pkg/apis/pytorch/v1beta1/register.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2018 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1beta1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
)

var (
// TODO: move SchemeBuilder with zz_generated.deepcopy.go to k8s.io/api.
// localSchemeBuilder and AddToScheme will stay in k8s.io/kubernetes.
SchemeBuilder runtime.SchemeBuilder
localSchemeBuilder = &SchemeBuilder
AddToScheme = localSchemeBuilder.AddToScheme
)

const (
// GroupName is the group name use in this package.
GroupName = "kubeflow.org"
// Kind is the kind name.
Kind = "PyTorchJob"
// GroupVersion is the version.
GroupVersion = "v1beta1"
// Plural is the Plural for pytorchJob.
Plural = "pytorchjobs"
// Singular is the singular for pytorchJob.
Singular = "pytorchjob"
)

var (
// SchemeGroupVersion is the group version used to register these objects.
SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: GroupVersion}
// SchemeGroupVersionKind is the GroupVersionKind of the resource.
SchemeGroupVersionKind = SchemeGroupVersion.WithKind(Kind)
)

func init() {
// We only register manually written functions here. The registration of the
// generated functions takes place in the generated files. The separation
// makes the code compile even when the generated files are missing.
localSchemeBuilder.Register(addKnownTypes)
localSchemeBuilder.Register(addDefaultingFuncs)
}

// Resource takes an unqualified resource and returns a Group-qualified GroupResource.
func Resource(resource string) schema.GroupResource {
return SchemeGroupVersion.WithResource(resource).GroupResource()
}

// addKnownTypes adds the set of types defined in this package to the supplied scheme.
func addKnownTypes(scheme *runtime.Scheme) error {
scheme.AddKnownTypes(SchemeGroupVersion,
&PyTorchJob{},
&PyTorchJobList{},
)
metav1.AddToGroupVersion(scheme, SchemeGroupVersion)
return nil
}
91 changes: 91 additions & 0 deletions pkg/apis/pytorch/v1beta1/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Copyright 2018 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1beta1

import (
common "github.com/kubeflow/tf-operator/pkg/apis/common/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// +genclient
// +genclient:noStatus
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// +resource:path=pytorchjob

// PyTorchJob represents the configuration of PyTorchJob
type PyTorchJob struct {
metav1.TypeMeta `json:",inline"`

// Standard object's metadata.
metav1.ObjectMeta `json:"metadata,omitempty"`

// Specification of the desired behavior of the PyTorchJob.
Spec PyTorchJobSpec `json:"spec,omitempty"`

// Most recently observed status of the PyTorchJob.
// This data may not be up to date.
// Populated by the system.
// Read-only.
Status common.JobStatus `json:"status,omitempty"`
}

// PyTorchJobSpec is a desired state description of the PyTorchJob.
type PyTorchJobSpec struct {
// CleanPodPolicy defines the policy to kill pods after PyTorchJob is
// succeeded.
// Default to Running.
CleanPodPolicy *common.CleanPodPolicy `json:"cleanPodPolicy,omitempty"`

// TTLSecondsAfterFinished is the TTL to clean up pytorch-jobs (temporary
// before kubernetes adds the cleanup controller).
// It may take extra ReconcilePeriod seconds for the cleanup, since
// reconcile gets called periodically.
// Default to infinite.
TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"`

// PyTorchReplicaSpecs is map of PyTorchReplicaType and PyTorchReplicaSpec
// specifies the PyTorch replicas to run.
// For example,
// {
// "Master": PyTorchReplicaSpec,
// "Worker": PyTorchReplicaSpec,
// }
PyTorchReplicaSpecs map[PyTorchReplicaType]*common.ReplicaSpec `json:"pytorchReplicaSpecs"`
}

// PyTorchReplicaType is the type for PyTorchReplica.
type PyTorchReplicaType common.ReplicaType

const (
// PyTorchReplicaTypeMaster is the type of Master of distributed PyTorch
PyTorchReplicaTypeMaster PyTorchReplicaType = "Master"

// PyTorchReplicaTypeWorker is the type for workers of distributed PyTorch.
PyTorchReplicaTypeWorker PyTorchReplicaType = "Worker"
)

// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// +resource:path=pytorchjobs

// PyTorchJobList is a list of PyTorchJobs.
type PyTorchJobList struct {
metav1.TypeMeta `json:",inline"`

// Standard list metadata.
metav1.ListMeta `json:"metadata,omitempty"`

// List of PyTorchJobs.
Items []PyTorchJob `json:"items"`
}
Loading

0 comments on commit 2ef5ae0

Please sign in to comment.