From 1f24f717eeb3ba03d118fc9c4b26b1bc5b4d10f9 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 19 Jun 2017 19:25:11 +0800 Subject: [PATCH 1/5] Add secret resource --- go/cmd/paddlecloud/paddlecloud.go | 10 ++- go/paddlecloud/delete.go | 53 +++++++++++++ go/paddlecloud/get.go | 34 ++++++++- go/paddlecloud/registry.go | 120 ++++++++++++++++++++++++++++++ go/paddlecloud/submit.go | 4 + paddlecloud/paddlecloud/urls.py | 1 + paddlecloud/paddlejob/__init__.py | 3 +- paddlecloud/paddlejob/registry.py | 98 ++++++++++++++++++++++++ paddlecloud/paddlejob/views.py | 2 +- 9 files changed, 317 insertions(+), 8 deletions(-) create mode 100644 go/paddlecloud/delete.go create mode 100644 go/paddlecloud/registry.go create mode 100644 paddlecloud/paddlejob/registry.py diff --git a/go/cmd/paddlecloud/paddlecloud.go b/go/cmd/paddlecloud/paddlecloud.go index ed5ece22..5bc48fc0 100644 --- a/go/cmd/paddlecloud/paddlecloud.go +++ b/go/cmd/paddlecloud/paddlecloud.go @@ -18,10 +18,12 @@ func main() { subcommands.Register(&paddlecloud.GetCommand{}, "") subcommands.Register(&paddlecloud.KillCommand{}, "") subcommands.Register(&paddlecloud.SimpleFileCmd{}, "") - subcommands.Register(&pfsmod.LsCmd{}, "") - subcommands.Register(&pfsmod.CpCmd{}, "") - subcommands.Register(&pfsmod.RmCmd{}, "") - subcommands.Register(&pfsmod.MkdirCmd{}, "") + subcommands.Register(&paddlecloud.RegistryCmd{}, "") + subcommands.Register(&paddlecloud.DeleteCommand{}, "") + subcommands.Register(&pfsmod.LsCmd{}, "PFS") + subcommands.Register(&pfsmod.CpCmd{}, "PFS") + subcommands.Register(&pfsmod.RmCmd{}, "PFS") + subcommands.Register(&pfsmod.MkdirCmd{}, "PFS") flag.Parse() ctx := context.Background() diff --git a/go/paddlecloud/delete.go b/go/paddlecloud/delete.go new file mode 100644 index 00000000..05a1a50f --- /dev/null +++ b/go/paddlecloud/delete.go @@ -0,0 +1,53 @@ +package paddlecloud + +import ( + "context" + "flag" + "fmt" + "os" + + "github.com/google/subcommands" +) + +// DeleteCommand do job killings +type DeleteCommand struct { + rm bool +} + +// Name is subcommands name +func (*DeleteCommand) Name() string { return "delete" } + +// Synopsis is subcommands synopsis +func (*DeleteCommand) Synopsis() string { return "Delete the specify resource." } + +// Usage is subcommands usage +func (*DeleteCommand) Usage() string { + return `delete registry [registry-name] +` +} + +// SetFlags registers subcommands flags +func (p *DeleteCommand) SetFlags(f *flag.FlagSet) { +} + +// Execute kill command +func (p *DeleteCommand) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus { + if f.NArg() != 2 { + f.Usage() + return subcommands.ExitFailure + } + if f.Arg(0) == RegistryCmdName { + name := f.Arg(1) + r := RegistryCmd{SecretName: KubeRegistryName(name)} + err := r.Delete() + if err != nil { + fmt.Fprintf(os.Stderr, "error delete registry: %v\n", err) + return subcommands.ExitFailure + } + fmt.Fprintf(os.Stdout, "registry: [%s] is deleted\n", name) + } else { + f.Usage() + return subcommands.ExitFailure + } + return subcommands.ExitSuccess +} diff --git a/go/paddlecloud/get.go b/go/paddlecloud/get.go index c0a1ab89..5b8616db 100644 --- a/go/paddlecloud/get.go +++ b/go/paddlecloud/get.go @@ -27,7 +27,7 @@ func (*GetCommand) Synopsis() string { return "Print resources" } // Usage is subcommands usage func (*GetCommand) Usage() string { - return `get [jobs|workers [jobname]|quota]: + return `get [jobs|workers|registry [jobname]|quota]: Print resources. ` } @@ -47,6 +47,8 @@ func (p *GetCommand) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{ jobs() } else if f.Arg(0) == "quota" { quota() + } else if f.Arg(0) == "registry" { + registry() } else if f.Arg(0) == "workers" { if f.NArg() != 2 { f.Usage() @@ -91,7 +93,35 @@ func workers(jobname string) error { w.Flush() return nil } - +func registry() error { + respBody, err := utils.GetCall(utils.Config.ActiveConfig.Endpoint+"/api/v1/registry/", nil) + if err != nil { + fmt.Fprintf(os.Stderr, "err getting registry secret: %v\n", err) + return err + } + var respObj interface{} + err = json.Unmarshal(respBody, &respObj) + if err != nil { + return err + } + items := respObj.(map[string]interface{})["msg"].(map[string]interface{})["items"].([]interface{}) + w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0) + if len(items) >= 0 { + fmt.Fprintf(w, "ID\tNAME\tDATA\n") + } + idx := 0 + for _, r := range items { + metadata := r.(map[string]interface{})["metadata"].(map[string]interface{}) + name := RegistryName(metadata["name"].(string)) + if len(name) != 0 { + cTime := metadata["creation_timestamp"].(string) + fmt.Fprintf(w, "%d\t%s\t%s\n", idx, name, cTime) + idx++ + } + } + w.Flush() + return err +} func jobs() error { respBody, err := utils.GetCall(utils.Config.ActiveConfig.Endpoint+"/api/v1/jobs/", nil) if err != nil { diff --git a/go/paddlecloud/registry.go b/go/paddlecloud/registry.go new file mode 100644 index 00000000..0a65bd52 --- /dev/null +++ b/go/paddlecloud/registry.go @@ -0,0 +1,120 @@ +package paddlecloud + +import ( + "context" + "encoding/json" + "errors" + "flag" + "fmt" + "os" + "strings" + + "github.com/PaddlePaddle/cloud/go/utils" + "github.com/golang/glog" + "github.com/google/subcommands" +) + +const ( + // RegistryCmdName is subcommand name + RegistryCmdName = "registry" + RegistryPrefix = "pcloud-registry" +) + +// RegistryCmd is Docker registry secret information +type RegistryCmd struct { + SecretName string `json:"name"` + Username string `json:"username"` + Password string `json:"password"` + Server string `json:"server"` +} + +// Name is the subcommand name +func (r *RegistryCmd) Name() string { return RegistryCmdName } + +// Synopsis is the subcommand's synopsis +func (r *RegistryCmd) Synopsis() string { return "Add registry secret on paddlecloud." } + +// Usage is the subcommand's usage +func (r *RegistryCmd) Usage() string { + return `registry [add|del]: +` +} + +// SetFlags registers subcommands flags. +func (r *RegistryCmd) SetFlags(f *flag.FlagSet) { + f.StringVar(&r.SecretName, "name", "", "registry secret name") + f.StringVar(&r.Username, "username", "", "your Docker registry username") + f.StringVar(&r.Password, "password", "", "your Docker registry password") + f.StringVar(&r.Server, "server", "", "your Docker registry Server") +} +func (r *RegistryCmd) addRegistrySecret() error { + jsonString, err := json.Marshal(r) + if err != nil { + return err + } + glog.V(10).Infof("Add registry secret: %s to %s\n", jsonString, utils.Config.ActiveConfig.Endpoint+"/api/v1/registry/") + respBody, err := utils.PostCall(utils.Config.ActiveConfig.Endpoint+"/api/v1/registry/", jsonString) + if err != nil { + return err + } + var respObj interface{} + if err = json.Unmarshal(respBody, &respObj); err != nil { + return err + } + // FIXME: Return an error if error message is not empty. Use response code instead + errMsg := respObj.(map[string]interface{})["msg"].(string) + if len(errMsg) > 0 { + return errors.New(errMsg) + } + return nil +} + +// Delete the specify registry +func (r *RegistryCmd) Delete() error { + jsonString, err := json.Marshal(r) + if err != nil { + return err + } + glog.V(10).Infof("Delete registry secret: %s to %s\n", jsonString, utils.Config.ActiveConfig.Endpoint+"/api/v1/registry/") + respBody, err := utils.DeleteCall(utils.Config.ActiveConfig.Endpoint+"/api/v1/registry/", jsonString) + if err != nil { + return err + } + + var respObj interface{} + if err = json.Unmarshal(respBody, &respObj); err != nil { + return err + } + // FIXME: Return an error if error message is not empty. Use response code instead + errMsg := respObj.(map[string]interface{})["msg"].(string) + if len(errMsg) > 0 { + return errors.New(errMsg) + } + return nil +} +func (r *RegistryCmd) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus { + if r.SecretName == "" || r.Username == "" || r.Password == "" || r.Server == "" { + f.Usage() + return subcommands.ExitFailure + } + r.SecretName = strings.Join([]string{RegistryPrefix, r.SecretName}, "-") + err := r.addRegistrySecret() + if err != nil { + fmt.Fprintf(os.Stderr, "add registry secret failed: %s\n", err) + return subcommands.ExitFailure + } + return subcommands.ExitSuccess +} + +// KubeRegistryName add a prefix for the name +func KubeRegistryName(name string) string { + return RegistryPrefix + "-" + name +} + +// RegistryName is registry secret name for PaddleCloud +func RegistryName(name string) string { + if strings.HasPrefix(name, RegistryPrefix) { + return name[len(RegistryPrefix)+1 : len(name)] + } + return "" +} diff --git a/go/paddlecloud/submit.go b/go/paddlecloud/submit.go index 1aa2cc03..e4cd5677 100644 --- a/go/paddlecloud/submit.go +++ b/go/paddlecloud/submit.go @@ -30,6 +30,8 @@ type SubmitCmd struct { Topology string `json:"topology"` Datacenter string `json:"datacenter"` Passes int `json:"passes"` + Image string `json:"image"` + Registry string `json:"registry"` } // Name is subcommands name. @@ -59,6 +61,8 @@ func (p *SubmitCmd) SetFlags(f *flag.FlagSet) { f.StringVar(&p.Entry, "entry", "", "Command of starting trainer process. Defaults to paddle train") f.StringVar(&p.Topology, "topology", "", "Will Be Deprecated .py file contains paddle v1 job configs") f.IntVar(&p.Passes, "passes", 1, "Pass count for training job") + f.StringVar(&p.Image, "image", "", "Runtime Docker image for the job") + f.StringVar(&p.Registry, "registry", "", "Registry secret name for the runtime Docker image") } // Execute submit command. diff --git a/paddlecloud/paddlecloud/urls.py b/paddlecloud/paddlecloud/urls.py index 746dd451..9cf5fbae 100644 --- a/paddlecloud/paddlecloud/urls.py +++ b/paddlecloud/paddlecloud/urls.py @@ -32,6 +32,7 @@ url(r"^api/v1/workers/", paddlejob.views.WorkersView.as_view()), url(r"^api/v1/quota/", paddlejob.views.QuotaView.as_view()), url(r"^api/v1/file/", paddlejob.views.SimpleFileView.as_view()), + url(r"^api/v1/registry/", paddlejob.registry.RegistryView.as_view()), ] urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) diff --git a/paddlecloud/paddlejob/__init__.py b/paddlecloud/paddlejob/__init__.py index 25f1fa63..7351d24c 100644 --- a/paddlecloud/paddlejob/__init__.py +++ b/paddlecloud/paddlejob/__init__.py @@ -1,2 +1,3 @@ from paddle_job import PaddleJob -__all__ = ["PaddleJob"] +import registry +__all__ = ["PaddleJob", "registry"] diff --git a/paddlecloud/paddlejob/registry.py b/paddlecloud/paddlejob/registry.py new file mode 100644 index 00000000..ba92bdfb --- /dev/null +++ b/paddlecloud/paddlejob/registry.py @@ -0,0 +1,98 @@ +from django.http import HttpResponseRedirect, HttpResponse, JsonResponse +from django.contrib import messages +from django.conf import settings +from kubernetes import client, config +from kubernetes.client.rest import ApiException +from . import PaddleJob +from rest_framework.authtoken.models import Token +from rest_framework import viewsets, generics, permissions +from rest_framework.response import Response +from rest_framework.views import APIView +from rest_framework.parsers import MultiPartParser, FormParser, FileUploadParser +import json +import utils +import notebook.utils +import logging +import volume +import os +import base64 + +def docker_cfg(username, password, email, server): + auth = "%s:%s" % (username, password) + auth_encode = base64.b64encode(auth) + return json.dumps({server: + {"username": username, + "password": password, + "email": email, + "auth": auth_encode}}) + +class RegistryView(APIView): + permission_classes = (permissions.IsAuthenticated,) + def post(self, request): + """ + Cretea a registry secret + """ + username = request.user.username + user_namespace = notebook.utils.email_escape(username) + api_client = notebook.utils.get_user_api_client(username) + obj = json.loads(request.body) + name = obj.get("name") + docker_username = obj.get("username") + docker_password = obj.get("password") + docker_server = obj.get("server") + cfg = docker_cfg(docker_username, + docker_password, + username, + docker_server) + try: + ret = client.CoreV1Api( + api_client=api_client).create_namespaced_secret( + namespace = user_namespace, + body = { + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": name + }, + "data": { + ".dockerconfigjson": base64.b64encode(cfg) + }, + "type": "kubernetes.io/dockerconfigjson"}) + except ApiException, e: + logging.error("Failed when create secret.") + return utils.simple_response(500, str(e)) + return utils.simple_response(200, "") + + def delete(self, request): + """ + Delete a registry secret + """ + username = username = request.user.username + user_namespace = notebook.utils.email_escape(username) + api_client = notebook.utils.get_user_api_client(username) + obj = json.loads(request.body) + name = obj.get("name") + try: + ret = client.CoreV1Api(api_client=api_client).delete_namespaced_secret( + name = name, + namespace = user_namespace, + body = client.V1DeleteOptions()) + except ApiException, e: + logging.error("Failed when delete secret.") + return utils.simple_response(500, str(e)) + return utils.simple_response(200, "") + + def get(self, request): + """ + Get registrys + """ + username = username = request.user.username + user_namespace = notebook.utils.email_escape(username) + api_client = notebook.utils.get_user_api_client(username) + try: + secretes_list = client.CoreV1Api(api_client=api_client).list_namespaced_secret( + namespace=user_namespace) + return utils.simple_response(200, secretes_list.to_dict()) + except ApiException, e: + logging.error("Failed when list secrets.") + return utils.simple_response(500, str(e)) diff --git a/paddlecloud/paddlejob/views.py b/paddlecloud/paddlejob/views.py index 96a38244..720944b1 100644 --- a/paddlecloud/paddlejob/views.py +++ b/paddlecloud/paddlejob/views.py @@ -288,7 +288,7 @@ def get(self, request, format=None): username = request.user.username namespace = notebook.utils.email_escape(username) api_client = notebook.utils.get_user_api_client(username) - quota_list = api_client.CoreV1Api(api_cilent=api_client)\ + quota_list = client.CoreV1Api(api_client=api_client)\ .list_namespaced_resource_quota(namespace) return Response(quota_list.to_dict()) From d4038253c0858f8e5eb77d366f22b7ebb32c2946 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 19 Jun 2017 19:33:53 +0800 Subject: [PATCH 2/5] paddle job support registry secret --- paddlecloud/paddlejob/views.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddlecloud/paddlejob/views.py b/paddlecloud/paddlejob/views.py index 720944b1..fe1308e7 100644 --- a/paddlecloud/paddlejob/views.py +++ b/paddlecloud/paddlejob/views.py @@ -86,8 +86,9 @@ def post(self, request, format=None): )) else: pass - - registry_secret = settings.JOB_DOCKER_IMAGE.get("registry_secret", None) + registry_secret = obj.get("registry", None) + if not registry_secret: + registry_secret = settings.JOB_DOCKER_IMAGE.get("registry_secret", None) # get user specified image job_image = obj.get("image", None) gpu_count = obj.get("gpu", 0) From af2f2c06b026051d577ae43dde1cfa926460d5bc Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Wed, 21 Jun 2017 16:36:13 +0800 Subject: [PATCH 3/5] update --- doc/usage_cn.md | 59 ++++++++++++++++++++++++++++++++++++-- go/paddlecloud/delete.go | 1 - go/paddlecloud/registry.go | 3 +- 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/doc/usage_cn.md b/doc/usage_cn.md index 509d38d9..73566afc 100644 --- a/doc/usage_cn.md +++ b/doc/usage_cn.md @@ -38,6 +38,7 @@ Subcommands: kill Stop the job. -rm will remove the job from history. logs Print logs of the job. submit Submit job to PaddlePaddle Cloud. + delete Delete the specify resource. Use "paddlecloud flags" for a list of top-level flags @@ -123,13 +124,30 @@ scp -r my_training_package/ user@tunnel-server:/mnt/hdfs_mulan/idl/idl-dl/mypack - 提交基于V1 API的训练任务 ```bash -paddlecloud submit -jobname my-paddlecloud-job -cpu 1 -gpu 0 -memory 1Gi -parallelism 10 -pscpu 1 -pservers 3 -psmemory 1Gi -passes 1 -topology trainer_config.py /pfs/[datacenter_name]/home/[username]/ctr_demo_package +paddlecloud submit -jobname my-paddlecloud-job \ + -cpu 1 \ + -gpu 0 \ + -memory 1Gi \ + -parallelism 10 \ + -pscpu 1 \ + -pservers 3 \ + -psmemory 1Gi \ + -passes 1 \ + -topology trainer_config.py /pfs/[datacenter_name]/home/[username]/ctr_demo_package ``` - 提交基于V2 API的训练任务 ```bash -paddlecloud submit -jobname my-paddlecloud-job -cpu 1 -gpu 0 -memory 1Gi -parallelism 10 -pscpu 1 -pservers 3 -psmemory 1Gi -passes 1 -entry "python trainer_config.py" /pfs/[datacenter_name]/home/[username]/ctr_demo_package +paddlecloud submit -jobname my-paddlecloud-job \ + -cpu 1 \ + -gpu 0 \ + -memory 1Gi \ + -parallelism 10 \ + -pscpu 1 \ + -pservers 3 \ + -psmemory 1Gi \ + -entry "python trainer_config.py" /pfs/[datacenter_name]/home/[username]/ctr_demo_package ``` 参数说明: @@ -146,6 +164,43 @@ paddlecloud submit -jobname my-paddlecloud-job -cpu 1 -gpu 0 -memory 1Gi -parall - `-passes`:执行训练的pass个数 - `package`:HDFS 训练任务package的路径 +### 使用自定义的Runtime Docker Image +runtime Docker Image是实际被Kubernetes调度的Docker Image,如果在某些情况下需要自定义属于某个任务的Docker Image可以通过以下方式 +- 自定义Runtime Docker Image + ```bash + git clone https://github.com/PaddlePaddle/cloud.git && cd cloud/docker + ./build_docker.sh {PaddlePaddle production image} {runtime Docker image} + docker push {runtime Docker image} + ``` +- 使用自定义的runtime Docker Image来运行Job + ```bash + paddlecloud submit -image {runtime Docker image} -jobname ... + ``` + +- 使用私有registry的runtime Docker image + - 在PaddleCloud上添加registry认证信息 + ```bash + paddlecloud registry \ + -username {your username} + -password {your password} + -server {your registry server} + -name {your registry name} + ``` + - 使用私有registry提交任务 + ```bash + paddlecloud submit \ + -image {runtime Docker image} \ + -registry {your registry name} + ``` + - 查看所有的registry + ```bash + paddlecloud get registry + ``` + - 删除指定的registry + ```bash + paddlecloud delete registry + ``` + ## 查看任务状态 用户可以查看任务、任务节点、用户空间配额的当前状态。 diff --git a/go/paddlecloud/delete.go b/go/paddlecloud/delete.go index 05a1a50f..7f0d1a77 100644 --- a/go/paddlecloud/delete.go +++ b/go/paddlecloud/delete.go @@ -11,7 +11,6 @@ import ( // DeleteCommand do job killings type DeleteCommand struct { - rm bool } // Name is subcommands name diff --git a/go/paddlecloud/registry.go b/go/paddlecloud/registry.go index 0a65bd52..ef64ee2a 100644 --- a/go/paddlecloud/registry.go +++ b/go/paddlecloud/registry.go @@ -17,7 +17,8 @@ import ( const ( // RegistryCmdName is subcommand name RegistryCmdName = "registry" - RegistryPrefix = "pcloud-registry" + // RegistryPrefix is the prefix for Kubernetes secret name + RegistryPrefix = "pcloud-registry" ) // RegistryCmd is Docker registry secret information From 2c7b94b4421835997d316a8d0aa17d2f85dcf278 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Thu, 22 Jun 2017 11:10:22 +0800 Subject: [PATCH 4/5] update doc --- doc/usage_cn.md | 2 +- docker/build_docker.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/usage_cn.md b/doc/usage_cn.md index 73566afc..4a3c8b1b 100644 --- a/doc/usage_cn.md +++ b/doc/usage_cn.md @@ -37,7 +37,7 @@ Subcommands: help describe subcommands and their syntax kill Stop the job. -rm will remove the job from history. logs Print logs of the job. - submit Submit job to PaddlePaddle Cloud. + submit Submit job to PaddlePaddle Cloud. delete Delete the specify resource. diff --git a/docker/build_docker.sh b/docker/build_docker.sh index ba825c29..e6d3d400 100755 --- a/docker/build_docker.sh +++ b/docker/build_docker.sh @@ -25,7 +25,7 @@ docker run --rm -it -v $PWD:/cloud $base_image \ #Build Docker Image cat > Dockerfile < Date: Thu, 22 Jun 2017 11:11:55 +0800 Subject: [PATCH 5/5] update doc --- doc/usage_cn.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/usage_cn.md b/doc/usage_cn.md index 4a3c8b1b..4aa50314 100644 --- a/doc/usage_cn.md +++ b/doc/usage_cn.md @@ -33,12 +33,20 @@ Usage: paddlecloud Subcommands: commands list all command names + delete Delete the specify resource. + file Simple file operations. get Print resources help describe subcommands and their syntax kill Stop the job. -rm will remove the job from history. logs Print logs of the job. - submit Submit job to PaddlePaddle Cloud. - delete Delete the specify resource. + registry Add registry secret on paddlecloud. + submit Submit job to PaddlePaddle Cloud. + +Subcommands for PFS: + cp uoload or download files + ls List files on PaddlePaddle Cloud + mkdir mkdir directoies on PaddlePaddle Cloud + rm rm files on PaddlePaddle Cloud Use "paddlecloud flags" for a list of top-level flags