diff --git a/config/_default/config.toml b/config/_default/config.toml index 08a8909..df6ec1d 100644 --- a/config/_default/config.toml +++ b/config/_default/config.toml @@ -67,6 +67,10 @@ preserveTaxonomyNames = true name = "latest" url = "/en/docs" +[[params.versions]] + name = "v1.9.0" + url = "/en/docs/v1-9-0" + [[params.versions]] name = "v1.8.2" url = "/en/docs/v1-8-2" diff --git a/config/_default/languages.toml b/config/_default/languages.toml index 21ded40..454b3b7 100644 --- a/config/_default/languages.toml +++ b/config/_default/languages.toml @@ -44,6 +44,10 @@ name = "latest" url = "/zh/docs" +[[zh.params.versions]] + name = "v1.9.0" + url = "/zh/docs/v1-9-0" + [[zh.params.versions]] name = "v1.8.2" url = "/zh/docs/v1-8-2" @@ -108,6 +112,43 @@ weight = 7 identifier = "contribution" +# Documentation version v1.9.0 + +[[zh.menu.v1-9-0]] + name = "主页" + weight = 1 + identifier = "home" + +[[zh.menu.v1-9-0]] + name = "开始" + weight = 2 + identifier = "getting-started" + +[[zh.menu.v1-9-0]] + name = "概念" + weight = 3 + identifier = "concepts" + +[[zh.menu.v1-9-0]] + name = "生态" + weight = 4 + identifier = "zoology" + +[[zh.menu.v1-9-0]] + name = "Scheduler" + weight = 5 + identifier = "scheduler" + +[[zh.menu.v1-9-0]] + name = "CLI" + weight = 6 + identifier = "cli" + +[[zh.menu.v1-9-0]] + name = "贡献" + weight = 7 + identifier = "contribution" + # Documentation version v1.8.2 [[zh.menu.v1-8-2]] diff --git a/config/_default/menus.toml b/config/_default/menus.toml index 937cbe8..a2e645a 100644 --- a/config/_default/menus.toml +++ b/config/_default/menus.toml @@ -92,6 +92,43 @@ weight = 7 identifier = "contribution" +# Documentation version v1.9.0 + +[[v1-9-0]] + name = "Home" + weight = 1 + identifier = "home" + +[[v1-9-0]] + name = "Getting Started" + weight = 2 + identifier = "getting-started" + +[[v1-9-0]] + name = "Concepts" + weight = 3 + identifier = "concepts" + +[[v1-9-0]] + name = "Ecosystem" + weight = 4 + identifier = "ecosystem" + +[[v1-9-0]] + name = "Scheduler" + weight = 5 + identifier = "scheduler" + +[[v1-9-0]] + name = "CLI" + weight = 6 + identifier = "cli" + +[[v1-9-0]] + name = "Contribution" + weight = 7 + identifier = "contribution" + # Documentation version v1.8.2 [[v1-8-2]] diff --git a/content/en/blog/Volcano-1.9.0-release.md b/content/en/blog/Volcano-1.9.0-release.md new file mode 100644 index 0000000..0a6db99 --- /dev/null +++ b/content/en/blog/Volcano-1.9.0-release.md @@ -0,0 +1,214 @@ ++++ +title = "Volcano v1.9.0 Available Now" +description = "New features: Support elastic queue capacity scheduling, Supports affinity scheduling between queues and nodes, GPU sharing feature supports node scoring scheduling, Volcano Support for Kubernetes v1.29, Enhance scheduler metrics, Add license compliance check, Improve scheduling stability, etc." +subtitle = "" + +date = 2024-05-21 +lastmod = 2024-05-21 +datemonth = "May" +dateyear = "2024" +dateday = 21 + +draft = false # Is this a draft? true/false +toc = true # Show table of contents? true/false +type = "posts" # Do not modify. +authors = ["volcano"] + +tags = ["Practice"] +summary = "New features: Support elastic queue capacity scheduling, Supports affinity scheduling between queues and nodes, GPU sharing feature supports node scoring scheduling, Volcano Support for Kubernetes v1.29, Enhance scheduler metrics, Add license compliance check, Improve scheduling stability, etc." + +# Add menu entry to sidebar. +linktitle = "Volcano v1.9.0 Available Now" +[menu.posts] +parent = "tutorials" +weight = 6 ++++ + +On May 21, 2024, UTC+8, Volcano version v1.9.0 was officially released. This version added the following new features: + +- **Support elastic queue capacity scheduling** + +- **Supports affinity scheduling between queues and nodes** + +- **GPU sharing feature supports node scoring scheduling** + +- **Volcano Support for Kubernetes v1.29** + +- **Enhance scheduler metrics** + +- **Add license compliance check** + +- **Improve scheduling stability** + +{{
}} +Volcano is the industry-first cloud native batch computing project. Open-sourced at KubeCon Shanghai in June 2019, it became an official CNCF project in April 2020. In April 2022, Volcano was promoted to a CNCF incubating project. By now, more than 600 global developers have committed code to the project. The community is seeing growing popularity among developers, partners, and users. + +### Key Features + +#### Support elastic queue capacity scheduling + +Volcano now uses the proportion plugin for queue management. Users can set the guarantee, capacity and other fields of the queue to set the reserved resources and capacity limit of the queue. And by setting the weight value of the queue to realize the resource sharing within the cluster, the queue is proportionally divided into cluster resources according to the weight value, but this queue management method has the following problems: + +- The capacity of the resources divided by the queue is reflected by the weight, which is not intuitive enough. +- All resources in the queue are divided using the same ratio, and the capacity cannot be set separately for each dimension of the queue. + +Based on the above considerations, Volcano implements a new queue elasticity capacity management capability, it supports: + +- Allows users to directly set the capacity of each dimension of resources for the queue instead of setting a weight value. +- Elastic capacity scheduling based deserved resources, and queue's resources can be shared and reclaimed back. + +For example, in AI large model training scenario, setting different resource capacities for different GPU models in the queue, such as A100 and V100, respectively. At the same time, when the cluster resources are idle, the queue can reuse the resources of other idle queues, and when needed, reclaim the resources set by the user for the queue, that is, the amount of resources deserved, so as to realize the elastic capacity scheduling. + +To use this feature, you need to set the deserved field of the queue and set the amount of resources to be deserved for each dimension. At the same time, you need to turn on the capacity plugin and turn off the proportion plugin in the scheduling configuration. + +```yaml +apiVersion: scheduling.volcano.sh/v1beta1 +kind: Queue +metadata: + name: demo-queue +spec: + reclaimable: true + deserved: # set the deserved field. + cpu: 64 + memeory: 128Gi + nvidia.com/a100: 40 + nvidia.com/v100: 80 +``` + +For a complete usage example of queue elastic capacity scheduling, please refer to: +[How to use capacity plugin](https://github.com/volcano-sh/volcano/blob/master/docs/user-guide/how_to_use_capacity_plugin.md). + +For the elastic queue capacity design document, please refer to: +[Capacity scheduling Design](https://github.com/volcano-sh/volcano/blob/master/docs/design/capacity-scheduling.md). + +#### Supports affinity scheduling between queues and nodes + +Queues are usually associated with departments within the company, and different departments usually need to use different heterogeneous resource types. For example, the large model training team needs to use NIVDIA’s Tesla GPU, and the recommendation team needs to use AMD’s GPU. When users submit jobs to the queue , the job needs to be automatically scheduled to the node of the corresponding resource type according to the attributes of the queue. + +Volcano has implemented affinity scheduling capabilities for queues and nodes. Users only need to set the node label that require affinity in the affinity field of the queue. Volcano will automatically schedule jobs submitted to the current queue to the nodes associated with the queue. Users do not need to Set the affinity of the job separately, and only need to set the affinity of the queue uniformly. Jobs submitted to the queue will be scheduled to the corresponding node based on the affinity of the queue and the node. + +This feature supports hard affinity, soft affinity, and anti-affinity scheduling at the same time. When using it, you need to set a label with the key `volcano.sh/nodegroup-name` for the node, and then set the affinity field of the queue to specify hard affinity, soft affinity label values. + +For example, the following queue setting means that jobs submitted to the queue need to be scheduled to nodes with label values of groupname1 and groupname2, and will be scheduled to nodes with label values of groupname2 first. At the same time, jobs cannot be scheduled to nodes with label values of groupname3 and groupname4, when resources are insufficient, it can also be scheduled to the node with the label value groupname3. + +```yaml +apiVersion: scheduling.volcano.sh/v1beta1 +kind: Queue +metadata: + name: default + spec: + reclaimable: true + weight: 1 + affinity: # added field + nodeGroupAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - + - + preferredDuringSchedulingIgnoredDuringExecution: + - + nodeGroupAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - + - + preferredDuringSchedulingIgnoredDuringExecution: + - +``` + +The scheduling plugin for this feature is called nodegroup, for a complete example of its use see: [How to use nodegroup plugin](https://github.com/volcano-sh/volcano/blob/master/docs/user-guide/how_to _use_nodegroup_plugin.md). + +For detailed design documentation, see [The nodegroup design](https://github.com/volcano-sh/volcano/blob/master/docs/design/node-group.md). + +#### GPU sharing feature supports node scoring scheduling + +GPU Sharing is a GPU sharing and isolation solution introduced in Volcano v1.8, which provides GPU sharing and device memory control capabilities to enhance the GPU resource utilization in AI training and inference scenarios. v1.9 adds a new scoring strategy for GPU nodes on top of this feature, so that the optimal node can be selected during job assignment to further enhance resource utilization. Users can set different scoring strategies. Currently, the following two strategies are supported: + +- Binpack: Provides a binpack algorithm for GPU card granularity, prioritizing to fill up a node with GPU cards that have already been allocated resources to avoid resource fragmentation and waste. + +- Spread: Prioritizes the use of idle GPU cards over shared cards that have already been allocated resources. + +For detailed usage documentation, please refer to: [How to use gpu sharing](https://github.com/volcano-sh/volcano/blob/master/docs/user-guide/how_to_use_gpu_sharing.md). + +#### Volcano Support for Kubernetes v1.29 + +Volcano version follows the Kubernetes community version tempo and supports every base version of Kubernetes. The latest supported version is v1.29 and ran full UT, E2E use cases to ensure functionality and reliability. If you would like to participate in the development of Volcano adapting to new versions of Kubernetes, please refer to: https://github.com/volcano-sh/volcano/pull/3459 to make community contributions. + +#### Enhance scheduler metrics + +Volcano uses the client-go to talk with Kubernetes. Although the client can set the QPS to avoid requests from being flow-limited, it is difficult to observe how many QPS is actually used by the client, so in order to observe the frequency of requests from the client in real time, Volcano has added a new client-go metrics, which allows users to access the metrics to see the number of GET, POST and other requests per second, so as to get the actual QPS used per second, and thus decide whether or not the client needs to adjust the QPS. The client-go metrics also include client certificate rotation cycle statistics, response size per request statistics, etc. + +Users can use curl http://$volcano_scheduler_pod_ip:8080/metrics to get all the detailed metrics of volcano scheduler. + +Related PR: [#3274](https://github.com/volcano-sh/volcano/pull/3274).([@Monokaix](https://github.com/Monokaix)) + +#### Add license compliance check + +In order to enhance the open source license compliance governance standards of the Volcano community, avoid the introduction of infectious open source protocols, and avoid potential risks, the Volcano community has introduced an open source license compliance checking tool. The so-called infectious protocol refers to software that uses this protocol as an open source license. Derivative works generated after modification, use, and copying must also be open sourced under this agreement. If the third-party library introduced by the PR submitted by the developer contains infectious open source protocols such as GPL, LGPL, etc., CI Access Control will intercept it. The developer needs to replace the third-party library with a loose free software license protocol such as MIT, Apache 2.0, BSD, etc. , to pass the open source license compliance check. + +#### Improve scheduling stability + +Volcano v1.9.0 has done more optimization in preemption, retry for scheduling failure, avoiding memory leaks, security enhancement, etc. The details include: + +- Fix the problem of pods not being able to be scheduled due to frequent expansion and contraction of deployment in extreme cases, see PR for details: [#3376](https://github.com/volcano-sh/volcano/pull/3376).([@guoqinwill](https://github.com/guoqinwill)) + +- Fix Pod preemption: see PR for details: [#3458](https://github.com/volcano-sh/volcano/pull/3458).([LivingCcj](https://github.com/LivingCcj)) + +- Optimize Pod scheduling failure retry mechanism: see PR for details: [#3435](https://github.com/volcano-sh/volcano/pull/3435).([@bibibox](https://github.com/bibibox)) + +- Metrics optimization: [#3463](https://github.com/volcano-sh/volcano/pull/3463).([@Monokaix](https://github.com/Monokaix)) + +- Security enhancements: [#3449](https://github.com/volcano-sh/volcano/pull/3449).([@lekaf974](https://github.com/lekaf974)) + +### Contributors + +Volcano 1.9.0 is brought into being from hundreds of code commits from many contributors. Thanks for your contributions. + +**Contributors on GitHub:**
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
@daniel-hutao@wuyueandrew@googs1025
@7sunarni@flyingfang@LivingCcj
@guoqinwill@panoswoo@william-wang
@lekaf974@yangqz@lowang-bh
@loheagn@hwdef@archlitchi
@Lily922@bibibox@Monokaix
@belo4ya
+ +**Reference** + +Release note: v1.9.0 + +https://github.com/volcano-sh/volcano/releases/tag/v1.9.0 + +Branch:release-1.9 + +https://github.com/volcano-sh/volcano/tree/release-1.9 + +### About Volcano + +Volcano is designed for high-performance computing applications such as AI, big data, gene sequencing, and rendering, and supports mainstream general computing frameworks. More than 58,000 global developers joined us, among whom the in-house ones come from companies such as Huawei, AWS, Baidu, Tencent, JD, and Xiaohongshu. There are 3.8k+ Stars and 800+ Forks for the project. Volcano has been proven feasible for mass data computing and analytics, such as AI, big data, and gene sequencing. Supported frameworks include Spark, Flink, TensorFlow, PyTorch, Argo, MindSpore, Paddlepaddle, Kubeflow, MPI, Horovod, MXNet, KubeGene, and Ray. The ecosystem is thriving with more developers and use cases coming up. \ No newline at end of file diff --git a/content/en/docs/installation.md b/content/en/docs/installation.md index 9bf59f4..091c64e 100644 --- a/content/en/docs/installation.md +++ b/content/en/docs/installation.md @@ -3,7 +3,7 @@ title = "Installation" date = 2019-01-28 -lastmod =2024-01-16 +lastmod =2024-05-21 draft = false # Is this a draft? true/false toc = true # Show table of contents? true/false @@ -36,7 +36,7 @@ Install Volcano on an existing Kubernetes cluster. This way is both available fo kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/master/installer/volcano-development.yaml ``` -You can also replace `master` of above url with specific tag/branch (such as `release-1.8` branch for latest v1.8.x version, `v1.8.2` tag for v1.8.2 version) to install Volcano with specific version. +You can also replace `master` of above url with specific tag/branch (such as `release-1.9` branch for latest v1.9.x version, `v1.9.0` tag for v1.9.0 version) to install Volcano with specific version. ### Install from code @@ -69,7 +69,7 @@ helm install volcano volcano-sh/volcano -n volcano-system --create-namespace The output is as follows after executing the above command. ``` NAME: volcano -LAST DEPLOYED: Tue Jan 16 17:30:34 2024 +LAST DEPLOYED: Tue May 21 15:16:30 2024 NAMESPACE: volcano-system STATUS: deployed REVISION: 1 diff --git a/content/en/docs/v1-9-0/_index.md b/content/en/docs/v1-9-0/_index.md new file mode 100644 index 0000000..c1d2ee5 --- /dev/null +++ b/content/en/docs/v1-9-0/_index.md @@ -0,0 +1,79 @@ ++++ +title = "Introduction" + +date = 2024-05-21 +lastmod = 2024-05-21 + +draft = false # Is this a draft? true/false +toc = true # Show table of contents? true/false +type = "docs" # Do not modify. + +# Add menu entry to sidebar. +[menu.v1-9-0] + parent = "home" + weight = 1 ++++ + +## What is Volcano +Volcano is a cloud native system for high-performance workloads, which has been accepted by [Cloud Native Computing Foundation (CNCF)](https://www.cncf.io/) as its first and only official container batch scheduling project. Volcano supports popular computing frameworks such as [Spark](https://spark.apache.org/), [TensorFlow](https://www.tensorflow.org/), [PyTorch](https://pytorch.org/), [Flink](https://flink.apache.org/), [Argo](https://argoproj.github.io/), [MindSpore](https://www.mindspore.cn/en), and [PaddlePaddle](https://www.paddlepaddle.org.cn/). Volcano also supports scheduling of computing resources on different architecture, such as x86, Arm, and Kunpeng. + +## Why Volcano +Job scheduling and management become increasingly complex and critical for high-performance batch computing. Common requirements are as follows: + +* Support for diverse scheduling algorithms +* More efficient scheduling +* Non-intrusive support for mainstream computing frameworks +* Support for multi-architecture computing + +Volcano is designed to cater to these requirements. In addition, Volcano inherits the design of Kubernetes APIs, allowing you to easily run applications that require high-performance computing on Kubernetes. +## Features +### Rich scheduling policies +Volcano supports a variety of scheduling policies: + +* Gang scheduling +* Fair-share scheduling +* Queue scheduling +* Preemption scheduling +* Topology-based scheduling +* Reclaim +* Backfill +* Resource reservation + +You can also configure plug-ins and actions to use custom scheduling policies. +### Enhanced job management +You can use enhanced job features of Volcano for high-performance computing: + +* Multi-pod jobs +* Improved error handling +* Indexed jobs + +### Multi-architecture computing +Volcano can schedule computing resources from multiple architectures: + +* x86 +* Arm +* Kunpeng +* Ascend +* GPU + +### Faster scheduling +Compared with existing queue schedulers, Volcano shortens the average scheduling delay through a series of optimizations. + +## Ecosystem +Volcano allows you to use mainstream computing frameworks: + +* [Spark](https://spark.apache.org/) +* [TensorFlow](https://www.tensorflow.org/) +* [PyTorch](https://pytorch.org/) +* [Flink](https://flink.apache.org/) +* [Argo](https://argoproj.github.io/) +* [MindSpore](https://www.mindspore.cn/en) +* [PaddlePaddle](https://www.paddlepaddle.org.cn/) +* [Open MPI](https://www.open-mpi.org/) +* [Horovod](https://horovod.readthedocs.io/) +* [MXNet](https://mxnet.apache.org/) +* [Kubeflow](https://www.kubeflow.org/) +* [KubeGene](https://github.com/volcano-sh/kubegene) +* [Cromwell](https://cromwell.readthedocs.io/) + +Volcano has been commercially used as the infrastructure scheduling engine by companies and organizations. \ No newline at end of file diff --git a/content/en/docs/v1-9-0/actions.md b/content/en/docs/v1-9-0/actions.md new file mode 100644 index 0000000..5531dab --- /dev/null +++ b/content/en/docs/v1-9-0/actions.md @@ -0,0 +1,86 @@ ++++ +title = "Actions" + +date = 2024-05-21 +lastmod = 2024-05-21 + +draft = false # Is this a draft? true/false +toc = true # Show table of contents? true/false +type = "docs" # Do not modify. + +# Add menu entry to sidebar. +linktitle = "Actions" +[menu.v1-9-0] + parent = "scheduler" + weight = 2 ++++ + + + +### Enqueue + +#### Overview + +The Enqueue action filters qualified jobs into the queue to be scheduled. When the minimum number of resource requests under a Job cannot be met, even if the scheduling action is performed for a pod under a Job, pod will not be able to schedule because the "Gang" constraint is not reached. A state refresh from "Pending" to "Inqueue" can only happen if the minimum resource size of the job is met. In general, the Enqueue action is an essential action for the scheduler configuration. + +#### Scenario + +Enqueue action is the preparatory stage in the scheduling process. Only when the cluster resources meet the minimum resource request for the job scheduling, the job state can be changed from "pending" to "Enqueue". In this way, Enqueue Action can prevent a large number of unscheduled pods in the cluster and improve the performance of the scheduler in the high-load scenarios where the cluster resources may be insufficient, such as AI/MPI/HPC. + + + +### Allocate + +#### Overview + +This Action binds of , including pre-selection and further selection.PredicateFn is used to filter out nodes that cannot be allocated,and NodeOrderFn is used to score the nodes to find the one that best fits.Allocate action is a essential step in a scheduling process,which is used to handle pod scheduling that has resource requests in the pod list to be scheduled. + +The Allocate action follows the commit mechanism. When a pod's scheduling request is satisfied, a binding action is not necessarily performed for that pod. This step also depends on whether the gang constraint of the Job in which the pod resides is satisfied. Only if the gang constraint of the Job in which the pod resides is satisfied can the pod be scheduled; otherwise, the pod cannot be scheduled. + +#### Scenario + +In a clustered mixed business scenario, the Allocate pre-selected part enables specific businesses (AI, big data, HPC, scientific computing) to quickly filter, sort, and schedule according to their namespace quickly and centrally. In a complex computing scenario such as TensorFlow or MPI, where there are multiple tasks in a single job, the Allocate action traversal multiple task allocation options under the job to find the most appropriate node for each task. + + + +### Preempt + +#### Overview + +The preempt action is used for resource preemption between jobs in a queue , or between tasks in a job.The preempt action is a preemption step in the scheduling process, which is used to deal with high-priority scheduling problems. It is used for preemption between jobs in the same queue, or between tasks under the same job. + +#### Scenario + +- Preemption between jobs in the same queue: Multiple departments in a company share a cluster, and each department can be mapped into a Queue. Resources of different departments cannot be preempted from each other. This mechanism can well guarantee the isolation of resources of departments..In complex scheduling scenarios, basic resources (CPUs, disks, GPUs, memory, network bandwidth) are allocated based on services: In computing-intensive scenarios, such as AI and high-performance scientific computing, queues require more computing resources, such as CPUs, GPUs, and memory. Big data scenarios, such as the Spark framework, have high requirements on disks. Different queues share resources. If AI jobs preempts all CPU resources, jobs in queues of other scenarios will starve. Therefore, the queue-based resource allocation is used to ensure service running. +- Preemption between tasks in the same job: Usually, there can be multiple tasks in the same Job. For example, in complex AI application scenarios, a parameter server and multiple workers need to be set inside the TF-job, and preemption between multiple workers is supported by preemption within such scenarios. + +### Reserve + +#### Overview + +The action has been deprecated from v1.2 and replaced with SLA plugin. + +The Reserve action completes the resource reservation. Bind the selected target job to the node. The Reserve action, the elect action, and the Reservation plugin make up the resource Reservation mechanism. The Reserve action must be configured after the allocate action. + +#### Scenario + +In practical applications, there are two common scenarios as follows: + +- In the case of insufficient cluster resources, it is assumed that for Job A and Job B in the state to be scheduled, the application amount of resource A is less than B or the priority of resource A is higher than that of job B. Based on the default scheduling policy, A will schedule ahead of B. In the worst case, if subsequent jobs with high priority or less application resources are added to the queue to be scheduled, B will be hungry for a long time and wait forever. + +- In the case of insufficient cluster resources, assume that there are jobs A and B to be scheduled. The priority of A is lower than that of B, but the resource application amount is smaller than that of B. Under the scheduling policy based on cluster throughput and resource utilization as the core, A will be scheduled first. In the worst case, B will remain hungry. + + +Therefore, we need a fair scheduling mechanism that ensures that chronic hunger for some reason reaches a critical state when it is dispatched. Job reservation is such a fair scheduling mechanism. + +Resource reservation mechanisms need to consider node selection, number of nodes, and how to lock nodes. Volcano resource reservation mechanism reserves resources for target operations in the way of node group locking, that is, select a group of nodes that meet certain constraints and include them into the node group. Nodes within the node group will not accept new job delivery from the inclusion moment, and the total specification of nodes meets the requirements of target operations. It is important to note that target jobs can be scheduled throughout the cluster, while non-target jobs can only be scheduled with nodes outside the node group. + +### Backfill + +#### Overview + +Backfill action is a backfill step in the scheduling process. It deals with the pod scheduling that does not specify the resource application amount in the list of pod to be scheduled. When executing the scheduling action on a single pod, it traverse all nodes and schedule the pod to this node as long as the node meets the scheduling request of pod. + +#### Scenario + +In a cluster, the main resources are occupied by "fat jobs", such as AI model training. Backfill actions allow the cluster to quickly schedule "small jobs" such as single AI model identification and small data volume communication. Backfill can improve cluster throughput and resource utilization. \ No newline at end of file diff --git a/content/en/docs/v1-9-0/architecture.md b/content/en/docs/v1-9-0/architecture.md new file mode 100644 index 0000000..e16fd12 --- /dev/null +++ b/content/en/docs/v1-9-0/architecture.md @@ -0,0 +1,42 @@ ++++ +title = "Architecture" + +date = 2024-05-21 +lastmod = 2024-05-21 + +draft = false # Is this a draft? true/false +toc = true # Show table of contents? true/false +type = "docs" # Do not modify. + +# Add menu entry to sidebar. +linktitle = "Architecture" +[menu.v1-9-0] + parent = "home" + weight = 2 ++++ + +## Overall Architecture + + +{{
}} + + +Volcano is designed for high-performance workloads running on Kubernetes. It follows the design and mechanisms of Kubernetes. + + +{{
}} + + +Volcano consists of **scheduler** / **controllermanager** / **admission** / **vcctl**: + +##### Scheduler +Volcano Scheduler schedules jobs to the most suitable node based on actions and plug-ins. Volcano supplements Kubernetes to support multiple scheduling algorithms for jobs. + +##### ControllerManager (CM) +Volcano CMs manage the lifecycle of Custom Resource Definitions (CRDs). You can use the **Queue CM**, **PodGroup CM**, and **VCJob CM**. + +##### Admission +Volcano Admission is responsible for the CRD API validation. + +##### vcctl +Volcano vcctl is the command line client for Volcano. diff --git a/content/en/docs/v1-9-0/cli.md b/content/en/docs/v1-9-0/cli.md new file mode 100644 index 0000000..f3d208b --- /dev/null +++ b/content/en/docs/v1-9-0/cli.md @@ -0,0 +1,73 @@ ++++ +title = "CLI" + +date = 2024-05-21 +lastmod = 2024-05-21 + +draft = false # Is this a draft? true/false +toc = true # Show table of contents? true/false +type = "docs" # Do not modify. + +# Add menu entry to sidebar. +linktitle = "Commandline" +[menu.v1-9-0] + parent = "cli" + weight = 1 ++++ + +## Introduction +A Command Line Interface (CLI) is provided for you to manage resources. +## Configuration + +1. You can obtain the latest executable file by cloning the code from GitHub and running the following command in the root directory of the project: +``` +# make vcctl +``` +2. Copy the executable file to $PATH. You then can execute it anywhere. + +## Command Line List +### Listing all jobs +vcctl job list + +```html +# vcctl job list +Name Creation Phase JobType Replicas Min Pending Running Succeeded Failed Unknown RetryCount +job-1 2020-09-01 Running Batch 1 1 0 1 0 0 0 0 +``` + +### Deleting a specific job +vcctl job delete --name job-name [--namespace job-namespace] + +```html +# vcctl delete job --name job-1 --namespaces default +delete job job-1 successfully +``` + +### Suspending a job +vcctl job suspend --name job-name [--namespace job-namespace] + +```html +# vcctl job suspend --name job-1 --namespace default +``` + +### Resuming a job (opposite to "vcctl job suspend") +vcctl job resume --name job-name [--namespace job-namespace] + +```html +# vcctl job resume --name job-1 --namespace default +``` + +### Running a job +vcctl job run --name job-name [--namespace job-namespace] + +```html +# vcctl job run --name job-1 --namespace default +``` + +## Note +For more information about Volcano command lines, run the following commands: + +```html +# vcctl -h +# vcctl [command] -h +``` diff --git a/content/en/docs/v1-9-0/contribution.md b/content/en/docs/v1-9-0/contribution.md new file mode 100644 index 0000000..71b3ce8 --- /dev/null +++ b/content/en/docs/v1-9-0/contribution.md @@ -0,0 +1,164 @@ ++++ +title = "Contribution" + +date = 2024-05-21 +lastmod = 2024-05-21 + +draft = false # Is this a draft? true/false +toc = true # Show table of contents? true/false +type = "docs" # Do not modify. + +# Add menu entry to sidebar. +linktitle = "Volcano Contribution" +[menu.v1-9-0] + parent = "contribution" + weight = 1 ++++ + +# Welcome + +Welcome to Volcano! + +- [Before You Start](#before-you-start) + - [Code of Conduct](#code-of-conduct) + - [Community discussions](#community-discussions) + - [Community Expectations](#community-expectations) +- [Getting Started](#getting-started) +- [Your First Contribution](#your-first-contribution) + - [Find Something to Work On](#find-something-to-work-on) + - [Find a Good Topic](#find-a-good-topic) + - [Work on an Issue](#work-on-an-issue) + - [File an Issue](#file-an-issue) +- [Contribution Workflow](#contribution-workflow) + - [Open a Pull Request](#open-a-pull-request) +- [Code Review](#code-review) +- [Commit Message Format](#commit-message-format) + - [Testing](#testing) + +## Before You Start + +### Code of Conduct + +All Volcano contributors must read and observe the [Code of Conduct](https://github.com/volcano-sh/website/blob/master/CODE_OF_CONDUCT.md). + +### Community discussions + +To better communicate with the developers in the Volcano community, please subscribe to the Volcano channel in the following way. + +- Sign up and visit `slack.cncf. IO/` to join the CNCF workspace. +- Participate in community discussions by adding the channel search `volcano`. + +### Community Expectations + +Volcano is an open-source project driven by the Volcano community, which strives to promote a healthy, friendly, and productive environment. +The community is committed to developing a system that helps running high-performance workloads, such as AI, ML, and deep learning applications, on Kubernetes. Building such a system would be impossible without the support of community contributors with similar aspirations. + +- For details about the community roles, see [Community Membership](https://github.com/volcano-sh/website/blob/master/content/en/docs/membership.md). If you make significant contributions, you will have a more advanced role in the community. + + +## Getting Started + +- For more information on building and deployment, see [setup](https://github.com/volcano-sh/website/blob/master/content/en/docs/installation.md). + + +## Your First Contribution + +You can contribute in different areas, including filing issues, developing features, fixing critical bugs, and getting your work reviewed and merged. + +If you have any question about the development process, visit the [Slack Channel](https://volcano-sh.slack.com) ([sign up](https://join.slack.com/t/volcano-sh/shared_invite/enQtNTU5NTU3NDU0MTc4LTgzZTQ2MzViNTFmNDg1ZGUyMzcwNjgxZGQ1ZDdhOGE3Mzg1Y2NkZjk1MDJlZTZhZWU5MDg2MWJhMzI3Mjg3ZTk)) +or join our [mailing list](https://groups.google.com/forum/#!forum/volcano-sh). + +#### Find Something to Work On + +You are welcome to open an issue concerning documentation, report bugs, and push changes to the repositories. +Feel free to optimize code that does not follow the best coding practices, perform code refactoring, or compile test cases. +The following steps will help you get started. + +#### Find a Good Topic + +There are [multiple repositories](https://github.com/volcano-sh/) within the Volcano organization with each repository containing a beginner-friendly issue that does not require deep understanding of the Volcano project. +For example, in [Volcano-Issues](https://github.com/volcano-sh/volcano), you can choose issues labeled with [help wanted](https://github.com/volcano-sh/volcano/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22) or [good first issue](https://github.com/volcano-sh/volcano/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22). +New contributors are welcome to work on these issues. + +Another good way to start is to find a document that needs improvement, for example, a document that is missing a link or contains a broken link. For details on the workflow, see [Contribution Workflow](#contributor-workflow). + +#### Work on an Issue + +When you are ready to work on an issue, reply with `/assign` or `/assign @yourself` on an issue. +The bot then will assign the issue to you. Your name will then be displayed on the `Assignees` list. + +#### File an Issue + +You are welcome to file issues to Volcano sub-repositories. + +*Example:* You can file an issue for [Volcano](https://github.com/volcano-sh/volcano/issues). + +Follow the submission guidelines when you open an issue. + +## Contribution Workflow + +All contributors are welcome to open issues and create pull requests. + +The contribution workflow is as follows: + +- Create a topic branch from the existing branch (usually the master branch). +- Edit and commit the code. +- Make sure [commit message format](#commit-message-format) is followed. +- Push changes in the topic branch to your remote personal fork of the repository. +- Submit a pull request (PR) to [Volcano](https://github.com/volcano-sh/volcano). The PR must receive approval from at least two community maintainers before it can be merged. + +### Open a Pull Request + +Volcano follows the standard [GitHub pull request](https://help.github.com/articles/about-pull-requests/) process. + +Volcano bot will apply structured labels to your PRs. + +It also provides suggestions on commands in your PRs to facilitate review. +These `/command` options can be annotated to trigger auto-labeling and notifications. For more information, see [command reference documentation](https://go.k8s.io/bot-commands). + +### Code Review + +To make it easier for your PRs to receive reviews, + +* Follow [good coding guidelines](https://github.com/golang/go/wiki/CodeReviewComments). +* Write [good commit messages](https://chris.beams.io/posts/git-commit/). +* Break down large chunks of modification into smaller unites that are logically independent and easy to understand. +* Label your PRs properly so that they can be sent to appropriate reviewers. The bot will help you through the entire PR submission process. + + + +### Commit Message Format + +In the subject line mention the changes you have made, and in the message body provide the reasons for making these changes. + +```shell +scripts: add test code for metamanager + +Unit test code is added to improve code coverage for metamanager. + +Fixes #12 +``` + +A more formal format is as follows: + +```shell +: + + + +