Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kubecon 2019 china demo #259

Merged
merged 2 commits into from
Jun 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions docs/samples/kubecon-2019-china/drf/nginx-1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: nginx-1
labels:
app: nginx-1
spec:
# modify replicas according to your case
replicas: 8
selector:
matchLabels:
app: nginx-1
template:
metadata:
labels:
app: nginx-1
spec:
schedulerName: volcano
containers:
- name: nginx-1
image: nginx
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
26 changes: 26 additions & 0 deletions docs/samples/kubecon-2019-china/drf/nginx-2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: nginx-2
labels:
app: nginx-2
spec:
# modify replicas according to your case
replicas: 8
selector:
matchLabels:
app: nginx-2
template:
metadata:
labels:
app: nginx-2
spec:
schedulerName: volcano
containers:
- name: nginx-2
image: nginx
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
25 changes: 25 additions & 0 deletions docs/samples/kubecon-2019-china/drf/nginx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: nginx
labels:
app: nginx
spec:
# modify replicas according to your case
replicas: 8
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
77 changes: 77 additions & 0 deletions docs/samples/kubecon-2019-china/gang/mpi-example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
apiVersion: batch.volcano.sh/v1alpha1
kind: Job
metadata:
name: lm-mpi-job
labels:
# 根据业务需要设置作业类型
"volcano.sh/job-type": "MPI"
spec:
# 设置最小需要的服务 (小于总replicas数)
minAvailable: 4
schedulerName: volcano
plugins:
# 提供 ssh 免密认证
ssh: []
# 提供运行作业所需要的网络信息,hosts文件,headless service等
svc: []
# 如果有pod被 杀死,重启整个作业
policies:
- event: PodEvicted
action: RestartJob
tasks:
- replicas: 1
name: mpimaster
# 当 mpiexec 结束,认识整个mpi作业结束
policies:
- event: TaskCompleted
action: CompleteJob
template:
spec:
# Volcano 的信息会统一放到 /etc/volcano 目录下
containers:
- command:
- /bin/sh
- -c
- |
MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
mkdir -p /var/run/sshd; /usr/sbin/sshd;
mpiexec --allow-run-as-root --host ${MPI_HOST} -np 3 mpi_hello_world;
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
name: mpimaster
ports:
- containerPort: 22
name: mpijob-port
workingDir: /home
resources:
requests:
cpu: "500m"
limits:
cpu: "500m"
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret
- replicas: 3
name: mpiworker
template:
spec:
containers:
- command:
- /bin/sh
- -c
- |
mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
name: mpiworker
ports:
- containerPort: 22
name: mpijob-port
workingDir: /home
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret

25 changes: 25 additions & 0 deletions docs/samples/kubecon-2019-china/gang/nginx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: nginx
labels:
app: nginx
spec:
# modify replicas according to your case
replicas: 6
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
apiVersion: batch.volcano.sh/v1alpha1
kind: Job
metadata:
name: lm-horovod-job
labels:
"volcano.sh/job-type": Horovod
spec:
minAvailable: 4
schedulerName: volcano
plugins:
ssh: []
svc: []
# 如果有pod被 杀死,重启整个作业
policies:
- event: PodEvicted
action: RestartJob
tasks:
- replicas: 1
name: master
policies:
- event: TaskCompleted
action: CompleteJob
template:
spec:
containers:
- command:
- /bin/sh
- -c
- |
WORKER_HOST=`cat /etc/volcano/worker.host | tr "\n" ","`;
mkdir -p /var/run/sshd; /usr/sbin/sshd;
mpiexec --allow-run-as-root --host ${WORKER_HOST} -np 3 python tensorflow_mnist_lm.py;
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/horovod-tf-mnist:0.5
name: master
ports:
- containerPort: 22
name: job-port
resources:
requests:
cpu: "500m"
memory: "1024Mi"
limits:
cpu: "500m"
memory: "1024Mi"
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret
- replicas: 3
name: worker
template:
spec:
containers:
- command:
- /bin/sh
- -c
- |
mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/horovod-tf-mnist:0.5
name: worker
ports:
- containerPort: 22
name: job-port
resources:
requests:
cpu: "1000m"
memory: "2048Mi"
limits:
cpu: "1000m"
memory: "2048Mi"
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret
---
81 changes: 81 additions & 0 deletions docs/samples/kubecon-2019-china/mpi-sample/mpi-example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
apiVersion: batch.volcano.sh/v1alpha1
kind: Job
metadata:
name: lm-mpi-job
labels:
# 根据业务需要设置作业类型
"volcano.sh/job-type": "MPI"
spec:
# 设置最小需要的服务 (小于总replicas数)
minAvailable: 3
schedulerName: volcano
plugins:
# 提供 ssh 免密认证
ssh: []
# 提供运行作业所需要的网络信息,hosts文件,headless service等
svc: []
# 如果有pod被 杀死,重启整个作业
policies:
- event: PodEvicted
action: RestartJob
tasks:
- replicas: 1
name: mpimaster
# 当 mpiexec 结束,认识整个mpi作业结束
policies:
- event: TaskCompleted
action: CompleteJob
template:
spec:
# Volcano 的信息会统一放到 /etc/volcano 目录下
containers:
- command:
- /bin/sh
- -c
- |
MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
mkdir -p /var/run/sshd; /usr/sbin/sshd;
mpiexec --allow-run-as-root --host ${MPI_HOST} -np 2 mpi_hello_world;
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
name: mpimaster
ports:
- containerPort: 22
name: mpijob-port
workingDir: /home
resources:
requests:
cpu: "500m"
memory: "1024Mi"
limits:
cpu: "500m"
memory: "1024Mi"
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret
- replicas: 2
name: mpiworker
template:
spec:
containers:
- command:
- /bin/sh
- -c
- |
mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
name: mpiworker
ports:
- containerPort: 22
name: mpijob-port
workingDir: /home
resources:
requests:
cpu: "1024m"
memory: "2048Mi"
limits:
cpu: "1024m"
memory: "2048Mi"
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret

28 changes: 28 additions & 0 deletions docs/samples/kubecon-2019-china/node-selector/nginx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: nginx
labels:
app: nginx
spec:
replicas: 8
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
schedulerName: volcano
# Volcano Scheduler already pass conformance test!!!
nodeSelector:
"kubernetes.io/hostname": "192.168.23.24"
containers:
- name: nginx
image: nginx
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
Loading