Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Trial Template API controller implementation #1202

Merged
merged 10 commits into from
Jun 10, 2020
52 changes: 28 additions & 24 deletions examples/v1beta1/bayesianoptimization-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,44 +21,48 @@ spec:
maxTrialCount: 12
maxFailedTrialCount: 3
parameters:
- name: --lr
- name: lr
parameterType: double
feasibleSpace:
min: "0.01"
max: "0.03"
- name: --num-layers
- name: num-layers
parameterType: int
feasibleSpace:
min: "2"
max: "5"
- name: --optimizer
- name: optimizer
parameterType: categorical
feasibleSpace:
list:
- sgd
- adam
- ftrl
trialTemplate:
goTemplate:
rawTemplate: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
trialParameters:
- name: learningRate
description: Learning rate for the training model
reference: lr
- name: numberLayers
description: Number of training model layers
reference: num-layers
- name: optimizer
description: Training model optimizer (sdg, adam or ftrl)
reference: optimizer
trialSpec:
apiVersion: batch/v1
kind: Job
spec:
template:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: Never
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
- "--lr=${trialParameters.learningRate}"
- "--num-layers=${trialParameters.numberLayers}"
- "--optimizer=${trialParameters.optimizer}"
restartPolicy: Never
52 changes: 28 additions & 24 deletions examples/v1beta1/cmaes-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,44 +18,48 @@ spec:
maxTrialCount: 12
maxFailedTrialCount: 3
parameters:
- name: --lr
- name: lr
parameterType: double
feasibleSpace:
min: "0.01"
max: "0.03"
- name: --num-layers
- name: num-layers
parameterType: int
feasibleSpace:
min: "2"
max: "5"
- name: --optimizer
- name: optimizer
parameterType: categorical
feasibleSpace:
list:
- sgd
- adam
- ftrl
trialTemplate:
goTemplate:
rawTemplate: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
trialParameters:
- name: learningRate
description: Learning rate for the training model
reference: lr
- name: numberLayers
description: Number of training model layers
reference: num-layers
- name: optimizer
description: Training model optimizer (sdg, adam or ftrl)
reference: optimizer
trialSpec:
apiVersion: batch/v1
kind: Job
spec:
template:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: Never
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
- "--lr=${trialParameters.learningRate}"
- "--num-layers=${trialParameters.numberLayers}"
- "--optimizer=${trialParameters.optimizer}"
restartPolicy: Never
46 changes: 23 additions & 23 deletions examples/v1beta1/custom-metricscollector-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,38 +41,38 @@ spec:
maxTrialCount: 12
maxFailedTrialCount: 3
parameters:
- name: --lr
- name: lr
parameterType: double
feasibleSpace:
min: "0.01"
max: "0.03"
- name: --momentum
- name: momentum
parameterType: double
feasibleSpace:
min: "0.3"
max: "0.7"
trialTemplate:
goTemplate:
rawTemplate: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
trialParameters:
- name: learningRate
description: Learning rate for the training model
reference: lr
- name: momentum
description: Momentum for the training model
reference: momentum
trialSpec:
apiVersion: batch/v1
kind: Job
spec:
template:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist
imagePullPolicy: Always
command:
- "python"
- "/var/mnist.py"
- "--epochs=1"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: Never
- "python"
- "/var/mnist.py"
- "--epochs=1"
- "--lr=${trialParameters.learningRate}"
- "--momentum=${trialParameters.momentum}"
restartPolicy: Never
46 changes: 23 additions & 23 deletions examples/v1beta1/file-metricscollector-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,38 +28,38 @@ spec:
maxTrialCount: 12
maxFailedTrialCount: 3
parameters:
- name: --lr
- name: lr
parameterType: double
feasibleSpace:
min: "0.01"
max: "0.03"
- name: --momentum
- name: momentum
parameterType: double
feasibleSpace:
min: "0.3"
max: "0.7"
trialTemplate:
goTemplate:
rawTemplate: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
trialParameters:
- name: learningRate
description: Learning rate for the training model
reference: lr
- name: momentum
description: Momentum for the training model
reference: momentum
trialSpec:
apiVersion: batch/v1
kind: Job
spec:
template:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist:1.0
imagePullPolicy: Always
command:
- "python"
- "/var/mnist.py"
- "--epochs=1"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: Never
- "python"
- "/var/mnist.py"
- "--epochs=1"
- "--lr=${trialParameters.learningRate}"
- "--momentum=${trialParameters.momentum}"
restartPolicy: Never
57 changes: 28 additions & 29 deletions examples/v1beta1/grid-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,50 +18,49 @@ spec:
maxTrialCount: 12
maxFailedTrialCount: 3
parameters:
- name: --lr
- name: lr
parameterType: double
feasibleSpace:
min: "0.001"
max: "0.01"
step: "0.001"
- name: --num-layers
- name: num-layers
parameterType: int
feasibleSpace:
min: "2"
max: "5"
- name: --num-epochs
parameterType: int
feasibleSpace:
min: "10"
max: "15"
- name: --optimizer
- name: optimizer
parameterType: categorical
feasibleSpace:
list:
- sgd
- adam
- ftrl
trialTemplate:
goTemplate:
rawTemplate: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
trialParameters:
- name: learningRate
description: Learning rate for the training model
reference: lr
- name: numberLayers
description: Number of training model layers
reference: num-layers
- name: optimizer
description: Training model optimizer (sdg, adam or ftrl)
reference: optimizer
trialSpec:
apiVersion: batch/v1
kind: Job
spec:
template:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: Never
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
- "--lr=${trialParameters.learningRate}"
- "--num-layers=${trialParameters.numberLayers}"
- "--optimizer=${trialParameters.optimizer}"
restartPolicy: Never
Loading