Skip to content

Commit

Permalink
feat: Add HyperBand (#787)
Browse files Browse the repository at this point in the history
* feat: Add HyperBand

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* chore: Add test in CI

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Fix

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Fix name

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Fix name

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Fix script

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Fix r_l

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Add parallel trial count

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Add output

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* feat: Append algorithm settings

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Add output

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Fix useless variable

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Use resource_name instead of ResourceName

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Update

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Avoid nil pointer exception

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* feat: Move algorithm to status

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Add max

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Use algorithm settings

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Remove updateSpec

Signed-off-by: Ce Gao <gaoce@caicloud.io>

* fix: Fix test

Signed-off-by: Ce Gao <gaoce@caicloud.io>
  • Loading branch information
gaocegege authored and k8s-ci-robot committed Sep 25, 2019
1 parent e9e0768 commit cc76656
Show file tree
Hide file tree
Showing 24 changed files with 613 additions and 226 deletions.
11 changes: 8 additions & 3 deletions cmd/suggestion/hyperband/v1alpha3/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
FROM python:3

ADD . /usr/src/app/github.com/kubeflow/katib
WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/suggestion/hyperband/v1alpha3
RUN if [ "$(uname -m)" = "ppc64le" ]; then \
apt-get -y update && \
apt-get -y install gfortran libopenblas-dev liblapack-dev && \
pip install cython; \
fi
RUN GRPC_HEALTH_PROBE_VERSION=v0.3.0 && \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64 && \
chmod +x /bin/grpc_health_probe

ADD . /usr/src/app/github.com/kubeflow/katib
WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/suggestion/hyperband/v1alpha3
RUN pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /usr/src/app/github.com/kubeflow/katib:/usr/src/app/github.com/kubeflow/katib/pkg/apis/manager/v1alpha3/python

ENV PYTHONPATH /usr/src/app/github.com/kubeflow/katib:/usr/src/app/github.com/kubeflow/katib/pkg/apis/manager/v1alpha3/python:/usr/src/app/github.com/kubeflow/katib/pkg/apis/manager/health/python

ENTRYPOINT ["python", "main.py"]
8 changes: 7 additions & 1 deletion cmd/suggestion/hyperband/v1alpha3/main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import grpc
import time
from pkg.apis.manager.v1alpha3.python import api_pb2_grpc
from pkg.apis.manager.health.python import health_pb2_grpc
from pkg.suggestion.v1alpha3.hyperband_service import HyperbandService
from concurrent import futures

_ONE_DAY_IN_SECONDS = 60 * 60 * 24
DEFAULT_PORT = "0.0.0.0:6789"


def serve():
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
api_pb2_grpc.add_SuggestionServicer_to_server(HyperbandService(), server)
service = HyperbandService()
api_pb2_grpc.add_SuggestionServicer_to_server(service, server)
health_pb2_grpc.add_HealthServicer_to_server(service, server)

server.add_insecure_port(DEFAULT_PORT)
print("Listening...")
server.start()
Expand All @@ -19,5 +24,6 @@ def serve():
except KeyboardInterrupt:
server.stop(0)


if __name__ == "__main__":
serve()
10 changes: 5 additions & 5 deletions cmd/suggestion/hyperband/v1alpha3/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
grpcio
duecredit
grpcio==1.23.0
duecredit===0.7.0
cloudpickle==0.5.6
numpy>=1.13.3
scikit-learn>=0.19.0
scipy>=0.19.1
forestci
protobuf
googleapis-common-protos
forestci==0.3
protobuf==3.9.1
googleapis-common-protos==1.6.0
5 changes: 3 additions & 2 deletions examples/v1alpha3/hyperband-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ metadata:
name: hyperband-example
spec:
parallelTrialCount: 9
maxTrialCount: 9
objective:
type: maximize
goal: 0.99
Expand All @@ -14,7 +15,7 @@ spec:
algorithm:
algorithmName: hyperband
algorithmSettings:
- name: "resourceName"
- name: "resource_name"
value: "--num-epochs"
- name: "eta"
value: "3"
Expand All @@ -41,7 +42,7 @@ spec:
- ftrl
- name: --num-epochs
parametertype: int
feasible:
feasibleSpace:
min: "20"
max: "20"
trialTemplate:
Expand Down
1 change: 1 addition & 0 deletions manifests/v1alpha3/katib-controller/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ rules:
- serviceaccounts
- services
- secrets
- events
verbs:
- "*"
- apiGroups:
Expand Down
7 changes: 4 additions & 3 deletions pkg/apis/controller/suggestions/v1alpha3/suggestion_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,16 @@ import (

// SuggestionSpec defines the desired state of Suggestion
type SuggestionSpec struct {
AlgorithmName string `json:"algorithmName"`
// Number of suggestions requested
Requests int32 `json:"requests,omitempty"`

//Algorithm settings set by the user in the experiment config
AlgorithmSpec *common.AlgorithmSpec `json:"algorithmSpec,omitempty"`
}

// SuggestionStatus defines the observed state of Suggestion
type SuggestionStatus struct {
// Algorithmsettings set by the algorithm services.
AlgorithmSettings []common.AlgorithmSetting `json:"algorithmSettings,omitempty"`

// Suggestion results
Suggestions []TrialAssignment `json:"suggestions,omitempty"`

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func (g *General) createSuggestion(instance *experimentsv1alpha3.Experiment, sug
Namespace: instance.Namespace,
},
Spec: suggestionsv1alpha3.SuggestionSpec{
AlgorithmSpec: instance.Spec.Algorithm,
AlgorithmName: instance.Spec.Algorithm.AlgorithmName,
Requests: suggestionRequests,
},
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller.v1alpha3/suggestion/composer/composer.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func (g *General) DesiredService(s *suggestionsv1alpha3.Suggestion) (*corev1.Ser
}

func (g *General) desiredContainer(s *suggestionsv1alpha3.Suggestion) (*corev1.Container, error) {
suggestionContainerImage, err := g.getSuggestionContainerImage(s.Spec.AlgorithmSpec.AlgorithmName)
suggestionContainerImage, err := g.getSuggestionContainerImage(s.Spec.AlgorithmName)
if err != nil {
return nil, err
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"
logf "sigs.k8s.io/controller-runtime/pkg/runtime/log"

commonv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/common/v1alpha3"
suggestionsv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/suggestions/v1alpha3"
)

Expand All @@ -56,10 +55,8 @@ func TestReconcile(t *testing.T) {
Namespace: "default",
},
Spec: suggestionsv1alpha3.SuggestionSpec{
Requests: 1,
AlgorithmSpec: &commonv1alpha3.AlgorithmSpec{
AlgorithmName: "random",
},
Requests: 1,
AlgorithmName: "random",
},
}
configMap := newKatibConfigMapInstance()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package suggestionclient

import (
common "github.com/kubeflow/katib/pkg/apis/controller/common/v1alpha3"
experimentsv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/experiments/v1alpha3"
suggestionsv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/suggestions/v1alpha3"
suggestionapi "github.com/kubeflow/katib/pkg/apis/manager/v1alpha3"
)

// appendAlgorithmSettingsFromSuggestion appends the algorithm settings
// in suggestion to Experiment.
// Algorithm settings in suggestion will overwrite the settings in experiment.
func appendAlgorithmSettingsFromSuggestion(experiment *experimentsv1alpha3.Experiment, algoSettingsInSuggestion []common.AlgorithmSetting) {
algoSettingsInExperiment := experiment.Spec.Algorithm
for _, setting := range algoSettingsInSuggestion {
if index, found := contains(
algoSettingsInExperiment.AlgorithmSettings, setting.Name); found {
// If the setting is found in Experiment, update it.
algoSettingsInExperiment.AlgorithmSettings[index].Value = setting.Value
} else {
// If not found, append it.
algoSettingsInExperiment.AlgorithmSettings = append(
algoSettingsInExperiment.AlgorithmSettings, setting)
}
}
}

func updateAlgorithmSettings(suggestion *suggestionsv1alpha3.Suggestion, algorithm *suggestionapi.AlgorithmSpec) {
for _, setting := range algorithm.AlgorithmSetting {
if setting != nil {
if index, found := contains(suggestion.Status.AlgorithmSettings, setting.Name); found {
// If the setting is found in Suggestion, update it.
suggestion.Status.AlgorithmSettings[index].Value = setting.Value
} else {
// If not found, append it.
suggestion.Status.AlgorithmSettings = append(suggestion.Status.AlgorithmSettings, common.AlgorithmSetting{
Name: setting.Name,
Value: setting.Value,
})
}
}
}
}

func contains(algorithmSettings []common.AlgorithmSetting,
name string) (int, bool) {
for i, s := range algorithmSettings {
if s.Name == name {
return i, true
}
}
return -1, false
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,24 @@ var (
timeout = 60 * time.Second
)

// SuggestionClient is the interface to communicate with algorithm services.
type SuggestionClient interface {
SyncAssignments(instance *suggestionsv1alpha3.Suggestion, e *experimentsv1alpha3.Experiment,
ts []trialsv1alpha3.Trial) error

ValidateAlgorithmSettings(instance *suggestionsv1alpha3.Suggestion, e *experimentsv1alpha3.Experiment) error
}

// General is the implementation for SuggestionClient.
type General struct {
}

// New creates a new SuggestionClient.
func New() SuggestionClient {
return &General{}
}

// SyncAssignments syncs assignments from algorithm services.
func (g *General) SyncAssignments(
instance *suggestionsv1alpha3.Suggestion,
e *experimentsv1alpha3.Experiment,
Expand All @@ -60,8 +64,13 @@ func (g *General) SyncAssignments(
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

// Algorithm settings in suggestion will overwrite the settings in experiment.
filledE := e.DeepCopy()
appendAlgorithmSettingsFromSuggestion(filledE,
instance.Status.AlgorithmSettings)

request := &suggestionapi.GetSuggestionsRequest{
Experiment: g.ConvertExperiment(e),
Experiment: g.ConvertExperiment(filledE),
Trials: g.ConvertTrials(ts),
RequestNumber: int32(requestNum),
}
Expand All @@ -83,10 +92,13 @@ func (g *General) SyncAssignments(
})
}

// TODO(gaocegege): Set algorithm settings
if response.Algorithm != nil {
updateAlgorithmSettings(instance, response.Algorithm)
}
return nil
}

// ValidateAlgorithmSettings validates if the algorithm specific configurations are valid.
func (g *General) ValidateAlgorithmSettings(instance *suggestionsv1alpha3.Suggestion, e *experimentsv1alpha3.Experiment) error {
logger := log.WithValues("Suggestion", types.NamespacedName{Name: instance.GetName(), Namespace: instance.GetNamespace()})
endpoint := fmt.Sprintf("%s:%d", instance.Name, consts.DefaultSuggestionPort)
Expand Down Expand Up @@ -151,6 +163,12 @@ func (g *General) ConvertExperiment(e *experimentsv1alpha3.Experiment) *suggesti
if e.Spec.NasConfig != nil {
res.Spec.NasConfig = convertNasConfig(e.Spec.NasConfig)
}
if e.Spec.ParallelTrialCount != nil {
res.Spec.ParallelTrialCount = *e.Spec.ParallelTrialCount
}
if e.Spec.MaxTrialCount != nil {
res.Spec.MaxTrialCount = *e.Spec.MaxTrialCount
}
return res
}

Expand Down
Loading

0 comments on commit cc76656

Please sign in to comment.