diff --git a/pkg/suggestion/v1alpha2/bayesian_service.py b/pkg/suggestion/v1alpha2/bayesian_service.py index a168f2feb9a..04aaec4ca05 100644 --- a/pkg/suggestion/v1alpha2/bayesian_service.py +++ b/pkg/suggestion/v1alpha2/bayesian_service.py @@ -10,6 +10,7 @@ from pkg.suggestion.v1alpha2.bayesianoptimization.src.algorithm_manager import AlgorithmManager import logging from logging import getLogger, StreamHandler, INFO, DEBUG +from . import parsing_util timeout = 10 @@ -22,8 +23,8 @@ def __init__(self, logger=None): FORMAT = '%(asctime)-15s Experiment %(experiment_name)s %(message)s' logging.basicConfig(format=FORMAT) handler = StreamHandler() - handler.setLevel(INFO) - self.logger.setLevel(INFO) + handler.setLevel(DEBUG) + self.logger.setLevel(DEBUG) self.logger.addHandler(handler) self.logger.propagate = False else: @@ -34,7 +35,7 @@ def _get_experiment(self, name): self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: exp = client.GetExperiment( - api_pb2.GetExperimentRequest(experiment_name=name), 10) + api_pb2.GetExperimentRequest(experiment_name=name), timeout) return exp.experiment def ValidateAlgorithmSettings(self, request, context): @@ -49,9 +50,12 @@ def GetSuggestions(self, request, context): X_train, y_train = self.getEvalHistory( request.experiment_name, experiment.spec.objective.objective_metric_name, service_params["burn_in"]) + parameter_config = parsing_util.parse_parameter_configs( + experiment.spec.parameter_specs.parameters) algo_manager = AlgorithmManager( experiment_name=request.experiment_name, experiment=experiment, + parameter_config=parameter_config, X_train=X_train, y_train=y_train, logger=self.logger, @@ -64,6 +68,7 @@ def GetSuggestions(self, request, context): self.logger.debug("upperbound: %r", upperbound, extra={"experiment_name": request.experiment_name}) alg = BOAlgorithm( + experiment_name=request.experiment_name, dim=algo_manager.dim, N=int(service_params["N"]), lowerbound=lowerbound, @@ -82,8 +87,12 @@ def GetSuggestions(self, request, context): model_type=service_params["model_type"], logger=self.logger, ) + self.logger.debug("alg: %r", alg, + extra={"experiment_name": request.experiment_name}) trials = [] x_next_list = alg.get_suggestion(request.request_number) + self.logger.debug("x_next_list: %r", x_next_list, + extra={"experiment_name": request.experiment_name}) for x_next in x_next_list: x_next = x_next.squeeze() self.logger.debug("xnext: %r ", x_next, extra={ @@ -116,7 +125,7 @@ def getEvalHistory(self, experiment_name, obj_name, burn_in): with api_pb2.beta_create_Manager_stub(channel) as client: trialsrep = client.GetTrialList(api_pb2.GetTrialListRequest( experiment_name=experiment_name - )) + ), timeout) for t in trialsrep.trials: if t.status.condition == api_pb2.TrialStatus.TrialConditionType.SUCCEEDED: gwfrep = client.GetObservationLog( diff --git a/pkg/suggestion/v1alpha2/bayesianoptimization/src/algorithm_manager.py b/pkg/suggestion/v1alpha2/bayesianoptimization/src/algorithm_manager.py index 1fd7318813b..4a09bb400ce 100644 --- a/pkg/suggestion/v1alpha2/bayesianoptimization/src/algorithm_manager.py +++ b/pkg/suggestion/v1alpha2/bayesianoptimization/src/algorithm_manager.py @@ -25,22 +25,20 @@ class AlgorithmManager: provide some helper functions """ - def __init__(self, experiment_name, experiment, X_train, y_train, logger=None): + def __init__(self, experiment_name, experiment, parameter_config, X_train, y_train, logger=None): self.logger = logger if (logger is not None) else get_logger() self._experiment_name = experiment_name self._experiment = experiment self._goal = self._experiment.spec.objective.type - self._dim = 0 - self._lowerbound = [] - self._upperbound = [] - self._types = [] - self._names = [] + self._dim = parameter_config.dim + self._lowerbound = parameter_config.lower_bounds + self._upperbound = parameter_config.upper_bounds + self._types = parameter_config.parameter_types + self._names = parameter_config.names # record all the feasible values of discrete type variables - self._discrete_info = [] - self._categorical_info = [] - self._name_id = {} - - self._parse_config() + self._discrete_info = parameter_config.discrete_info + self._categorical_info = parameter_config.categorical_info + self._name_id = parameter_config.name_ids self._X_train = self._mapping_params(X_train) self.parse_X() @@ -108,40 +106,6 @@ def y_train(self): """ return the target of the training data""" return self._y_train - def _parse_config(self): - """ extract info from the study configuration """ - for i, param in enumerate(self._experiment.spec.parameter_specs.parameters): - self._name_id[param.name] = i - self._types.append(param.parameter_type) - self._names.append(param.name) - if param.parameter_type in [api_pb2.DOUBLE, api_pb2.INT]: - self._dim = self._dim + 1 - self._lowerbound.append(float(param.feasible_space.min)) - self._upperbound.append(float(param.feasible_space.max)) - elif param.parameter_type == api_pb2.DISCRETE: - self._dim = self._dim + 1 - discrete_values = [int(x) for x in param.feasible_space.list] - min_value = min(discrete_values) - max_value = max(discrete_values) - self._lowerbound.append(min_value) - self._upperbound.append(max_value) - self._discrete_info.append(dict({ - "name": param.name, - "values": discrete_values, - })) - # one hot encoding for categorical type - elif param.parameter_type == api_pb2.CATEGORICAL: - num_feasible = len(param.feasible.list) - for i in range(num_feasible): - self._lowerbound.append(0) - self._upperbound.append(1) - self._categorical_info.append(dict({ - "name": param.name, - "values": param.feasible.list, - "number": num_feasible, - })) - self._dim += num_feasible - def _mapping_params(self, parameters_list): if len(parameters_list) == 0: return None @@ -169,6 +133,8 @@ def _mapping_params(self, parameters_list): def _parse_metric(self): """ parse the metric to the dictionary """ + self.logger.info("Ytrain: %r", self._y_train, extra={ + "Experiment": self._experiment_name}) if not self._y_train: self._y_train = None return diff --git a/pkg/suggestion/v1alpha2/bayesianoptimization/src/bayesian_optimization_algorithm.py b/pkg/suggestion/v1alpha2/bayesianoptimization/src/bayesian_optimization_algorithm.py index 6b2e57c3f9d..ce207e91765 100644 --- a/pkg/suggestion/v1alpha2/bayesianoptimization/src/bayesian_optimization_algorithm.py +++ b/pkg/suggestion/v1alpha2/bayesianoptimization/src/bayesian_optimization_algorithm.py @@ -7,15 +7,18 @@ class BOAlgorithm: """ class for bayesian optimization """ - def __init__(self, dim, N, lowerbound, upperbound, X_train, y_train, mode, trade_off, + + def __init__(self, experiment_name, dim, N, lowerbound, upperbound, X_train, y_train, mode, trade_off, length_scale, noise, nu, kernel_type, n_estimators, max_features, model_type, logger=None): # np.random.seed(0) + self._experiment_name = experiment_name self.dim = dim self.N = N or 100 self.l = np.zeros((1, dim)) self.u = np.ones((1, dim)) self.lowerbound = lowerbound.reshape(1, dim) self.upperbound = upperbound.reshape(1, dim) + self.logger = logger # normalize the upperbound and lowerbound to [0, 1] self.scaler = MinMaxScaler() @@ -27,27 +30,31 @@ def __init__(self, dim, N, lowerbound, upperbound, X_train, y_train, mode, trade self.current_optimal = None else: self.current_optimal = max(self.y_train) - - # initialize the global optimizer - self.optimizer = GlobalOptimizer( - N, - self.l, - self.u, - self.scaler, - self.X_train, - self.y_train, - self.current_optimal, - mode=mode, - trade_off=trade_off, - length_scale=length_scale, - noise=noise, - nu=nu, - kernel_type=kernel_type, - n_estimators=n_estimators, - max_features=max_features, - model_type=model_type, - logger=logger, - ) + self.logger.debug("create optimizer", extra={ + "Experiment": self._experiment_name}) + # initialize the global optimizer + self.optimizer = GlobalOptimizer( + N, + self.l, + self.u, + self.scaler, + self.X_train, + self.y_train, + self.current_optimal, + experiment_name=self._experiment_name, + mode=mode, + trade_off=trade_off, + length_scale=length_scale, + noise=noise, + nu=nu, + kernel_type=kernel_type, + n_estimators=n_estimators, + max_features=max_features, + model_type=model_type, + logger=logger, + ) + self.logger.debug("optimizer created", extra={ + "Experiment": self._experiment_name}) def get_suggestion(self, request_num): """ main function to provide suggestion """ @@ -55,7 +62,8 @@ def get_suggestion(self, request_num): if self.X_train is None and self.y_train is None and self.current_optimal is None: # randomly pick a point as the first trial for _ in range(request_num): - x_next_list.append(np.random.uniform(self.lowerbound, self.upperbound, size=(1, self.dim))) + x_next_list.append(np.random.uniform( + self.lowerbound, self.upperbound, size=(1, self.dim))) else: _, x_next_list_que = self.optimizer.direct(request_num) for xn in x_next_list_que: diff --git a/pkg/suggestion/v1alpha2/bayesianoptimization/src/global_optimizer.py b/pkg/suggestion/v1alpha2/bayesianoptimization/src/global_optimizer.py index 7a307c86af9..acd9138b610 100644 --- a/pkg/suggestion/v1alpha2/bayesianoptimization/src/global_optimizer.py +++ b/pkg/suggestion/v1alpha2/bayesianoptimization/src/global_optimizer.py @@ -22,7 +22,8 @@ def __init__(self, l, u, division_num, dim, scaler, aq_func): self.center = (l + u) / 2 j = np.mod(division_num, dim) k = (division_num - j) / dim - self.d = np.sqrt(j * np.power(3, float(-2 * (k + 1))) + (dim - j) * np.power(3, float(-2 * k))) / 2 + self.d = np.sqrt(j * np.power(3, float(-2 * (k + 1))) + + (dim - j) * np.power(3, float(-2 * k))) / 2 self.division_num = division_num self.fc, _, _ = aq_func.compute(scaler.inverse_transform(self.center)) self.fc = -self.fc @@ -73,7 +74,8 @@ def __init__(self, dim, fc): class GlobalOptimizer: """ class for the global optimizer """ - def __init__(self, N, l, u, scaler, X_train, y_train, current_optimal, mode, trade_off, length_scale, + def __init__(self, N, l, u, scaler, X_train, y_train, current_optimal, + experiment_name, mode, trade_off, length_scale, noise, nu, kernel_type, n_estimators, max_features, model_type, logger=None): self.logger = logger if (logger is not None) else get_logger() self.N = N @@ -82,6 +84,7 @@ def __init__(self, N, l, u, scaler, X_train, y_train, current_optimal, mode, tra self.scaler = scaler self.buckets = [] self.dim = None + self._experiment_name = experiment_name if model_type == "gp": model = GaussianProcessModel( length_scale=length_scale, @@ -94,7 +97,11 @@ def __init__(self, N, l, u, scaler, X_train, y_train, current_optimal, mode, tra n_estimators=n_estimators, max_features=max_features, ) + self.logger.debug("before model fit", extra={ + "Experiment": self._experiment_name}) model.fit(X_train, y_train) + self.logger.debug("after model fit", extra={ + "Experiment": self._experiment_name}) self.aq_func = AcquisitionFunc( model=model, current_optimal=current_optimal, @@ -120,7 +127,8 @@ def potential_opt(self, f_min): prev = len(opt_list) - 1 diff1 = b[i].d diff2 = opt_list[prev].point.d - current_slope = (b[i].fc - opt_list[prev].point.fc) / (diff1 - diff2) + current_slope = ( + b[i].fc - opt_list[prev].point.fc) / (diff1 - diff2) prev_slope = opt_list[prev].slope while prev >= 0 and current_slope < prev_slope: @@ -130,7 +138,8 @@ def potential_opt(self, f_min): prev_slope = opt_list[prev].slope diff1 = b[i].d diff2 = opt_list[prev].point.d - current_slope = (b[i].fc - opt_list[prev].point.fc) / (diff1 - diff2) + current_slope = ( + b[i].fc - opt_list[prev].point.fc) / (diff1 - diff2) opt_list.append(OptimalPoint(b[i], prev, current_slope)) @@ -238,8 +247,10 @@ def divide_rect(self, opt_rect, f_min, x_next, aq_func, scaler): e = np.zeros((1, self.dim)) e[0, i] = 1 function_value = min( - aq_func.compute(scaler.inverse_transform(rect.center + delta * e)), - aq_func.compute(scaler.inverse_transform(rect.center - delta * e)) + aq_func.compute(scaler.inverse_transform( + rect.center + delta * e)), + aq_func.compute(scaler.inverse_transform( + rect.center - delta * e)) ) dim_list.append(DimPack(i, function_value)) dim_list.sort(key=lambda x: x.fc)