Skip to content

Commit

Permalink
Merge branch 'AUTOML-20' into 'master'
Browse files Browse the repository at this point in the history
[AUTOML-20] rm DLOptunaTuner

See merge request ai-lab-pmo/mltools/automl/LightAutoML!36
  • Loading branch information
dev-rinchin committed Dec 11, 2024
2 parents 5dbd34c + fe4ab62 commit c092d5c
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 276 deletions.
10 changes: 7 additions & 3 deletions examples/optimization/conditional_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,16 @@

def sample(optimization_search_space, trial, suggested_params):
trial_values = copy.copy(suggested_params)
trial_values["feature_fraction"] = trial.suggest_uniform("feature_fraction", low=0.5, high=1.0)
trial_values["feature_fraction"] = trial.suggest_float("feature_fraction", low=0.5, high=1.0)

if trial_values["feature_fraction"] > 0.7:
trial_values["min_sum_hessian_in_leaf"] = trial.suggest_uniform("min_sum_hessian_in_leaf", low=0.5, high=1)
trial_values["min_sum_hessian_in_leaf"] = trial.suggest_float(
"min_sum_hessian_in_leaf", low=0.5, high=1, log=True
)
else:
trial_values["min_sum_hessian_in_leaf"] = trial.suggest_uniform("min_sum_hessian_in_leaf", low=0, high=0.5)
trial_values["min_sum_hessian_in_leaf"] = trial.suggest_float(
"min_sum_hessian_in_leaf", low=0, high=0.5, log=True
)

return trial_values

Expand Down
2 changes: 1 addition & 1 deletion examples/optimization/sequential_parameter_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def sample(optimization_search_space, trial, suggested_params):
for feature_fraction in range(10):
feature_fraction = feature_fraction / 10
trial_values["feature_fraction"] = feature_fraction
trial_values["min_sum_hessian_in_leaf"] = trial.suggest_uniform("min_sum_hessian_in_leaf", low=0.5, high=1)
trial_values["min_sum_hessian_in_leaf"] = trial.suggest_float("min_sum_hessian_in_leaf", low=0.5, high=1)
yield trial_values


Expand Down
3 changes: 1 addition & 2 deletions lightautoml/automl/presets/tabular_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from ...ml_algo.dl_model import TorchModel
from ...ml_algo.linear_sklearn import LinearLBFGS
from ...ml_algo.random_forest import RandomForestSklearn
from ...ml_algo.tuning.optuna import DLOptunaTuner
from ...ml_algo.tuning.optuna import OptunaTuner
from ...pipelines.features.lgb_pipeline import LGBAdvancedPipeline
from ...pipelines.features.lgb_pipeline import LGBSeqSimpleFeatures
Expand Down Expand Up @@ -444,7 +443,7 @@ def get_nn(

if tuned:
nn_model.set_prefix("Tuned")
nn_tuner = DLOptunaTuner(
nn_tuner = OptunaTuner(
n_trials=model_params["tuning_params"]["max_tuning_iter"],
timeout=model_params["tuning_params"]["max_tuning_time"],
fit_on_holdout=model_params["tuning_params"]["fit_on_holdout"],
Expand Down
3 changes: 1 addition & 2 deletions lightautoml/automl/presets/text_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from ...ml_algo.boost_lgbm import BoostLGBM
from ...ml_algo.dl_model import TorchModel
from ...ml_algo.linear_sklearn import LinearLBFGS
from ...ml_algo.tuning.optuna import DLOptunaTuner
from ...ml_algo.tuning.optuna import OptunaTuner
from ...pipelines.features.base import FeaturesPipeline
from ...pipelines.features.lgb_pipeline import LGBAdvancedPipeline
Expand Down Expand Up @@ -307,7 +306,7 @@ def get_nn(

if tuned:
nn_model.set_prefix("Tuned")
nn_tuner = DLOptunaTuner(
nn_tuner = OptunaTuner(
n_trials=model_params["tuning_params"]["max_tuning_iter"],
timeout=model_params["tuning_params"]["max_tuning_time"],
fit_on_holdout=model_params["tuning_params"]["fit_on_holdout"],
Expand Down
59 changes: 31 additions & 28 deletions lightautoml/ml_algo/dl_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,32 +618,35 @@ def predict_single_fold(self, model: any, dataset: TabularDataset) -> np.ndarray

return pred

def _default_sample(self, trial: optuna.trial.Trial, estimated_n_trials: int, suggested_params: Dict) -> Dict:
"""Implements simple tuning sampling strategy.
Args:
trial: Current optuna Trial.
estimated_n_trials: Estimated trials based on time spent on previous ones.
suggested_params: Suggested params
Returns:
Dict with Sampled params.
"""
# optionally
trial_values = copy(suggested_params)

trial_values["bs"] = trial.suggest_categorical("bs", [2 ** i for i in range(6, 11)])

weight_decay_bin = trial.suggest_categorical("weight_decay_bin", [0, 1])
if weight_decay_bin == 0:
weight_decay = 0
else:
weight_decay = trial.suggest_loguniform("weight_decay", low=1e-6, high=1e-2)
def _get_default_search_spaces(self, suggested_params: Dict, estimated_n_trials: int) -> Dict:
def sample(optimization_search_space, trial: optuna.trial.Trial, suggested_params: Dict) -> Dict:
"""Implements simple tuning sampling strategy.
Args:
trial: Current optuna Trial.
estimated_n_trials: Estimated trials based on time spent on previous ones.
suggested_params: Suggested params
Returns:
Dict with Sampled params.
"""
# optionally
trial_values = copy(suggested_params)

trial_values["bs"] = trial.suggest_categorical("bs", [2 ** i for i in range(6, 11)])

weight_decay_bin = trial.suggest_categorical("weight_decay_bin", [0, 1])
if weight_decay_bin == 0:
weight_decay = 0
else:
weight_decay = trial.suggest_float("weight_decay", low=1e-6, high=1e-2, log=True)

lr = trial.suggest_float("lr", low=1e-5, high=1e-1, log=True)
trial_values["opt_params"] = {
"lr": lr,
"weight_decay": weight_decay,
}
return trial_values

lr = trial.suggest_loguniform("lr", low=1e-5, high=1e-1)
trial_values["opt_params"] = {
"lr": lr,
"weight_decay": weight_decay,
}
return trial_values
return sample
244 changes: 4 additions & 240 deletions lightautoml/ml_algo/tuning/optuna.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class OptunaTuner(ParamsTuner):
and ``maximize`` for maximization.
fit_on_holdout: Will be used holdout cv-iterator.
random_state: Seed for optuna sampler.
fail_tolerance: the maximum allowed percentage of failed tuner trials. Exception will be thrown after crossing the threshold value.
"""

Expand Down Expand Up @@ -178,7 +179,9 @@ def update_trial_time(study: optuna.study.Study, trial: optuna.trial.FrozenTrial
def check_fail_tolerance(study: optuna.study.Study, trial: optuna.trial.FrozenTrial):
df = study.trials_dataframe()

if df[df["state"] == "FAIL"].shape[0] / self.estimated_n_trials > self.fail_tolerance:
if (self.estimated_n_trials > 0) and (
df[df["state"] == "FAIL"].shape[0] / self.estimated_n_trials > self.fail_tolerance
):
raise Exception(
f"Too much trials was failed ({df[df['state'] == 'FAIL'].shape[0]} of {df.shape[0]}). Check the model or search space for it."
)
Expand Down Expand Up @@ -314,242 +317,3 @@ def _sample(
def plot(self):
"""Plot optimization history of all trials in a study."""
return optuna.visualization.plot_optimization_history(self.study)


class DLOptunaTuner(ParamsTuner):
"""Wrapper for optuna tuner.
Args:
timeout: Maximum learning time.
n_trials: Maximum number of trials.
direction: Direction of optimization.
Set ``minimize`` for minimization
and ``maximize`` for maximization.
fit_on_holdout: Will be used holdout cv-iterator.
random_state: Seed for optuna sampler.
"""

_name: str = "OptunaTuner"

study: optuna.study.Study = None
estimated_n_trials: int = None
mean_trial_time: Optional[int] = None

def __init__(
# TODO: For now, metric is designed to be greater is better. Change maximize param after metric refactor if needed
self,
timeout: Optional[int] = 1000,
n_trials: Optional[int] = 100,
direction: Optional[str] = "maximize",
fit_on_holdout: bool = True,
random_state: int = 42,
fail_tolerance=0.5,
):
self.timeout = timeout
self.n_trials = n_trials
self.estimated_n_trials = n_trials
self.direction = direction
self._fit_on_holdout = fit_on_holdout
self.random_state = random_state
self.fail_tolerance = fail_tolerance

def _upd_timeout(self, timeout):
self.timeout = min(self.timeout, timeout)

def fit(
self,
ml_algo: TunableAlgo,
train_valid_iterator: Optional[TrainValidIterator] = None,
) -> Tuple[Optional[TunableAlgo], Optional[LAMLDataset]]:
"""Tune model.
Args:
ml_algo: Algo that is tuned.
train_valid_iterator: Classic cv-iterator.
Returns:
Tuple (None, None) if an optuna exception raised
or ``fit_on_holdout=True`` and ``train_valid_iterator`` is
not :class:`~lightautoml.validation.base.HoldoutIterator`.
Tuple (MlALgo, preds_ds) otherwise.
"""
assert not ml_algo.is_fitted, "Fitted algo cannot be tuned."
self._params_scores = []

# optuna.logging.set_verbosity(get_stdout_level())
# upd timeout according to ml_algo timer
estimated_tuning_time = ml_algo.timer.estimate_tuner_time(len(train_valid_iterator))
if estimated_tuning_time:
# TODO: Check for minimal runtime!
estimated_tuning_time = max(estimated_tuning_time, 1)
self._upd_timeout(estimated_tuning_time)

logger.info(
f"Start hyperparameters optimization for \x1b[1m{ml_algo._name}\x1b[0m ... Time budget is {self.timeout:.2f} secs"
)

metric_name = train_valid_iterator.train.task.get_dataset_metric().name
ml_algo = deepcopy(ml_algo)

flg_new_iterator = False
if self._fit_on_holdout and not isinstance(train_valid_iterator, HoldoutIterator):
train_valid_iterator = train_valid_iterator.convert_to_holdout_iterator()
flg_new_iterator = True

# TODO: Check if time estimation will be ok with multiprocessing
def update_trial_time(study: optuna.study.Study, trial: optuna.trial.FrozenTrial):
"""Callback for number of iteration with time cut-off.
Args:
study: Optuna study object.
trial: Optuna trial object.
"""
ml_algo.mean_trial_time = study.trials_dataframe()["duration"].mean().total_seconds()
self.estimated_n_trials = min(self.n_trials, self.timeout // ml_algo.mean_trial_time)

logger.info3(
f"\x1b[1mTrial {len(study.trials)}\x1b[0m with hyperparameters {trial.params} scored {trial.value} in {trial.duration}"
)

def check_fail_tolerance(study: optuna.study.Study, trial: optuna.trial.FrozenTrial):
df = study.trials_dataframe()

if df[df["state"] == "FAIL"].shape[0] / self.estimated_n_trials > self.fail_tolerance:
raise Exception(
f"Too much trials was failed ({df[df['state'] == 'FAIL'].shape[0]} of {df.shape[0]}). Check the model or search space for it."
)

try:
# Custom progress bar
def custom_progress_bar(study: optuna.study.Study, trial: optuna.trial.FrozenTrial):
best_trial = study.best_trial
progress_bar.set_postfix(best_trial=best_trial.number, best_value=best_trial.value)
progress_bar.update(1)

# Initialize progress bar
if get_stdout_level() in [logging.INFO, logging.INFO2]:
progress_bar = tqdm(total=self.n_trials, desc="Optimization Progress")

sampler = optuna.samplers.TPESampler(seed=self.random_state)
self.study = optuna.create_study(direction=self.direction, sampler=sampler)

self.study.optimize(
func=self._get_objective(
ml_algo=ml_algo,
estimated_n_trials=self.estimated_n_trials,
train_valid_iterator=train_valid_iterator,
),
n_trials=self.n_trials,
timeout=self.timeout,
callbacks=(
[update_trial_time, check_fail_tolerance, custom_progress_bar]
if get_stdout_level() in [logging.INFO, logging.INFO2]
else [update_trial_time, check_fail_tolerance]
),
catch=[Exception],
)

# Close the progress bar if it was initialized
if get_stdout_level() in [logging.INFO, logging.INFO2]:
progress_bar.close()

# need to update best params here
if self.direction == "maximize":
self._best_params = max(self._params_scores, key=lambda x: x[1])[0]
else:
self._best_params = min(self._params_scores, key=lambda x: x[1])[0]

ml_algo.params = self._best_params
del self._params_scores

logger.info(f"Hyperparameters optimization for \x1b[1m{ml_algo._name}\x1b[0m completed")
logger.info2(
f"The set of hyperparameters \x1b[1m{self._best_params}\x1b[0m\n achieve {self.study.best_value:.4f} {metric_name}"
)

if flg_new_iterator:
# if tuner was fitted on holdout set we dont need to save train results
return None, None

preds_ds = ml_algo.fit_predict(train_valid_iterator)

return ml_algo, preds_ds
except optuna.exceptions.OptunaError:
del self._params_scores
return None, None

def _get_objective(
self,
ml_algo: TunableAlgo,
estimated_n_trials: int,
train_valid_iterator: TrainValidIterator,
) -> Callable[[optuna.trial.Trial], Union[float, int]]:
"""Get objective.
Args:
ml_algo: Tunable algorithm.
estimated_n_trials: Maximum number of hyperparameter estimations.
train_valid_iterator: Used for getting parameters
depending on dataset.
Returns:
Callable objective.
"""
assert isinstance(ml_algo, MLAlgo)

def objective(trial: optuna.trial.Trial) -> float:
_ml_algo = deepcopy(ml_algo)

optimization_search_space = _ml_algo.optimization_search_space
if not optimization_search_space:
optimization_search_space = _ml_algo._default_sample

if callable(optimization_search_space):
sampled_params = optimization_search_space(
trial=trial,
estimated_n_trials=estimated_n_trials,
suggested_params=_ml_algo.init_params_on_input(train_valid_iterator),
)
else:
sampled_params = self._sample(
trial=trial,
optimization_search_space=optimization_search_space,
suggested_params=_ml_algo.init_params_on_input(train_valid_iterator),
)

_ml_algo.params = sampled_params
output_dataset = _ml_algo.fit_predict(train_valid_iterator=train_valid_iterator)
score = _ml_algo.score(output_dataset)
self._params_scores.append((sampled_params, score))
return score

return objective

def _sample(
self,
optimization_search_space,
trial: optuna.trial.Trial,
suggested_params: dict,
) -> dict:
# logger.info3(f'Suggested parameters: {suggested_params}')
trial_values = copy(suggested_params)
for parameter_name, search_space in optimization_search_space.items():
not_supported = True
for key_class in OPTUNA_DISTRIBUTIONS_MAP:
if isinstance(search_space, key_class):
wrapped_search_space = OPTUNA_DISTRIBUTIONS_MAP[key_class](search_space)
trial_values[parameter_name] = wrapped_search_space(
name=parameter_name,
trial=trial,
)
not_supported = False
if not_supported:
raise ValueError(f"Optuna does not support distribution {search_space}")

def plot(self):
"""Plot optimization history of all trials in a study."""
return optuna.visualization.plot_optimization_history(self.study)

0 comments on commit c092d5c

Please sign in to comment.