From ddf692502f51332edcfaf126a4c925b5d967a343 Mon Sep 17 00:00:00 2001 From: Xavier Bouthillier Date: Thu, 9 Sep 2021 15:35:22 -0400 Subject: [PATCH 1/6] Move pool-size to worker conf, cancel deprecation Why: The new default behavior is confusing for users. It is also difficult to determine a good default max_trials, so having not enough or to many trials sampled by default at the start of HPO can be annoying for many users. Using inf by default and iterating with pool-size may be the best alternative. Now that we have a support for n-workers, the argument pool-size we previously deprecated actually make sense. By default, pool-size should be equal to number of workers. We have n-workers set to 1 by default, so by default we are back to previous behavior; sampling 1 trial at a time, until max_trials. How: The producer now takes a pool size as argument when producing. The same applies to ExperimentClient.suggest() and ExperimentClient.workon(). The pool size is used to sample multiple trials at a time and increase I/O efficiency. The producer now keeps track of number of new trials so that if multiple workers are producing new trials with a non-seed algorithm (hence they produce different trials and there are no conflicts leading to backoff) they will stop if they generated together up to `pool_size` trials. Note: Pool-size is moved to to worker configuration instead. Since pool-size relates to n-workers, which is part of worker configuration, having pool-size in worker configuration makes more sense. --- docs/src/tutorials/pytorch-mnist.rst | 73 ----------- docs/src/user/config.rst | 34 +++-- src/orion/client/experiment.py | 42 +++++- src/orion/core/__init__.py | 22 ++-- src/orion/core/cli/hunt.py | 2 + src/orion/core/io/experiment_builder.py | 5 - src/orion/core/utils/format_terminal.py | 1 - src/orion/core/worker/experiment.py | 6 +- src/orion/core/worker/producer.py | 24 ++-- src/orion/serving/responses.py | 1 - src/orion/testing/evc.py | 6 + .../configuration/test_all_options.py | 6 +- tests/functional/demo/test_demo.py | 13 -- .../serving/test_experiments_resource.py | 2 - .../client/test_experiment_client.py | 5 +- tests/unittests/core/cli/test_info.py | 4 - .../core/io/test_experiment_builder.py | 8 -- .../unittests/core/io/test_resolve_config.py | 5 +- .../unittests/core/worker/test_experiment.py | 2 - tests/unittests/core/worker/test_producer.py | 121 +++++++++++------- 20 files changed, 173 insertions(+), 209 deletions(-) diff --git a/docs/src/tutorials/pytorch-mnist.rst b/docs/src/tutorials/pytorch-mnist.rst index 0b9d0e4bd..5e9429653 100644 --- a/docs/src/tutorials/pytorch-mnist.rst +++ b/docs/src/tutorials/pytorch-mnist.rst @@ -162,76 +162,3 @@ don't use ``--debug`` you will likely quickly fill your database with broken exp .. code-block:: bash $ orion --debug hunt -n orion-tutorial python main.py --lr~'loguniform(1e-5, 1.0)' - -Hunting Options ---------------- - -.. code-block:: console - - $ orion hunt --help - - Oríon arguments (optional): - These arguments determine orion's behaviour - - -n stringID, --name stringID - experiment's unique name; (default: None - specified - either here or in a config) - -u USER, --user USER user associated to experiment's unique name; (default: - $USER - can be overriden either here or in a config) - -c path-to-config, --config path-to-config - user provided orion configuration file - --max-trials # number of trials to be completed for the experiment. - This value will be saved within the experiment - configuration and reused across all workers to - determine experiment's completion. (default: inf/until - preempted) - --worker-trials # number of trials to be completed for this worker. If - the experiment is completed, the worker will die even - if it did not reach its maximum number of trials - (default: inf/until preempted) - --working-dir WORKING_DIR - Set working directory for running experiment. - --pool-size # number of simultaneous trials the algorithm should - suggest. This is useful if many workers are executed - in parallel and the algorithm has a strategy to sample - non-independant trials simultaneously. Otherwise, it - is better to leave `pool_size` to 1 and set a Strategy - for Oríon's producer. Note that this option is not usefull useless you - know the algorithm have a strategy to produce multiple trials - simultaneously. If you have any doubt, leave it to 1. - (default: 1) - -``name`` - -The unique name of the experiment. - -``user`` - -Username used to identify the experiments of a user. The default value is the system's username -$USER. - -``config`` - -Configuration file for Oríon which may define the database, the algorithm and all options of the -command hunt, including ``name``, ``pool-size`` and ``max-trials``. - -``max-trials`` - -The maximum number of trials tried during an experiment. - -``worker-trials`` - -The maximum number of trials to be executed by a worker (a single call to ``orion hunt [...]``). - -``working-dir`` - -The directory where configuration files are created. If not specified, Oríon will create a -temporary directory that will be removed at end of execution of the trial. - -``pool-size`` - -The number of trials which are generated by the algorithm each time it is interrogated. This is -useful if many workers are executed in parallel and the algorithm has a strategy to sample -non-independant trials simultaneously. Otherwise, it is better to leave ``pool_size`` to its default -value 1. Note that this option is not usefull useless you know the algorithm have a strategy -to produce multiple trials simultaneously. If you have any doubt, leave it to 1. :) diff --git a/docs/src/user/config.rst b/docs/src/user/config.rst index 74cf5c846..8f7dc9477 100644 --- a/docs/src/user/config.rst +++ b/docs/src/user/config.rst @@ -97,7 +97,6 @@ Full Example of Global Configuration seed: None max_broken: 3 max_trials: 1000000000 - pool_size: 1 strategy: MaxParallelStrategy worker_trials: 1000000000 @@ -105,6 +104,7 @@ Full Example of Global Configuration worker: n_workers: 1 + pool_size: 0 executor: joblib executor_configuration: {} heartbeat: 120 @@ -211,7 +211,6 @@ Experiment seed: None max_broken: 3 max_trials: 1000000000 - pool_size: 1 strategy: MaxParallelStrategy worker_trials: 1000000000 @@ -322,22 +321,6 @@ working_dir -.. _config_experiment_pool_size: - -pool_size -~~~~~~~~~ - -.. warning:: - - **DEPRECATED.** This argument will be removed in v0.3. - -:Type: int -:Default: 1 -:Env var: -:Description: - (DEPRECATED) This argument will be removed in v0.3. - - .. _config_experiment_algorithms: algorithms @@ -376,6 +359,7 @@ Worker worker: n_workers: 1 + pool_size: 0 executor: joblib executor_configuration: {} heartbeat: 120 @@ -400,6 +384,20 @@ n_workers It is possible to run many `orion hunt` in parallel, and each will spawn ``n_workers``. +.. _config_worker_pool_size: + +pool_size +~~~~~~~~~ + +:Type: int +:Default: 0 +:Env var: +:Description: + Number of trials to sample at a time. If 0, default to number of workers. + Increase it to improve the sampling speed if workers spend too much time + waiting for algorithms to sample points. An algorithm will try sampling `pool-size` + trials but may return less. + .. _config_worker_executor: diff --git a/src/orion/client/experiment.py b/src/orion/client/experiment.py index 03a6ac9d2..976364901 100644 --- a/src/orion/client/experiment.py +++ b/src/orion/client/experiment.py @@ -32,7 +32,7 @@ log = logging.getLogger(__name__) -def reserve_trial(experiment, producer, _depth=1): +def reserve_trial(experiment, producer, pool_size, _depth=1): """Reserve a new trial, or produce and reserve a trial if none are available.""" log.debug("Trying to reserve a new trial to evaluate.") trial = experiment.reserve_trial() @@ -51,9 +51,9 @@ def reserve_trial(experiment, producer, _depth=1): producer.update() log.debug("#### Produce new trials.") - producer.produce() + producer.produce(pool_size) - return reserve_trial(experiment, producer, _depth=_depth + 1) + return reserve_trial(experiment, producer, pool_size, _depth=_depth + 1) return trial @@ -500,7 +500,7 @@ def release(self, trial, status="interrupted"): finally: self._release_reservation(trial, raise_if_unreserved=raise_if_unreserved) - def suggest(self): + def suggest(self, pool_size=0): """Suggest a trial to execute. Experiment must be in executable ('x') mode. @@ -509,6 +509,16 @@ def suggest(self): Otherwise, the algorithm is used to generate a new trial that is registered in storage and reserved. + Parameters + ---------- + pool_size: int, optional + Number of trials to sample at a time. If 0, default to global config if defined, + else 1. Increase it to improve the sampling speed if workers spend too much time + waiting for algorithms to sample points. An algorithm will try sampling `pool-size` + trials but may return less. Note: The method will still return only 1 trial even though + if the pool size is larger than 1. This is because atomic reservation of trials + can only be done one at a time. + Returns ------- `orior.core.worker.trial.Trial` @@ -535,6 +545,10 @@ def suggest(self): """ self._check_if_executable() + if not pool_size: + pool_size = orion.core.config.worker.pool_size + if not pool_size: + pool_size = 1 if self.is_broken: raise BrokenExperiment("Trials failed too many times") @@ -543,7 +557,7 @@ def suggest(self): raise CompletedExperiment("Experiment is done, cannot sample more trials.") try: - trial = reserve_trial(self._experiment, self._producer) + trial = reserve_trial(self._experiment, self._producer, pool_size) except (WaitingForTrials, SampleTimeout) as e: if self.is_broken: @@ -633,6 +647,7 @@ def workon( self, fct, n_workers=None, + pool_size=0, max_trials=None, max_trials_per_worker=None, max_broken=None, @@ -652,6 +667,11 @@ def workon( objective. n_workers: int, optional Number of workers to run in parallel. Defaults to value of global config. + pool_size: int, optional + Number of trials to sample at a time. If 0, defaults to `n_workers` or value of global + config if defined. Increase it to improve the sampling speed if workers spend too much + time waiting for algorithms to sample points. An algorithm will try sampling + `pool-size` trials but may return less. max_trials: int, optional Maximum number of trials to execute within ``workon``. If the experiment or algorithm reach status is_done before, the execution of ``workon`` terminates. @@ -712,6 +732,11 @@ def workon( str(self.executor.n_workers), ) + if not pool_size: + pool_size = orion.core.config.worker.pool_size + if not pool_size: + pool_size = n_workers + if max_trials is None: max_trials = self.max_trials @@ -731,6 +756,7 @@ def workon( self.executor.submit( self._optimize, fct, + pool_size, max_trials_per_worker, max_broken, trial_arg, @@ -742,14 +768,16 @@ def workon( return sum(trials) - def _optimize(self, fct, max_trials, max_broken, trial_arg, on_error, **kwargs): + def _optimize( + self, fct, pool_size, max_trials, max_broken, trial_arg, on_error, **kwargs + ): worker_broken_trials = 0 trials = 0 kwargs = flatten(kwargs) max_trials = min(max_trials, self.max_trials) while not self.is_done and trials - worker_broken_trials < max_trials: try: - with self.suggest() as trial: + with self.suggest(pool_size=pool_size) as trial: kwargs.update(flatten(trial.params)) diff --git a/src/orion/core/__init__.py b/src/orion/core/__init__.py index 1da4d4248..543367575 100644 --- a/src/orion/core/__init__.py +++ b/src/orion/core/__init__.py @@ -169,14 +169,6 @@ def define_experiment_config(config): help="Set working directory for running experiment.", ) - experiment_config.add_option( - "pool_size", - option_type=int, - default=1, - deprecate=dict(version="v0.3", alternative=None, name="experiment.pool_size"), - help="This argument will be removed in v0.3.", - ) - experiment_config.add_option( "algorithms", option_type=dict, @@ -210,6 +202,20 @@ def define_worker_config(config): ), ) + worker_config.add_option( + "pool_size", + option_type=int, + default=0, + env_var="ORION_POOL_SIZE", + help=( + "Number of trials to sample at a time. " + "If 0, will default to number of executor workers. " + "Increase it to improve the sampling speed if workers spend too much time " + "waiting for algorithms to sample points. An algorithm will try sampling " + "`pool-size` trials but may return less." + ), + ) + worker_config.add_option( "executor", option_type=str, diff --git a/src/orion/core/cli/hunt.py b/src/orion/core/cli/hunt.py index 4f4c1b8a3..3b724d453 100644 --- a/src/orion/core/cli/hunt.py +++ b/src/orion/core/cli/hunt.py @@ -130,6 +130,7 @@ def on_error(client, trial, error, worker_broken_trials): def workon( experiment, n_workers=None, + pool_size=None, max_trials=None, max_broken=None, max_idle_time=None, @@ -163,6 +164,7 @@ def workon( client.workon( consumer, n_workers=n_workers, + pool_size=pool_size, max_trials_per_worker=max_trials, max_broken=max_broken, trial_arg="trial", diff --git a/src/orion/core/io/experiment_builder.py b/src/orion/core/io/experiment_builder.py index 7ebb9d8ba..e4ffcb9db 100644 --- a/src/orion/core/io/experiment_builder.py +++ b/src/orion/core/io/experiment_builder.py @@ -379,11 +379,6 @@ def create_experiment(name, version, mode, space, **kwargs): """ experiment = Experiment(name=name, version=version, mode=mode) experiment._id = kwargs.get("_id", None) # pylint:disable=protected-access - experiment.pool_size = kwargs.get("pool_size") - if experiment.pool_size is None: - experiment.pool_size = orion.core.config.experiment.get( - "pool_size", deprecated="ignore" - ) experiment.max_trials = kwargs.get( "max_trials", orion.core.config.experiment.max_trials ) diff --git a/src/orion/core/utils/format_terminal.py b/src/orion/core/utils/format_terminal.py index ea1d0b208..9d254ffbb 100644 --- a/src/orion/core/utils/format_terminal.py +++ b/src/orion/core/utils/format_terminal.py @@ -257,7 +257,6 @@ def format_commandline(experiment): CONFIG_TEMPLATE = """\ {title} -pool size: {experiment.pool_size} max trials: {experiment.max_trials} max broken: {experiment.max_broken} working dir: {experiment.working_dir} diff --git a/src/orion/core/worker/experiment.py b/src/orion/core/worker/experiment.py index 2109b7ab5..1dff1852b 100644 --- a/src/orion/core/worker/experiment.py +++ b/src/orion/core/worker/experiment.py @@ -45,8 +45,6 @@ class Experiment: Current version of this experiment. metadata : dict Contains managerial information about this `Experiment`. - pool_size : int - How many workers can participate asynchronously in this `Experiment`. max_trials : int How many trials must be evaluated, before considering this `Experiment` done. This attribute can be updated if the rest of the experiment configuration @@ -90,7 +88,6 @@ class Experiment: "name", "refers", "metadata", - "pool_size", "max_trials", "max_broken", "version", @@ -103,7 +100,7 @@ class Experiment: "_node", "_mode", ) - non_branching_attrs = ("pool_size", "max_trials", "max_broken") + non_branching_attrs = ("max_trials", "max_broken") def __init__(self, name, version=None, mode="r"): self._id = None @@ -113,7 +110,6 @@ def __init__(self, name, version=None, mode="r"): self._node = None self.refers = {} self.metadata = {} - self.pool_size = None self.max_trials = None self.max_broken = None self.space = None diff --git a/src/orion/core/worker/producer.py b/src/orion/core/worker/producer.py index f825523f8..b03c9940a 100644 --- a/src/orion/core/worker/producer.py +++ b/src/orion/core/worker/producer.py @@ -59,11 +59,6 @@ def __init__(self, experiment, max_idle_time=None): self.num_trials = 0 self.num_broken = 0 - @property - def pool_size(self): - """Pool-size of the experiment""" - return self.experiment.pool_size - def backoff(self): """Wait some time and update algorithm.""" waiting_time = max(0, random.gauss(1, 0.2)) @@ -90,25 +85,32 @@ def is_done(self): self.naive_algorithm is not None and self.naive_algorithm.is_done ) - def suggest(self): + def suggest(self, pool_size): """Try suggesting new points with the naive algorithm""" num_pending = self.num_trials - self.num_broken num = max(self.experiment.max_trials - num_pending, 1) - return self.naive_algorithm.suggest(num) + return self.naive_algorithm.suggest(min(num, pool_size)) - def produce(self): + def produce(self, pool_size): """Create and register new trials.""" sampled_points = 0 - # reset the number of time we failed to sample points self.failure_count = 0 start = time.time() - while sampled_points < self.pool_size and not self.is_done: + # This number (self.num_trials) is based on most recent algo state update so it is not + # sensitive to race-conditions. If another worker started suggesting points in between, the + # value of self.num_trials will not count it and thus the current producer will count new + # points of other producer as part of the current pool samples. + while ( + len(self.experiment.fetch_trials(with_evc_tree=True)) - self.num_trials + < pool_size + and not self.is_done + ): self._sample_guard(start) log.debug("### Algorithm suggests new points.") - new_points = self.suggest() + new_points = self.suggest(pool_size) # Sync state of original algo so that state continues evolving. self.algorithm.set_state(self.naive_algorithm.state_dict) diff --git a/src/orion/serving/responses.py b/src/orion/serving/responses.py index d8087a2c2..49152fc69 100644 --- a/src/orion/serving/responses.py +++ b/src/orion/serving/responses.py @@ -77,7 +77,6 @@ def build_experiment_response( "config": { "maxTrials": experiment.max_trials, "maxBroken": experiment.max_broken, - "poolSize": experiment.pool_size, "algorithm": algorithm, "space": experiment.configuration["space"], }, diff --git a/src/orion/testing/evc.py b/src/orion/testing/evc.py index 7b3520d64..da543497a 100644 --- a/src/orion/testing/evc.py +++ b/src/orion/testing/evc.py @@ -44,6 +44,12 @@ def generate_trials(exp, trials): status, heartbeat=trial.submit_time if status == "reserved" else None, ) + else: + exp._experiment._storage.set_trial_status( + trial, + "reserved", + heartbeat=trial.submit_time, + ) def build_root_experiment(space=None, trials=None): diff --git a/tests/functional/configuration/test_all_options.py b/tests/functional/configuration/test_all_options.py index 56d64195b..4ee3d8037 100644 --- a/tests/functional/configuration/test_all_options.py +++ b/tests/functional/configuration/test_all_options.py @@ -369,7 +369,6 @@ class TestExperimentConfig(ConfigurationTestSuite): "max_trials": 10, "max_broken": 5, "working_dir": "here", - "pool_size": 2, "worker_trials": 5, "algorithms": {"aa": {"b": "c", "d": {"e": "f"}}}, "strategy": {"sa": {"c": "d", "e": {"f": "g"}}}, @@ -563,6 +562,7 @@ class TestWorkerConfig(ConfigurationTestSuite): config = { "worker": { "n_workers": 2, + "pool_size": 2, "executor": "dask", "executor_configuration": {"threads_per_worker": 1}, "heartbeat": 30, @@ -576,6 +576,7 @@ class TestWorkerConfig(ConfigurationTestSuite): env_vars = { "ORION_N_WORKERS": 3, + "ORION_POOL_SIZE": 1, "ORION_EXECUTOR": "joblib", "ORION_HEARTBEAT": 40, "ORION_WORKER_MAX_TRIALS": 20, @@ -588,6 +589,7 @@ class TestWorkerConfig(ConfigurationTestSuite): local = { "worker": { "n_workers": 4, + "pool_size": 5, "executor": "dask", "executor_configuration": {"threads_per_worker": 2}, "heartbeat": 50, @@ -601,6 +603,7 @@ class TestWorkerConfig(ConfigurationTestSuite): cmdargs = { "n-workers": 1, + "pool-size": 6, "executor": "dask", "heartbeat": 70, "worker-max-trials": 1, @@ -704,6 +707,7 @@ def check_env_var_config(self, tmp_path, monkeypatch): """Check that env vars overrides global configuration""" env_var_config = { "n_workers": self.env_vars["ORION_N_WORKERS"], + "pool_size": self.env_vars["ORION_POOL_SIZE"], "executor": self.env_vars["ORION_EXECUTOR"], "executor_configuration": self.config["worker"]["executor_configuration"], "heartbeat": self.env_vars["ORION_HEARTBEAT"], diff --git a/tests/functional/demo/test_demo.py b/tests/functional/demo/test_demo.py index f9ffed13e..c95fac11d 100644 --- a/tests/functional/demo/test_demo.py +++ b/tests/functional/demo/test_demo.py @@ -41,7 +41,6 @@ def test_demo_with_default_algo_cli_config_only(storage, monkeypatch): exp = exp[0] assert "_id" in exp assert exp["name"] == "default_algo" - assert exp["pool_size"] == 1 assert exp["max_trials"] == 5 assert exp["max_broken"] == 3 assert exp["algorithms"] == {"random": {"seed": None}} @@ -84,7 +83,6 @@ def test_demo(storage, monkeypatch): assert "_id" in exp exp_id = exp["_id"] assert exp["name"] == "voila_voici" - assert exp["pool_size"] == 1 assert exp["max_trials"] == 20 assert exp["max_broken"] == 5 assert exp["algorithms"] == { @@ -135,7 +133,6 @@ def test_demo_with_script_config(storage, monkeypatch): assert "_id" in exp exp_id = exp["_id"] assert exp["name"] == "voila_voici" - assert exp["pool_size"] == 1 assert exp["max_trials"] == 20 assert exp["max_broken"] == 5 assert exp["algorithms"] == { @@ -192,7 +189,6 @@ def test_demo_with_python_and_script(storage, monkeypatch): assert "_id" in exp exp_id = exp["_id"] assert exp["name"] == "voila_voici" - assert exp["pool_size"] == 1 assert exp["max_trials"] == 20 assert exp["max_broken"] == 5 assert exp["algorithms"] == { @@ -279,7 +275,6 @@ def test_demo_four_workers(storage, monkeypatch): assert "_id" in exp exp_id = exp["_id"] assert exp["name"] == "four_workers_demo" - assert exp["pool_size"] == 2 assert exp["max_trials"] == 20 assert exp["max_broken"] == 5 assert exp["algorithms"] == {"random": {"seed": 2}} @@ -306,7 +301,6 @@ def test_workon(): name = "voici_voila" config = {"name": name} config["algorithms"] = {"random": {"seed": 1}} - config["pool_size"] = 1 config["max_trials"] = 50 config["exp_max_broken"] = 5 config["user_args"] = [ @@ -338,7 +332,6 @@ def test_workon(): exp = exp[0] assert "_id" in exp assert exp["name"] == name - assert exp["pool_size"] == 1 assert exp["max_trials"] == 50 assert exp["max_broken"] == 5 assert exp["algorithms"] == {"random": {"seed": 1}} @@ -370,7 +363,6 @@ def test_stress_unique_folder_creation(storage, monkeypatch, tmpdir, capfd): [ "hunt", "--max-trials={}".format(how_many), - "--pool-size=1", "--name=lalala", "--config", "./stress_gradient.yaml", @@ -570,8 +562,6 @@ def test_run_with_parallel_strategy(storage, monkeypatch, strategy): "hunt", "--max-trials", "20", - "--pool-size", - "1", "--config", config_file, "./black_box.py", @@ -602,8 +592,6 @@ def test_worker_trials(storage, monkeypatch): "hunt", "--config", "./orion_config_random.yaml", - "--pool-size", - "1", "--worker-trials", "0", "./black_box.py", @@ -720,7 +708,6 @@ def test_demo_with_nondefault_config_keyword(storage, monkeypatch): assert "_id" in exp exp_id = exp["_id"] assert exp["name"] == "voila_voici" - assert exp["pool_size"] == 1 assert exp["max_trials"] == 20 assert exp["algorithms"] == { "gradient_descent": {"learning_rate": 0.1, "dx_tolerance": 1e-5} diff --git a/tests/functional/serving/test_experiments_resource.py b/tests/functional/serving/test_experiments_resource.py index 93bef6611..06e43ea7c 100644 --- a/tests/functional/serving/test_experiments_resource.py +++ b/tests/functional/serving/test_experiments_resource.py @@ -24,7 +24,6 @@ }, }, version=1, - pool_size=1, max_trials=10, max_broken=7, working_dir="", @@ -194,7 +193,6 @@ def _add_trial(**kwargs): def _assert_config(config): """Asserts properties of the ``config`` dictionary""" - assert config["poolSize"] == 1 assert config["maxTrials"] == 10 assert config["maxBroken"] == 7 diff --git a/tests/unittests/client/test_experiment_client.py b/tests/unittests/client/test_experiment_client.py index 93374ff4b..f1e690c53 100644 --- a/tests/unittests/client/test_experiment_client.py +++ b/tests/unittests/client/test_experiment_client.py @@ -37,7 +37,6 @@ }, }, version=1, - pool_size=1, max_trials=10, max_broken=5, working_dir="", @@ -674,7 +673,7 @@ def is_done(self): """Experiment is done""" return True - def set_is_done(): + def set_is_done(pool_size): """Set is_done while algo is trying to suggest""" monkeypatch.setattr(experiment.__class__, "is_done", property(is_done)) @@ -703,7 +702,7 @@ def is_broken(self): """Experiment is broken""" return True - def set_is_broken(): + def set_is_broken(pool_size): """Set is_broken while algo is trying to suggest""" monkeypatch.setattr( experiment.__class__, "is_broken", property(is_broken) diff --git a/tests/unittests/core/cli/test_info.py b/tests/unittests/core/cli/test_info.py index 4e627bf65..c79764d88 100755 --- a/tests/unittests/core/cli/test_info.py +++ b/tests/unittests/core/cli/test_info.py @@ -398,7 +398,6 @@ def test_format_commandline(): def test_format_config(monkeypatch): """Test config section formatting""" experiment = DummyExperiment() - experiment.pool_size = 10 experiment.max_trials = 100 experiment.max_broken = 5 experiment.working_dir = "working_dir" @@ -407,7 +406,6 @@ def test_format_config(monkeypatch): == """\ Config ====== -pool size: 10 max trials: 100 max broken: 5 working dir: working_dir @@ -608,7 +606,6 @@ def test_format_info(algorithm_dict, dummy_trial): experiment.name = "test" experiment.version = 1 experiment.metadata = {"user_args": commandline} - experiment.pool_size = 10 experiment.max_trials = 100 experiment.max_broken = 5 experiment.working_dir = "working_dir" @@ -678,7 +675,6 @@ def test_format_info(algorithm_dict, dummy_trial): Config ====== -pool size: 10 max trials: 100 max broken: 5 working dir: working_dir diff --git a/tests/unittests/core/io/test_experiment_builder.py b/tests/unittests/core/io/test_experiment_builder.py index 3af8ddcc3..5747ae79f 100644 --- a/tests/unittests/core/io/test_experiment_builder.py +++ b/tests/unittests/core/io/test_experiment_builder.py @@ -112,7 +112,6 @@ def new_config(random_dt, script_path): }, }, version=1, - pool_size=10, max_trials=1000, max_broken=5, working_dir="", @@ -192,7 +191,6 @@ def test_get_cmd_config(config_file): assert local_config["max_trials"] == 100 assert local_config["max_broken"] == 5 assert local_config["name"] == "voila_voici" - assert local_config["pool_size"] == 1 assert local_config["storage"] == { "database": { "host": "mongodb://user:pass@localhost", @@ -216,7 +214,6 @@ def test_get_cmd_config_from_incomplete_config(incomplete_config_file): assert "algorithms" not in local_config assert "max_trials" not in local_config assert "max_broken" not in local_config - assert "pool_size" not in local_config assert "name" not in local_config["storage"]["database"] assert ( local_config["storage"]["database"]["host"] == "mongodb://user:pass@localhost" @@ -243,7 +240,6 @@ def test_fetch_config_from_db_hit(new_config): assert db_config["name"] == new_config["name"] assert db_config["refers"] == new_config["refers"] assert db_config["metadata"] == new_config["metadata"] - assert db_config["pool_size"] == new_config["pool_size"] assert db_config["max_trials"] == new_config["max_trials"] assert db_config["max_broken"] == new_config["max_broken"] assert db_config["algorithms"] == new_config["algorithms"] @@ -275,7 +271,6 @@ def test_get_from_args_hit(config_file, random_dt, new_config): assert exp_view.name == new_config["name"] assert exp_view.configuration["refers"] == new_config["refers"] assert exp_view.metadata == new_config["metadata"] - assert exp_view.pool_size == new_config["pool_size"] assert exp_view.max_trials == new_config["max_trials"] assert exp_view.max_broken == new_config["max_broken"] assert exp_view.algorithms.configuration == new_config["algorithms"] @@ -299,7 +294,6 @@ def test_get_from_args_hit_no_conf_file(config_file, random_dt, new_config): assert exp_view.name == new_config["name"] assert exp_view.configuration["refers"] == new_config["refers"] assert exp_view.metadata == new_config["metadata"] - assert exp_view.pool_size == new_config["pool_size"] assert exp_view.max_trials == new_config["max_trials"] assert exp_view.max_broken == new_config["max_broken"] assert exp_view.algorithms.configuration == new_config["algorithms"] @@ -333,7 +327,6 @@ def test_build_from_args_no_hit(config_file, random_dt, script_path, new_config) assert exp.metadata["user"] == "dendi" assert exp.metadata["user_script"] == cmdargs["user_args"][0] assert exp.metadata["user_args"] == cmdargs["user_args"] - assert exp.pool_size == 1 assert exp.max_trials == 100 assert exp.max_broken == 5 assert exp.algorithms.configuration == {"random": {"seed": None}} @@ -715,7 +708,6 @@ def test_good_set_before_init_no_hit(self, random_dt, new_config): assert exp.name == new_config["name"] assert exp.configuration["refers"] == new_config["refers"] assert exp.metadata == new_config["metadata"] - assert exp.pool_size == new_config["pool_size"] assert exp.max_trials == new_config["max_trials"] assert exp.max_broken == new_config["max_broken"] assert exp.working_dir == new_config["working_dir"] diff --git a/tests/unittests/core/io/test_resolve_config.py b/tests/unittests/core/io/test_resolve_config.py index 1aae08ecf..cb8f65630 100644 --- a/tests/unittests/core/io/test_resolve_config.py +++ b/tests/unittests/core/io/test_resolve_config.py @@ -113,7 +113,6 @@ def test_fetch_config_from_cmdargs(): "worker_trials": "worker_trials", "exp_max_broken": "exp_max_broken", "working_dir": "working_dir", - "pool_size": "pool_size", "max_trials": "max_trials", "heartbeat": "heartbeat", "worker_max_trials": "worker_max_trials", @@ -144,7 +143,6 @@ def test_fetch_config_from_cmdargs(): assert exp_config.pop("max_trials") == "exp_max_trials" assert exp_config.pop("max_broken") == "exp_max_broken" assert exp_config.pop("working_dir") == "working_dir" - assert exp_config.pop("pool_size") == "pool_size" assert exp_config == {} @@ -222,7 +220,6 @@ def test_fetch_config(config_file): "max_trials": 100, "max_broken": 5, "name": "voila_voici", - "pool_size": 1, "algorithms": "random", "strategy": "NoParallelStrategy", } @@ -258,7 +255,6 @@ def mocked_config(file_object): assert exp_config.pop("max_trials") == orion.core.config.experiment.max_trials assert exp_config.pop("max_broken") == orion.core.config.experiment.max_broken assert exp_config.pop("working_dir") == orion.core.config.experiment.working_dir - assert exp_config.pop("pool_size") == orion.core.config.experiment.pool_size assert exp_config.pop("algorithms") == orion.core.config.experiment.algorithms assert exp_config.pop("strategy") == orion.core.config.experiment.strategy @@ -267,6 +263,7 @@ def mocked_config(file_object): # Test worker subconfig worker_config = config.pop("worker") assert worker_config.pop("n_workers") == orion.core.config.worker.n_workers + assert worker_config.pop("pool_size") == orion.core.config.worker.pool_size assert worker_config.pop("executor") == orion.core.config.worker.executor assert ( worker_config.pop("executor_configuration") diff --git a/tests/unittests/core/worker/test_experiment.py b/tests/unittests/core/worker/test_experiment.py index 3fc36e9e1..01d3dd0f6 100644 --- a/tests/unittests/core/worker/test_experiment.py +++ b/tests/unittests/core/worker/test_experiment.py @@ -46,7 +46,6 @@ def new_config(random_dt): }, }, version=1, - pool_size=10, max_trials=1000, max_broken=5, working_dir=None, @@ -600,7 +599,6 @@ def test_experiment_pickleable(): "max_trials", "metadata", "name", - "pool_size", "producer", "refers", "retrieve_result", diff --git a/tests/unittests/core/worker/test_producer.py b/tests/unittests/core/worker/test_producer.py index 0e2d349a4..2fd5a8122 100644 --- a/tests/unittests/core/worker/test_producer.py +++ b/tests/unittests/core/worker/test_producer.py @@ -58,7 +58,6 @@ def producer(monkeypatch, hacked_exp, random_dt, categorical_values): """Return a setup `Producer`.""" # make init done - assert hacked_exp.pool_size == 1 hacked_exp.algorithms.algorithm.possible_values = categorical_values hacked_exp.algorithms.seed_rng(0) hacked_exp.max_trials = 20 @@ -114,12 +113,11 @@ def test_strategist_observe_completed(producer): def test_naive_algorithm_is_producing(monkeypatch, producer, random_dt): """Verify naive algo is used to produce, not original algo""" - producer.experiment.pool_size = 1 producer.algorithm.algorithm.possible_values = [("gru", "rnn")] producer.update() monkeypatch.setattr(producer.algorithm.algorithm, "set_state", lambda value: None) producer.algorithm.algorithm.possible_values = [("gru", "gru")] - producer.produce() + producer.produce(1) assert producer.naive_algorithm.algorithm._num == 1 # pool size assert producer.algorithm.algorithm._num == 0 @@ -128,11 +126,10 @@ def test_naive_algorithm_is_producing(monkeypatch, producer, random_dt): def test_update_and_produce(producer, random_dt): """Test new trials are properly produced""" possible_values = [("gru", "rnn")] - producer.experiment.pool_size = 1 producer.experiment.algorithms.algorithm.possible_values = possible_values producer.update() - producer.produce() + producer.produce(1) # Algorithm was ordered to suggest some trials num_new_points = producer.naive_algorithm.algorithm._num @@ -146,11 +143,10 @@ def test_register_new_trials(producer, storage, random_dt): trials_in_db_before = len(storage._fetch_trials({})) new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) - producer.experiment.pool_size = 1 producer.experiment.algorithms.algorithm.possible_values = [("gru", "rnn")] producer.update() - producer.produce() + producer.produce(1) # Algorithm was ordered to suggest some trials num_new_points = producer.naive_algorithm.algorithm._num @@ -267,7 +263,6 @@ def test_register_duplicate_lies(producer, storage, random_dt): producer.strategy._value = 4 # Set specific output value for to algo to ensure successful creation of a new trial. - producer.experiment.pool_size = 1 producer.experiment.algorithms.algorithm.possible_values = [("gru", "rnn")] producer.update() @@ -278,7 +273,7 @@ def test_register_duplicate_lies(producer, storage, random_dt): # Create a new point to make sure additional non-completed trials increase number of lying # trials generated - producer.produce() + producer.produce(1) trials_non_completed = storage._fetch_trials(query) assert len(trials_non_completed) == 5 @@ -362,7 +357,6 @@ def test_naive_algo_trained_on_all_non_completed_trials(producer, storage, rando def test_naive_algo_is_discared(producer, monkeypatch): """Verify that naive algo is discarded and recopied from original algo""" # Set values for predictions - producer.experiment.pool_size = 1 producer.experiment.algorithms.algorithm.possible_values = [("gru", "rnn")] producer.update() @@ -373,7 +367,7 @@ def test_naive_algo_is_discared(producer, monkeypatch): assert len(producer.algorithm.algorithm._points) == 3 assert len(first_naive_algorithm.algorithm._points) == (3 + 4) - producer.produce() + producer.produce(1) # Only update the original algo, naive algo is still not discarded update_algorithm(producer) @@ -392,24 +386,25 @@ def test_concurent_producers(producer, storage, random_dt): trials_in_db_before = len(storage._fetch_trials({})) new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) + # Avoid limiting number of samples from the within the algorithm. + producer.algorithm.algorithm.pool_size = 1000 + # Set so that first producer's algorithm generate valid point on first time - # And second producer produce same point and thus must produce next one two. + # And second producer produce same point and thus must produce next one too. # Hence, we know that producer algo will have _num == 1 and # second producer algo will have _num == 2 producer.algorithm.algorithm.possible_values = [("gru", "rnn"), ("gru", "gru")] # Make sure it starts from index 0 producer.algorithm.seed_rng(0) - assert producer.experiment.pool_size == 1 - second_producer = Producer(producer.experiment) second_producer.algorithm = copy.deepcopy(producer.algorithm) producer.update() second_producer.update() - producer.produce() - second_producer.produce() + producer.produce(1) + second_producer.produce(2) # Algorithm was required to suggest some trials num_new_points = producer.algorithm.algorithm._num @@ -438,6 +433,52 @@ def test_concurent_producers(producer, storage, random_dt): } +def test_concurent_producers_shared_pool(producer, storage, random_dt): + """Test concurrent production of new trials share the same pool""" + trials_in_db_before = len(storage._fetch_trials({})) + new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) + + # Set so that first producer's algorithm generate valid point on first time + # And second producer produce same point and thus must backoff and then stop + # because first producer filled the pool. + # Hence, we know that producer algo will have _num == 1 and + # second producer algo will have _num == 1 + producer.algorithm.algorithm.possible_values = [("gru", "rnn"), ("gru", "gru")] + # Make sure it starts from index 0 + producer.algorithm.seed_rng(0) + + second_producer = Producer(producer.experiment) + second_producer.algorithm = copy.deepcopy(producer.algorithm) + + producer.update() + second_producer.update() + + producer.produce(1) + second_producer.produce(1) + + # Algorithm was required to suggest some trials + num_new_points = producer.algorithm.algorithm._num + assert num_new_points == 1 # pool size + num_new_points = second_producer.algorithm.algorithm._num + assert num_new_points == 0 # pool size + + # `num_new_points` new trials were registered at database + assert len(storage._fetch_trials({})) == trials_in_db_before + 1 + assert len(storage._fetch_trials({"status": "new"})) == new_trials_in_db_before + 1 + new_trials = list( + storage._fetch_trials({"status": "new", "submit_time": random_dt}) + ) + assert len(new_trials) == 1 + assert new_trials[0].experiment == producer.experiment.id + assert new_trials[0].start_time is None + assert new_trials[0].end_time is None + assert new_trials[0].results == [] + assert new_trials[0].params == { + "/decoding_layer": "gru", + "/encoding_layer": "rnn", + } + + def test_duplicate_within_pool(producer, storage, random_dt): """Test that an algo suggesting multiple points can have a few registered even if one of them is a duplicate. @@ -445,8 +486,8 @@ def test_duplicate_within_pool(producer, storage, random_dt): trials_in_db_before = len(storage._fetch_trials({})) new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) - producer.experiment.pool_size = 2 - producer.algorithm.algorithm.pool_size = 2 + # Avoid limiting number of samples from the within the algorithm. + producer.algorithm.algorithm.pool_size = 1000 producer.experiment.algorithms.algorithm.possible_values = [ ("gru", "rnn"), @@ -455,7 +496,7 @@ def test_duplicate_within_pool(producer, storage, random_dt): ] producer.update() - producer.produce() + producer.produce(2) # Algorithm was required to suggest some trials num_new_points = producer.algorithm.algorithm._num @@ -489,8 +530,8 @@ def test_duplicate_within_pool_and_db(producer, storage, random_dt): trials_in_db_before = len(storage._fetch_trials({})) new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) - producer.experiment.pool_size = 2 - producer.algorithm.algorithm.pool_size = 2 + # Avoid limiting number of samples from the within the algorithm. + producer.algorithm.algorithm.pool_size = 1000 producer.experiment.algorithms.algorithm.possible_values = [ ("gru", "rnn"), @@ -499,7 +540,7 @@ def test_duplicate_within_pool_and_db(producer, storage, random_dt): ] producer.update() - producer.produce() + producer.produce(2) # Algorithm was required to suggest some trials num_new_points = producer.algorithm.algorithm._num @@ -532,14 +573,12 @@ def test_exceed_max_idle_time_because_of_duplicates(producer, random_dt): producer.max_idle_time = timeout # to limit run-time, default would work as well. producer.experiment.algorithms.algorithm.possible_values = [("rnn", "rnn")] - assert producer.experiment.pool_size == 1 - producer.update() start = time.time() with pytest.raises(SampleTimeout): - producer.produce() + producer.produce(1) assert timeout <= time.time() - start < timeout + 1 @@ -558,12 +597,10 @@ def opt_out(self, num=1): producer.experiment.algorithms.algorithm.__class__, "suggest", opt_out ) - assert producer.experiment.pool_size == 1 - producer.update() with pytest.raises(WaitingForTrials): - producer.produce() + producer.produce(1) def test_stops_if_algo_done(producer, storage, random_dt, monkeypatch): @@ -582,12 +619,10 @@ def opt_out_and_complete(self, num=1): opt_out_and_complete, ) - assert producer.experiment.pool_size == 1 - trials_in_db_before = len(storage._fetch_trials({})) producer.update() - producer.produce() + producer.produce(1) assert len(storage._fetch_trials({})) == trials_in_db_before assert producer.experiment.algorithms.is_done @@ -595,14 +630,12 @@ def opt_out_and_complete(self, num=1): def test_original_seeding(producer): """Verify that rng state in original algo changes when duplicate trials is discarded""" - assert producer.experiment.pool_size == 1 - producer.algorithm.seed_rng(0) assert producer.algorithm.algorithm._index == 0 producer.update() - producer.produce() + producer.produce(1) prev_index = producer.algorithm.algorithm._index prev_suggested = producer.algorithm.algorithm._suggested @@ -616,7 +649,7 @@ def test_original_seeding(producer): producer.algorithm.seed_rng(0) producer.update() - producer.produce() + producer.produce(1) assert prev_suggested != producer.algorithm.algorithm._suggested assert prev_index < producer.algorithm.algorithm._index @@ -677,7 +710,7 @@ def suggest(pool_size=None): producer.update() with pytest.raises(SampleTimeout): - producer.produce() + producer.produce(1) assert len(new_experiment.fetch_trials(with_evc_tree=False)) == 0 @@ -686,7 +719,6 @@ def test_algorithm_is_done(monkeypatch, producer): """Verify that producer won't register new samples if algorithm is done meanwhile.""" producer.experiment.max_trials = 8 producer.experiment.algorithms.algorithm.max_trials = 8 - producer.experiment.pool_size = 10 producer = Producer(producer.experiment) def suggest_one_only(self, num=1): @@ -698,12 +730,11 @@ def suggest_one_only(self, num=1): producer.experiment.algorithms.algorithm.__class__, "suggest", suggest_one_only ) - assert producer.experiment.pool_size == 10 trials_in_exp_before = len(producer.experiment.fetch_trials()) assert trials_in_exp_before == producer.experiment.max_trials - 1 producer.update() - producer.produce() + producer.produce(10) assert len(producer.experiment.fetch_trials()) == producer.experiment.max_trials assert producer.naive_algorithm.is_done @@ -729,11 +760,13 @@ def suggest_n(self, num): # Setup naive algorithm producer.update() - assert len(producer.suggest()) == 3 + assert len(producer.suggest(50)) == 3 + # Test pool_size is the min selected + assert len(producer.suggest(2)) == 2 producer.experiment.max_trials = 7 - assert len(producer.suggest()) == 1 + assert len(producer.suggest(50)) == 1 producer.experiment.max_trials = 5 - assert len(producer.suggest()) == 1 + assert len(producer.suggest(50)) == 1 trials = producer.experiment.fetch_trials() for trial in trials[:4]: @@ -745,4 +778,6 @@ def suggest_n(self, num): producer.update() # There is now 3 completed and 4 broken. Max trials is 5. Producer should suggest 2 - assert len(producer.suggest()) == 2 + assert len(producer.suggest(50)) == 2 + # Test pool_size is the min selected + assert len(producer.suggest(1)) == 1 From 35ce5c49f29944614b20f0361fe6076336176d73 Mon Sep 17 00:00:00 2001 From: Xavier Bouthillier Date: Fri, 10 Sep 2021 11:31:16 -0400 Subject: [PATCH 2/6] Revert max trials to 10e8 (like inf) --- src/orion/core/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/orion/core/__init__.py b/src/orion/core/__init__.py index 543367575..d9a3a9422 100644 --- a/src/orion/core/__init__.py +++ b/src/orion/core/__init__.py @@ -134,7 +134,7 @@ def define_experiment_config(config): experiment_config.add_option( "max_trials", option_type=int, - default=1000, + default=int(10e8), env_var="ORION_EXP_MAX_TRIALS", help="number of trials to be completed for the experiment. This value " "will be saved within the experiment configuration and reused " @@ -144,7 +144,7 @@ def define_experiment_config(config): experiment_config.add_option( "worker_trials", option_type=int, - default=1000, + default=int(10e8), deprecate=dict( version="v0.3", alternative="worker.max_trials", From c458f131f0b92dbb2426ad852fbe0d2e562dd9bb Mon Sep 17 00:00:00 2001 From: Xavier Bouthillier Date: Fri, 10 Sep 2021 11:31:35 -0400 Subject: [PATCH 3/6] Fix experiment tree tests Why: There was a bug in the tests. The functions to generate trials would generate more than requested because of the new behavior of producer attempting to produce all trials at once, once the value of `max_trials` was conflicting with the number of trials requested to the trial generating function for tests (`orion.testing.evc.generate_trials`). Fortunately the bug in the tests did not seem to miss any bugs in the code they were testing. How: Adjust the expected numbers based on the corrected behiavor. The numbers make indeed more sense now. --- .../core/evc/test_experiment_tree.py | 58 ++++++++++++------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/tests/unittests/core/evc/test_experiment_tree.py b/tests/unittests/core/evc/test_experiment_tree.py index 4c2b11b69..cd5d10ba5 100644 --- a/tests/unittests/core/evc/test_experiment_tree.py +++ b/tests/unittests/core/evc/test_experiment_tree.py @@ -147,28 +147,44 @@ def generic_tree_test( assert len(list(exp_node.root)) == num_nodes + print("In node") + for trial in experiment.fetch_trials(): + print(trial) assert len(experiment.fetch_trials()) == node_trials if parent_name: + print("In parent") + for trial in exp_node.parent.item.fetch_trials(): + print(trial) assert len(exp_node.parent.item.fetch_trials()) == parent_trials if grand_parent_name: + print("In grand-parent") + for trial in exp_node.parent.parent.item.fetch_trials(): + print(trial) assert len(exp_node.parent.parent.item.fetch_trials()) == grand_parent_trials if children_names: + print("In children") + for trial in exp_node.children[0].item.fetch_trials(): + print(trial) assert [ len(child.item.fetch_trials()) for child in exp_node.children ] == children_trials if grand_children_names: grand_children = sum([child.children for child in exp_node.children], []) + all_trials = sum( + [child_node.item.fetch_trials() for child_node in grand_children], [] + ) + print("In grand-children") + for trial in all_trials: + print(trial) assert [ len(child.item.fetch_trials()) for child in grand_children ] == grand_children_trials + print("with evc") for trial in experiment.fetch_trials(with_evc_tree=True): - print( - trial, - trial.compute_trial_hash(trial, ignore_lie=True, ignore_experiment=True), - ) + print(trial) assert len(experiment.fetch_trials(with_evc_tree=True)) == total_trials @@ -328,9 +344,9 @@ def generic_tree_test( dict( experiment_name="child", parent_name="root", - node_trials=9, + node_trials=6, parent_trials=4, - total_trials=10, + total_trials=7, ), ), "deletion-with-default-backward": ( @@ -341,8 +357,8 @@ def generic_tree_test( experiment_name="root", children_names=["child"], node_trials=4, - children_trials=[9], - total_trials=13, + children_trials=[6], + total_trials=10, ), ), "deletion-without-default-forward": ( @@ -352,9 +368,9 @@ def generic_tree_test( dict( experiment_name="child", parent_name="root", - node_trials=10, + node_trials=6, parent_trials=4, - total_trials=10, + total_trials=6, ), ), "deletion-without-default-backward": ( @@ -365,7 +381,7 @@ def generic_tree_test( experiment_name="root", children_names=["child"], node_trials=4, - children_trials=[10], + children_trials=[6], total_trials=4, ), ), @@ -377,10 +393,10 @@ def generic_tree_test( experiment_name="grand-child", parent_name="child", grand_parent_name="root", - node_trials=15, + node_trials=5, parent_trials=6, grand_parent_trials=4, - total_trials=15, + total_trials=5, ), ), "deletion-with-default-forward-backward": ( @@ -393,8 +409,8 @@ def generic_tree_test( children_names=["grand-child"], node_trials=6, parent_trials=4, - children_trials=[15], - total_trials=6 + 1 + 15, + children_trials=[5], + total_trials=6 + 1 + 5, ), ), "deletion-with-default-backward-backward": ( @@ -407,8 +423,8 @@ def generic_tree_test( grand_children_names=["grand-child"], node_trials=4, children_trials=[6], - grand_children_trials=[15], - total_trials=4 + 6 + 15, + grand_children_trials=[5], + total_trials=4 + 6 + 5, ), ), "deletion-without-default-forward-forward": ( @@ -419,10 +435,10 @@ def generic_tree_test( experiment_name="grand-child", parent_name="child", grand_parent_name="root", - node_trials=15, + node_trials=5, parent_trials=6, grand_parent_trials=4, - total_trials=15, + total_trials=5, ), ), "deletion-without-default-forward-backward": ( @@ -435,7 +451,7 @@ def generic_tree_test( children_names=["grand-child"], node_trials=6, parent_trials=4, - children_trials=[15], + children_trials=[5], total_trials=6, ), ), @@ -449,7 +465,7 @@ def generic_tree_test( grand_children_names=["grand-child"], node_trials=4, children_trials=[6], - grand_children_trials=[15], + grand_children_trials=[5], total_trials=4, ), ), From c7d3e0c778a3fa93d5a422a68987986a9a44a391 Mon Sep 17 00:00:00 2001 From: Xavier Bouthillier Date: Fri, 10 Sep 2021 11:39:35 -0400 Subject: [PATCH 4/6] Accept many arguments for workon --- src/orion/client/experiment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/orion/client/experiment.py b/src/orion/client/experiment.py index 976364901..4b9505908 100644 --- a/src/orion/client/experiment.py +++ b/src/orion/client/experiment.py @@ -643,6 +643,7 @@ def tmp_executor(self, executor, **config): yield self self.executor = old_executor + # pylint:disable=too-many-arguments def workon( self, fct, From db385f27cd598126ec32a796033d3a021e965bcc Mon Sep 17 00:00:00 2001 From: Xavier Bouthillier Date: Tue, 14 Sep 2021 09:07:32 -0400 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Lin Dong --- docs/src/user/config.rst | 2 +- src/orion/client/experiment.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/user/config.rst b/docs/src/user/config.rst index 8f7dc9477..b63f1c8ba 100644 --- a/docs/src/user/config.rst +++ b/docs/src/user/config.rst @@ -395,7 +395,7 @@ pool_size :Description: Number of trials to sample at a time. If 0, default to number of workers. Increase it to improve the sampling speed if workers spend too much time - waiting for algorithms to sample points. An algorithm will try sampling `pool-size` + waiting for algorithms to sample points. An algorithm will try sampling `pool_size` trials but may return less. diff --git a/src/orion/client/experiment.py b/src/orion/client/experiment.py index 4b9505908..8e2180d11 100644 --- a/src/orion/client/experiment.py +++ b/src/orion/client/experiment.py @@ -514,7 +514,7 @@ def suggest(self, pool_size=0): pool_size: int, optional Number of trials to sample at a time. If 0, default to global config if defined, else 1. Increase it to improve the sampling speed if workers spend too much time - waiting for algorithms to sample points. An algorithm will try sampling `pool-size` + waiting for algorithms to sample points. An algorithm will try sampling `pool_size` trials but may return less. Note: The method will still return only 1 trial even though if the pool size is larger than 1. This is because atomic reservation of trials can only be done one at a time. @@ -672,7 +672,7 @@ def workon( Number of trials to sample at a time. If 0, defaults to `n_workers` or value of global config if defined. Increase it to improve the sampling speed if workers spend too much time waiting for algorithms to sample points. An algorithm will try sampling - `pool-size` trials but may return less. + `pool_size` trials but may return less. max_trials: int, optional Maximum number of trials to execute within ``workon``. If the experiment or algorithm reach status is_done before, the execution of ``workon`` terminates. From 72698c0562fc7ac133608e6031538365e67edffc Mon Sep 17 00:00:00 2001 From: Xavier Bouthillier Date: Tue, 14 Sep 2021 10:28:06 -0400 Subject: [PATCH 6/6] Increase threshold of algo test... --- tests/functional/algos/test_algos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/functional/algos/test_algos.py b/tests/functional/algos/test_algos.py index 4538dc7f7..1b3840d53 100644 --- a/tests/functional/algos/test_algos.py +++ b/tests/functional/algos/test_algos.py @@ -159,7 +159,7 @@ def test_with_fidelity(algorithm): best_trial = next(iter(sorted(trials, key=lambda trial: trial.objective.value))) assert best_trial.objective.name == "objective" - assert abs(best_trial.objective.value - 23.4) < 5 + assert abs(best_trial.objective.value - 23.4) < 10 assert len(best_trial.params) == 2 fidelity = best_trial._params[0] assert fidelity.name == "noise" @@ -194,7 +194,7 @@ def test_with_multidim(algorithm): best_trial = next(iter(sorted(trials, key=lambda trial: trial.objective.value))) assert best_trial.objective.name == "objective" - assert abs(best_trial.objective.value - 23.4) < 5 + assert abs(best_trial.objective.value - 23.4) < 10 assert len(best_trial.params) == 2 fidelity = best_trial._params[0] assert fidelity.name == "noise"