diff --git a/docs/src/tutorials/pytorch-mnist.rst b/docs/src/tutorials/pytorch-mnist.rst index 0b9d0e4bd..5e9429653 100644 --- a/docs/src/tutorials/pytorch-mnist.rst +++ b/docs/src/tutorials/pytorch-mnist.rst @@ -162,76 +162,3 @@ don't use ``--debug`` you will likely quickly fill your database with broken exp .. code-block:: bash $ orion --debug hunt -n orion-tutorial python main.py --lr~'loguniform(1e-5, 1.0)' - -Hunting Options ---------------- - -.. code-block:: console - - $ orion hunt --help - - Oríon arguments (optional): - These arguments determine orion's behaviour - - -n stringID, --name stringID - experiment's unique name; (default: None - specified - either here or in a config) - -u USER, --user USER user associated to experiment's unique name; (default: - $USER - can be overriden either here or in a config) - -c path-to-config, --config path-to-config - user provided orion configuration file - --max-trials # number of trials to be completed for the experiment. - This value will be saved within the experiment - configuration and reused across all workers to - determine experiment's completion. (default: inf/until - preempted) - --worker-trials # number of trials to be completed for this worker. If - the experiment is completed, the worker will die even - if it did not reach its maximum number of trials - (default: inf/until preempted) - --working-dir WORKING_DIR - Set working directory for running experiment. - --pool-size # number of simultaneous trials the algorithm should - suggest. This is useful if many workers are executed - in parallel and the algorithm has a strategy to sample - non-independant trials simultaneously. Otherwise, it - is better to leave `pool_size` to 1 and set a Strategy - for Oríon's producer. Note that this option is not usefull useless you - know the algorithm have a strategy to produce multiple trials - simultaneously. If you have any doubt, leave it to 1. - (default: 1) - -``name`` - -The unique name of the experiment. - -``user`` - -Username used to identify the experiments of a user. The default value is the system's username -$USER. - -``config`` - -Configuration file for Oríon which may define the database, the algorithm and all options of the -command hunt, including ``name``, ``pool-size`` and ``max-trials``. - -``max-trials`` - -The maximum number of trials tried during an experiment. - -``worker-trials`` - -The maximum number of trials to be executed by a worker (a single call to ``orion hunt [...]``). - -``working-dir`` - -The directory where configuration files are created. If not specified, Oríon will create a -temporary directory that will be removed at end of execution of the trial. - -``pool-size`` - -The number of trials which are generated by the algorithm each time it is interrogated. This is -useful if many workers are executed in parallel and the algorithm has a strategy to sample -non-independant trials simultaneously. Otherwise, it is better to leave ``pool_size`` to its default -value 1. Note that this option is not usefull useless you know the algorithm have a strategy -to produce multiple trials simultaneously. If you have any doubt, leave it to 1. :) diff --git a/docs/src/user/config.rst b/docs/src/user/config.rst index 74cf5c846..b63f1c8ba 100644 --- a/docs/src/user/config.rst +++ b/docs/src/user/config.rst @@ -97,7 +97,6 @@ Full Example of Global Configuration seed: None max_broken: 3 max_trials: 1000000000 - pool_size: 1 strategy: MaxParallelStrategy worker_trials: 1000000000 @@ -105,6 +104,7 @@ Full Example of Global Configuration worker: n_workers: 1 + pool_size: 0 executor: joblib executor_configuration: {} heartbeat: 120 @@ -211,7 +211,6 @@ Experiment seed: None max_broken: 3 max_trials: 1000000000 - pool_size: 1 strategy: MaxParallelStrategy worker_trials: 1000000000 @@ -322,22 +321,6 @@ working_dir -.. _config_experiment_pool_size: - -pool_size -~~~~~~~~~ - -.. warning:: - - **DEPRECATED.** This argument will be removed in v0.3. - -:Type: int -:Default: 1 -:Env var: -:Description: - (DEPRECATED) This argument will be removed in v0.3. - - .. _config_experiment_algorithms: algorithms @@ -376,6 +359,7 @@ Worker worker: n_workers: 1 + pool_size: 0 executor: joblib executor_configuration: {} heartbeat: 120 @@ -400,6 +384,20 @@ n_workers It is possible to run many `orion hunt` in parallel, and each will spawn ``n_workers``. +.. _config_worker_pool_size: + +pool_size +~~~~~~~~~ + +:Type: int +:Default: 0 +:Env var: +:Description: + Number of trials to sample at a time. If 0, default to number of workers. + Increase it to improve the sampling speed if workers spend too much time + waiting for algorithms to sample points. An algorithm will try sampling `pool_size` + trials but may return less. + .. _config_worker_executor: diff --git a/src/orion/client/experiment.py b/src/orion/client/experiment.py index 03a6ac9d2..8e2180d11 100644 --- a/src/orion/client/experiment.py +++ b/src/orion/client/experiment.py @@ -32,7 +32,7 @@ log = logging.getLogger(__name__) -def reserve_trial(experiment, producer, _depth=1): +def reserve_trial(experiment, producer, pool_size, _depth=1): """Reserve a new trial, or produce and reserve a trial if none are available.""" log.debug("Trying to reserve a new trial to evaluate.") trial = experiment.reserve_trial() @@ -51,9 +51,9 @@ def reserve_trial(experiment, producer, _depth=1): producer.update() log.debug("#### Produce new trials.") - producer.produce() + producer.produce(pool_size) - return reserve_trial(experiment, producer, _depth=_depth + 1) + return reserve_trial(experiment, producer, pool_size, _depth=_depth + 1) return trial @@ -500,7 +500,7 @@ def release(self, trial, status="interrupted"): finally: self._release_reservation(trial, raise_if_unreserved=raise_if_unreserved) - def suggest(self): + def suggest(self, pool_size=0): """Suggest a trial to execute. Experiment must be in executable ('x') mode. @@ -509,6 +509,16 @@ def suggest(self): Otherwise, the algorithm is used to generate a new trial that is registered in storage and reserved. + Parameters + ---------- + pool_size: int, optional + Number of trials to sample at a time. If 0, default to global config if defined, + else 1. Increase it to improve the sampling speed if workers spend too much time + waiting for algorithms to sample points. An algorithm will try sampling `pool_size` + trials but may return less. Note: The method will still return only 1 trial even though + if the pool size is larger than 1. This is because atomic reservation of trials + can only be done one at a time. + Returns ------- `orior.core.worker.trial.Trial` @@ -535,6 +545,10 @@ def suggest(self): """ self._check_if_executable() + if not pool_size: + pool_size = orion.core.config.worker.pool_size + if not pool_size: + pool_size = 1 if self.is_broken: raise BrokenExperiment("Trials failed too many times") @@ -543,7 +557,7 @@ def suggest(self): raise CompletedExperiment("Experiment is done, cannot sample more trials.") try: - trial = reserve_trial(self._experiment, self._producer) + trial = reserve_trial(self._experiment, self._producer, pool_size) except (WaitingForTrials, SampleTimeout) as e: if self.is_broken: @@ -629,10 +643,12 @@ def tmp_executor(self, executor, **config): yield self self.executor = old_executor + # pylint:disable=too-many-arguments def workon( self, fct, n_workers=None, + pool_size=0, max_trials=None, max_trials_per_worker=None, max_broken=None, @@ -652,6 +668,11 @@ def workon( objective. n_workers: int, optional Number of workers to run in parallel. Defaults to value of global config. + pool_size: int, optional + Number of trials to sample at a time. If 0, defaults to `n_workers` or value of global + config if defined. Increase it to improve the sampling speed if workers spend too much + time waiting for algorithms to sample points. An algorithm will try sampling + `pool_size` trials but may return less. max_trials: int, optional Maximum number of trials to execute within ``workon``. If the experiment or algorithm reach status is_done before, the execution of ``workon`` terminates. @@ -712,6 +733,11 @@ def workon( str(self.executor.n_workers), ) + if not pool_size: + pool_size = orion.core.config.worker.pool_size + if not pool_size: + pool_size = n_workers + if max_trials is None: max_trials = self.max_trials @@ -731,6 +757,7 @@ def workon( self.executor.submit( self._optimize, fct, + pool_size, max_trials_per_worker, max_broken, trial_arg, @@ -742,14 +769,16 @@ def workon( return sum(trials) - def _optimize(self, fct, max_trials, max_broken, trial_arg, on_error, **kwargs): + def _optimize( + self, fct, pool_size, max_trials, max_broken, trial_arg, on_error, **kwargs + ): worker_broken_trials = 0 trials = 0 kwargs = flatten(kwargs) max_trials = min(max_trials, self.max_trials) while not self.is_done and trials - worker_broken_trials < max_trials: try: - with self.suggest() as trial: + with self.suggest(pool_size=pool_size) as trial: kwargs.update(flatten(trial.params)) diff --git a/src/orion/core/__init__.py b/src/orion/core/__init__.py index 1da4d4248..d9a3a9422 100644 --- a/src/orion/core/__init__.py +++ b/src/orion/core/__init__.py @@ -134,7 +134,7 @@ def define_experiment_config(config): experiment_config.add_option( "max_trials", option_type=int, - default=1000, + default=int(10e8), env_var="ORION_EXP_MAX_TRIALS", help="number of trials to be completed for the experiment. This value " "will be saved within the experiment configuration and reused " @@ -144,7 +144,7 @@ def define_experiment_config(config): experiment_config.add_option( "worker_trials", option_type=int, - default=1000, + default=int(10e8), deprecate=dict( version="v0.3", alternative="worker.max_trials", @@ -169,14 +169,6 @@ def define_experiment_config(config): help="Set working directory for running experiment.", ) - experiment_config.add_option( - "pool_size", - option_type=int, - default=1, - deprecate=dict(version="v0.3", alternative=None, name="experiment.pool_size"), - help="This argument will be removed in v0.3.", - ) - experiment_config.add_option( "algorithms", option_type=dict, @@ -210,6 +202,20 @@ def define_worker_config(config): ), ) + worker_config.add_option( + "pool_size", + option_type=int, + default=0, + env_var="ORION_POOL_SIZE", + help=( + "Number of trials to sample at a time. " + "If 0, will default to number of executor workers. " + "Increase it to improve the sampling speed if workers spend too much time " + "waiting for algorithms to sample points. An algorithm will try sampling " + "`pool-size` trials but may return less." + ), + ) + worker_config.add_option( "executor", option_type=str, diff --git a/src/orion/core/cli/hunt.py b/src/orion/core/cli/hunt.py index 4f4c1b8a3..3b724d453 100644 --- a/src/orion/core/cli/hunt.py +++ b/src/orion/core/cli/hunt.py @@ -130,6 +130,7 @@ def on_error(client, trial, error, worker_broken_trials): def workon( experiment, n_workers=None, + pool_size=None, max_trials=None, max_broken=None, max_idle_time=None, @@ -163,6 +164,7 @@ def workon( client.workon( consumer, n_workers=n_workers, + pool_size=pool_size, max_trials_per_worker=max_trials, max_broken=max_broken, trial_arg="trial", diff --git a/src/orion/core/io/experiment_builder.py b/src/orion/core/io/experiment_builder.py index 7ebb9d8ba..e4ffcb9db 100644 --- a/src/orion/core/io/experiment_builder.py +++ b/src/orion/core/io/experiment_builder.py @@ -379,11 +379,6 @@ def create_experiment(name, version, mode, space, **kwargs): """ experiment = Experiment(name=name, version=version, mode=mode) experiment._id = kwargs.get("_id", None) # pylint:disable=protected-access - experiment.pool_size = kwargs.get("pool_size") - if experiment.pool_size is None: - experiment.pool_size = orion.core.config.experiment.get( - "pool_size", deprecated="ignore" - ) experiment.max_trials = kwargs.get( "max_trials", orion.core.config.experiment.max_trials ) diff --git a/src/orion/core/utils/format_terminal.py b/src/orion/core/utils/format_terminal.py index ea1d0b208..9d254ffbb 100644 --- a/src/orion/core/utils/format_terminal.py +++ b/src/orion/core/utils/format_terminal.py @@ -257,7 +257,6 @@ def format_commandline(experiment): CONFIG_TEMPLATE = """\ {title} -pool size: {experiment.pool_size} max trials: {experiment.max_trials} max broken: {experiment.max_broken} working dir: {experiment.working_dir} diff --git a/src/orion/core/worker/experiment.py b/src/orion/core/worker/experiment.py index 2109b7ab5..1dff1852b 100644 --- a/src/orion/core/worker/experiment.py +++ b/src/orion/core/worker/experiment.py @@ -45,8 +45,6 @@ class Experiment: Current version of this experiment. metadata : dict Contains managerial information about this `Experiment`. - pool_size : int - How many workers can participate asynchronously in this `Experiment`. max_trials : int How many trials must be evaluated, before considering this `Experiment` done. This attribute can be updated if the rest of the experiment configuration @@ -90,7 +88,6 @@ class Experiment: "name", "refers", "metadata", - "pool_size", "max_trials", "max_broken", "version", @@ -103,7 +100,7 @@ class Experiment: "_node", "_mode", ) - non_branching_attrs = ("pool_size", "max_trials", "max_broken") + non_branching_attrs = ("max_trials", "max_broken") def __init__(self, name, version=None, mode="r"): self._id = None @@ -113,7 +110,6 @@ def __init__(self, name, version=None, mode="r"): self._node = None self.refers = {} self.metadata = {} - self.pool_size = None self.max_trials = None self.max_broken = None self.space = None diff --git a/src/orion/core/worker/producer.py b/src/orion/core/worker/producer.py index f825523f8..b03c9940a 100644 --- a/src/orion/core/worker/producer.py +++ b/src/orion/core/worker/producer.py @@ -59,11 +59,6 @@ def __init__(self, experiment, max_idle_time=None): self.num_trials = 0 self.num_broken = 0 - @property - def pool_size(self): - """Pool-size of the experiment""" - return self.experiment.pool_size - def backoff(self): """Wait some time and update algorithm.""" waiting_time = max(0, random.gauss(1, 0.2)) @@ -90,25 +85,32 @@ def is_done(self): self.naive_algorithm is not None and self.naive_algorithm.is_done ) - def suggest(self): + def suggest(self, pool_size): """Try suggesting new points with the naive algorithm""" num_pending = self.num_trials - self.num_broken num = max(self.experiment.max_trials - num_pending, 1) - return self.naive_algorithm.suggest(num) + return self.naive_algorithm.suggest(min(num, pool_size)) - def produce(self): + def produce(self, pool_size): """Create and register new trials.""" sampled_points = 0 - # reset the number of time we failed to sample points self.failure_count = 0 start = time.time() - while sampled_points < self.pool_size and not self.is_done: + # This number (self.num_trials) is based on most recent algo state update so it is not + # sensitive to race-conditions. If another worker started suggesting points in between, the + # value of self.num_trials will not count it and thus the current producer will count new + # points of other producer as part of the current pool samples. + while ( + len(self.experiment.fetch_trials(with_evc_tree=True)) - self.num_trials + < pool_size + and not self.is_done + ): self._sample_guard(start) log.debug("### Algorithm suggests new points.") - new_points = self.suggest() + new_points = self.suggest(pool_size) # Sync state of original algo so that state continues evolving. self.algorithm.set_state(self.naive_algorithm.state_dict) diff --git a/src/orion/serving/responses.py b/src/orion/serving/responses.py index d8087a2c2..49152fc69 100644 --- a/src/orion/serving/responses.py +++ b/src/orion/serving/responses.py @@ -77,7 +77,6 @@ def build_experiment_response( "config": { "maxTrials": experiment.max_trials, "maxBroken": experiment.max_broken, - "poolSize": experiment.pool_size, "algorithm": algorithm, "space": experiment.configuration["space"], }, diff --git a/src/orion/testing/evc.py b/src/orion/testing/evc.py index 7b3520d64..da543497a 100644 --- a/src/orion/testing/evc.py +++ b/src/orion/testing/evc.py @@ -44,6 +44,12 @@ def generate_trials(exp, trials): status, heartbeat=trial.submit_time if status == "reserved" else None, ) + else: + exp._experiment._storage.set_trial_status( + trial, + "reserved", + heartbeat=trial.submit_time, + ) def build_root_experiment(space=None, trials=None): diff --git a/tests/functional/algos/test_algos.py b/tests/functional/algos/test_algos.py index 4538dc7f7..1b3840d53 100644 --- a/tests/functional/algos/test_algos.py +++ b/tests/functional/algos/test_algos.py @@ -159,7 +159,7 @@ def test_with_fidelity(algorithm): best_trial = next(iter(sorted(trials, key=lambda trial: trial.objective.value))) assert best_trial.objective.name == "objective" - assert abs(best_trial.objective.value - 23.4) < 5 + assert abs(best_trial.objective.value - 23.4) < 10 assert len(best_trial.params) == 2 fidelity = best_trial._params[0] assert fidelity.name == "noise" @@ -194,7 +194,7 @@ def test_with_multidim(algorithm): best_trial = next(iter(sorted(trials, key=lambda trial: trial.objective.value))) assert best_trial.objective.name == "objective" - assert abs(best_trial.objective.value - 23.4) < 5 + assert abs(best_trial.objective.value - 23.4) < 10 assert len(best_trial.params) == 2 fidelity = best_trial._params[0] assert fidelity.name == "noise" diff --git a/tests/functional/configuration/test_all_options.py b/tests/functional/configuration/test_all_options.py index 56d64195b..4ee3d8037 100644 --- a/tests/functional/configuration/test_all_options.py +++ b/tests/functional/configuration/test_all_options.py @@ -369,7 +369,6 @@ class TestExperimentConfig(ConfigurationTestSuite): "max_trials": 10, "max_broken": 5, "working_dir": "here", - "pool_size": 2, "worker_trials": 5, "algorithms": {"aa": {"b": "c", "d": {"e": "f"}}}, "strategy": {"sa": {"c": "d", "e": {"f": "g"}}}, @@ -563,6 +562,7 @@ class TestWorkerConfig(ConfigurationTestSuite): config = { "worker": { "n_workers": 2, + "pool_size": 2, "executor": "dask", "executor_configuration": {"threads_per_worker": 1}, "heartbeat": 30, @@ -576,6 +576,7 @@ class TestWorkerConfig(ConfigurationTestSuite): env_vars = { "ORION_N_WORKERS": 3, + "ORION_POOL_SIZE": 1, "ORION_EXECUTOR": "joblib", "ORION_HEARTBEAT": 40, "ORION_WORKER_MAX_TRIALS": 20, @@ -588,6 +589,7 @@ class TestWorkerConfig(ConfigurationTestSuite): local = { "worker": { "n_workers": 4, + "pool_size": 5, "executor": "dask", "executor_configuration": {"threads_per_worker": 2}, "heartbeat": 50, @@ -601,6 +603,7 @@ class TestWorkerConfig(ConfigurationTestSuite): cmdargs = { "n-workers": 1, + "pool-size": 6, "executor": "dask", "heartbeat": 70, "worker-max-trials": 1, @@ -704,6 +707,7 @@ def check_env_var_config(self, tmp_path, monkeypatch): """Check that env vars overrides global configuration""" env_var_config = { "n_workers": self.env_vars["ORION_N_WORKERS"], + "pool_size": self.env_vars["ORION_POOL_SIZE"], "executor": self.env_vars["ORION_EXECUTOR"], "executor_configuration": self.config["worker"]["executor_configuration"], "heartbeat": self.env_vars["ORION_HEARTBEAT"], diff --git a/tests/functional/demo/test_demo.py b/tests/functional/demo/test_demo.py index f9ffed13e..c95fac11d 100644 --- a/tests/functional/demo/test_demo.py +++ b/tests/functional/demo/test_demo.py @@ -41,7 +41,6 @@ def test_demo_with_default_algo_cli_config_only(storage, monkeypatch): exp = exp[0] assert "_id" in exp assert exp["name"] == "default_algo" - assert exp["pool_size"] == 1 assert exp["max_trials"] == 5 assert exp["max_broken"] == 3 assert exp["algorithms"] == {"random": {"seed": None}} @@ -84,7 +83,6 @@ def test_demo(storage, monkeypatch): assert "_id" in exp exp_id = exp["_id"] assert exp["name"] == "voila_voici" - assert exp["pool_size"] == 1 assert exp["max_trials"] == 20 assert exp["max_broken"] == 5 assert exp["algorithms"] == { @@ -135,7 +133,6 @@ def test_demo_with_script_config(storage, monkeypatch): assert "_id" in exp exp_id = exp["_id"] assert exp["name"] == "voila_voici" - assert exp["pool_size"] == 1 assert exp["max_trials"] == 20 assert exp["max_broken"] == 5 assert exp["algorithms"] == { @@ -192,7 +189,6 @@ def test_demo_with_python_and_script(storage, monkeypatch): assert "_id" in exp exp_id = exp["_id"] assert exp["name"] == "voila_voici" - assert exp["pool_size"] == 1 assert exp["max_trials"] == 20 assert exp["max_broken"] == 5 assert exp["algorithms"] == { @@ -279,7 +275,6 @@ def test_demo_four_workers(storage, monkeypatch): assert "_id" in exp exp_id = exp["_id"] assert exp["name"] == "four_workers_demo" - assert exp["pool_size"] == 2 assert exp["max_trials"] == 20 assert exp["max_broken"] == 5 assert exp["algorithms"] == {"random": {"seed": 2}} @@ -306,7 +301,6 @@ def test_workon(): name = "voici_voila" config = {"name": name} config["algorithms"] = {"random": {"seed": 1}} - config["pool_size"] = 1 config["max_trials"] = 50 config["exp_max_broken"] = 5 config["user_args"] = [ @@ -338,7 +332,6 @@ def test_workon(): exp = exp[0] assert "_id" in exp assert exp["name"] == name - assert exp["pool_size"] == 1 assert exp["max_trials"] == 50 assert exp["max_broken"] == 5 assert exp["algorithms"] == {"random": {"seed": 1}} @@ -370,7 +363,6 @@ def test_stress_unique_folder_creation(storage, monkeypatch, tmpdir, capfd): [ "hunt", "--max-trials={}".format(how_many), - "--pool-size=1", "--name=lalala", "--config", "./stress_gradient.yaml", @@ -570,8 +562,6 @@ def test_run_with_parallel_strategy(storage, monkeypatch, strategy): "hunt", "--max-trials", "20", - "--pool-size", - "1", "--config", config_file, "./black_box.py", @@ -602,8 +592,6 @@ def test_worker_trials(storage, monkeypatch): "hunt", "--config", "./orion_config_random.yaml", - "--pool-size", - "1", "--worker-trials", "0", "./black_box.py", @@ -720,7 +708,6 @@ def test_demo_with_nondefault_config_keyword(storage, monkeypatch): assert "_id" in exp exp_id = exp["_id"] assert exp["name"] == "voila_voici" - assert exp["pool_size"] == 1 assert exp["max_trials"] == 20 assert exp["algorithms"] == { "gradient_descent": {"learning_rate": 0.1, "dx_tolerance": 1e-5} diff --git a/tests/functional/serving/test_experiments_resource.py b/tests/functional/serving/test_experiments_resource.py index 93bef6611..06e43ea7c 100644 --- a/tests/functional/serving/test_experiments_resource.py +++ b/tests/functional/serving/test_experiments_resource.py @@ -24,7 +24,6 @@ }, }, version=1, - pool_size=1, max_trials=10, max_broken=7, working_dir="", @@ -194,7 +193,6 @@ def _add_trial(**kwargs): def _assert_config(config): """Asserts properties of the ``config`` dictionary""" - assert config["poolSize"] == 1 assert config["maxTrials"] == 10 assert config["maxBroken"] == 7 diff --git a/tests/unittests/client/test_experiment_client.py b/tests/unittests/client/test_experiment_client.py index 93374ff4b..f1e690c53 100644 --- a/tests/unittests/client/test_experiment_client.py +++ b/tests/unittests/client/test_experiment_client.py @@ -37,7 +37,6 @@ }, }, version=1, - pool_size=1, max_trials=10, max_broken=5, working_dir="", @@ -674,7 +673,7 @@ def is_done(self): """Experiment is done""" return True - def set_is_done(): + def set_is_done(pool_size): """Set is_done while algo is trying to suggest""" monkeypatch.setattr(experiment.__class__, "is_done", property(is_done)) @@ -703,7 +702,7 @@ def is_broken(self): """Experiment is broken""" return True - def set_is_broken(): + def set_is_broken(pool_size): """Set is_broken while algo is trying to suggest""" monkeypatch.setattr( experiment.__class__, "is_broken", property(is_broken) diff --git a/tests/unittests/core/cli/test_info.py b/tests/unittests/core/cli/test_info.py index 4e627bf65..c79764d88 100755 --- a/tests/unittests/core/cli/test_info.py +++ b/tests/unittests/core/cli/test_info.py @@ -398,7 +398,6 @@ def test_format_commandline(): def test_format_config(monkeypatch): """Test config section formatting""" experiment = DummyExperiment() - experiment.pool_size = 10 experiment.max_trials = 100 experiment.max_broken = 5 experiment.working_dir = "working_dir" @@ -407,7 +406,6 @@ def test_format_config(monkeypatch): == """\ Config ====== -pool size: 10 max trials: 100 max broken: 5 working dir: working_dir @@ -608,7 +606,6 @@ def test_format_info(algorithm_dict, dummy_trial): experiment.name = "test" experiment.version = 1 experiment.metadata = {"user_args": commandline} - experiment.pool_size = 10 experiment.max_trials = 100 experiment.max_broken = 5 experiment.working_dir = "working_dir" @@ -678,7 +675,6 @@ def test_format_info(algorithm_dict, dummy_trial): Config ====== -pool size: 10 max trials: 100 max broken: 5 working dir: working_dir diff --git a/tests/unittests/core/evc/test_experiment_tree.py b/tests/unittests/core/evc/test_experiment_tree.py index 4c2b11b69..cd5d10ba5 100644 --- a/tests/unittests/core/evc/test_experiment_tree.py +++ b/tests/unittests/core/evc/test_experiment_tree.py @@ -147,28 +147,44 @@ def generic_tree_test( assert len(list(exp_node.root)) == num_nodes + print("In node") + for trial in experiment.fetch_trials(): + print(trial) assert len(experiment.fetch_trials()) == node_trials if parent_name: + print("In parent") + for trial in exp_node.parent.item.fetch_trials(): + print(trial) assert len(exp_node.parent.item.fetch_trials()) == parent_trials if grand_parent_name: + print("In grand-parent") + for trial in exp_node.parent.parent.item.fetch_trials(): + print(trial) assert len(exp_node.parent.parent.item.fetch_trials()) == grand_parent_trials if children_names: + print("In children") + for trial in exp_node.children[0].item.fetch_trials(): + print(trial) assert [ len(child.item.fetch_trials()) for child in exp_node.children ] == children_trials if grand_children_names: grand_children = sum([child.children for child in exp_node.children], []) + all_trials = sum( + [child_node.item.fetch_trials() for child_node in grand_children], [] + ) + print("In grand-children") + for trial in all_trials: + print(trial) assert [ len(child.item.fetch_trials()) for child in grand_children ] == grand_children_trials + print("with evc") for trial in experiment.fetch_trials(with_evc_tree=True): - print( - trial, - trial.compute_trial_hash(trial, ignore_lie=True, ignore_experiment=True), - ) + print(trial) assert len(experiment.fetch_trials(with_evc_tree=True)) == total_trials @@ -328,9 +344,9 @@ def generic_tree_test( dict( experiment_name="child", parent_name="root", - node_trials=9, + node_trials=6, parent_trials=4, - total_trials=10, + total_trials=7, ), ), "deletion-with-default-backward": ( @@ -341,8 +357,8 @@ def generic_tree_test( experiment_name="root", children_names=["child"], node_trials=4, - children_trials=[9], - total_trials=13, + children_trials=[6], + total_trials=10, ), ), "deletion-without-default-forward": ( @@ -352,9 +368,9 @@ def generic_tree_test( dict( experiment_name="child", parent_name="root", - node_trials=10, + node_trials=6, parent_trials=4, - total_trials=10, + total_trials=6, ), ), "deletion-without-default-backward": ( @@ -365,7 +381,7 @@ def generic_tree_test( experiment_name="root", children_names=["child"], node_trials=4, - children_trials=[10], + children_trials=[6], total_trials=4, ), ), @@ -377,10 +393,10 @@ def generic_tree_test( experiment_name="grand-child", parent_name="child", grand_parent_name="root", - node_trials=15, + node_trials=5, parent_trials=6, grand_parent_trials=4, - total_trials=15, + total_trials=5, ), ), "deletion-with-default-forward-backward": ( @@ -393,8 +409,8 @@ def generic_tree_test( children_names=["grand-child"], node_trials=6, parent_trials=4, - children_trials=[15], - total_trials=6 + 1 + 15, + children_trials=[5], + total_trials=6 + 1 + 5, ), ), "deletion-with-default-backward-backward": ( @@ -407,8 +423,8 @@ def generic_tree_test( grand_children_names=["grand-child"], node_trials=4, children_trials=[6], - grand_children_trials=[15], - total_trials=4 + 6 + 15, + grand_children_trials=[5], + total_trials=4 + 6 + 5, ), ), "deletion-without-default-forward-forward": ( @@ -419,10 +435,10 @@ def generic_tree_test( experiment_name="grand-child", parent_name="child", grand_parent_name="root", - node_trials=15, + node_trials=5, parent_trials=6, grand_parent_trials=4, - total_trials=15, + total_trials=5, ), ), "deletion-without-default-forward-backward": ( @@ -435,7 +451,7 @@ def generic_tree_test( children_names=["grand-child"], node_trials=6, parent_trials=4, - children_trials=[15], + children_trials=[5], total_trials=6, ), ), @@ -449,7 +465,7 @@ def generic_tree_test( grand_children_names=["grand-child"], node_trials=4, children_trials=[6], - grand_children_trials=[15], + grand_children_trials=[5], total_trials=4, ), ), diff --git a/tests/unittests/core/io/test_experiment_builder.py b/tests/unittests/core/io/test_experiment_builder.py index 3af8ddcc3..5747ae79f 100644 --- a/tests/unittests/core/io/test_experiment_builder.py +++ b/tests/unittests/core/io/test_experiment_builder.py @@ -112,7 +112,6 @@ def new_config(random_dt, script_path): }, }, version=1, - pool_size=10, max_trials=1000, max_broken=5, working_dir="", @@ -192,7 +191,6 @@ def test_get_cmd_config(config_file): assert local_config["max_trials"] == 100 assert local_config["max_broken"] == 5 assert local_config["name"] == "voila_voici" - assert local_config["pool_size"] == 1 assert local_config["storage"] == { "database": { "host": "mongodb://user:pass@localhost", @@ -216,7 +214,6 @@ def test_get_cmd_config_from_incomplete_config(incomplete_config_file): assert "algorithms" not in local_config assert "max_trials" not in local_config assert "max_broken" not in local_config - assert "pool_size" not in local_config assert "name" not in local_config["storage"]["database"] assert ( local_config["storage"]["database"]["host"] == "mongodb://user:pass@localhost" @@ -243,7 +240,6 @@ def test_fetch_config_from_db_hit(new_config): assert db_config["name"] == new_config["name"] assert db_config["refers"] == new_config["refers"] assert db_config["metadata"] == new_config["metadata"] - assert db_config["pool_size"] == new_config["pool_size"] assert db_config["max_trials"] == new_config["max_trials"] assert db_config["max_broken"] == new_config["max_broken"] assert db_config["algorithms"] == new_config["algorithms"] @@ -275,7 +271,6 @@ def test_get_from_args_hit(config_file, random_dt, new_config): assert exp_view.name == new_config["name"] assert exp_view.configuration["refers"] == new_config["refers"] assert exp_view.metadata == new_config["metadata"] - assert exp_view.pool_size == new_config["pool_size"] assert exp_view.max_trials == new_config["max_trials"] assert exp_view.max_broken == new_config["max_broken"] assert exp_view.algorithms.configuration == new_config["algorithms"] @@ -299,7 +294,6 @@ def test_get_from_args_hit_no_conf_file(config_file, random_dt, new_config): assert exp_view.name == new_config["name"] assert exp_view.configuration["refers"] == new_config["refers"] assert exp_view.metadata == new_config["metadata"] - assert exp_view.pool_size == new_config["pool_size"] assert exp_view.max_trials == new_config["max_trials"] assert exp_view.max_broken == new_config["max_broken"] assert exp_view.algorithms.configuration == new_config["algorithms"] @@ -333,7 +327,6 @@ def test_build_from_args_no_hit(config_file, random_dt, script_path, new_config) assert exp.metadata["user"] == "dendi" assert exp.metadata["user_script"] == cmdargs["user_args"][0] assert exp.metadata["user_args"] == cmdargs["user_args"] - assert exp.pool_size == 1 assert exp.max_trials == 100 assert exp.max_broken == 5 assert exp.algorithms.configuration == {"random": {"seed": None}} @@ -715,7 +708,6 @@ def test_good_set_before_init_no_hit(self, random_dt, new_config): assert exp.name == new_config["name"] assert exp.configuration["refers"] == new_config["refers"] assert exp.metadata == new_config["metadata"] - assert exp.pool_size == new_config["pool_size"] assert exp.max_trials == new_config["max_trials"] assert exp.max_broken == new_config["max_broken"] assert exp.working_dir == new_config["working_dir"] diff --git a/tests/unittests/core/io/test_resolve_config.py b/tests/unittests/core/io/test_resolve_config.py index 1aae08ecf..cb8f65630 100644 --- a/tests/unittests/core/io/test_resolve_config.py +++ b/tests/unittests/core/io/test_resolve_config.py @@ -113,7 +113,6 @@ def test_fetch_config_from_cmdargs(): "worker_trials": "worker_trials", "exp_max_broken": "exp_max_broken", "working_dir": "working_dir", - "pool_size": "pool_size", "max_trials": "max_trials", "heartbeat": "heartbeat", "worker_max_trials": "worker_max_trials", @@ -144,7 +143,6 @@ def test_fetch_config_from_cmdargs(): assert exp_config.pop("max_trials") == "exp_max_trials" assert exp_config.pop("max_broken") == "exp_max_broken" assert exp_config.pop("working_dir") == "working_dir" - assert exp_config.pop("pool_size") == "pool_size" assert exp_config == {} @@ -222,7 +220,6 @@ def test_fetch_config(config_file): "max_trials": 100, "max_broken": 5, "name": "voila_voici", - "pool_size": 1, "algorithms": "random", "strategy": "NoParallelStrategy", } @@ -258,7 +255,6 @@ def mocked_config(file_object): assert exp_config.pop("max_trials") == orion.core.config.experiment.max_trials assert exp_config.pop("max_broken") == orion.core.config.experiment.max_broken assert exp_config.pop("working_dir") == orion.core.config.experiment.working_dir - assert exp_config.pop("pool_size") == orion.core.config.experiment.pool_size assert exp_config.pop("algorithms") == orion.core.config.experiment.algorithms assert exp_config.pop("strategy") == orion.core.config.experiment.strategy @@ -267,6 +263,7 @@ def mocked_config(file_object): # Test worker subconfig worker_config = config.pop("worker") assert worker_config.pop("n_workers") == orion.core.config.worker.n_workers + assert worker_config.pop("pool_size") == orion.core.config.worker.pool_size assert worker_config.pop("executor") == orion.core.config.worker.executor assert ( worker_config.pop("executor_configuration") diff --git a/tests/unittests/core/worker/test_experiment.py b/tests/unittests/core/worker/test_experiment.py index 3fc36e9e1..01d3dd0f6 100644 --- a/tests/unittests/core/worker/test_experiment.py +++ b/tests/unittests/core/worker/test_experiment.py @@ -46,7 +46,6 @@ def new_config(random_dt): }, }, version=1, - pool_size=10, max_trials=1000, max_broken=5, working_dir=None, @@ -600,7 +599,6 @@ def test_experiment_pickleable(): "max_trials", "metadata", "name", - "pool_size", "producer", "refers", "retrieve_result", diff --git a/tests/unittests/core/worker/test_producer.py b/tests/unittests/core/worker/test_producer.py index 0e2d349a4..2fd5a8122 100644 --- a/tests/unittests/core/worker/test_producer.py +++ b/tests/unittests/core/worker/test_producer.py @@ -58,7 +58,6 @@ def producer(monkeypatch, hacked_exp, random_dt, categorical_values): """Return a setup `Producer`.""" # make init done - assert hacked_exp.pool_size == 1 hacked_exp.algorithms.algorithm.possible_values = categorical_values hacked_exp.algorithms.seed_rng(0) hacked_exp.max_trials = 20 @@ -114,12 +113,11 @@ def test_strategist_observe_completed(producer): def test_naive_algorithm_is_producing(monkeypatch, producer, random_dt): """Verify naive algo is used to produce, not original algo""" - producer.experiment.pool_size = 1 producer.algorithm.algorithm.possible_values = [("gru", "rnn")] producer.update() monkeypatch.setattr(producer.algorithm.algorithm, "set_state", lambda value: None) producer.algorithm.algorithm.possible_values = [("gru", "gru")] - producer.produce() + producer.produce(1) assert producer.naive_algorithm.algorithm._num == 1 # pool size assert producer.algorithm.algorithm._num == 0 @@ -128,11 +126,10 @@ def test_naive_algorithm_is_producing(monkeypatch, producer, random_dt): def test_update_and_produce(producer, random_dt): """Test new trials are properly produced""" possible_values = [("gru", "rnn")] - producer.experiment.pool_size = 1 producer.experiment.algorithms.algorithm.possible_values = possible_values producer.update() - producer.produce() + producer.produce(1) # Algorithm was ordered to suggest some trials num_new_points = producer.naive_algorithm.algorithm._num @@ -146,11 +143,10 @@ def test_register_new_trials(producer, storage, random_dt): trials_in_db_before = len(storage._fetch_trials({})) new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) - producer.experiment.pool_size = 1 producer.experiment.algorithms.algorithm.possible_values = [("gru", "rnn")] producer.update() - producer.produce() + producer.produce(1) # Algorithm was ordered to suggest some trials num_new_points = producer.naive_algorithm.algorithm._num @@ -267,7 +263,6 @@ def test_register_duplicate_lies(producer, storage, random_dt): producer.strategy._value = 4 # Set specific output value for to algo to ensure successful creation of a new trial. - producer.experiment.pool_size = 1 producer.experiment.algorithms.algorithm.possible_values = [("gru", "rnn")] producer.update() @@ -278,7 +273,7 @@ def test_register_duplicate_lies(producer, storage, random_dt): # Create a new point to make sure additional non-completed trials increase number of lying # trials generated - producer.produce() + producer.produce(1) trials_non_completed = storage._fetch_trials(query) assert len(trials_non_completed) == 5 @@ -362,7 +357,6 @@ def test_naive_algo_trained_on_all_non_completed_trials(producer, storage, rando def test_naive_algo_is_discared(producer, monkeypatch): """Verify that naive algo is discarded and recopied from original algo""" # Set values for predictions - producer.experiment.pool_size = 1 producer.experiment.algorithms.algorithm.possible_values = [("gru", "rnn")] producer.update() @@ -373,7 +367,7 @@ def test_naive_algo_is_discared(producer, monkeypatch): assert len(producer.algorithm.algorithm._points) == 3 assert len(first_naive_algorithm.algorithm._points) == (3 + 4) - producer.produce() + producer.produce(1) # Only update the original algo, naive algo is still not discarded update_algorithm(producer) @@ -392,24 +386,25 @@ def test_concurent_producers(producer, storage, random_dt): trials_in_db_before = len(storage._fetch_trials({})) new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) + # Avoid limiting number of samples from the within the algorithm. + producer.algorithm.algorithm.pool_size = 1000 + # Set so that first producer's algorithm generate valid point on first time - # And second producer produce same point and thus must produce next one two. + # And second producer produce same point and thus must produce next one too. # Hence, we know that producer algo will have _num == 1 and # second producer algo will have _num == 2 producer.algorithm.algorithm.possible_values = [("gru", "rnn"), ("gru", "gru")] # Make sure it starts from index 0 producer.algorithm.seed_rng(0) - assert producer.experiment.pool_size == 1 - second_producer = Producer(producer.experiment) second_producer.algorithm = copy.deepcopy(producer.algorithm) producer.update() second_producer.update() - producer.produce() - second_producer.produce() + producer.produce(1) + second_producer.produce(2) # Algorithm was required to suggest some trials num_new_points = producer.algorithm.algorithm._num @@ -438,6 +433,52 @@ def test_concurent_producers(producer, storage, random_dt): } +def test_concurent_producers_shared_pool(producer, storage, random_dt): + """Test concurrent production of new trials share the same pool""" + trials_in_db_before = len(storage._fetch_trials({})) + new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) + + # Set so that first producer's algorithm generate valid point on first time + # And second producer produce same point and thus must backoff and then stop + # because first producer filled the pool. + # Hence, we know that producer algo will have _num == 1 and + # second producer algo will have _num == 1 + producer.algorithm.algorithm.possible_values = [("gru", "rnn"), ("gru", "gru")] + # Make sure it starts from index 0 + producer.algorithm.seed_rng(0) + + second_producer = Producer(producer.experiment) + second_producer.algorithm = copy.deepcopy(producer.algorithm) + + producer.update() + second_producer.update() + + producer.produce(1) + second_producer.produce(1) + + # Algorithm was required to suggest some trials + num_new_points = producer.algorithm.algorithm._num + assert num_new_points == 1 # pool size + num_new_points = second_producer.algorithm.algorithm._num + assert num_new_points == 0 # pool size + + # `num_new_points` new trials were registered at database + assert len(storage._fetch_trials({})) == trials_in_db_before + 1 + assert len(storage._fetch_trials({"status": "new"})) == new_trials_in_db_before + 1 + new_trials = list( + storage._fetch_trials({"status": "new", "submit_time": random_dt}) + ) + assert len(new_trials) == 1 + assert new_trials[0].experiment == producer.experiment.id + assert new_trials[0].start_time is None + assert new_trials[0].end_time is None + assert new_trials[0].results == [] + assert new_trials[0].params == { + "/decoding_layer": "gru", + "/encoding_layer": "rnn", + } + + def test_duplicate_within_pool(producer, storage, random_dt): """Test that an algo suggesting multiple points can have a few registered even if one of them is a duplicate. @@ -445,8 +486,8 @@ def test_duplicate_within_pool(producer, storage, random_dt): trials_in_db_before = len(storage._fetch_trials({})) new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) - producer.experiment.pool_size = 2 - producer.algorithm.algorithm.pool_size = 2 + # Avoid limiting number of samples from the within the algorithm. + producer.algorithm.algorithm.pool_size = 1000 producer.experiment.algorithms.algorithm.possible_values = [ ("gru", "rnn"), @@ -455,7 +496,7 @@ def test_duplicate_within_pool(producer, storage, random_dt): ] producer.update() - producer.produce() + producer.produce(2) # Algorithm was required to suggest some trials num_new_points = producer.algorithm.algorithm._num @@ -489,8 +530,8 @@ def test_duplicate_within_pool_and_db(producer, storage, random_dt): trials_in_db_before = len(storage._fetch_trials({})) new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) - producer.experiment.pool_size = 2 - producer.algorithm.algorithm.pool_size = 2 + # Avoid limiting number of samples from the within the algorithm. + producer.algorithm.algorithm.pool_size = 1000 producer.experiment.algorithms.algorithm.possible_values = [ ("gru", "rnn"), @@ -499,7 +540,7 @@ def test_duplicate_within_pool_and_db(producer, storage, random_dt): ] producer.update() - producer.produce() + producer.produce(2) # Algorithm was required to suggest some trials num_new_points = producer.algorithm.algorithm._num @@ -532,14 +573,12 @@ def test_exceed_max_idle_time_because_of_duplicates(producer, random_dt): producer.max_idle_time = timeout # to limit run-time, default would work as well. producer.experiment.algorithms.algorithm.possible_values = [("rnn", "rnn")] - assert producer.experiment.pool_size == 1 - producer.update() start = time.time() with pytest.raises(SampleTimeout): - producer.produce() + producer.produce(1) assert timeout <= time.time() - start < timeout + 1 @@ -558,12 +597,10 @@ def opt_out(self, num=1): producer.experiment.algorithms.algorithm.__class__, "suggest", opt_out ) - assert producer.experiment.pool_size == 1 - producer.update() with pytest.raises(WaitingForTrials): - producer.produce() + producer.produce(1) def test_stops_if_algo_done(producer, storage, random_dt, monkeypatch): @@ -582,12 +619,10 @@ def opt_out_and_complete(self, num=1): opt_out_and_complete, ) - assert producer.experiment.pool_size == 1 - trials_in_db_before = len(storage._fetch_trials({})) producer.update() - producer.produce() + producer.produce(1) assert len(storage._fetch_trials({})) == trials_in_db_before assert producer.experiment.algorithms.is_done @@ -595,14 +630,12 @@ def opt_out_and_complete(self, num=1): def test_original_seeding(producer): """Verify that rng state in original algo changes when duplicate trials is discarded""" - assert producer.experiment.pool_size == 1 - producer.algorithm.seed_rng(0) assert producer.algorithm.algorithm._index == 0 producer.update() - producer.produce() + producer.produce(1) prev_index = producer.algorithm.algorithm._index prev_suggested = producer.algorithm.algorithm._suggested @@ -616,7 +649,7 @@ def test_original_seeding(producer): producer.algorithm.seed_rng(0) producer.update() - producer.produce() + producer.produce(1) assert prev_suggested != producer.algorithm.algorithm._suggested assert prev_index < producer.algorithm.algorithm._index @@ -677,7 +710,7 @@ def suggest(pool_size=None): producer.update() with pytest.raises(SampleTimeout): - producer.produce() + producer.produce(1) assert len(new_experiment.fetch_trials(with_evc_tree=False)) == 0 @@ -686,7 +719,6 @@ def test_algorithm_is_done(monkeypatch, producer): """Verify that producer won't register new samples if algorithm is done meanwhile.""" producer.experiment.max_trials = 8 producer.experiment.algorithms.algorithm.max_trials = 8 - producer.experiment.pool_size = 10 producer = Producer(producer.experiment) def suggest_one_only(self, num=1): @@ -698,12 +730,11 @@ def suggest_one_only(self, num=1): producer.experiment.algorithms.algorithm.__class__, "suggest", suggest_one_only ) - assert producer.experiment.pool_size == 10 trials_in_exp_before = len(producer.experiment.fetch_trials()) assert trials_in_exp_before == producer.experiment.max_trials - 1 producer.update() - producer.produce() + producer.produce(10) assert len(producer.experiment.fetch_trials()) == producer.experiment.max_trials assert producer.naive_algorithm.is_done @@ -729,11 +760,13 @@ def suggest_n(self, num): # Setup naive algorithm producer.update() - assert len(producer.suggest()) == 3 + assert len(producer.suggest(50)) == 3 + # Test pool_size is the min selected + assert len(producer.suggest(2)) == 2 producer.experiment.max_trials = 7 - assert len(producer.suggest()) == 1 + assert len(producer.suggest(50)) == 1 producer.experiment.max_trials = 5 - assert len(producer.suggest()) == 1 + assert len(producer.suggest(50)) == 1 trials = producer.experiment.fetch_trials() for trial in trials[:4]: @@ -745,4 +778,6 @@ def suggest_n(self, num): producer.update() # There is now 3 completed and 4 broken. Max trials is 5. Producer should suggest 2 - assert len(producer.suggest()) == 2 + assert len(producer.suggest(50)) == 2 + # Test pool_size is the min selected + assert len(producer.suggest(1)) == 1