diff --git a/HISTORY.md b/HISTORY.md index 3e13a7dd8..6ba087b76 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,9 +1,12 @@ ### ensmallen ?.??.?: "???" ###### ????-??-?? -* LBFGS: avoid generation of NaNs, and add checks for finite values + * Implementation of Active CMAES + ([#367](https://github.com/mlpack/ensmallen/pull/367)). + + * LBFGS: avoid generation of NaNs, and add checks for finite values ([#368](https://github.com/mlpack/ensmallen/pull/368)). -* Fix CNE test tolerances + * Fix CNE test tolerances ([#360](https://github.com/mlpack/ensmallen/pull/360)). ### ensmallen 2.19.1: "Eight Ball Deluxe" diff --git a/doc/optimizers.md b/doc/optimizers.md index e1b039676..2acb5a6ed 100644 --- a/doc/optimizers.md +++ b/doc/optimizers.md @@ -1,3 +1,95 @@ +## ActiveCMAES + +*An optimizer for [separable functions](#separable-functions).* + +Active CMA-ES is a variant of the stochastic search algorithm +CMA-ES - Covariance Matrix Adaptation Evolution Strategy. +Active CMA-ES actively reduces the uncertainty in unfavourable directions by +exploiting the information about bad mutations in the covariance matrix +update step. This isn't for the purpose of accelerating progress, but +instead for speeding up the adaptation of the covariance matrix (which, in +turn, will lead to faster progress). + +#### Constructors + + * `ActiveCMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>()` + * `ActiveCMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy`_`)` + * `ActiveCMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize`_`)` + * `ActiveCMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy`_`)` + * `ActiveCMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize`_`)` + +The _`SelectionPolicyType`_ template parameter refers to the strategy used to +compute the (approximate) objective function. The `FullSelection` and +`RandomSelection` classes are available for use; custom behavior can be achieved +by implementing a class with the same method signatures. +The _`TransformationPolicyType`_ template parameter refers to transformation +strategy used to map decision variables to the desired domain during fitness +evaluation and optimization termination. The `EmptyTransformation` and +`BoundaryBoxConstraint` classes are available for use; custom behavior can be +achieved by implementing a class with the same method signatures. + +For convenience the following types can be used: + + * **`ActiveCMAES<>`** (equivalent to `ActiveCMAES>`): uses all separable functions to compute objective + * **`ApproxActiveCMAES<>`** (equivalent to `ActiveCMAES>`): uses a small amount of separable functions to compute approximate objective + +#### Attributes + +| **type** | **name** | **description** | **default** | +|----------|----------|-----------------|-------------| +| `size_t` | **`lambda`** | The population size (0 uses a default size). | `0` | +| `TransformationPolicyType` | **`transformationPolicy`** | Instantiated transformation policy used to map the coordinates to the desired domain. | `TransformationPolicyType()` | +| `size_t` | **`batchSize`** | Batch size to use for the objective calculation. | `32` | +| `size_t` | **`maxIterations`** | Maximum number of iterations. | `1000` | +| `double` | **`tolerance`** | Maximum absolute tolerance to terminate algorithm. | `1e-5` | +| `SelectionPolicyType` | **`selectionPolicy`** | Instantiated selection policy used to calculate the objective. | `SelectionPolicyType()` | +| `size_t` | **`stepSize`** | Initial step size | `0` | + +Attributes of the optimizer may also be changed via the member methods +`Lambda()`, `TransformationPolicy()`, `BatchSize()`, `MaxIterations()`, +`Tolerance()`, and `SelectionPolicy()`. + +The `selectionPolicy` attribute allows an instantiated `SelectionPolicyType` to +be given. The `FullSelection` policy has no need to be instantiated and thus +the option is not relevant when the `ActiveCMAES<>` optimizer type is being used; the +`RandomSelection` policy has the constructor `RandomSelection(`_`fraction`_`)` +where _`fraction`_ specifies the percentage of separable functions to use to +estimate the objective function. +The `transformationPolicy` attribute allows an instantiated +`TransformationPolicyType` to be given. The `EmptyTransformation<`_`MatType`_`>` +has no need to be instantiated. `BoundaryBoxConstraint<`_`MatType`_`>` policy has +the constructor `BoundaryBoxConstraint(`_`lowerBound, upperBound`_`)` +where _`lowerBound`_ and _`lowerBound`_ are the lower bound and upper bound of +the coordinates respectively. + +#### Examples: + +
+Click to collapse/expand example code. + + +```c++ +RosenbrockFunction f; +arma::mat coordinates = f.GetInitialPoint(); + +// ActiveCMAES with the FullSelection and BoundaryBoxConstraint policies. +BoundaryBoxConstraint b(-1, 1); +ActiveCMAES optimizer(0, b, 32, 200, 1e-4); +optimizer.Optimize(f, coordinates); + +// ActiveCMAES with the RandomSelection and BoundaryBoxConstraint policies. +ApproxActiveCMAES> cmaes(0, b, 32, 200, 1e-4); +approxOptimizer.Optimize(f, coordinates); +``` + +
+ +#### See also: + + * [CMAES](#cmaes) + * [Improving Evolution Strategies through Active Covariance Matrix Adaptation](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.114.4239&rep=rep1&type=pdfn) + * [Evolution strategy in Wikipedia](https://en.wikipedia.org/wiki/Evolution_strategy) + ## AdaBelief *An optimizer for [differentiable separable functions](#differentiable-separable-functions).* @@ -696,35 +788,41 @@ matrix within an iterative procedure using the covariance matrix. #### Constructors - * `CMAES<`_`SelectionPolicyType`_`>()` - * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound`_`)` - * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound, batchSize`_`)` - * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound, batchSize, maxIterations, tolerance, selectionPolicy`_`)` + * `CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>()` + * `CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy`_`)` + * `CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize`_`)` + * `CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy`_`)` + * `CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize`_`)` The _`SelectionPolicyType`_ template parameter refers to the strategy used to compute the (approximate) objective function. The `FullSelection` and `RandomSelection` classes are available for use; custom behavior can be achieved by implementing a class with the same method signatures. +The _`TransformationPolicyType`_ template parameter refers to transformation +strategy used to map decision variables to the desired domain during fitness +evaluation and optimization termination. The `EmptyTransformation` and +`BoundaryBoxConstraint` classes are available for use; custom behavior can be +achieved by implementing a class with the same method signatures. For convenience the following types can be used: - * **`CMAES<>`** (equivalent to `CMAES`): uses all separable functions to compute objective - * **`ApproxCMAES`** (equivalent to `CMAES`): uses a small amount of separable functions to compute approximate objective + * **`CMAES<>`** (equivalent to `CMAES>`): uses all separable functions to compute objective + * **`ApproxCMAES<>`** (equivalent to `CMAES>`): uses a small amount of separable functions to compute approximate objective #### Attributes | **type** | **name** | **description** | **default** | |----------|----------|-----------------|-------------| | `size_t` | **`lambda`** | The population size (0 uses a default size). | `0` | -| `double` | **`lowerBound`** | Lower bound of decision variables. | `-10.0` | -| `double` | **`upperBound`** | Upper bound of decision variables. | `10.0` | +| `TransformationPolicyType` | **`transformationPolicy`** | Instantiated transformation policy used to map the coordinates to the desired domain. | `TransformationPolicyType()` | | `size_t` | **`batchSize`** | Batch size to use for the objective calculation. | `32` | | `size_t` | **`maxIterations`** | Maximum number of iterations. | `1000` | | `double` | **`tolerance`** | Maximum absolute tolerance to terminate algorithm. | `1e-5` | | `SelectionPolicyType` | **`selectionPolicy`** | Instantiated selection policy used to calculate the objective. | `SelectionPolicyType()` | +| `size_t` | **`stepSize`** | Initial step size | `0` | Attributes of the optimizer may also be changed via the member methods -`Lambda()`, `LowerBound()`, `UpperBound()`, `BatchSize()`, `MaxIterations()`, +`Lambda()`, `TransformationPolicy()`, `BatchSize()`, `MaxIterations()`, `Tolerance()`, and `SelectionPolicy()`. The `selectionPolicy` attribute allows an instantiated `SelectionPolicyType` to @@ -733,6 +831,12 @@ the option is not relevant when the `CMAES<>` optimizer type is being used; the `RandomSelection` policy has the constructor `RandomSelection(`_`fraction`_`)` where _`fraction`_ specifies the percentage of separable functions to use to estimate the objective function. +The `transformationPolicy` attribute allows an instantiated +`TransformationPolicyType` to be given. The `EmptyTransformation<`_`MatType`_`>` +has no need to be instantiated. `BoundaryBoxConstraint<`_`MatType`_`>` policy has +the constructor `BoundaryBoxConstraint(`_`lowerBound, upperBound`_`)` +where _`lowerBound`_ and _`lowerBound`_ are the lower bound and upper bound of +the coordinates respectively. #### Examples: @@ -744,12 +848,13 @@ estimate the objective function. RosenbrockFunction f; arma::mat coordinates = f.GetInitialPoint(); -// CMAES with the FullSelection policy. -CMAES<> optimizer(0, -1, 1, 32, 200, 1e-4); +// CMAES with the FullSelection and BoundaryBoxConstraint policies. +BoundaryBoxConstraint b(-1, 1); +CMAES optimizer(0, b, 32, 200, 1e-4); optimizer.Optimize(f, coordinates); -// CMAES with the RandomSelection policy. -ApproxCMAES<> approxOptimizer(0, -1, 1. 32, 200, 1e-4); +// CMAES with the RandomSelection and BoundaryBoxConstraint policies. +ApproxCMAES> cmaes(0, b, 32, 200, 1e-4); approxOptimizer.Optimize(f, coordinates); ``` diff --git a/include/ensmallen.hpp b/include/ensmallen.hpp index a1338e819..be91cc23f 100644 --- a/include/ensmallen.hpp +++ b/include/ensmallen.hpp @@ -97,6 +97,7 @@ #include "ensmallen_bits/aug_lagrangian/aug_lagrangian.hpp" #include "ensmallen_bits/bigbatch_sgd/bigbatch_sgd.hpp" #include "ensmallen_bits/cmaes/cmaes.hpp" +#include "ensmallen_bits/cmaes/active_cmaes.hpp" #include "ensmallen_bits/cne/cne.hpp" #include "ensmallen_bits/de/de.hpp" #include "ensmallen_bits/eve/eve.hpp" diff --git a/include/ensmallen_bits/cmaes/active_cmaes.hpp b/include/ensmallen_bits/cmaes/active_cmaes.hpp new file mode 100644 index 000000000..1edec07c2 --- /dev/null +++ b/include/ensmallen_bits/cmaes/active_cmaes.hpp @@ -0,0 +1,219 @@ +/** + * @file active_cmaes.hpp + * @author Marcus Edel + * @author Suvarsha Chennareddy + * + * Definition of the Active Covariance Matrix Adaptation Evolution Strategy + * as proposed by G.A Jastrebski and D.V Arnold in "Improving Evolution + * Strategies through Active Covariance Matrix Adaptation". + * + * ensmallen is free software; you may redistribute it and/or modify it under + * the terms of the 3-clause BSD license. You should have received a copy of + * the 3-clause BSD license along with ensmallen. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef ENSMALLEN_CMAES_ACTIVE_CMAES_HPP +#define ENSMALLEN_CMAES_ACTIVE_CMAES_HPP + +#include "full_selection.hpp" +#include "random_selection.hpp" +#include "transformation_policies/empty_transformation.hpp" +#include "transformation_policies/boundary_box_constraint.hpp" + +namespace ens { + +/** + * Active CMA-ES is a variant of the stochastic search algorithm + * CMA-ES - Covariance Matrix Adaptation Evolution Strategy. + * Active CMA-ES actively reduces the uncertainty in unfavourable directions by + * exploiting the information about bad mutations in the covariance matrix + * update step. This isn't for the purpose of accelerating progress, but + * instead for speeding up the adaptation of the covariance matrix (which, in + * turn, will lead to faster progress). + * + * For more information, please refer to: + * + * @code + * @INPROCEEDINGS{1688662, + * author={Jastrebski, G.A. and Arnold, D.V.}, + * booktitle={2006 IEEE International Conference on Evolutionary + Computation}, + * title={Improving Evolution Strategies through Active Covariance + Matrix Adaptation}, + * year={2006}, + * volume={}, + * number={}, + * pages={2814-2821}, + * doi={10.1109/CEC.2006.1688662}} + * @endcode + * + * Active CMA-ES can optimize separable functions. For more details, see the + * documentation on function types included with this distribution or on the + * ensmallen website. + * + * @tparam SelectionPolicy The selection strategy used for the evaluation step. + * @tparam TransformationPolicy The transformation strategy used to + * map decision variables to the desired domain during fitness evaluation + * and termination. Use EmptyTransformation if the domain isn't bounded. + */ +template> +class ActiveCMAES +{ + public: + /** + * Construct the Active CMA-ES optimizer with the given function and parameters. The + * defaults here are not necessarily good for the given problem, so it is + * suggested that the values used be tailored to the task at hand. The + * maximum number of iterations refers to the maximum number of points that + * are processed (i.e., one iteration equals one point; one iteration does not + * equal one pass over the dataset). + * + * @param lambda The population size (0 use the default size). + * @param transformationPolicy Instantiated transformation policy used to + * map the coordinates to the desired domain. + * @param batchSize Batch size to use for the objective calculation. + * @param maxIterations Maximum number of iterations allowed (0 means no + * limit). + * @param tolerance Maximum absolute tolerance to terminate algorithm. + * @param selectionPolicy Instantiated selection policy used to calculate the + * objective. + * @param stepSize Starting sigma/step size (will be modified). + */ + ActiveCMAES( + const size_t lambda = 0, + const TransformationPolicyType& + transformationPolicy = TransformationPolicyType(), + const size_t batchSize = 32, + const size_t maxIterations = 1000, + const double tolerance = 1e-5, + const SelectionPolicyType& selectionPolicy = SelectionPolicyType(), + double stepSize = 0); + + /** + * Construct the Active CMA-ES optimizer with the given function and parameters + * (including lower and upper bounds). The defaults here are not necessarily + * good for the given problem, so it is suggested that the values used be + * tailored to the task at hand. The maximum number of iterations refers to + * the maximum number of points that are processed (i.e., one iteration + * equals one point; one iteration does not equal one pass over the dataset). + * + * @param lambda The population size(0 use the default size). + * @param lowerBound Lower bound of decision variables. + * @param upperBound Upper bound of decision variables. + * @param batchSize Batch size to use for the objective calculation. + * @param maxIterations Maximum number of iterations allowed(0 means no + limit). + * @param tolerance Maximum absolute tolerance to terminate algorithm. + * @param selectionPolicy Instantiated selection policy used to calculate the + * objective. + * @param stepSize Starting sigma/step size (will be modified). + */ + ActiveCMAES( + const size_t lambda = 0, + const double lowerBound = -10, + const double upperBound = 10, + const size_t batchSize = 32, + const size_t maxIterations = 1000, + const double tolerance = 1e-5, + const SelectionPolicyType& selectionPolicy = SelectionPolicyType(), + double stepSize = 0); + + /** + * Optimize the given function using Active CMA-ES. The given starting point will be + * modified to store the finishing point of the algorithm, and the final + * objective value is returned. + * + * @tparam SeparableFunctionType Type of the function to be optimized. + * @tparam MatType Type of matrix to optimize. + * @tparam CallbackTypes Types of callback functions. + * @param function Function to optimize. + * @param iterate Starting point (will be modified). + * @param callbacks Callback functions. + * @return Objective value of the final point. + */ + template + typename MatType::elem_type Optimize( + SeparableFunctionType& function, + MatType& iterate, + CallbackTypes&&... callbacks); + + //! Get the population size. + size_t PopulationSize() const { return lambda; } + //! Modify the population size. + size_t& PopulationSize() { return lambda; } + + //! Get the batch size. + size_t BatchSize() const { return batchSize; } + //! Modify the batch size. + size_t& BatchSize() { return batchSize; } + + //! Get the maximum number of iterations (0 indicates no limit). + size_t MaxIterations() const { return maxIterations; } + //! Modify the maximum number of iterations (0 indicates no limit). + size_t& MaxIterations() { return maxIterations; } + + //! Get the tolerance for termination. + double Tolerance() const { return tolerance; } + //! Modify the tolerance for termination. + double& Tolerance() { return tolerance; } + + //! Get the selection policy. + const SelectionPolicyType& SelectionPolicy() const { return selectionPolicy; } + //! Modify the selection policy. + SelectionPolicyType& SelectionPolicy() { return selectionPolicy; } + + //! Get the transformation policy. + const TransformationPolicyType& TransformationPolicy() const + { return transformationPolicy; } + //! Modify the transformation policy. + TransformationPolicyType& TransformationPolicy() + { return transformationPolicy; } + + //! Get the step size. + double StepSize() const + { return stepSize; } + //! Modify the step size. + double& StepSize() + { return stepSize; } + + private: + //! Population size. + size_t lambda; + + //! The batch size for processing. + size_t batchSize; + + //! The maximum number of allowed iterations. + size_t maxIterations; + + //! The tolerance for termination. + double tolerance; + + //! The selection policy used to calculate the objective. + SelectionPolicyType selectionPolicy; + + //! The transformationPolicy used to map coordinates to the suitable domain + //! while evaluating fitness. This mapping is also done after optimization + //! has completed. + TransformationPolicyType transformationPolicy; + + //! The step size. + double stepSize; +}; + +/** + * Convenient typedef for Active CMAES approximation. + */ +template, + typename SelectionPolicyType = RandomSelection> +using ApproxActiveCMAES = ActiveCMAES; + +} // namespace ens + +// Include implementation. +#include "active_cmaes_impl.hpp" + +#endif diff --git a/include/ensmallen_bits/cmaes/active_cmaes_impl.hpp b/include/ensmallen_bits/cmaes/active_cmaes_impl.hpp new file mode 100644 index 000000000..8975038dd --- /dev/null +++ b/include/ensmallen_bits/cmaes/active_cmaes_impl.hpp @@ -0,0 +1,367 @@ +/** + * @file active_cmaes_impl.hpp + * @author Marcus Edel + * @author Suvarsha Chennareddy + * + * Implementation of the Active Covariance Matrix Adaptation Evolution Strategy + * as proposed by G.A Jastrebski and D.V Arnold in "Improving Evolution + * Strategies through Active Covariance Matrix Adaptation". + * + * ensmallen is free software; you may redistribute it and/or modify it under + * the terms of the 3-clause BSD license. You should have received a copy of + * the 3-clause BSD license along with ensmallen. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef ENSMALLEN_CMAES_ACTIVE_CMAES_IMPL_HPP +#define ENSMALLEN_CMAES_ACTIVE_CMAES_IMPL_HPP + +// In case it hasn't been included yet. +#include "active_cmaes.hpp" + +#include "not_empty_transformation.hpp" +#include + +namespace ens { + +template +ActiveCMAES::ActiveCMAES( + const size_t lambda, + const TransformationPolicyType& + transformationPolicy, + const size_t batchSize, + const size_t maxIterations, + const double tolerance, + const SelectionPolicyType& selectionPolicy, + double stepSizeIn) : + lambda(lambda), + batchSize(batchSize), + maxIterations(maxIterations), + tolerance(tolerance), + selectionPolicy(selectionPolicy), + transformationPolicy(transformationPolicy), + stepSize(stepSizeIn) +{ /* Nothing to do. */ } + +template +ActiveCMAES::ActiveCMAES( + const size_t lambda, + const double lowerBound, + const double upperBound, + const size_t batchSize, + const size_t maxIterations, + const double tolerance, + const SelectionPolicyType& selectionPolicy, + double stepSizeIn) : + lambda(lambda), + batchSize(batchSize), + maxIterations(maxIterations), + tolerance(tolerance), + selectionPolicy(selectionPolicy), + stepSize(stepSizeIn) +{ + Warn << "This is a deprecated constructor and will be removed in a " + "future version of ensmallen" << std::endl; + NotEmptyTransformation> d; + d.Assign(transformationPolicy, lowerBound, upperBound); +} + +//! Optimize the function (minimize). +template +template +typename MatType::elem_type ActiveCMAES::Optimize( + SeparableFunctionType& function, + MatType& iterateIn, + CallbackTypes&&... callbacks) +{ + // Convenience typedefs. + typedef typename MatType::elem_type ElemType; + typedef typename MatTypeTraits::BaseMatType BaseMatType; + + // Make sure that we have the methods that we need. Long name... + traits::CheckArbitrarySeparableFunctionTypeAPI< + SeparableFunctionType, BaseMatType>(); + RequireDenseFloatingPointType(); + + BaseMatType& iterate = (BaseMatType&) iterateIn; + + // Find the number of functions to use. + const size_t numFunctions = function.NumFunctions(); + + // Population size. + if (lambda == 0) + lambda = (4 + std::round(3 * std::log(iterate.n_elem))) * 10; + + // Parent number. + const size_t mu = std::round(lambda / 4); + + // Recombination weight (w = 1 / (parent number)). + const ElemType w = 1.0 / mu; + + // Number of effective solutions. + const ElemType muEffective = mu; + + // Step size control parameters. + BaseMatType sigma(2, 1); // sigma is vector-shaped. + if (stepSize == 0) + sigma(0) = transformationPolicy.InitialStepSize(); + else + sigma(0) = stepSize; + + const ElemType cs = 4.0 / (iterate.n_elem + 4); + const ElemType ds = 1 + cs; + const ElemType enn = std::sqrt(iterate.n_elem) * (1.0 - 1.0 / + (4.0 * iterate.n_elem) + 1.0 / (21 * std::pow(iterate.n_elem, 2))); + + // Covariance update parameters. Cumulation for distribution. + const ElemType cc = cs; + const ElemType ccov = 2.0 / std::pow((iterate.n_elem + std::sqrt(2)), 2); + const ElemType beta = (4.0 * mu - 2.0) / (std::pow((iterate.n_elem + 12), 2) + + 4 * mu); + + std::vector mPosition(2, BaseMatType(iterate.n_rows, + iterate.n_cols)); + mPosition[0] = iterate; + + BaseMatType step(iterate.n_rows, iterate.n_cols); + step.zeros(); + + BaseMatType transformedIterate = transformationPolicy.Transform(iterate); + + // Calculate the first objective function. + ElemType currentObjective = 0; + for (size_t f = 0; f < numFunctions; f += batchSize) + { + const size_t effectiveBatchSize = std::min(batchSize, numFunctions - f); + const ElemType objective = function.Evaluate(transformedIterate, f, + effectiveBatchSize); + currentObjective += objective; + + Callback::Evaluate(*this, function, transformedIterate, objective, + callbacks...); + } + + ElemType overallObjective = currentObjective; + ElemType lastObjective = std::numeric_limits::max(); + + // Population parameters. + std::vector pStep(lambda, BaseMatType(iterate.n_rows, + iterate.n_cols)); + std::vector pPosition(lambda, BaseMatType(iterate.n_rows, + iterate.n_cols)); + BaseMatType pObjective(lambda, 1); // pObjective is vector-shaped. + std::vector ps(2, BaseMatType(iterate.n_rows, iterate.n_cols)); + ps[0].zeros(); + ps[1].zeros(); + std::vector pc = ps; + std::vector C(2, BaseMatType(iterate.n_elem, iterate.n_elem)); + C[0].eye(); + + // Covariance matrix parameters. + arma::Col eigval; + BaseMatType eigvec; + BaseMatType eigvalZero(iterate.n_elem, 1); // eigvalZero is vector-shaped. + eigvalZero.zeros(); + + // The current visitation order (sorted by population objectives). + arma::uvec idx = arma::linspace(0, lambda - 1, lambda); + + // Controls early termination of the optimization process. + bool terminate = false; + + // Now iterate! + terminate |= Callback::BeginOptimization(*this, function, + transformedIterate, callbacks...); + + size_t idx0, idx1; + + // The number of generations to wait after the minimum loss has + // been reached or no improvement has been made before terminating. + size_t patience = 10 + (30 * iterate.n_elem / lambda) + 1; + size_t steps = 0; + + for (size_t i = 1; (i != maxIterations) && !terminate; ++i) + { + // To keep track of where we are. + idx0 = (i - 1) % 2; + idx1 = i % 2; + + // Perform Cholesky decomposition. If the matrix is not positive definite, + // add a small value and try again. + BaseMatType covLower; + while (!arma::chol(covLower, C[idx0], "lower")) + C[idx0].diag() += std::numeric_limits::epsilon(); + + arma::eig_sym(eigval, eigvec, C[idx0]); + + for (size_t j = 0; j < lambda; ++j) + { + if (iterate.n_rows > iterate.n_cols) + { + pStep[idx(j)] = covLower * + arma::randn(iterate.n_rows, iterate.n_cols); + } + else + { + pStep[idx(j)] = arma::randn(iterate.n_rows, iterate.n_cols) + * covLower.t(); + } + + pPosition[idx(j)] = mPosition[idx0] + sigma(idx0) * pStep[idx(j)]; + + // Calculate the objective function. + pObjective(idx(j)) = selectionPolicy.Select(function, batchSize, + transformationPolicy.Transform(pPosition[idx(j)]), callbacks...); + } + + // Sort population. + idx = arma::sort_index(pObjective); + + step = w * pStep[idx(0)]; + for (size_t j = 1; j < mu; ++j) + step += w * pStep[idx(j)]; + + mPosition[idx1] = mPosition[idx0] + sigma(idx0) * step; + + // Calculate the objective function. + currentObjective = selectionPolicy.Select(function, batchSize, + transformationPolicy.Transform(mPosition[idx1]), callbacks...); + + // Update best parameters. + if (currentObjective < overallObjective) + { + overallObjective = currentObjective; + iterate = mPosition[idx1]; + + transformedIterate = transformationPolicy.Transform(iterate); + terminate |= Callback::StepTaken(*this, function, + transformedIterate, callbacks...); + } + + // Update Step Size. + if (iterate.n_rows > iterate.n_cols) + { + ps[idx1] = (1 - cs) * ps[idx0] + std::sqrt( + cs * (2 - cs) * muEffective) * + eigvec * diagmat(1 / eigval) * eigvec.t() * step; + } + else + { + ps[idx1] = (1 - cs) * ps[idx0] + std::sqrt( + cs * (2 - cs) * muEffective) * step * + eigvec * diagmat(1 / eigval) * eigvec.t(); + } + + const ElemType psNorm = arma::norm(ps[idx1]); + sigma(idx1) = sigma(idx0) * std::exp(cs / ds * (psNorm / enn - 1)); + + if (std::isnan(sigma(idx1)) || sigma(idx1) > 1e14) + { + Warn << "The step size diverged to " << sigma(idx1) << "; " + << "terminating with failure. Try a smaller step size?" << std::endl; + + iterate = transformationPolicy.Transform(iterate); + + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + + pc[idx1] = (1 - cc) * pc[idx0] + std::sqrt(cc * (2 - cc) * + muEffective) * step; + + if (iterate.n_rows > iterate.n_cols) + { + C[idx1] = (1 - ccov) * C[idx0] + ccov * + (pc[idx1] * pc[idx1].t()); + + for (size_t j = 0; j < mu; ++j) + { + C[idx1] = C[idx1] + beta * w * + pStep[idx(j)] * pStep[idx(j)].t(); + } + + for (size_t j = lambda - mu; j < lambda; ++j) + { + C[idx1] = C[idx1] - beta * w * + pStep[idx(j)] * pStep[idx(j)].t(); + } + } + else + { + C[idx1] = (1 - ccov) * C[idx0] + ccov * + (pc[idx1].t() * pc[idx1]); + + for (size_t j = 0; j < mu; ++j) + { + C[idx1] = C[idx1] + beta * w * + pStep[idx(j)].t() * pStep[idx(j)]; + } + + for (size_t j = lambda - mu; j < lambda; ++j) + { + C[idx1] = C[idx1] - beta * w * + pStep[idx(j)].t() * pStep[idx(j)]; + } + } + + arma::eig_sym(eigval, eigvec, C[idx1]); + const arma::uvec negativeEigval = arma::find(eigval < 0, 1); + if (!negativeEigval.is_empty()) + { + if (negativeEigval(0) == 0) + { + C[idx1].zeros(); + } + else + { + C[idx1] = eigvec.cols(0, negativeEigval(0) - 1) * + arma::diagmat(eigval.subvec(0, negativeEigval(0) - 1)) * + eigvec.cols(0, negativeEigval(0) - 1).t(); + } + } + + // Output current objective function. + Info << "Active CMA-ES: iteration " << i << ", objective " << overallObjective + << "." << std::endl; + + if (std::isnan(overallObjective) || std::isinf(overallObjective)) + { + Warn << "Active CMA-ES: converged to " << overallObjective << "; " + << "terminating with failure. Try a smaller step size?" << std::endl; + + iterate = transformationPolicy.Transform(iterate); + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + + if (std::abs(lastObjective - overallObjective) < tolerance) + { + if (steps > patience) + { + Info << "Active CMA-ES: minimized within tolerance " << tolerance << "; " + << "terminating optimization." << std::endl; + + iterate = transformationPolicy.Transform(iterate); + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + } + else + { + steps = 0; + } + + steps++; + lastObjective = overallObjective; + } + + iterate = transformationPolicy.Transform(iterate); + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; +} + +} // namespace ens + +#endif diff --git a/include/ensmallen_bits/cmaes/cmaes.hpp b/include/ensmallen_bits/cmaes/cmaes.hpp index 98500a44e..0ae323490 100644 --- a/include/ensmallen_bits/cmaes/cmaes.hpp +++ b/include/ensmallen_bits/cmaes/cmaes.hpp @@ -17,6 +17,8 @@ #include "full_selection.hpp" #include "random_selection.hpp" +#include "transformation_policies/empty_transformation.hpp" +#include "transformation_policies/boundary_box_constraint.hpp" namespace ens { @@ -46,8 +48,12 @@ namespace ens { * ensmallen website. * * @tparam SelectionPolicy The selection strategy used for the evaluation step. + * @tparam TransformationPolicy The transformation strategy used to + * map decision variables to the desired domain during fitness evaluation + * and termination. Use EmptyTransformation if the domain isn't bounded. */ -template +template> class CMAES { public: @@ -60,14 +66,43 @@ class CMAES * equal one pass over the dataset). * * @param lambda The population size (0 use the default size). - * @param lowerBound Lower bound of decision variables. - * @param upperBound Upper bound of decision variables. + * @param transformationPolicy Instantiated transformation policy used to + * map the coordinates to the desired domain. * @param batchSize Batch size to use for the objective calculation. * @param maxIterations Maximum number of iterations allowed (0 means no * limit). * @param tolerance Maximum absolute tolerance to terminate algorithm. * @param selectionPolicy Instantiated selection policy used to calculate the * objective. + * @param stepSize Starting sigma/step size (will be modified). + */ + CMAES(const size_t lambda = 0, + const TransformationPolicyType& + transformationPolicy = TransformationPolicyType(), + const size_t batchSize = 32, + const size_t maxIterations = 1000, + const double tolerance = 1e-5, + const SelectionPolicyType& selectionPolicy = SelectionPolicyType(), + double stepSize = 0); + + /** + * Construct the CMA-ES optimizer with the given function and parameters + * (including lower and upper bounds). The defaults here are not necessarily + * good for the given problem, so it is suggested that the values used be + * tailored to the task at hand. The maximum number of iterations refers to + * the maximum number of points that are processed (i.e., one iteration + * equals one point; one iteration does not equal one pass over the dataset). + * + * @param lambda The population size(0 use the default size). + * @param lowerBound Lower bound of decision variables. + * @param upperBound Upper bound of decision variables. + * @param batchSize Batch size to use for the objective calculation. + * @param maxIterations Maximum number of iterations allowed(0 means no + limit). + * @param tolerance Maximum absolute tolerance to terminate algorithm. + * @param selectionPolicy Instantiated selection policy used to calculate the + * objective. + * @param stepSize Starting sigma/step size (will be modified). */ CMAES(const size_t lambda = 0, const double lowerBound = -10, @@ -75,7 +110,8 @@ class CMAES const size_t batchSize = 32, const size_t maxIterations = 1000, const double tolerance = 1e-5, - const SelectionPolicyType& selectionPolicy = SelectionPolicyType()); + const SelectionPolicyType& selectionPolicy = SelectionPolicyType(), + double stepSize = 0); /** * Optimize the given function using CMA-ES. The given starting point will be @@ -91,27 +127,17 @@ class CMAES * @return Objective value of the final point. */ template - typename MatType::elem_type Optimize(SeparableFunctionType& function, - MatType& iterate, - CallbackTypes&&... callbacks); + typename MatType, + typename... CallbackTypes> + typename MatType::elem_type Optimize(SeparableFunctionType& function, + MatType& iterate, + CallbackTypes&&... callbacks); //! Get the population size. size_t PopulationSize() const { return lambda; } //! Modify the population size. size_t& PopulationSize() { return lambda; } - //! Get the lower bound of decision variables. - double LowerBound() const { return lowerBound; } - //! Modify the lower bound of decision variables. - double& LowerBound() { return lowerBound; } - - //! Get the upper bound of decision variables - double UpperBound() const { return upperBound; } - //! Modify the upper bound of decision variables - double& UpperBound() { return upperBound; } - //! Get the batch size. size_t BatchSize() const { return batchSize; } //! Modify the batch size. @@ -132,16 +158,24 @@ class CMAES //! Modify the selection policy. SelectionPolicyType& SelectionPolicy() { return selectionPolicy; } + //! Get the transformation policy. + const TransformationPolicyType& TransformationPolicy() const + { return transformationPolicy; } + //! Modify the transformation policy. + TransformationPolicyType& TransformationPolicy() + { return transformationPolicy; } + + //! Get the step size. + double StepSize() const + { return stepSize; } + //! Modify the step size. + double& StepSize() + { return stepSize; } + private: //! Population size. size_t lambda; - //! Lower bound of decision variables. - double lowerBound; - - //! Upper bound of decision variables - double upperBound; - //! The batch size for processing. size_t batchSize; @@ -153,13 +187,22 @@ class CMAES //! The selection policy used to calculate the objective. SelectionPolicyType selectionPolicy; + + //! The transformationPolicy used to map coordinates to the suitable domain + //! while evaluating fitness. This mapping is also done after optimization + //! has completed. + TransformationPolicyType transformationPolicy; + + //! The step size. + double stepSize; }; /** * Convenient typedef for CMAES approximation. */ -template -using ApproxCMAES = CMAES; +template, + typename SelectionPolicyType = RandomSelection> +using ApproxCMAES = CMAES; } // namespace ens diff --git a/include/ensmallen_bits/cmaes/cmaes_impl.hpp b/include/ensmallen_bits/cmaes/cmaes_impl.hpp index 90e047ae4..4d875ee38 100644 --- a/include/ensmallen_bits/cmaes/cmaes_impl.hpp +++ b/include/ensmallen_bits/cmaes/cmaes_impl.hpp @@ -18,33 +18,59 @@ // In case it hasn't been included yet. #include "cmaes.hpp" +#include "not_empty_transformation.hpp" #include namespace ens { -template -CMAES::CMAES(const size_t lambda, +template +CMAES::CMAES(const size_t lambda, + const TransformationPolicyType& + transformationPolicy, + const size_t batchSize, + const size_t maxIterations, + const double tolerance, + const SelectionPolicyType& selectionPolicy, + double stepSizeIn) : + lambda(lambda), + batchSize(batchSize), + maxIterations(maxIterations), + tolerance(tolerance), + selectionPolicy(selectionPolicy), + transformationPolicy(transformationPolicy), + stepSize(stepSizeIn) +{ /* Nothing to do. */ } + +template +CMAES::CMAES(const size_t lambda, const double lowerBound, const double upperBound, const size_t batchSize, const size_t maxIterations, const double tolerance, - const SelectionPolicyType& selectionPolicy) : + const SelectionPolicyType& selectionPolicy, + double stepSizeIn) : lambda(lambda), - lowerBound(lowerBound), - upperBound(upperBound), batchSize(batchSize), maxIterations(maxIterations), tolerance(tolerance), - selectionPolicy(selectionPolicy) -{ /* Nothing to do. */ } + selectionPolicy(selectionPolicy), + stepSize(stepSizeIn) +{ + Warn << "This is a deprecated constructor and will be removed in a " + "future version of ensmallen" << std::endl; + NotEmptyTransformation> d; + d.Assign(transformationPolicy, lowerBound, upperBound); +} + //! Optimize the function (minimize). -template +template template -typename MatType::elem_type CMAES::Optimize( +typename MatType::elem_type CMAES::Optimize( SeparableFunctionType& function, MatType& iterateIn, CallbackTypes&&... callbacks) @@ -78,7 +104,11 @@ typename MatType::elem_type CMAES::Optimize( // Step size control parameters. BaseMatType sigma(2, 1); // sigma is vector-shaped. - sigma(0) = 0.3 * (upperBound - lowerBound); + if (stepSize == 0) + sigma(0) = transformationPolicy.InitialStepSize(); + else + sigma(0) = stepSize; + const double cs = (muEffective + 2) / (iterate.n_elem + muEffective + 5); const double ds = 1 + cs + 2 * std::max(std::sqrt((muEffective - 1) / (iterate.n_elem + 1)) - 1, 0.0); @@ -99,22 +129,23 @@ typename MatType::elem_type CMAES::Optimize( std::vector mPosition(2, BaseMatType(iterate.n_rows, iterate.n_cols)); - mPosition[0] = lowerBound + arma::randu( - iterate.n_rows, iterate.n_cols) * (upperBound - lowerBound); + mPosition[0] = iterate; BaseMatType step(iterate.n_rows, iterate.n_cols); step.zeros(); + BaseMatType transformedIterate = transformationPolicy.Transform(iterate); + // Calculate the first objective function. ElemType currentObjective = 0; for (size_t f = 0; f < numFunctions; f += batchSize) { const size_t effectiveBatchSize = std::min(batchSize, numFunctions - f); - const ElemType objective = function.Evaluate(mPosition[0], f, + const ElemType objective = function.Evaluate(transformedIterate, f, effectiveBatchSize); currentObjective += objective; - Callback::Evaluate(*this, function, mPosition[0], objective, + Callback::Evaluate(*this, function, transformedIterate, objective, callbacks...); } @@ -147,9 +178,15 @@ typename MatType::elem_type CMAES::Optimize( bool terminate = false; // Now iterate! - terminate |= Callback::BeginOptimization(*this, function, iterate, - callbacks...); - for (size_t i = 1; i < maxIterations && !terminate; ++i) + terminate |= Callback::BeginOptimization(*this, function, + transformedIterate, callbacks...); + + // The number of generations to wait after the minimum loss has + // been reached or no improvement has been made before terminating. + size_t patience = 10 + (30 * iterate.n_elem / lambda) + 1; + size_t steps = 0; + + for (size_t i = 1; (i != maxIterations) && !terminate; ++i) { // To keep track of where we are. const size_t idx0 = (i - 1) % 2; @@ -161,24 +198,26 @@ typename MatType::elem_type CMAES::Optimize( while (!arma::chol(covLower, C[idx0], "lower")) C[idx0].diag() += std::numeric_limits::epsilon(); + arma::eig_sym(eigval, eigvec, C[idx0]); + for (size_t j = 0; j < lambda; ++j) { if (iterate.n_rows > iterate.n_cols) { pStep[idx(j)] = covLower * - arma::randn(iterate.n_rows, iterate.n_cols); + arma::randn(iterate.n_rows, iterate.n_cols); } else { pStep[idx(j)] = arma::randn(iterate.n_rows, iterate.n_cols) - * covLower; + * covLower.t(); } pPosition[idx(j)] = mPosition[idx0] + sigma(idx0) * pStep[idx(j)]; // Calculate the objective function. pObjective(idx(j)) = selectionPolicy.Select(function, batchSize, - pPosition[idx(j)], callbacks...); + transformationPolicy.Transform(pPosition[idx(j)]), callbacks...); } // Sort population. @@ -192,7 +231,7 @@ typename MatType::elem_type CMAES::Optimize( // Calculate the objective function. currentObjective = selectionPolicy.Select(function, batchSize, - mPosition[idx1], callbacks...); + transformationPolicy.Transform(mPosition[idx1]), callbacks...); // Update best parameters. if (currentObjective < overallObjective) @@ -200,23 +239,38 @@ typename MatType::elem_type CMAES::Optimize( overallObjective = currentObjective; iterate = mPosition[idx1]; - terminate |= Callback::StepTaken(*this, function, iterate, callbacks...); + transformedIterate = transformationPolicy.Transform(iterate); + terminate |= Callback::StepTaken(*this, function, + transformedIterate, callbacks...); } // Update Step Size. if (iterate.n_rows > iterate.n_cols) { ps[idx1] = (1 - cs) * ps[idx0] + std::sqrt( - cs * (2 - cs) * muEffective) * covLower.t() * step; + cs * (2 - cs) * muEffective) * + eigvec * diagmat(1 / eigval) * eigvec.t() * step; } else { ps[idx1] = (1 - cs) * ps[idx0] + std::sqrt( - cs * (2 - cs) * muEffective) * step * covLower.t(); + cs * (2 - cs) * muEffective) * step * + eigvec * diagmat(1 / eigval) * eigvec.t(); } const ElemType psNorm = arma::norm(ps[idx1]); - sigma(idx1) = sigma(idx0) * std::exp(cs / ds * ( psNorm / enn - 1)); + sigma(idx1) = sigma(idx0) * std::exp(cs / ds * (psNorm / enn - 1)); + + if (std::isnan(sigma(idx1)) || sigma(idx1) > 1e14) + { + Warn << "The step size diverged to " << sigma(idx1) << "; " + << "terminating with failure. Try a smaller step size?" << std::endl; + + iterate = transformationPolicy.Transform(iterate); + + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } // Update covariance matrix. if ((psNorm / sqrt(1 - std::pow(1 - cs, 2 * i))) < h) @@ -242,12 +296,12 @@ typename MatType::elem_type CMAES::Optimize( if (iterate.n_rows > iterate.n_cols) { C[idx1] = (1 - c1 - cmu) * C[idx0] + c1 * (pc[idx1] * - pc[idx1].t() + (cc * (2 - cc)) * C[idx0]); + pc[idx1].t() + (cc * (2 - cc)) * C[idx0]); } else { C[idx1] = (1 - c1 - cmu) * C[idx0] + c1 * - (pc[idx1].t() * pc[idx1] + (cc * (2 - cc)) * C[idx0]); + (pc[idx1].t() * pc[idx1] + (cc * (2 - cc)) * C[idx0]); } } @@ -256,7 +310,7 @@ typename MatType::elem_type CMAES::Optimize( for (size_t j = 0; j < mu; ++j) { C[idx1] = C[idx1] + cmu * w(j) * - pStep[idx(j)] * pStep[idx(j)].t(); + pStep[idx(j)] * pStep[idx(j)].t(); } } else @@ -264,7 +318,7 @@ typename MatType::elem_type CMAES::Optimize( for (size_t j = 0; j < mu; ++j) { C[idx1] = C[idx1] + cmu * w(j) * - pStep[idx(j)].t() * pStep[idx(j)]; + pStep[idx(j)].t() * pStep[idx(j)]; } } @@ -279,36 +333,46 @@ typename MatType::elem_type CMAES::Optimize( else { C[idx1] = eigvec.cols(0, negativeEigval(0) - 1) * - arma::diagmat(eigval.subvec(0, negativeEigval(0) - 1)) * - eigvec.cols(0, negativeEigval(0) - 1).t(); + arma::diagmat(eigval.subvec(0, negativeEigval(0) - 1)) * + eigvec.cols(0, negativeEigval(0) - 1).t(); } } // Output current objective function. Info << "CMA-ES: iteration " << i << ", objective " << overallObjective - << "." << std::endl; + << "." << std::endl; if (std::isnan(overallObjective) || std::isinf(overallObjective)) { Warn << "CMA-ES: converged to " << overallObjective << "; " - << "terminating with failure. Try a smaller step size?" << std::endl; + << "terminating with failure. Try a smaller step size?" << std::endl; + iterate = transformationPolicy.Transform(iterate); Callback::EndOptimization(*this, function, iterate, callbacks...); return overallObjective; } if (std::abs(lastObjective - overallObjective) < tolerance) { - Info << "CMA-ES: minimized within tolerance " << tolerance << "; " + if (steps > patience) { + Info << "CMA-ES: minimized within tolerance " << tolerance << "; " << "terminating optimization." << std::endl; - Callback::EndOptimization(*this, function, iterate, callbacks...); - return overallObjective; + iterate = transformationPolicy.Transform(iterate); + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + } + else { + steps = 0; } + steps++; + lastObjective = overallObjective; } + iterate = transformationPolicy.Transform(iterate); Callback::EndOptimization(*this, function, iterate, callbacks...); return overallObjective; } diff --git a/include/ensmallen_bits/cmaes/not_empty_transformation.hpp b/include/ensmallen_bits/cmaes/not_empty_transformation.hpp new file mode 100644 index 000000000..cdf18c10d --- /dev/null +++ b/include/ensmallen_bits/cmaes/not_empty_transformation.hpp @@ -0,0 +1,36 @@ +/** + * @file not_empty_transformation.hpp + * @author Suvarsha Chennareddy + * + * Check whether TransformationPolicyType is EmptyTransformation. + * + * ensmallen is free software; you may redistribute it and/or modify it under + * the terms of the 3-clause BSD license. You should have received a copy of + * the 3-clause BSD license along with ensmallen. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef NOT_EMPTY_TRANSFORMATION +#define NOT_EMPTY_TRANSFORMATION + + /* + This partial specialization is used to throw an exception when the + TransformationPolicyType is EmptyTransformation and call a + constructor with parameters 'lowerBound' and 'upperBound' otherwise. + This shall be removed when the deprecated constructor is removed in + the next major version of ensmallen. + */ +template +struct NotEmptyTransformation : std::true_type { + void Assign(T1& obj, double lowerBound, double upperBound) { + obj = T1(lowerBound, upperBound); + } +}; + +template class T, typename... A, typename... B> +struct NotEmptyTransformation, T> : std::false_type { + void Assign(T& obj, double lowerBound, double upperBound) { + throw std::logic_error("TransformationPolicyType is EmptyTransformation"); + } +}; + +#endif \ No newline at end of file diff --git a/include/ensmallen_bits/cmaes/transformation_policies/boundary_box_constraint.hpp b/include/ensmallen_bits/cmaes/transformation_policies/boundary_box_constraint.hpp new file mode 100644 index 000000000..bc8f937ae --- /dev/null +++ b/include/ensmallen_bits/cmaes/transformation_policies/boundary_box_constraint.hpp @@ -0,0 +1,162 @@ +/** + * @file boundary_box_constraint.hpp + * @author Suvarsha Chennareddy + * + * Boundary Box Transformation. + * + * + * ensmallen is free software; you may redistribute it and/or modify it under + * the terms of the 3-clause BSD license. You should have received a copy of + * the 3-clause BSD license along with ensmallen. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef ENSMALLEN_CMAES_BOUNDARY_BOX_TRANSFORMATION_HPP +#define ENSMALLEN_CMAES_BOUNDARY_BOX_TRANSFORMATION_HPP + +namespace ens { + +/** + * More often than not, coordinates must be bounded by some constraints. + * In a particular case, the domain of a specific function is restricted + * by boundaries. + * The implemented transformation transforms given coordinates into a region + * bounded by the given lower and upper bounds (a box). First, the + * coordinates are shifted into a feasible preimage bounded by lowerBound - al + * and upperBound + au where al and au and calculated internally. + * These shifted coordinates are then transformed into coordinates bounded by + * lower_bound and upper_bound. It is an identity transformation in between + * the lower and upper bounds. + * + * For more information, check the original implementation in C by N. Hansen: + * https://github.com/CMA-ES/c-cmaes/blob/master/src/boundary_transformation.c + * + * @tparam MatType The matrix type of the coordinates and bounds. + */ +template +class BoundaryBoxConstraint +{ +public: + + /** + * Construct the boundary box constraint policy. + */ + BoundaryBoxConstraint() + { /* Nothing to do. */ } + + /** + * Construct the boundary box constraint policy. + * + * @param lowerBound The lower bound of the coordinates. + * @param upperBound The upper bound of the coordinates. + */ + BoundaryBoxConstraint(const MatType& lowerBound, + const MatType& upperBound) : + lowerBound(lowerBound), + upperBound(upperBound) + {} + + /** + * Construct the boundary box constraint policy. + * + * @param lowerBound The lower bound (for every dimension) of the coordinates. + * @param upperBound The upper bound (for every dimension) of the coordinates. + */ + BoundaryBoxConstraint(const typename MatType::elem_type lowerBound, + const typename MatType::elem_type upperBound) : + lowerBound({ (typename MatType::elem_type) lowerBound }), + upperBound({ (typename MatType::elem_type) upperBound }) + {} + + /** + * Map the given coordinates to the range + * [lowerBound, upperBound] + * + * @param x Given coordinates. + * @return Transformed coordinates. + */ + MatType Transform(const MatType& x) + { + typedef typename MatType::elem_type ElemType; + double diff, al, au, xlow, xup, r; + size_t Bi, Bj; + MatType y = x; + for (size_t i = 0; i < x.n_rows; i++) + { + Bi = (i < lowerBound.n_rows) ? i : (lowerBound.n_rows - 1); + for (size_t j = 0; j < x.n_cols; j++) + { + Bj = (j < lowerBound.n_cols) ? j : (lowerBound.n_cols - 1); + + diff = (upperBound(Bi, Bj) - lowerBound(Bi, Bj)) / 2.0; + al = std::min(diff, (1 + std::abs(lowerBound(Bi, Bj))) / 20.0); + au = std::min(diff, (1 + std::abs(upperBound(Bi, Bj))) / 20.0); + xlow = lowerBound(Bi, Bj) - 2 * al - diff; + xup = upperBound(Bi, Bj) + 2 * au + diff; + r = 2 * (2 * diff + al + au); + + // Shift y into feasible pre-image. + if (y(i, j) < xlow) + { + y(i,j) += (ElemType)(r * (1 + (int)((xlow - y(i, j)) / r))); + } + if (y(i, j) > xup) + { + y(i, j) -= (ElemType)(r * (1 + (int)((y(i, j) - xup) / r))); + } + if (y(i, j) < lowerBound(Bi, Bj) - al) + { + y(i, j) += (ElemType)(2 * (lowerBound(Bi, Bj) - al - y(i, j))); + } + if (y(i, j) > upperBound(Bi, Bj) + au) + { + y(i, j) -= (ElemType)(2 * (y(i, j) - upperBound(Bi, Bj) - au)); + } + + // Boundary transformation. + if (y(i, j) < lowerBound(Bi, Bj) + al) + { + y(i, j) = (ElemType)(lowerBound(Bi, Bj) + + (y(i, j) - (lowerBound(Bi, Bj) - al)) * + (y(i, j) - (lowerBound(Bi, Bj) - al)) / 4.0 / al); + } + else if (y(i,j) > upperBound(Bi,Bj) - au) + { + y(i, j) = (ElemType)(upperBound(Bi, Bj) - + (y(i, j) - (upperBound(Bi, Bj) + au)) * + (y(i, j) - (upperBound(Bi, Bj) + au)) / 4.0 / au); + } + } + } + + return y; + } + + /** + * Return a suitable initial step size. + * + * @return initial step size. + */ + typename MatType::elem_type InitialStepSize() + { return 0.3 * (upperBound - lowerBound).min(); } + + //! Get the lower bound of decision variables. + MatType LowerBound() const { return lowerBound; } + //! Modify the lower bound of decision variables. + MatType& LowerBound() { return lowerBound; } + + //! Get the upper bound of decision variables. + MatType UpperBound() const { return upperBound; } + //! Modify the upper bound of decision variables. + MatType& UpperBound() { return upperBound; } + +private: + //! Lower bound of decision variables. + MatType lowerBound; + + //! Upper bound of decision variables. + MatType upperBound; +}; + +} // namespace ens + +#endif diff --git a/include/ensmallen_bits/cmaes/transformation_policies/empty_transformation.hpp b/include/ensmallen_bits/cmaes/transformation_policies/empty_transformation.hpp new file mode 100644 index 000000000..eb8618116 --- /dev/null +++ b/include/ensmallen_bits/cmaes/transformation_policies/empty_transformation.hpp @@ -0,0 +1,47 @@ +/** + * @file empty_transformation.hpp + * @author Suvarsha Chennareddy + * + * Empty Transformation, can also be called an Indentity Transformation. + * + * ensmallen is free software; you may redistribute it and/or modify it under + * the terms of the 3-clause BSD license. You should have received a copy of + * the 3-clause BSD license along with ensmallen. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef ENSMALLEN_CMAES_EMPTY_TRANSFORMATION_HPP +#define ENSMALLEN_CMAES_EMPTY_TRANSFORMATION_HPP + +namespace ens { + +/** + * This is an empty transformation. As the name indicates, it does + * not do anything. It is essentially an identity + * transformation and is meant to be used when there are no + * sorts of constraints on the coordinates. + * + * @tparam MatType The matrix type of the coordinates. + */ +template +class EmptyTransformation +{ + public: + /** + * Transforms coordinates to themselves (effectively no transformation). + * + * @param x Input coordinates. + * @return Transformed coordinates (the coordinates themselves). + */ + MatType Transform(const MatType& x) { return x; } + + /** + * Return a suitable initial step size. + * + * @return initial step size. + */ + typename MatType::elem_type InitialStepSize() { return 1; } +}; + +} // namespace ens + +#endif diff --git a/include/ensmallen_bits/nsga2/nsga2_impl.hpp b/include/ensmallen_bits/nsga2/nsga2_impl.hpp index d28e69c5a..acfb736d4 100644 --- a/include/ensmallen_bits/nsga2/nsga2_impl.hpp +++ b/include/ensmallen_bits/nsga2/nsga2_impl.hpp @@ -423,9 +423,9 @@ inline bool NSGA2::Dominates( //! Assign crowding distance to the population. template inline void NSGA2::CrowdingDistanceAssignment( - const std::vector& front, - std::vector>& calculatedObjectives, - std::vector& crowdingDistance) + const std::vector& front, + std::vector>& calculatedObjectives, + std::vector& crowdingDistance) { // Convenience typedefs. typedef typename MatType::elem_type ElemType; diff --git a/include/ensmallen_bits/problems/zdt/zdt4_function.hpp b/include/ensmallen_bits/problems/zdt/zdt4_function.hpp index b7d77f06d..fad2ba912 100644 --- a/include/ensmallen_bits/problems/zdt/zdt4_function.hpp +++ b/include/ensmallen_bits/problems/zdt/zdt4_function.hpp @@ -83,7 +83,7 @@ namespace test { 10. * arma::cos(4 * arma::datum::pi * truncatedCoords)); ElemType g = 1. + 10. * static_cast(numVariables - 1) + sum; ElemType objectiveRatio = objectives(0) / g; - objectives(1) = g * (1. - std::sqrt(objectiveRatio)); + objectives(1) = g * (1. - std::sqrt(objectiveRatio)); return objectives; } @@ -157,4 +157,4 @@ namespace test { }; } //namespace test } //namespace ens -#endif \ No newline at end of file +#endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 791477e9f..8ce3123fa 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,6 +1,7 @@ # The tests that need to be compiled. set(ENSMALLEN_TESTS_SOURCES main.cpp + active_cmaes_test.cpp ada_belief_test.cpp ada_bound_test.cpp ada_delta_test.cpp diff --git a/tests/active_cmaes_test.cpp b/tests/active_cmaes_test.cpp new file mode 100644 index 000000000..24c2fd1e3 --- /dev/null +++ b/tests/active_cmaes_test.cpp @@ -0,0 +1,82 @@ +/** + * @file active_cmaes_test.cpp + * @author Suvarsha Chennareddy + * + * ensmallen is free software; you may redistribute it and/or modify it under + * the terms of the 3-clause BSD license. You should have received a copy of + * the 3-clause BSD license along with ensmallen. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#include +#include "catch.hpp" +#include "test_function_tools.hpp" + +using namespace ens; +using namespace ens::test; + +/** + * Run Active CMA-ES with the full selection policy on Rosenbrock function and + * make sure the results are acceptable. + */ +TEST_CASE("ActiveCMAESRosenbrockFunctionTest", "[ActiveCMAESTest]") +{ + BoundaryBoxConstraint<> b(0, 10); + ActiveCMAES> + activecmaes(0, b, 1, 0, 1e-8); + activecmaes.StepSize() = 0.01; + FunctionTest(activecmaes, 0.1, 0.1, 10); +} + +/** + * Run Active CMA-ES with the random selection policy on Rosenbrock function and + * make sure the results are acceptable. + */ +TEST_CASE("ApproxActiveCMAESRosenbrockFunctionTest", "[ActiveCMAESTest]") +{ + BoundaryBoxConstraint<> b(0, 10); + ApproxActiveCMAES> + activecmaes(2048, b, 1, 0, 1e-13); + activecmaes.StepSize() = 0.01; + FunctionTest(activecmaes, 0.1, 0.1, 10); +} + +/** + * Run Active CMA-ES with the full selection policy on Rosenbrock function and + * make sure the results are acceptable. Use arma::fmat. + */ +TEST_CASE("ActiveCMAESRosenbrockFunctionFMatTest", "[ActiveCMAESTest]") +{ + BoundaryBoxConstraint b(0, 10); + ActiveCMAES> + activecmaes(0, b, 1, 0, 1e-8); + activecmaes.StepSize() = 0.01; + FunctionTest(activecmaes, 0.1, 0.1, 5); +} + +/** + * Run Active CMA-ES with the random selection policy on Rosenbrock function and + * make sure the results are acceptable. Use arma::fmat. + */ +TEST_CASE("ApproxActiveCMAESRosenbrockFunctionFMatTest", "[ActiveCMAESTest]") +{ + BoundaryBoxConstraint b(0, 10); + ApproxActiveCMAES> + activecmaes(2048, b, 1, 0, 1e-5); + activecmaes.StepSize() = 0.01; + FunctionTest(activecmaes, 0.1, 0.1, 10); +} + +/** + * Run Active CMA-ES with the random selection and empty transformation policies + * on Rosenbrock function and make sure the results are acceptable. + * Use arma::fmat. + */ +TEST_CASE("ApproxActiveCMAESEmptyTransformationLogisticRegressionFMatTest", + "[ActiveCMAESTest]") +{ + ApproxActiveCMAES> + activecmaes(0, EmptyTransformation(), 16, 0, 1e-3); + activecmaes.StepSize() = 0.55; + LogisticRegressionFunctionTest(activecmaes, 0.01, 0.02, 5); +} diff --git a/tests/callbacks_test.cpp b/tests/callbacks_test.cpp index 79a7c4353..f5a5205eb 100644 --- a/tests/callbacks_test.cpp +++ b/tests/callbacks_test.cpp @@ -256,6 +256,17 @@ TEST_CASE("EarlyStopAtMinLossCustomLambdaTest", "[CallbacksTest]") REQUIRE(std::abs(coordinates[i]) >= 3.0); } +/** + * Make sure we invoke all callbacks (ActiveCMAES). + */ +TEST_CASE("ActiveCMAESCallbacksFullFunctionTest", "[CallbacksTest]") +{ + BoundaryBoxConstraint<> b(-1, 1); + ActiveCMAES> optimizer(0, b, 32, 3, 1e-3); + CallbacksFullFunctionTest(optimizer, true, false, false, false, true, true, + false, false, true); +} + /** * Make sure we invoke all callbacks (AdaBound). */ @@ -312,9 +323,10 @@ TEST_CASE("BigBatchSGDCallbacksFullFunctionTest", "[CallbacksTest]") */ TEST_CASE("CMAESCallbacksFullFunctionTest", "[CallbacksTest]") { - CMAES<> optimizer(0, -1, 1, 32, 3, 1e-3); + BoundaryBoxConstraint<> b(-1, 1); + CMAES> optimizer(0, b, 32, 3, 1e-3); CallbacksFullFunctionTest(optimizer, true, false, false, false, true, true, - false, false, true); + false, false, true); } /** diff --git a/tests/cmaes_test.cpp b/tests/cmaes_test.cpp index 6dccb4505..75a9fe6b8 100644 --- a/tests/cmaes_test.cpp +++ b/tests/cmaes_test.cpp @@ -23,7 +23,9 @@ using namespace ens::test; */ TEST_CASE("CMAESLogisticRegressionTest", "[CMAESTest]") { - CMAES<> cmaes(0, -1, 1, 32, 200, 1e-3); + BoundaryBoxConstraint<> b(-10, 10); + CMAES> cmaes(0, b, 32, 500, 1e-3); + cmaes.StepSize() = 0.6; LogisticRegressionFunctionTest(cmaes, 0.003, 0.006, 5); } @@ -33,7 +35,9 @@ TEST_CASE("CMAESLogisticRegressionTest", "[CMAESTest]") */ TEST_CASE("ApproxCMAESLogisticRegressionTest", "[CMAESTest]") { - ApproxCMAES<> cmaes(0, -1, 1, 32, 200, 1e-3); + BoundaryBoxConstraint<> b(-10, 10); + ApproxCMAES> cmaes(256, b, 16, 500, 1e-3); + cmaes.StepSize() = 0.6; LogisticRegressionFunctionTest(cmaes, 0.003, 0.006, 5); } @@ -43,7 +47,8 @@ TEST_CASE("ApproxCMAESLogisticRegressionTest", "[CMAESTest]") */ TEST_CASE("CMAESLogisticRegressionFMatTest", "[CMAESTest]") { - CMAES<> cmaes(0, -1, 1, 32, 200, 1e-3); + BoundaryBoxConstraint b(-10, 10); + CMAES> cmaes(0, b, 32, 500, 1e-3); LogisticRegressionFunctionTest(cmaes, 0.01, 0.02, 5); } @@ -53,6 +58,20 @@ TEST_CASE("CMAESLogisticRegressionFMatTest", "[CMAESTest]") */ TEST_CASE("ApproxCMAESLogisticRegressionFMatTest", "[CMAESTest]") { - ApproxCMAES<> cmaes(0, -1, 1, 32, 200, 1e-3); + BoundaryBoxConstraint b(-10, 10); + ApproxCMAES> cmaes(0, b, 16, 500, 1e-3); + LogisticRegressionFunctionTest(cmaes, 0.01, 0.02, 5); +} + +/** + * Run CMA-ES with the random selection and empty transformation policies + * on logistic regression and make sure the results are acceptable. + * Use arma::fmat. + */ +TEST_CASE("ApproxCMAESEmptyTransformationLogisticRegressionFMatTest", + "[CMAESTest]") +{ + ApproxCMAES> + cmaes(0, EmptyTransformation(), 16, 500, 1e-3); LogisticRegressionFunctionTest(cmaes, 0.01, 0.02, 5); }