mlpack · gaurav-singh1998 · Apr 4, 2020 · Apr 4, 2020 · Apr 8, 2020 · Apr 8, 2020
diff --git a/HISTORY.md b/HISTORY.md
@@ -3,6 +3,9 @@
  * Fix CMake package export
     ([#198](https://github.com/mlpack/ensmallen/pull/198)).
 
+ * Fix CMA-ES inconsistencies
+    ([#193](https://github.com/mlpack/ensmallen/pull/193)).
+
 ### ensmallen 2.12.1: "Stir Crazy"
 ###### 2020-04-20
  * Fix total number of epochs and time estimation for ProgressBar callback
@@ -20,7 +23,7 @@
     ([#183](https://github.com/mlpack/ensmallen/pull/183)).
 
  * Remove deprecated methods from PrimalDualSolver implementation
-    ([#185](https://github.com/mlpack/ensmallen/pull/185).
+    ([#185](https://github.com/mlpack/ensmallen/pull/185)).
 
  * Update logo ([#186](https://github.com/mlpack/ensmallen/pull/186)).
 

diff --git a/doc/optimizers.md b/doc/optimizers.md
@@ -584,6 +584,7 @@ matrix within an iterative procedure using the covariance matrix.
  * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound`_`)`
  * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound, batchSize`_`)`
  * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound, batchSize, maxIterations, tolerance, selectionPolicy`_`)`
+ * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound, batchSize, maxIterations, tolerance, selectionPolicy, initialSigma`_`)`
 
 The _`SelectionPolicyType`_ template parameter refers to the strategy used to
 compute the (approximate) objective function.  The `FullSelection` and
@@ -606,10 +607,11 @@ For convenience the following types can be used:
 | `size_t` | **`maxIterations`** | Maximum number of iterations. | `1000` |
 | `double` | **`tolerance`** | Maximum absolute tolerance to terminate algorithm. | `1e-5` |
 | `SelectionPolicyType` | **`selectionPolicy`** | Instantiated selection policy used to calculate the objective. | `SelectionPolicyType()` |
+| `double` | **`initialSigma`** | The initial step size. | `0.6` |
 
 Attributes of the optimizer may also be changed via the member methods
 `Lambda()`, `LowerBound()`, `UpperBound()`, `BatchSize()`, `MaxIterations()`,
-`Tolerance()`, and `SelectionPolicy()`.
+`Tolerance()`, `SelectionPolicy()` and `InitialSigma()`.
 
 The `selectionPolicy` attribute allows an instantiated `SelectionPolicyType` to
 be given.  The `FullSelection` policy has no need to be instantiated and thus

diff --git a/include/ensmallen_bits/cmaes/cmaes.hpp b/include/ensmallen_bits/cmaes/cmaes.hpp
@@ -66,6 +66,7 @@ class CMAES
    * @param maxIterations Maximum number of iterations allowed (0 means no
    *     limit).
    * @param tolerance Maximum absolute tolerance to terminate algorithm.
+   * @param initialSigma The initial step size.
    * @param selectionPolicy Instantiated selection policy used to calculate the
    *     objective.
    */
@@ -75,7 +76,8 @@ class CMAES
         const size_t batchSize = 32,
         const size_t maxIterations = 1000,
         const double tolerance = 1e-5,
-        const SelectionPolicyType& selectionPolicy = SelectionPolicyType());
+        const SelectionPolicyType& selectionPolicy = SelectionPolicyType(),
+        const double initialSigma = 0.6);
 
   /**
    * Optimize the given function using CMA-ES. The given starting point will be
@@ -97,19 +99,19 @@ class CMAES
                                        MatType& iterate,
                                        CallbackTypes&&... callbacks);
 
-  //! Get the step size.
+  //! Get the population size.
   size_t PopulationSize() const { return lambda; }
-  //! Modify the step size.
+  //! Modify the population size.
   size_t& PopulationSize() { return lambda; }
 
   //! Get the lower bound of decision variables.
   double LowerBound() const { return lowerBound; }
   //! Modify the lower bound of decision variables.
   double& LowerBound() { return lowerBound; }
 
-  //! Get the upper bound of decision variables
+  //! Get the upper bound of decision variables.
   double UpperBound() const { return upperBound; }
-  //! Modify the upper bound of decision variables
+  //! Modify the upper bound of decision variables.
   double& UpperBound() { return upperBound; }
 
   //! Get the batch size.
@@ -132,14 +134,19 @@ class CMAES
   //! Modify the selection policy.
   SelectionPolicyType& SelectionPolicy() { return selectionPolicy; }
 
+  //! Get the initial step size.
+  double InitialSigma() const { return initialSigma; }
+  //! Modify the initial step size.
+  double& InitialSigma() { return initialSigma; }
+
  private:
   //! Population size.
   size_t lambda;
 
   //! Lower bound of decision variables.
   double lowerBound;
 
-  //! Upper bound of decision variables
+  //! Upper bound of decision variables.
   double upperBound;
 
   //! The batch size for processing.
@@ -153,6 +160,15 @@ class CMAES
 
   //! The selection policy used to calculate the objective.
   SelectionPolicyType selectionPolicy;
+
+  //! Initial step size.
+  double initialSigma;
+
+  //! Methods used to transform the candidates into the constraints.
+  template<typename BaseMatType>
+  void BoundaryTransform(BaseMatType& matrix);
+  template<typename BaseMatType>
+  void BoundaryTransformInverse(BaseMatType& matrix);
 };
 
 /**

diff --git a/include/ensmallen_bits/cmaes/cmaes_impl.hpp b/include/ensmallen_bits/cmaes/cmaes_impl.hpp
@@ -17,6 +17,7 @@
 
 // In case it hasn't been included yet.
 #include "cmaes.hpp"
+#include <assert.h>
 
 #include <ensmallen_bits/function.hpp>
 
@@ -29,15 +30,20 @@ CMAES<SelectionPolicyType>::CMAES(const size_t lambda,
                                   const size_t batchSize,
                                   const size_t maxIterations,
                                   const double tolerance,
-                                  const SelectionPolicyType& selectionPolicy) :
+                                  const SelectionPolicyType& selectionPolicy,
+                                  const double initialSigma) :
     lambda(lambda),
     lowerBound(lowerBound),
     upperBound(upperBound),
     batchSize(batchSize),
     maxIterations(maxIterations),
     tolerance(tolerance),
-    selectionPolicy(selectionPolicy)
-{ /* Nothing to do. */ }
+    selectionPolicy(selectionPolicy),
+    initialSigma(initialSigma)
+{
+  assert(this->lowerBound != this->upperBound && "The values of "
+        "lowerbound and upperbound must be different.");
+}
 
 //! Optimize the function (minimize).
 template<typename SelectionPolicyType>
@@ -78,7 +84,7 @@ typename MatType::elem_type CMAES<SelectionPolicyType>::Optimize(
 
   // Step size control parameters.
   BaseMatType sigma(2, 1); // sigma is vector-shaped.
-  sigma(0) = 0.3 * (upperBound - lowerBound);
+  sigma(0) = initialSigma;
   const double cs = (muEffective + 2) / (iterate.n_elem + muEffective + 5);
   const double ds = 1 + cs + 2 * std::max(std::sqrt((muEffective - 1) /
       (iterate.n_elem + 1)) - 1, 0.0);
@@ -99,13 +105,16 @@ typename MatType::elem_type CMAES<SelectionPolicyType>::Optimize(
 
   std::vector<BaseMatType> mPosition(2, BaseMatType(iterate.n_rows,
       iterate.n_cols));
-  mPosition[0] = lowerBound + arma::randu<BaseMatType>(
-      iterate.n_rows, iterate.n_cols) * (upperBound - lowerBound);
+  BaseMatType initialVal;
+  initialVal.randu(iterate.n_rows, iterate.n_cols);
+  initialVal += (BaseMatType)(iterateIn);
+  mPosition[0] = initialVal;
 
   BaseMatType step(iterate.n_rows, iterate.n_cols);
   step.zeros();
 
   // Calculate the first objective function.
+  BoundaryTransform<BaseMatType>(mPosition[0]);
   ElemType currentObjective = 0;
   for (size_t f = 0; f < numFunctions; f += batchSize)
   {
@@ -177,6 +186,7 @@ typename MatType::elem_type CMAES<SelectionPolicyType>::Optimize(
       pPosition[idx(j)] = mPosition[idx0] + sigma(idx0) * pStep[idx(j)];
 
       // Calculate the objective function.
+      BoundaryTransform<BaseMatType>(pPosition[idx(j)]);
       pObjective(idx(j)) = selectionPolicy.Select(function, batchSize,
           pPosition[idx(j)], callbacks...);
     }
@@ -191,6 +201,7 @@ typename MatType::elem_type CMAES<SelectionPolicyType>::Optimize(
     mPosition[idx1] = mPosition[idx0] + sigma(idx0) * step;
 
     // Calculate the objective function.
+    BoundaryTransform<BaseMatType>(mPosition[idx1]);
     currentObjective = selectionPolicy.Select(function, batchSize,
         mPosition[idx1], callbacks...);
 
@@ -313,6 +324,90 @@ typename MatType::elem_type CMAES<SelectionPolicyType>::Optimize(
   return overallObjective;
 }
 
+// Transforms the candidate into the given bounds.
+template<typename SelectionPolicyType>
+template<typename BaseMatType>
+void CMAES<SelectionPolicyType>::BoundaryTransform(BaseMatType& matrix)
+{
+  typedef typename BaseMatType::elem_type ElemType;
+  const double diff = (upperBound - lowerBound) / 2.0;
+  const double al = std::min(diff, (1 + std::abs(lowerBound)) / 20.0);
+  const double au = std::min(diff, (1 + std::abs(upperBound)) / 20.0);
+  const double xlow = lowerBound - 2 * al - diff;
 const arma::mat& lowerBound = arma::ones(1, 1), 
 const arma::mat& upperBound = arma::ones(1, 1), 
 const arma::mat& lowerBound = arma::ones(1, 1), 
 const arma::mat& upperBound = arma::ones(1, 1), 
+  const double xup = upperBound + 2 * au + diff;
+  const double r = 2 * (upperBound - lowerBound + al + au);
+
+  for (size_t col = 0; col < matrix.n_cols; col++)
+  {
+    for (size_t row = 0; row < matrix.n_rows; row++)
+    {
+      ElemType y = matrix(row, col);
+      // Boundary transformation shift into feasible pre-image.
+      if (y < xlow)
+      {
+        y += (ElemType)(r * (1 + (xlow - y) / r));
+      }
+      else if (y > xup)
+      {
+        y -= (ElemType)(r * (1 + (y - xup) / r));
+      }
+      else if (y < lowerBound - al)
+      {
+        y += (ElemType)(2 * (lowerBound - al - y));
+      }
+      else if (y > upperBound + au)
+      {
+        y -= (ElemType)(2 * (y - upperBound - au));
+      }
+      // Boundary transformation.
+      if (y < lowerBound + al)
+      {
+        y = (ElemType)(lowerBound + (y - (lowerBound - al)) *
+            (y - (lowerBound - al)) / 4.0 / al);
+      }
+      else if (y > upperBound - au)
+      {
+        y = (ElemType)(upperBound - (y - (upperBound + au)) *
+            (y - (upperBound + au)) / 4.0 / au);
+      }
+
+      matrix(row, col) = y;
+    }
+  }
+}
+
+// Computes the inverse of the transformation.
+template<typename SelectionPolicyType>
+template<typename BaseMatType>
+void CMAES<SelectionPolicyType>::BoundaryTransformInverse(BaseMatType& matrix)
+{
+  typedef typename BaseMatType::elem_type ElemType;
+  const double diff = (upperBound - lowerBound) / 2.0;
+  const double al = std::min(diff, (1 + std::abs(lowerBound)) / 20.0);
+  const double au = std::min(diff, (1 + std::abs(upperBound)) / 20.0);
+
+  for (size_t col = 0; col < matrix.n_cols; col++)
+  {
+    for (size_t row = 0; row < matrix.n_rows; row++)
+    {
+      ElemType y = matrix(row, col);
+
+      if (y < lowerBound + al)
+      {
+        y =  (ElemType)(lowerBound - al) + 2 *
+              std::sqrt(std::abs(al * (y - lowerBound)));
+      }
+      else if (y > upperBound - au)
+      {
+        y = (ElemType)(upperBound + au) - 2 *
+             std::sqrt(std::abs(au * (upperBound - y)));
+      }
+
+      matrix(row, col) = y;
+    }
+  }
+}
+
 } // namespace ens
 
 #endif
diff --git a/tests/cmaes_test.cpp b/tests/cmaes_test.cpp
@@ -50,7 +50,7 @@ TEST_CASE("CMAESLogisticRegressionTest", "[CMAESTest]")
         responses, testResponses, shuffledResponses);
     LogisticRegression<> lr(shuffledData, shuffledResponses, 0.5);
 
-    CMAES<> cmaes(0, -1, 1, 32, 200, 1e-3);
+    CMAES<> cmaes(0, -20, 20, 32, 200, 1e-3);
     arma::mat coordinates = lr.GetInitialPoint();
     cmaes.Optimize(lr, coordinates);
 
@@ -85,7 +85,7 @@ TEST_CASE("ApproxCMAESLogisticRegressionTest", "[CMAESTest]")
         responses, testResponses, shuffledResponses);
     LogisticRegression<> lr(shuffledData, shuffledResponses, 0.5);
 
-    ApproxCMAES<> cmaes(0, -1, 1, 32, 200, 1e-3);
+    ApproxCMAES<> cmaes(0, -20, 20, 32, 200, 1e-3);
     arma::mat coordinates = lr.GetInitialPoint();
     cmaes.Optimize(lr, coordinates);
 
@@ -120,7 +120,7 @@ TEST_CASE("CMAESLogisticRegressionFMatTest", "[CMAESTest]")
         responses, testResponses, shuffledResponses);
     LogisticRegression<arma::fmat> lr(shuffledData, shuffledResponses, 0.5);
 
-    CMAES<> cmaes(0, -1, 1, 32, 200, 1e-3);
+    CMAES<> cmaes(0, -20, 20, 32, 200, 1e-3);
     arma::fmat coordinates = lr.GetInitialPoint();
     cmaes.Optimize(lr, coordinates);
 
@@ -155,7 +155,7 @@ TEST_CASE("ApproxCMAESLogisticRegressionFMatTest", "[CMAESTest]")
         responses, testResponses, shuffledResponses);
     LogisticRegression<arma::fmat> lr(shuffledData, shuffledResponses, 0.5);
 
-    ApproxCMAES<> cmaes(0, -1, 1, 32, 200, 1e-3);
+    ApproxCMAES<> cmaes(0, -20, 20, 32, 200, 1e-3);
     arma::fmat coordinates = lr.GetInitialPoint();
     cmaes.Optimize(lr, coordinates);