PennyLaneAI · albi3ro · Jan 6, 2021 · Dec 9, 2020 · Dec 11, 2020 · Dec 14, 2020
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -2,6 +2,41 @@
 
 <h3>New features since last release</h3>
 
+* Optimizers allow more flexible cost functions. The cost function passed to most optimizers 
+  may accept any combination of trainable arguments, non-trainable arguments, and keywords. 
+  The `requires_grad=False` property must mark any non-trainable constant argument. 
+  The `RotoselectOptimizer` allows only keywords.
+  [(#959)](https://github.com/PennyLaneAI/pennylane/pull/959)
+
+  The full changes apply to:
+
+  * `AdagradOptimizer`
+  * `AdamOptimizer`
+  * `GradientDescentOptimizer`
+  * `MomentumOptimizer`
+  * `NesterovMomentumOptimizer`
+  * `RMSPropOptimizer`
+  * `RotosolveOptimizer`
+
+  Example use:
+
+  ```python
+  def cost(x, y, data, scale=1.0):
+    return scale * (x[0]-data)**2 + scale * (y-data)**2
+
+  x = np.array([1.], requires_grad=True)
+  y = np.array([1.0])
+  data = np.array([2.], requires_grad=False)
+
+  opt = qml.GradientDescentOptimizer()
+  x_new, y_new, data = opt.step(cost, x, y, data, scale=0.5)
+
+  (x_new, y_new, data), value = opt.step_and_cost(cost, x, y, data, scale=0.5) 
+
+  params = (x, y, data)
+  params = opt.step(cost, *params)
+  ```
+
 * A new  `qml.draw` function is available, allowing QNodes to be easily
   drawn without execution by providing example input.
   [(#962)](https://github.com/PennyLaneAI/pennylane/pull/962)
@@ -86,6 +121,7 @@
       return qml.expval(qml.PauliZ(0))
   ```
 
+
 <h3>Improvements</h3>
 
 * A new test series, pennylane/devices/tests/test_compare_default_qubit.py, has been added, allowing to test if

diff --git a/pennylane/optimize/adagrad.py b/pennylane/optimize/adagrad.py
@@ -15,6 +15,7 @@
 import math
 
 from pennylane.utils import _flatten, unflatten
+from pennylane.numpy import ndarray, tensor
 from .gradient_descent import GradientDescentOptimizer
 
 
@@ -51,7 +52,7 @@ def __init__(self, stepsize=0.01, eps=1e-8):
         self.eps = eps
         self.accumulation = None
 
-    def apply_grad(self, grad, x):
+    def apply_grad(self, grad, args):
         r"""Update the variables x to take a single optimization step. Flattens and unflattens
         the inputs to maintain nested iterables as the parameters of the optimization.
 
@@ -63,21 +64,45 @@ def apply_grad(self, grad, x):
         Returns:
             array: the new values :math:`x^{(t+1)}`
         """
-
-        x_flat = _flatten(x)
-        grad_flat = list(_flatten(grad))
+        args_new = list(args)
 
         if self.accumulation is None:
-            self.accumulation = [g * g for g in grad_flat]
-        else:
-            self.accumulation = [a + g * g for a, g in zip(self.accumulation, grad_flat)]
+            self.accumulation = [None] * len(args)
+
+        trained_index = 0
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", True):
+                x_flat = _flatten(arg)
+                grad_flat = list(_flatten(grad[trained_index]))
+
+                self._update_accumulation(index, grad_flat)
+
+                x_new_flat = [
+                    e - (self._stepsize / math.sqrt(a + self.eps)) * g
+                    for a, g, e in zip(self.accumulation[index], grad_flat, x_flat)
+                ]
+
+                args_new[index] = unflatten(x_new_flat, arg)
 
-        x_new_flat = [
-            e - (self._stepsize / math.sqrt(a + self.eps)) * g
-            for a, g, e in zip(self.accumulation, grad_flat, x_flat)
-        ]
+                if isinstance(arg, ndarray):
+                    args_new[index] = args_new[index].view(tensor)
+                    args_new[index].requires_grad = True
 
-        return unflatten(x_new_flat, x)
+        return args_new
+
+    def _update_accumulation(self, index, grad_flat):
+        r"""Update the accumulation at index with gradient
+
+        Args:
+            index (Int): location of arg to update
+            grad_flat (list): flattened list form of gradient
+        """
+        if self.accumulation[index] is None:
+            self.accumulation[index] = [g * g for g in grad_flat]
+        else:
+            self.accumulation[index] = [
+                a + g * g for a, g in zip(self.accumulation[index], grad_flat)
+            ]
 
     def reset(self):
         """Reset optimizer by erasing memory of past steps."""

diff --git a/pennylane/optimize/adam.py b/pennylane/optimize/adam.py
@@ -15,6 +15,7 @@
 import math
 
 from pennylane.utils import _flatten, unflatten
+from pennylane.numpy import ndarray, tensor
 from .gradient_descent import GradientDescentOptimizer
 
 
@@ -61,7 +62,7 @@ def __init__(self, stepsize=0.01, beta1=0.9, beta2=0.99, eps=1e-8):
         self.sm = None
         self.t = 0
 
-    def apply_grad(self, grad, x):
+    def apply_grad(self, grad, args):
         r"""Update the variables x to take a single optimization step. Flattens and unflattens
         the inputs to maintain nested iterables as the parameters of the optimization.
 
@@ -73,37 +74,63 @@ def apply_grad(self, grad, x):
         Returns:
             array: the new values :math:`x^{(t+1)}`
         """
-
+        args_new = list(args)
         self.t += 1
 
-        grad_flat = list(_flatten(grad))
-        x_flat = _flatten(x)
+        # Update step size (instead of correcting for bias)
+        new_stepsize = (
+            self._stepsize * math.sqrt(1 - self.beta2 ** self.t) / (1 - self.beta1 ** self.t)
+        )
 
-        # Update first moment
         if self.fm is None:
-            self.fm = grad_flat
-        else:
-            self.fm = [self.beta1 * f + (1 - self.beta1) * g for f, g in zip(self.fm, grad_flat)]
+            self.fm = [None] * len(args)
 
-        # Update second moment
         if self.sm is None:
-            self.sm = [g * g for g in grad_flat]
-        else:
-            self.sm = [
-                self.beta2 * f + (1 - self.beta2) * g * g for f, g in zip(self.sm, grad_flat)
-            ]
+            self.sm = [None] * len(args)
 
-        # Update step size (instead of correcting for bias)
-        new_stepsize = (
-            self._stepsize * math.sqrt(1 - self.beta2 ** self.t) / (1 - self.beta1 ** self.t)
-        )
+        trained_index = 0
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", True):
+                x_flat = _flatten(arg)
+                grad_flat = list(_flatten(grad[trained_index]))
+                trained_index += 1
 
-        x_new_flat = [
-            e - new_stepsize * f / (math.sqrt(s) + self.eps)
-            for f, s, e in zip(self.fm, self.sm, x_flat)
-        ]
+                self._update_moments(index, grad_flat)
 
-        return unflatten(x_new_flat, x)
+                x_new_flat = [
+                    e - new_stepsize * f / (math.sqrt(s) + self.eps)
+                    for f, s, e in zip(self.fm[index], self.sm[index], x_flat)
+                ]
+                args_new[index] = unflatten(x_new_flat, arg)
+
+                if isinstance(arg, ndarray):
+                    args_new[index] = args_new[index].view(tensor)
+                    args_new[index].requires_grad = True
+
+        return args_new
+
+    def _update_moments(self, index, grad_flat):
+        r"""Update the moments
+
+        Args:
+            index (Int): the index of the trainable argument to update out of trainable params
+            grad_flat (list): the flattened gradient for that trainable param
+        """
+        # update first moment
+        if self.fm[index] is None:
+            self.fm[index] = grad_flat
+        else:
+            self.fm[index] = [
+                self.beta1 * f + (1 - self.beta1) * g for f, g in zip(self.fm[index], grad_flat)
+            ]
+
+        # update second moment
+        if self.sm[index] is None:
+            self.sm[index] = [g * g for g in grad_flat]
+        else:
+            self.sm[index] = [
+                self.beta2 * f + (1 - self.beta2) * g * g for f, g in zip(self.sm[index], grad_flat)
+            ]
 
     def reset(self):
         """Reset optimizer by erasing memory of past steps."""

diff --git a/pennylane/optimize/gradient_descent.py b/pennylane/optimize/gradient_descent.py
@@ -15,6 +15,7 @@
 
 from pennylane._grad import grad as get_gradient
 from pennylane.utils import _flatten, unflatten
+from pennylane.numpy import ndarray, tensor
 
 
 class GradientDescentOptimizer:
@@ -47,86 +48,110 @@ def update_stepsize(self, stepsize):
         """
         self._stepsize = stepsize
 
-    def step_and_cost(self, objective_fn, x, grad_fn=None):
+    def step_and_cost(self, objective_fn, *args, grad_fn=None, **kwargs):
         """Update x with one step of the optimizer and return the corresponding objective
-        """Update x with one step of the optimizer and return the corresponding objective
+        """Update args with one step of the optimizer and return the corresponding objective
-        """Update x with one step of the optimizer and return the corresponding objective
+        """Update args with one step of the optimizer and return the corresponding objective
         function value prior to the step.
-        """Update x with one step of the optimizer and return the corresponding objective
-        function value prior to the step.
+        """Update differentiable arguments with one step of the optimizer and return the corresponding objective
+        function value prior to the step.
-        """Update x with one step of the optimizer and return the corresponding objective
-        function value prior to the step.
+        """Update differentiable arguments with one step of the optimizer and return the corresponding objective
+        function value prior to the step.
 
         Args:
             objective_fn (function): the objective function for optimization
-            x (array): NumPy array containing the current values of the variables to be updated
+            *args : Variable length argument list for objective function
-            *args : Variable length argument list for objective function
+            *args (list): argument list for objective function
-            *args : Variable length argument list for objective function
+            *args (list): argument list for objective function
             grad_fn (function): Optional gradient function of the
                 objective function with respect to the variables ``x``.
                 If ``None``, the gradient function is computed automatically.
+                Must match shape of autograd derivative.
+            **kwargs : Variable length of keywords for the cost function
 
         Returns:
             tuple: the new variable values :math:`x^{(t+1)}` and the objective function output
                 prior to the step
         """
 
-        g, forward = self.compute_grad(objective_fn, x, grad_fn=grad_fn)
-        x_out = self.apply_grad(g, x)
+        g, forward = self.compute_grad(objective_fn, args, kwargs, grad_fn=grad_fn)
+        new_args = self.apply_grad(g, args)
 
         if forward is None:
-            forward = objective_fn(x)
+            forward = objective_fn(*args, **kwargs)
 
-        return x_out, forward
+        if len(new_args) == 1:
+            return new_args[0], forward
+        return new_args, forward
 
-    def step(self, objective_fn, x, grad_fn=None):
+    def step(self, objective_fn, *args, grad_fn=None, **kwargs):
         """Update x with one step of the optimizer.
 
         Args:
             objective_fn (function): the objective function for optimization
-            x (array): NumPy array containing the current values of the variables to be updated
+            *args : Variable length argument list for objective function
-            *args : Variable length argument list for objective function
+            *args (list): argument list for objective function
-            *args : Variable length argument list for objective function
+            *args (list): argument list for objective function
             grad_fn (function): Optional gradient function of the
                 objective function with respect to the variables ``x``.
                 If ``None``, the gradient function is computed automatically.
+                Must match shape of autograd derivative.
+            **kwargs : Variable length of keywords for the cost function
 
         Returns:
             array: the new variable values :math:`x^{(t+1)}`
         """
-        g, _ = self.compute_grad(objective_fn, x, grad_fn=grad_fn)
-        x_out = self.apply_grad(g, x)
 
-        return x_out
+        g, _ = self.compute_grad(objective_fn, args, kwargs, grad_fn=grad_fn)
+        new_args = self.apply_grad(g, args)
+
+        if len(new_args) == 1:
+            return new_args[0]
+
+        return new_args
 
     @staticmethod
-    def compute_grad(objective_fn, x, grad_fn=None):
+    def compute_grad(objective_fn, args, kwargs, grad_fn=None):
         r"""Compute gradient of the objective_fn at the point x and return it along with the
             objective function forward pass (if available).
 
         Args:
             objective_fn (function): the objective function for optimization
-            x (array): NumPy array containing the current values of the variables to be updated
+            args (tuple(array)): Tuple of NumPy arrays containing the current values for the
+                objection function
+            kwargs (dict): Keywords for the cost function
             grad_fn (function): Optional gradient function of the objective function with respect to
                 the variables ``x``. If ``None``, the gradient function is computed automatically.
+                Must match shape of autograd derivative.
 
         Returns:
             tuple: The NumPy array containing the gradient :math:`\nabla f(x^{(t)})` and the
                 objective function output. If ``grad_fn`` is provided, the objective function
                 will not be evaluted and instead ``None`` will be returned.
         """
         g = get_gradient(objective_fn) if grad_fn is None else grad_fn
-        grad = g(x)
+        grad = g(*args, **kwargs)
         forward = getattr(g, "forward", None)
 
         return grad, forward
 
-    def apply_grad(self, grad, x):
-        r"""Update the variables x to take a single optimization step. Flattens and unflattens
+    def apply_grad(self, grad, args):
+        r"""Update the variables to take a single optimization step. Flattens and unflattens
         the inputs to maintain nested iterables as the parameters of the optimization.
 
         Args:
             grad (array): The gradient of the objective
                 function at point :math:`x^{(t)}`: :math:`\nabla f(x^{(t)})`
-            x (array): the current value of the variables :math:`x^{(t)}`
+            x (tuple(array)): the current value of the variables :math:`x^{(t)}`
 
         Returns:
             array: the new values :math:`x^{(t+1)}`
         """
+        args_new = list(args)
+
+        trained_index = 0
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", True):
+                x_flat = _flatten(arg)
+                grad_flat = _flatten(grad[trained_index])
+                trained_index += 1
+
+                x_new_flat = [e - self._stepsize * g for g, e in zip(grad_flat, x_flat)]
 
-        x_flat = _flatten(x)
-        grad_flat = _flatten(grad)
+                args_new[index] = unflatten(x_new_flat, args[index])
 
-        x_new_flat = [e - self._stepsize * g for g, e in zip(grad_flat, x_flat)]
+                if isinstance(arg, ndarray):
+                    args_new[index] = args_new[index].view(tensor)
+                    args_new[index].requires_grad = True
 
-        return unflatten(x_new_flat, x)
+        return args_new