From b83d9203e0afad6b76a437e533fa86b95379934c Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Wed, 24 Nov 2021 11:21:25 -0500
Subject: [PATCH 01/27] Add vecMatrixProd

---
 pennylane_lightning/src/tests/Test_Util.cpp |  50 +++++++++
 pennylane_lightning/src/util/Util.hpp       | 111 ++++++++++++++++++++
 2 files changed, 161 insertions(+)
diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp
index 546d592e4f..9b29ab52e4 100644
--- a/pennylane_lightning/src/tests/Test_Util.cpp
+++ b/pennylane_lightning/src/tests/Test_Util.cpp
@@ -197,6 +197,56 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util]", float, double) {
                               Contains("Invalid m & n for the input matrix"));
         }
     }
+    SECTION("vecMatrixProd") {
+        SECTION("Simple Iterative") {
+            for (size_t m = 2; m < 8; m++) {
+                std::vector<double> mat(m * m, 1);
+                std::vector<double> v_in(m, 1);
+                std::vector<double> v_expected(
+                    m, m);
+                std::vector<double> v_out =
+                    Util::vecMatrixProd(v_in, mat, m, m);
+                CAPTURE(v_out);
+                CAPTURE(v_expected);
+                for (size_t i = 0; i < m; i++) {
+                    CHECK(v_out[i] == v_expected[i]);
+                }
+            }
+        }
+        SECTION("Zero Vector") {
+            for (size_t m = 2; m < 8; m++) {
+                std::vector<double> mat(m * m, 1);
+                std::vector<double> v_in(m, 0);
+                std::vector<double> v_expected(
+                    m, 0);
+                std::vector<double> v_out =
+                    Util::vecMatrixProd(v_in, mat, m, m);
+                CAPTURE(v_out);
+                CAPTURE(v_expected);
+                for (size_t i = 0; i < m; i++) {
+                    CHECK(v_out[i] == v_expected[i]);
+                }
+            }
+        }
+        SECTION("Random Matrix") {
+            std::vector<float> v_in{
+                1.0, 2.0, 3.0, 4.0};
+            std::vector<float> mat{
+                1.0 ,  0.1,  0.2,
+                0.2,  0.6,  0.1,
+                0.4, -0.7,  1.2,
+                -0.5, -0.6,  0.7};
+            std::vector<float> v_expected{
+                0.6, -3.2,  6.8};
+            std::vector<float> v_out = 
+                Util::vecMatrixProd(v_in, mat, 4, 3);
+            CAPTURE(v_out);
+            CAPTURE(v_expected);
+            for (size_t i = 0; i < 3; i++) {
+                CHECK(std::abs(v_out[i] - v_expected[i]) < 0.000001);
+            }
+        }
+    }
     SECTION("Transpose") {
         SECTION("Simple Matrix") {
             for (size_t m = 2; m < 8; m++) {
diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp
index 6a811f0086..9e3f8232df 100644
--- a/pennylane_lightning/src/util/Util.hpp
+++ b/pennylane_lightning/src/util/Util.hpp
@@ -502,6 +502,117 @@ inline auto matrixVecProd(const std::vector<std::complex<T>> mat,
     return v_out;
 }
 
+/**
+ * @brief Calculates transpose of a matrix recursively and Cache-Friendly
+ * using blacking and Cache-optimized techniques.
+ */
+template <class T, size_t BLOCKSIZE = 32> // NOLINT(readability-magic-numbers)
+inline static void CFTranspose(const T *mat,
+                               T *mat_t, size_t m, size_t n,
+                               size_t m1, size_t m2, size_t n1, size_t n2) {
+    size_t r;
+    size_t s;
+
+    size_t r1;
+    size_t s1;
+    size_t r2;
+    size_t s2;
+
+    r1 = m2 - m1;
+    s1 = n2 - n1;
+
+    if (r1 >= s1 && r1 > BLOCKSIZE) {
+        r2 = (m1 + m2) / 2;
+        CFTranspose(mat, mat_t, m, n, m1, r2, n1, n2);
+        m1 = r2;
+        CFTranspose(mat, mat_t, m, n, m1, m2, n1, n2);
+    } else if (s1 > BLOCKSIZE) {
+        s2 = (n1 + n2) / 2;
+        CFTranspose(mat, mat_t, m, n, m1, m2, n1, s2);
+        n1 = s2;
+        CFTranspose(mat, mat_t, m, n, m1, m2, n1, n2);
+    } else {
+        for (r = m1; r < m2; r++) {
+            for (s = n1; s < n2; s++) {
+                mat_t[s * m + r] = mat[r * n + s];
+            }
+        }
+    }
+}
+
+/**
+ * @brief Calculates vector-matrix product.
+ *
+ * @tparam T Floating point precision type.
+ * @param v_in Data array repr. a vector of shape m * 1.
+ * @param mat Data array repr. a flatten (row-wise) matrix m * n.
+ * @param v_out Pre-allocated data array to store the result that is
+ *              `mat_t \times v_in` where `mat_t` is transpose of `mat`.
+ * @param m Number of rows of `mat`.
+ * @param n Number of columns of `mat`.
+ */
+template <class T>
+inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, 
+                        size_t m, size_t n) {
+    if (!v_out) {
+        return;
+    }
+
+    // v_in m * 1
+    // mat m * n
+    // return  mat'[n*m] * v_in[m*1]  
+    // v_out n * 1
+    size_t i;
+    size_t j;
+    
+    T z = static_cast<T>(0.0);
+    bool allzero = true;
+    for (j = 0; j < m; j++) {
+        if (v_in[j] != z) {
+            allzero = false;
+            break;
+        }
+    }
+
+    if (allzero) {
+        return;
+    }
+
+    T *mat_t = new T[m*n];
+    CFTranspose(mat, mat_t, m, n, 0, m, 0, n);
+
+    for (i = 0; i < n; i++) {
+        for (j = 0; j < m; j++) {
+            v_out[i] += mat_t[i*m+j] * v_in[j];
+        }
+    }
+
+    delete[] mat_t;
+}
+
+/**
+ * @brief Calculates the vactor-matrix product using the best available method.
+ *
+ * @see template <class T> inline void vecMatrixProd(const T *v_in, 
+ * const T *mat, T *v_out, size_t m, size_t n)
+ */
+template <class T>
+inline auto vecMatrixProd(const std::vector<T> v_in,
+                        const std::vector<T> mat, 
+                        size_t m, size_t n)
+    -> std::vector<T> {
+    if (v_in.size() != m) {
+        throw std::invalid_argument("Invalid size for the input vector");
+    }
+    if (mat.size() != m * n) {
+        throw std::invalid_argument("Invalid m & n for the input matrix");
+    }
+
+    std::vector<T> v_out(n);
+    vecMatrixProd(v_in.data(), mat.data(), v_out.data(), m, n);
+    return v_out;
+}
+
 /**
  * @brief Calculates transpose of a matrix recursively and Cache-Friendly
  * using blacking and Cache-optimized techniques.

From c3c7525e398331bc763a660c59e92319e1930aa7 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Wed, 24 Nov 2021 20:13:55 -0500
Subject: [PATCH 02/27] Add vjp to lightning_qubit.py and tests

---
 pennylane_lightning/lightning_qubit.py | 134 ++++++++++++
 tests/test_vector_jacobian_product.py  | 286 +++++++++++++++++++++++++
 2 files changed, 420 insertions(+)
 create mode 100644 tests/test_vector_jacobian_product.py

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 9a47d3d7c6..4f317427d2 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -18,6 +18,7 @@
 from warnings import warn
 
 import numpy as np
+from numpy.lib.function_base import vectorize
 from pennylane import (
     BasisState,
     DeviceError,
@@ -26,6 +27,7 @@
     QubitUnitary,
 )
 import pennylane as qml
+from pennylane import math
 from pennylane.devices import DefaultQubit
 from pennylane.operation import Expectation
 
@@ -236,6 +238,138 @@ def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False):
         )
         return jac
 
+    def _compute_vjp_tensordot(self, dy, jac, num=None):
+        if jac is None:
+            return None
+        
+        dy_reshaped = math.reshape(dy, [-1])
+        num = math.shape(dy_reshaped)[0] if num is None else num
+        jac = math.convert_like(jac, dy_reshaped) if not isinstance(dy_reshaped, np.ndarray) else jac
+        jac = math.reshape(jac, [num, -1])
+
+        try:
+            if math.allclose(dy, 0):
+                return math.convert_like(np.zeros([jac.shape[1]]), dy)
+        except (AttributeError, TypeError):
+            pass
+
+        return math.tensordot(jac, dy_reshaped, [[0], [0]])
+
+    def vector_jacobian_product(self, tape, dy, num=None, starting_state=None, use_device_state=False):
+        """Generate the the vector-Jacobian products of a tape.
+        
+        Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by
+        .. math::
+            \mathbf{J}_{\mathbf{f}}(\mathbf{x}) = \begin{pmatrix}
+                \frac{\partial f_1}{\partial x_1} &\cdots &\frac{\partial f_1}{\partial x_n}\\
+                \vdots &\ddots &\vdots\\
+                \frac{\partial f_m}{\partial x_1} &\cdots &\frac{\partial f_m}{\partial x_n}\\
+            \end{pmatrix}.
+        During backpropagation, the chain rule is applied. For example, consider the
+        cost function :math:`h = y\circ f: \mathbb{R}^n \rightarrow \mathbb{R}`,
+        where :math:`y: \mathbb{R}^m \rightarrow \mathbb{R}`.
+        The gradient is:
+        .. math::
+            \nabla h(\mathbf{x}) = \frac{\partial y}{\partial \mathbf{f}} \frac{\partial \mathbf{f}}{\partial \mathbf{x}}
+            = \frac{\partial y}{\partial \mathbf{f}} \mathbf{J}_{\mathbf{f}}(\mathbf{x}).
+        Denote :math:`d\mathbf{y} = \frac{\partial y}{\partial \mathbf{f}}`; we can write this in the form
+        of a matrix multiplication:
+        .. math:: \left[\nabla h(\mathbf{x})\right]_{j} = \sum_{i=0}^m d\mathbf{y}_i ~ \mathbf{J}_{ij}.
+        Thus, we can see that the gradient of the cost function is given by the so-called
+        **vector-Jacobian product**; the product of the row-vector :math:`d\mathbf{y}`, representing
+        the gradient of subsequent components of the cost function, and :math:`\mathbf{J}`,
+        the Jacobian of the current node of interest.
+
+        Args:
+            tape (.QuantumTape): quantum tape to differentiate
+            dy (tensor_like): Gradient-output vector. Must have shape
+                matching the output shape of the corresponding tape.
+            num (int): The length of the flattened ``dy`` argument. This is an
+            optional argument, but can be useful to provide if ``dy`` potentially
+            has no shape (for example, due to tracing or just-in-time compilation).
+            starting_state (): ...
+            use_device_state (): ...
+        Returns:
+            tensor_like or None: Vector-Jacobian product. Returns None if the tape
+            has no trainable parameters.  
+        """
+        num_params = len(tape.trainable_params)
+        if num_params == 0:
+            # The tape has no trainable parameters; the VJP
+            # is simply none.
+            return None
+        
+        try:
+            # If the dy vector is zero, then the
+            # corresponding element of the VJP will be zero,
+            # and we can avoid a quantum computation.
+            if math.allclose(dy, 0):
+                return math.convert_like(np.zeros([num_params]), dy)
+        except (AttributeError, TypeError):
+            pass
+        
+        jac = self.adjoint_jacobian(tape, starting_state=starting_state, use_device_state=use_device_state)
+
+        return self._compute_vjp_tensordot(dy, jac, num=num)
+
+    def batch_vector_jacobian_product(self, tapes, dys, num=None, reduction="append", starting_state=None, use_device_state=False):
+        """Generate the the vector-Jacobian products of a batch of tapes.
+        
+        Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by
+        .. math::
+            \mathbf{J}_{\mathbf{f}}(\mathbf{x}) = \begin{pmatrix}
+                \frac{\partial f_1}{\partial x_1} &\cdots &\frac{\partial f_1}{\partial x_n}\\
+                \vdots &\ddots &\vdots\\
+                \frac{\partial f_m}{\partial x_1} &\cdots &\frac{\partial f_m}{\partial x_n}\\
+            \end{pmatrix}.
+        During backpropagation, the chain rule is applied. For example, consider the
+        cost function :math:`h = y\circ f: \mathbb{R}^n \rightarrow \mathbb{R}`,
+        where :math:`y: \mathbb{R}^m \rightarrow \mathbb{R}`.
+        The gradient is:
+        .. math::
+            \nabla h(\mathbf{x}) = \frac{\partial y}{\partial \mathbf{f}} \frac{\partial \mathbf{f}}{\partial \mathbf{x}}
+            = \frac{\partial y}{\partial \mathbf{f}} \mathbf{J}_{\mathbf{f}}(\mathbf{x}).
+        Denote :math:`d\mathbf{y} = \frac{\partial y}{\partial \mathbf{f}}`; we can write this in the form
+        of a matrix multiplication:
+        .. math:: \left[\nabla h(\mathbf{x})\right]_{j} = \sum_{i=0}^m d\mathbf{y}_i ~ \mathbf{J}_{ij}.
+        Thus, we can see that the gradient of the cost function is given by the so-called
+        **vector-Jacobian product**; the product of the row-vector :math:`d\mathbf{y}`, representing
+        the gradient of subsequent components of the cost function, and :math:`\mathbf{J}`,
+        the Jacobian of the current node of interest.
+
+        Args:
+            tapes (Sequence[.QuantumTape]): sequence of quantum tapes to differentiate
+            dys (Sequence[tensor_like]): Sequence of gradient-output vectors ``dy``. Must be the
+                same length as ``tapes``. Each ``dy`` tensor should have shape
+                matching the output shape of the corresponding tape.
+            num (int): The length of the flattened ``dy`` argument. This is an
+            optional argument, but can be useful to provide if ``dy`` potentially
+            has no shape (for example, due to tracing or just-in-time compilation).
+            reduction (str): Determines how the vector-Jacobian products are returned.
+                If ``append``, then the output of the function will be of the form
+                ``List[tensor_like]``, with each element corresponding to the VJP of each
+            starting_state (): ...
+            use_device_state (): ...
+                input tape. If ``extend``, then the output VJPs will be concatenated.
+        Returns:
+            List[tensor_like or None]: list of vector-Jacobian products. ``None`` elements corresponds
+            to tapes with no trainable parameters.
+        """
+        vjps = []
+
+        # Loop through the tapes and dys vector
+        for tape, dy in zip(tapes, dys):
+            vjp = self.vector_jacobian_product(tape, dy, num=num, starting_state=starting_state, use_device_state=use_device_state)
+            if vjp is None:
+                if reduction == "append":
+                    vjps.append(None)
+                continue
+            if isinstance(reduction, str):
+                getattr(vjps, reduction)(vjp)
+            elif callable(reduction):
+                reduction(vjps, vjp)
+
+        return vjps
 
 if not CPP_BINARY_AVAILABLE:
 
diff --git a/tests/test_vector_jacobian_product.py b/tests/test_vector_jacobian_product.py
new file mode 100644
index 0000000000..6468f6facb
--- /dev/null
+++ b/tests/test_vector_jacobian_product.py
@@ -0,0 +1,286 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Tests for the ``vector_jacobian_product`` method of LightningQubit.
+"""
+import pytest
+
+import pennylane as qml
+from pennylane import numpy as np
+
+class TestComputeVJPTensordot:
+    """Tests for the numeric computation of VJPs' Tensordots"""
+
+    @pytest.fixture
+    def dev(self):
+        return qml.device("lightning.qubit", wires=2)
+
+    def test_computation(self, dev):
+        """Test that the correct VJP is returned"""
+        dy = np.array([[1.0, 2.0], [3.0, 4.0]])
+        jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]])
+
+        vjp = dev._compute_vjp_tensordot(dy, jac)
+
+        assert vjp.shape == (3,)
+        assert np.all(vjp == np.tensordot(dy, jac, axes=[[0, 1], [0, 1]]))
+
+    def test_jacobian_is_none(self, dev):
+        """A None Jacobian returns a None VJP"""
+
+        dy = np.array([[1.0, 2.0], [3.0, 4.0]])
+        jac = None
+
+        vjp = dev._compute_vjp_tensordot(dy, jac)
+        assert vjp is None
+
+    def test_zero_dy(self, dev):
+        """A zero dy vector will return a zero matrix"""
+        dy = np.zeros([2, 2])
+        jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]])
+
+        vjp = dev._compute_vjp_tensordot(dy, jac)
+        assert np.all(vjp == np.zeros([3]))
+
+
+class TestVectorJacobianProduct:
+    """Tests for the vector_jacobian_product function"""
+
+    @pytest.fixture
+    def dev(self):
+        return qml.device("lightning.qubit", wires=2)
+
+    def test_no_trainable_parameters(self, dev):
+        """A tape with no trainable parameters will simply return None"""
+        x = 0.4
+
+        with qml.tape.QuantumTape() as tape:
+            qml.RX(x, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        tape.trainable_params = {}
+        dy = np.array([1.0])
+        vjp = dev.vector_jacobian_product(tape, dy)
+
+        assert vjp is None 
+
+    def test_zero_dy(self, dev):
+        """A zero dy vector will return no tapes and a zero matrix"""
+        x = 0.4
+        y = 0.6
+
+        with qml.tape.QuantumTape() as tape:
+            qml.RX(x, wires=0)
+            qml.RX(y, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        tape.trainable_params = {0, 1}
+        dy = np.array([0.0])
+        vjp = dev.vector_jacobian_product(tape, dy)
+
+        assert np.all(vjp == np.zeros([len(tape.trainable_params)]))
+
+    def test_single_expectation_value(self, tol, dev):
+        """Tests correct output shape and evaluation for a tape
+        with a single expval output"""
+        x = 0.543
+        y = -0.654
+
+        with qml.tape.JacobianTape() as tape:
+            qml.RX(x, wires=[0])
+            qml.RY(y, wires=[1])
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0) @ qml.PauliX(1))
+
+        tape.trainable_params = {0, 1}
+        dy = np.array([1.0])
+
+        vjp = dev.vector_jacobian_product(tape, dy)
+
+        expected = np.array([-np.sin(y) * np.sin(x), np.cos(y) * np.cos(x)])
+        assert np.allclose(vjp, expected, atol=tol, rtol=0)
+
+    def test_multiple_expectation_values(self, tol, dev):
+        """Tests correct output shape and evaluation for a tape
+        with multiple expval outputs"""
+        x = 0.543
+        y = -0.654
+
+        with qml.tape.JacobianTape() as tape:
+            qml.RX(x, wires=[0])
+            qml.RY(y, wires=[1])
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+            qml.expval(qml.PauliX(1))
+
+        tape.trainable_params = {0, 1}
+        dy = np.array([1.0, 2.0])
+
+        vjp = dev.vector_jacobian_product(tape, dy)
+
+        expected = np.array([-np.sin(x), 2 * np.cos(y)])
+        assert np.allclose(vjp, expected, atol=tol, rtol=0)
+
+    def test_prob_expectation_values(self, tol, dev):
+        """Tests correct output shape and evaluation for a tape
+        with prob and expval outputs"""
+        x = 0.543
+        y = -0.654
+
+        with qml.tape.JacobianTape() as tape:
+            qml.RX(x, wires=[0])
+            qml.RY(y, wires=[1])
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+            qml.probs(wires=[0, 1])
+
+        tape.trainable_params = {0, 1}
+        dy = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
+
+        with pytest.raises(qml.QuantumFunctionError, match="Adjoint differentiation method does"):
+            dev.vector_jacobian_product(tape, dy)
+
+class TestBatchVectorJacobianProduct:
+    """Tests for the batch_vector_jacobian_product function"""
+
+    @pytest.fixture
+    def dev(self):
+        return qml.device("lightning.qubit", wires=2)
+
+    def test_one_tape_no_trainable_parameters(self, dev):
+        """A tape with no trainable parameters will simply return None"""
+
+        with qml.tape.QuantumTape() as tape1:
+            qml.RX(0.4, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        with qml.tape.JacobianTape() as tape2:
+            qml.RX(0.4, wires=0)
+            qml.RX(0.6, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        tape1.trainable_params = {}
+        tape2.trainable_params = {0, 1}
+
+        tapes = [tape1, tape2]
+        dys = [np.array([1.0]), np.array([1.0])]
+
+        vjps = dev.batch_vector_jacobian_product(tapes, dys)
+
+        assert vjps[0] is None
+        assert vjps[1] is not None
+
+    def test_all_tapes_no_trainable_parameters(self, dev):
+        """If all tapes have no trainable parameters all outputs will be None"""
+
+        with qml.tape.QuantumTape() as tape1:
+            qml.RX(0.4, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        with qml.tape.QuantumTape() as tape2:
+            qml.RX(0.4, wires=0)
+            qml.RX(0.6, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        tape1.trainable_params = set()
+        tape2.trainable_params = set()
+
+        tapes = [tape1, tape2]
+        dys = [np.array([1.0]), np.array([1.0])]
+
+        vjps = dev.batch_vector_jacobian_product(tapes, dys)
+
+        assert vjps[0] is None
+        assert vjps[1] is None 
+
+    def test_zero_dy(self, dev):
+        """A zero dy vector will return no tapes and a zero matrix"""
+
+        with qml.tape.QuantumTape() as tape1:
+            qml.RX(0.4, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        with qml.tape.JacobianTape() as tape2:
+            qml.RX(0.4, wires=0)
+            qml.RX(0.6, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        tape1.trainable_params = {0}
+        tape2.trainable_params = {0, 1}
+
+        tapes = [tape1, tape2]
+        dys = [np.array([0.0]), np.array([1.0])]
+
+        vjps = dev.batch_vector_jacobian_product(tapes, dys)
+
+        assert np.allclose(vjps[0], 0)
+
+
+    def test_reduction_append(self, dev):
+        """Test the 'append' reduction strategy"""
+
+        with qml.tape.JacobianTape() as tape1:
+            qml.RX(0.4, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        with qml.tape.JacobianTape() as tape2:
+            qml.RX(0.4, wires=0)
+            qml.RX(0.6, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        tape1.trainable_params = {0}
+        tape2.trainable_params = {0, 1}
+
+        tapes = [tape1, tape2]
+        dys = [np.array([1.0]), np.array([1.0])]
+
+        vjps = dev.batch_vector_jacobian_product(tapes, dys)
+
+        assert len(vjps) == 2
+        assert all(isinstance(v, np.ndarray) for v in vjps)
+        assert all(len(v) == len(t.trainable_params) for t, v in zip(tapes, vjps))
+
+    def test_reduction_extend(self, dev):
+        """Test the 'extend' reduction strategy"""
+
+        with qml.tape.JacobianTape() as tape1:
+            qml.RX(0.4, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        with qml.tape.JacobianTape() as tape2:
+            qml.RX(0.4, wires=0)
+            qml.RX(0.6, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        tape1.trainable_params = {0}
+        tape2.trainable_params = {0, 1}
+
+        tapes = [tape1, tape2]
+        dys = [np.array([1.0]), np.array([1.0])]
+
+        vjps = dev.batch_vector_jacobian_product(tapes, dys)
+
+        assert sum(len(t) for t in vjps) == sum(len(t.trainable_params) for t in tapes)
\ No newline at end of file

From 77fc34aedb3009a0f54e54ff55264478839552fb Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Wed, 24 Nov 2021 20:17:35 -0500
Subject: [PATCH 03/27] Update formatting

---
 pennylane_lightning/lightning_qubit.py      | 27 +++++++++++++++------
 pennylane_lightning/src/tests/Test_Util.cpp | 22 ++++++-----------
 pennylane_lightning/src/util/Util.hpp       | 23 ++++++++----------
 tests/test_vector_jacobian_product.py       |  9 ++++---
 4 files changed, 41 insertions(+), 40 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 4f317427d2..fe7e1cf5f6 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -241,10 +241,12 @@ def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False):
     def _compute_vjp_tensordot(self, dy, jac, num=None):
         if jac is None:
             return None
-        
+
         dy_reshaped = math.reshape(dy, [-1])
         num = math.shape(dy_reshaped)[0] if num is None else num
-        jac = math.convert_like(jac, dy_reshaped) if not isinstance(dy_reshaped, np.ndarray) else jac
+        jac = (
+            math.convert_like(jac, dy_reshaped) if not isinstance(dy_reshaped, np.ndarray) else jac
+        )
         jac = math.reshape(jac, [num, -1])
 
         try:
@@ -255,7 +257,9 @@ def _compute_vjp_tensordot(self, dy, jac, num=None):
 
         return math.tensordot(jac, dy_reshaped, [[0], [0]])
 
-    def vector_jacobian_product(self, tape, dy, num=None, starting_state=None, use_device_state=False):
+    def vector_jacobian_product(
+        self, tape, dy, num=None, starting_state=None, use_device_state=False
+    ):
         """Generate the the vector-Jacobian products of a tape.
         
         Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by
@@ -298,7 +302,7 @@ def vector_jacobian_product(self, tape, dy, num=None, starting_state=None, use_d
             # The tape has no trainable parameters; the VJP
             # is simply none.
             return None
-        
+
         try:
             # If the dy vector is zero, then the
             # corresponding element of the VJP will be zero,
@@ -307,12 +311,16 @@ def vector_jacobian_product(self, tape, dy, num=None, starting_state=None, use_d
                 return math.convert_like(np.zeros([num_params]), dy)
         except (AttributeError, TypeError):
             pass
-        
-        jac = self.adjoint_jacobian(tape, starting_state=starting_state, use_device_state=use_device_state)
+
+        jac = self.adjoint_jacobian(
+            tape, starting_state=starting_state, use_device_state=use_device_state
+        )
 
         return self._compute_vjp_tensordot(dy, jac, num=num)
 
-    def batch_vector_jacobian_product(self, tapes, dys, num=None, reduction="append", starting_state=None, use_device_state=False):
+    def batch_vector_jacobian_product(
+        self, tapes, dys, num=None, reduction="append", starting_state=None, use_device_state=False
+    ):
         """Generate the the vector-Jacobian products of a batch of tapes.
         
         Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by
@@ -359,7 +367,9 @@ def batch_vector_jacobian_product(self, tapes, dys, num=None, reduction="append"
 
         # Loop through the tapes and dys vector
         for tape, dy in zip(tapes, dys):
-            vjp = self.vector_jacobian_product(tape, dy, num=num, starting_state=starting_state, use_device_state=use_device_state)
+            vjp = self.vector_jacobian_product(
+                tape, dy, num=num, starting_state=starting_state, use_device_state=use_device_state
+            )
             if vjp is None:
                 if reduction == "append":
                     vjps.append(None)
@@ -371,6 +381,7 @@ def batch_vector_jacobian_product(self, tapes, dys, num=None, reduction="append"
 
         return vjps
 
+
 if not CPP_BINARY_AVAILABLE:
 
     class LightningQubit(DefaultQubit):
diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp
index 9b29ab52e4..dce88d8930 100644
--- a/pennylane_lightning/src/tests/Test_Util.cpp
+++ b/pennylane_lightning/src/tests/Test_Util.cpp
@@ -202,8 +202,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util]", float, double) {
             for (size_t m = 2; m < 8; m++) {
                 std::vector<double> mat(m * m, 1);
                 std::vector<double> v_in(m, 1);
-                std::vector<double> v_expected(
-                    m, m);
+                std::vector<double> v_expected(m, m);
                 std::vector<double> v_out =
                     Util::vecMatrixProd(v_in, mat, m, m);
                 CAPTURE(v_out);
@@ -217,8 +216,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util]", float, double) {
             for (size_t m = 2; m < 8; m++) {
                 std::vector<double> mat(m * m, 1);
                 std::vector<double> v_in(m, 0);
-                std::vector<double> v_expected(
-                    m, 0);
+                std::vector<double> v_expected(m, 0);
                 std::vector<double> v_out =
                     Util::vecMatrixProd(v_in, mat, m, m);
                 CAPTURE(v_out);
@@ -229,17 +227,11 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util]", float, double) {
             }
         }
         SECTION("Random Matrix") {
-            std::vector<float> v_in{
-                1.0, 2.0, 3.0, 4.0};
-            std::vector<float> mat{
-                1.0 ,  0.1,  0.2,
-                0.2,  0.6,  0.1,
-                0.4, -0.7,  1.2,
-                -0.5, -0.6,  0.7};
-            std::vector<float> v_expected{
-                0.6, -3.2,  6.8};
-            std::vector<float> v_out = 
-                Util::vecMatrixProd(v_in, mat, 4, 3);
+            std::vector<float> v_in{1.0, 2.0, 3.0, 4.0};
+            std::vector<float> mat{1.0, 0.1,  0.2, 0.2,  0.6,  0.1,
+                                   0.4, -0.7, 1.2, -0.5, -0.6, 0.7};
+            std::vector<float> v_expected{0.6, -3.2, 6.8};
+            std::vector<float> v_out = Util::vecMatrixProd(v_in, mat, 4, 3);
             CAPTURE(v_out);
             CAPTURE(v_expected);
             for (size_t i = 0; i < 3; i++) {
diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp
index 3f6d496476..2509c3a859 100644
--- a/pennylane_lightning/src/util/Util.hpp
+++ b/pennylane_lightning/src/util/Util.hpp
@@ -507,8 +507,7 @@ inline auto matrixVecProd(const std::vector<std::complex<T>> mat,
  * using blacking and Cache-optimized techniques.
  */
 template <class T, size_t BLOCKSIZE = 32> // NOLINT(readability-magic-numbers)
-inline static void CFTranspose(const T *mat,
-                               T *mat_t, size_t m, size_t n,
+inline static void CFTranspose(const T *mat, T *mat_t, size_t m, size_t n,
                                size_t m1, size_t m2, size_t n1, size_t n2) {
     size_t r;
     size_t s;
@@ -552,19 +551,19 @@ inline static void CFTranspose(const T *mat,
  * @param n Number of columns of `mat`.
  */
 template <class T>
-inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, 
-                        size_t m, size_t n) {
+inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, size_t m,
+                          size_t n) {
     if (!v_out) {
         return;
     }
 
     // v_in m * 1
     // mat m * n
-    // return  mat'[n*m] * v_in[m*1]  
+    // return  mat'[n*m] * v_in[m*1]
     // v_out n * 1
     size_t i;
     size_t j;
-    
+
     T z = static_cast<T>(0.0);
     bool allzero = true;
     for (j = 0; j < m; j++) {
@@ -578,12 +577,12 @@ inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out,
         return;
     }
 
-    T *mat_t = new T[m*n];
+    T *mat_t = new T[m * n];
     CFTranspose(mat, mat_t, m, n, 0, m, 0, n);
 
     for (i = 0; i < n; i++) {
         for (j = 0; j < m; j++) {
-            v_out[i] += mat_t[i*m+j] * v_in[j];
+            v_out[i] += mat_t[i * m + j] * v_in[j];
         }
     }
 
@@ -593,14 +592,12 @@ inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out,
 /**
  * @brief Calculates the vactor-matrix product using the best available method.
  *
- * @see template <class T> inline void vecMatrixProd(const T *v_in, 
+ * @see template <class T> inline void vecMatrixProd(const T *v_in,
  * const T *mat, T *v_out, size_t m, size_t n)
  */
 template <class T>
-inline auto vecMatrixProd(const std::vector<T> v_in,
-                        const std::vector<T> mat, 
-                        size_t m, size_t n)
-    -> std::vector<T> {
+inline auto vecMatrixProd(const std::vector<T> v_in, const std::vector<T> mat,
+                          size_t m, size_t n) -> std::vector<T> {
     if (v_in.size() != m) {
         throw std::invalid_argument("Invalid size for the input vector");
     }
diff --git a/tests/test_vector_jacobian_product.py b/tests/test_vector_jacobian_product.py
index 6468f6facb..f668556fd5 100644
--- a/tests/test_vector_jacobian_product.py
+++ b/tests/test_vector_jacobian_product.py
@@ -19,6 +19,7 @@
 import pennylane as qml
 from pennylane import numpy as np
 
+
 class TestComputeVJPTensordot:
     """Tests for the numeric computation of VJPs' Tensordots"""
 
@@ -74,7 +75,7 @@ def test_no_trainable_parameters(self, dev):
         dy = np.array([1.0])
         vjp = dev.vector_jacobian_product(tape, dy)
 
-        assert vjp is None 
+        assert vjp is None
 
     def test_zero_dy(self, dev):
         """A zero dy vector will return no tapes and a zero matrix"""
@@ -153,6 +154,7 @@ def test_prob_expectation_values(self, tol, dev):
         with pytest.raises(qml.QuantumFunctionError, match="Adjoint differentiation method does"):
             dev.vector_jacobian_product(tape, dy)
 
+
 class TestBatchVectorJacobianProduct:
     """Tests for the batch_vector_jacobian_product function"""
 
@@ -208,7 +210,7 @@ def test_all_tapes_no_trainable_parameters(self, dev):
         vjps = dev.batch_vector_jacobian_product(tapes, dys)
 
         assert vjps[0] is None
-        assert vjps[1] is None 
+        assert vjps[1] is None
 
     def test_zero_dy(self, dev):
         """A zero dy vector will return no tapes and a zero matrix"""
@@ -234,7 +236,6 @@ def test_zero_dy(self, dev):
 
         assert np.allclose(vjps[0], 0)
 
-
     def test_reduction_append(self, dev):
         """Test the 'append' reduction strategy"""
 
@@ -283,4 +284,4 @@ def test_reduction_extend(self, dev):
 
         vjps = dev.batch_vector_jacobian_product(tapes, dys)
 
-        assert sum(len(t) for t in vjps) == sum(len(t.trainable_params) for t in tapes)
\ No newline at end of file
+        assert sum(len(t) for t in vjps) == sum(len(t.trainable_params) for t in tapes)

From ac39aaae04d8c9bbd09bcb95401fd024aaf8b3b3 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Thu, 25 Nov 2021 01:52:04 -0500
Subject: [PATCH 04/27] Add class VectorJacobianProduct

---
 .../src/algorithms/AdjointDiff.cpp            |  3 +
 .../src/algorithms/AdjointDiff.hpp            | 90 ++++++++++++++++++-
 pennylane_lightning/src/util/Util.hpp         |  4 -
 3 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.cpp b/pennylane_lightning/src/algorithms/AdjointDiff.cpp
index 4b2b1ec8ad..3b2bd7d925 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.cpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.cpp
@@ -18,6 +18,9 @@
 template class Pennylane::Algorithms::AdjointJacobian<float>;
 template class Pennylane::Algorithms::AdjointJacobian<double>;
 
+template class Pennylane::Algorithms::VectorJacobianProduct<float>;
+template class Pennylane::Algorithms::VectorJacobianProduct<double>;
+
 template class Pennylane::Algorithms::ObsDatum<float>;
 template class Pennylane::Algorithms::ObsDatum<double>;
 template class Pennylane::Algorithms::ObsDatum<std::complex<float>>;
diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index ffffa12f55..88f6d9c4c6 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -762,7 +762,95 @@ template <class T = double> class AdjointJacobian {
             }
         }
     }
-};
+}; // class AdjointJacobian
+
+/**
+ * @brief Represent the class to compute the vector-Jacobian products
+ * following the implementation in Pennylane.
+ *
+ * @tparam T Floating-point precision.
+ */
+template <class T = double> class VectorJacobianProduct {
+  private:
+    void getRowMajor(std::vector<T> &res,
+                     const std::vector<std::vector<T>> &jac, size_t len = 0) {
+        if (!jac.size()) {
+            return;
+        }
+
+        const size_t r_len = jac.size();
+        const size_t c_len = jac.front().size();
+        const size_t t_len = len ? len : r_len * c_len;
+
+        if (res.size() != t_len) {
+            res.resize(t_len);
+        }
+
+        size_t k = 0;
+        for (size_t i = 0; i < r_len; i++) {
+            for (size_t j = 0; j < c_len; j++) {
+                res[k] = jac[i][j];
+                k++;
+            }
+        }
+    }
+
+  public:
+    VectorJacobianProduct() = default;
+
+    /**
+     * @brief Computes the vector-Jacobian product for a given vector of
+     * gradient outputs and a Jacobian.
+     *
+     * @param jac Jacobian matrix from `AdjointJacobian`.
+     * @param dy Gradient-output vector.
+     */
+    auto tensorDot(const std::vector<std::vector<T>> &jac,
+                   const std::vector<T> &dy) -> std::vector<T> {
+
+        const size_t r_len = jac.size();
+        const size_t c_len = jac.front().size();
+        const size_t t_len = r_len * c_len;
+
+        std::vector<T> jac_row(t_len);
+        getRowMajor(jac_row, jac, t_len);
+
+        return Util::vecMatrixProd(dy, jac_row, r_len, c_len);
+    }
+
+    /**
+     * @brief Calculates the VectorJacobianProduct for the statevector
+     * for the selected set of parametric gates using `AdjointJacobian`.
+     *
+     * @param vjp Preallocated vector for vector-jacobian product data results.
+     * @param psi Pointer to the statevector data.
+     * @param num_elements Length of the statevector data.
+     * @param dy Gradient-output vector.
+     * @param observables Observables for which to calculate Jacobian.
+     * @param operations Operations used to create given state.
+     * @param trainableParams List of parameters participating in Jacobian
+     * calculation.
+     * @param apply_operations Indicate whether to apply operations to psi prior
+     * to calculation.
+     */
+    void vectorJacobianProduct(std::vector<T> &vjp, const std::complex<T> *psi,
+                               size_t num_elements, const std::vector<T> &dy,
+                               const std::vector<ObsDatum<T>> &observables,
+                               const OpsData<T> &operations,
+                               const std::vector<size_t> &trainableParams,
+                               bool apply_operations = false) {
+        if (!vjp.size() || !dy.size()) {
+            return;
+        }
+
+        AdjointJacobian<T> adj;
+        std::vector<std::vector<T>> jac(dy.size());
+        adj.adjointJacobian(psi, num_elements, jac, observables, operations,
+                            trainableParams, apply_operations);
+        vjp = tensorDot(jac, dy);
+    }
+
+}; // class VectorJacobianProduct
 
 } // namespace Algorithms
 } // namespace Pennylane
\ No newline at end of file
diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp
index 2509c3a859..a43b436811 100644
--- a/pennylane_lightning/src/util/Util.hpp
+++ b/pennylane_lightning/src/util/Util.hpp
@@ -557,10 +557,6 @@ inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, size_t m,
         return;
     }
 
-    // v_in m * 1
-    // mat m * n
-    // return  mat'[n*m] * v_in[m*1]
-    // v_out n * 1
     size_t i;
     size_t j;
 

From 1210aac0336d488f5cceaf1f84705925bad120d8 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Thu, 25 Nov 2021 01:56:57 -0500
Subject: [PATCH 05/27] Update formatting

---
 pennylane_lightning/src/algorithms/AdjointDiff.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index 88f6d9c4c6..3487a91ad9 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -807,7 +807,6 @@ template <class T = double> class VectorJacobianProduct {
      */
     auto tensorDot(const std::vector<std::vector<T>> &jac,
                    const std::vector<T> &dy) -> std::vector<T> {
-
         const size_t r_len = jac.size();
         const size_t c_len = jac.front().size();
         const size_t t_len = r_len * c_len;
@@ -849,7 +848,6 @@ template <class T = double> class VectorJacobianProduct {
                             trainableParams, apply_operations);
         vjp = tensorDot(jac, dy);
     }
-
 }; // class VectorJacobianProduct
 
 } // namespace Algorithms

From 90ea7ede61865d394db5e4419ac2b10a86798d6b Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Thu, 25 Nov 2021 02:04:23 -0500
Subject: [PATCH 06/27] Update C++ class

---
 pennylane_lightning/src/algorithms/AdjointDiff.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index 3487a91ad9..a2bba1eabd 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -773,14 +773,14 @@ template <class T = double> class AdjointJacobian {
 template <class T = double> class VectorJacobianProduct {
   private:
     void getRowMajor(std::vector<T> &res,
-                     const std::vector<std::vector<T>> &jac, size_t len = 0) {
-        if (!jac.size()) {
+                     const std::vector<std::vector<T>> &jac, size_t len = 0u) {
+        if (jac.empty()) {
             return;
         }
 
         const size_t r_len = jac.size();
         const size_t c_len = jac.front().size();
-        const size_t t_len = len ? len : r_len * c_len;
+        const size_t t_len = len == 0u ? len : r_len * c_len;
 
         if (res.size() != t_len) {
             res.resize(t_len);
@@ -838,7 +838,7 @@ template <class T = double> class VectorJacobianProduct {
                                const OpsData<T> &operations,
                                const std::vector<size_t> &trainableParams,
                                bool apply_operations = false) {
-        if (!vjp.size() || !dy.size()) {
+        if (vjp.empty() || dy.empty()) {
             return;
         }
 

From 94ced72c00016654b17ef2b14c6cbe0d168b2f67 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Thu, 25 Nov 2021 02:26:11 -0500
Subject: [PATCH 07/27] Update clang-tidy

---
 pennylane_lightning/src/algorithms/AdjointDiff.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index a2bba1eabd..947d5088a4 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -773,14 +773,14 @@ template <class T = double> class AdjointJacobian {
 template <class T = double> class VectorJacobianProduct {
   private:
     void getRowMajor(std::vector<T> &res,
-                     const std::vector<std::vector<T>> &jac, size_t len = 0u) {
+                     const std::vector<std::vector<T>> &jac, size_t len = 0U) {
         if (jac.empty()) {
             return;
         }
 
         const size_t r_len = jac.size();
         const size_t c_len = jac.front().size();
-        const size_t t_len = len == 0u ? len : r_len * c_len;
+        const size_t t_len = len != 0U ? len : r_len * c_len;
 
         if (res.size() != t_len) {
             res.resize(t_len);

From b219561d92cc65a09c9cb555d8d79cefaf057263 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Thu, 25 Nov 2021 13:09:30 -0500
Subject: [PATCH 08/27] Update VectorJacobianProduct public methods

---
 .../src/algorithms/AdjointDiff.hpp            | 46 ++++++++++++++++---
 pennylane_lightning/src/util/Util.hpp         | 32 +++++++++++--
 2 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index 947d5088a4..e8ecf58644 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -772,6 +772,15 @@ template <class T = double> class AdjointJacobian {
  */
 template <class T = double> class VectorJacobianProduct {
   private:
+    /**
+     * @brief Computes the vector-Jacobian product for a given vector of
+     * gradient outputs and a Jacobian.
+     *
+     * @param res Prealloacted vector for row-major ordered `jac` matrix
+     * representation.
+     * @param jac Jacobian matrix from `AdjointJacobian`.
+     * @param len Total allocation size of `jac`.
+     */
     void getRowMajor(std::vector<T> &res,
                      const std::vector<std::vector<T>> &jac, size_t len = 0U) {
         if (jac.empty()) {
@@ -802,11 +811,17 @@ template <class T = double> class VectorJacobianProduct {
      * @brief Computes the vector-Jacobian product for a given vector of
      * gradient outputs and a Jacobian.
      *
+     * @param vjp Preallocated vector for vector-jacobian product data results.
      * @param jac Jacobian matrix from `AdjointJacobian`.
      * @param dy Gradient-output vector.
      */
-    auto tensorDot(const std::vector<std::vector<T>> &jac,
-                   const std::vector<T> &dy) -> std::vector<T> {
+    void tensorDot(std::vector<T> &vjp, const std::vector<std::vector<T>> &jac,
+                   const std::vector<T> &dy) {
+        if (jac.empty() || dy.empty()) {
+            vjp.clear();
+            return;
+        }
+
         const size_t r_len = jac.size();
         const size_t c_len = jac.front().size();
         const size_t t_len = r_len * c_len;
@@ -814,7 +829,7 @@ template <class T = double> class VectorJacobianProduct {
         std::vector<T> jac_row(t_len);
         getRowMajor(jac_row, jac, t_len);
 
-        return Util::vecMatrixProd(dy, jac_row, r_len, c_len);
+        Util::vecMatrixProd(vjp, dy, jac_row, r_len, c_len);
     }
 
     /**
@@ -838,15 +853,34 @@ template <class T = double> class VectorJacobianProduct {
                                const OpsData<T> &operations,
                                const std::vector<size_t> &trainableParams,
                                bool apply_operations = false) {
-        if (vjp.empty() || dy.empty()) {
+        size_t num_params = trainableParams.size();
+        size_t num_obs = observables.size();
+
+        if (dy.size() != num_obs) {
+            throw std::invalid_argument(
+                "Invalid size for the gradient-output vector");
+        }
+        if (num_params == 0U) {
+            vjp.clear();
             return;
         }
+        if (vjp.size() != num_params) {
+            vjp.resize(num_params);
+        }
+
+        const bool allzero =
+            std::all_of(dy.cbegin(), dy.cend(), [](T e) { return e == 0; });
+        if (allzero) {
+            return;
+        }
+
+        std::vector<std::vector<T>> jac(num_obs, std::vector<T>(num_params, 0));
 
         AdjointJacobian<T> adj;
-        std::vector<std::vector<T>> jac(dy.size());
         adj.adjointJacobian(psi, num_elements, jac, observables, operations,
                             trainableParams, apply_operations);
-        vjp = tensorDot(jac, dy);
+
+        tensorDot(vjp, jac, dy);
     }
 }; // class VectorJacobianProduct
 
diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp
index a43b436811..11ccaf51b9 100644
--- a/pennylane_lightning/src/util/Util.hpp
+++ b/pennylane_lightning/src/util/Util.hpp
@@ -546,7 +546,7 @@ inline static void CFTranspose(const T *mat, T *mat_t, size_t m, size_t n,
  * @param v_in Data array repr. a vector of shape m * 1.
  * @param mat Data array repr. a flatten (row-wise) matrix m * n.
  * @param v_out Pre-allocated data array to store the result that is
- *              `mat_t \times v_in` where `mat_t` is transpose of `mat`.
+ *              `mat_t \times v_in` where `mat_t` is transposed of `mat`.
  * @param m Number of rows of `mat`.
  * @param n Number of columns of `mat`.
  */
@@ -560,7 +560,7 @@ inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, size_t m,
     size_t i;
     size_t j;
 
-    T z = static_cast<T>(0.0);
+    constexpr T z = static_cast<T>(0.0);
     bool allzero = true;
     for (j = 0; j < m; j++) {
         if (v_in[j] != z) {
@@ -588,11 +588,11 @@ inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, size_t m,
 /**
  * @brief Calculates the vactor-matrix product using the best available method.
  *
- * @see template <class T> inline void vecMatrixProd(const T *v_in,
+ * @see inline void vecMatrixProd(const T *v_in,
  * const T *mat, T *v_out, size_t m, size_t n)
  */
 template <class T>
-inline auto vecMatrixProd(const std::vector<T> v_in, const std::vector<T> mat,
+inline auto vecMatrixProd(const std::vector<T> &v_in, const std::vector<T> &mat,
                           size_t m, size_t n) -> std::vector<T> {
     if (v_in.size() != m) {
         throw std::invalid_argument("Invalid size for the input vector");
@@ -603,9 +603,33 @@ inline auto vecMatrixProd(const std::vector<T> v_in, const std::vector<T> mat,
 
     std::vector<T> v_out(n);
     vecMatrixProd(v_in.data(), mat.data(), v_out.data(), m, n);
+
     return v_out;
 }
 
+/**
+ * @brief Calculates the vactor-matrix product using the best available method.
+ *
+ * @see inline void vecMatrixProd(const T *v_in,
+ * const T *mat, T *v_out, size_t m, size_t n)
+ */
+template <class T>
+inline void vecMatrixProd(std::vector<T> &v_out, const std::vector<T> &v_in,
+                          const std::vector<T> &mat, size_t m, size_t n) {
+    if (mat.size() != m * n) {
+        throw std::invalid_argument("Invalid m & n for the input matrix");
+    }
+    if (v_in.size() != m) {
+        throw std::invalid_argument("Invalid size for the input vector");
+    }
+    if (v_out.size() != n) {
+        throw std::invalid_argument(
+            "Invalid pre-allocated size for the result");
+    }
+
+    vecMatrixProd(v_in.data(), mat.data(), v_out.data(), m, n);
+}
+
 /**
  * @brief Calculates transpose of a matrix recursively and Cache-Friendly
  * using blacking and Cache-optimized techniques.

From 9c163b8454bec532e844086fca8da016e40e8b7d Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Thu, 25 Nov 2021 13:31:33 -0500
Subject: [PATCH 09/27] Add Test_VJP.cpp

---
 pennylane_lightning/src/tests/CMakeLists.txt  |  5 +-
 .../src/tests/Test_VectorJacobianProduct.cpp  | 64 +++++++++++++++++++
 2 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp

diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt
index b343c589ec..1bab182e5d 100644
--- a/pennylane_lightning/src/tests/CMakeLists.txt
+++ b/pennylane_lightning/src/tests/CMakeLists.txt
@@ -32,13 +32,14 @@ include(Catch)
 add_executable(runner runner_main.cpp)
 target_link_libraries(runner lightning_simulator lightning_utils lightning_algorithms Catch2::Catch2)
 
-target_sources(runner PRIVATE   Test_AdjDiff.cpp
+target_sources(runner PRIVATE   Test_Util.cpp
                                 Test_Bindings.cpp
+                                Test_AdjDiff.cpp
+                                Test_VectorJacobianProduct.cpp
                                 Test_StateVector_Nonparam.cpp 
                                 Test_StateVector_Param.cpp 
                                 Test_StateVectorManaged_Nonparam.cpp 
                                 Test_StateVectorManaged_Param.cpp 
-                                Test_Util.cpp
 )
 
 target_compile_options(runner PRIVATE "$<$<CONFIG:DEBUG>:-Wall>")
diff --git a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
new file mode 100644
index 0000000000..385f54898c
--- /dev/null
+++ b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
@@ -0,0 +1,64 @@
+#define _USE_MATH_DEFINES
+
+#include <algorithm>
+#include <cmath>
+#include <complex>
+#include <iostream>
+#include <limits>
+#include <type_traits>
+#include <utility>
+#include <variant>
+#include <vector>
+
+#include <catch2/catch.hpp>
+
+#include "AdjointDiff.hpp"
+#include "StateVector.hpp"
+#include "Util.hpp"
+
+#include "TestHelpers.hpp"
+
+using namespace Pennylane;
+using namespace Pennylane::Algorithms;
+
+/**
+ * @brief Tests the constructability of the AdjointDiff.hpp classes.
+ *
+ */
+TEMPLATE_TEST_CASE("VectorJacobianProduct::VectorJacobianProduct",
+                   "[VectorJacobianProduct]", float, double) {
+    SECTION("VectorJacobianProduct") {
+        REQUIRE(std::is_constructible<VectorJacobianProduct<>>::value);
+    }
+    SECTION("VectorJacobianProduct<TestType> {}") {
+        REQUIRE(std::is_constructible<VectorJacobianProduct<TestType>>::value);
+    }
+}
+
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z",
+          "[VectorJacobianProduct]") {
+    AdjointJacobian<double> adj;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+
+    {
+        const size_t num_qubits = 1;
+        const size_t num_params = 3;
+        const size_t num_obs = 1;
+        auto obs = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+
+        for (const auto &p : param) {
+            auto ops = adj.createOpsData({"RX"}, {{p}}, {{0}}, {false});
+
+            std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+            cdata[0] = std::complex<double>{1, 0};
+
+            StateVector<double> psi(cdata.data(), cdata.size());
+            adj.adjointJacobian(psi.getData(), psi.getLength(), jacobian, {obs},
+                                ops, {0}, true);
+            CAPTURE(jacobian);
+            CHECK(-sin(p) == Approx(jacobian[0].front()));
+        }
+    }
+}
\ No newline at end of file

From 516d306bdbb236f7e422cdc5baa8eaaaa6dccd93 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Thu, 25 Nov 2021 19:34:39 -0500
Subject: [PATCH 10/27] Update cpp tests

---
 .../src/algorithms/AdjointDiff.hpp            |  87 +++--
 pennylane_lightning/src/tests/CMakeLists.txt  |   2 +-
 .../src/tests/Test_VectorJacobianProduct.cpp  | 346 +++++++++++++++++-
 pennylane_lightning/src/util/Util.hpp         |   3 +-
 4 files changed, 400 insertions(+), 38 deletions(-)

diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index e8ecf58644..032c058bce 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -770,7 +770,8 @@ template <class T = double> class AdjointJacobian {
  *
  * @tparam T Floating-point precision.
  */
-template <class T = double> class VectorJacobianProduct {
+template <class T = double>
+class VectorJacobianProduct : public AdjointJacobian<T> {
   private:
     /**
      * @brief Computes the vector-Jacobian product for a given vector of
@@ -813,30 +814,36 @@ template <class T = double> class VectorJacobianProduct {
      *
      * @param vjp Preallocated vector for vector-jacobian product data results.
      * @param jac Jacobian matrix from `AdjointJacobian`.
-     * @param dy Gradient-output vector.
+     * @param dy_row Gradient-output vector.
      */
     void tensorDot(std::vector<T> &vjp, const std::vector<std::vector<T>> &jac,
-                   const std::vector<T> &dy) {
-        if (jac.empty() || dy.empty()) {
+                   const std::vector<T> &dy_row) {
+        if (jac.empty() || dy_row.empty()) {
             vjp.clear();
             return;
         }
 
-        const size_t r_len = jac.size();
-        const size_t c_len = jac.front().size();
-        const size_t t_len = r_len * c_len;
+        const size_t jac_len[2]{jac.size(), jac.front().size()};
+        if (dy_row.size() != jac_len[0]) {
+            throw std::invalid_argument(
+                "Invalid size for gradient-output vector");
+        }
 
+        const size_t t_len = jac_len[0] * jac_len[1];
         std::vector<T> jac_row(t_len);
         getRowMajor(jac_row, jac, t_len);
 
-        Util::vecMatrixProd(vjp, dy, jac_row, r_len, c_len);
+        Util::vecMatrixProd(vjp, dy_row, jac_row, jac_len[0], jac_len[1]);
     }
 
     /**
      * @brief Calculates the VectorJacobianProduct for the statevector
      * for the selected set of parametric gates using `AdjointJacobian`.
      *
-     * @param vjp Preallocated vector for vector-jacobian product data results.
+     * @param vjp Preallocated vector for vector-jacobian product data results
+     * of size `trainableParams.size()`.
+     * @param jac Preallocated Jacobian matrix from `AdjointJacobian` of size
+     * `observables.size() * trainableParams.size()`.
      * @param psi Pointer to the statevector data.
      * @param num_elements Length of the statevector data.
      * @param dy Gradient-output vector.
@@ -847,40 +854,60 @@ template <class T = double> class VectorJacobianProduct {
      * @param apply_operations Indicate whether to apply operations to psi prior
      * to calculation.
      */
-    void vectorJacobianProduct(std::vector<T> &vjp, const std::complex<T> *psi,
-                               size_t num_elements, const std::vector<T> &dy,
+    void vectorJacobianProduct(std::vector<T> &vjp,
+                               std::vector<std::vector<T>> &jac,
+                               const std::vector<std::vector<T>> &dy,
+                               const std::complex<T> *psi, size_t num_elements,
                                const std::vector<ObsDatum<T>> &observables,
                                const OpsData<T> &operations,
                                const std::vector<size_t> &trainableParams,
                                bool apply_operations = false) {
-        size_t num_params = trainableParams.size();
-        size_t num_obs = observables.size();
+        const size_t num_params = trainableParams.size();
 
-        if (dy.size() != num_obs) {
-            throw std::invalid_argument(
-                "Invalid size for the gradient-output vector");
-        }
-        if (num_params == 0U) {
+        if (num_params == 0U || dy.empty()) {
             vjp.clear();
             return;
         }
-        if (vjp.size() != num_params) {
-            vjp.resize(num_params);
-        }
 
-        const bool allzero =
-            std::all_of(dy.cbegin(), dy.cend(), [](T e) { return e == 0; });
+        const size_t t_len = dy.size() * dy.front().size();
+        std::vector<T> dy_row(t_len);
+        getRowMajor(dy_row, dy, t_len);
+
+        const bool allzero = std::all_of(dy_row.cbegin(), dy_row.cend(),
+                                         [](T e) { return e == 0; });
         if (allzero) {
+            vjp.resize(num_params);
             return;
         }
 
-        std::vector<std::vector<T>> jac(num_obs, std::vector<T>(num_params, 0));
-
-        AdjointJacobian<T> adj;
-        adj.adjointJacobian(psi, num_elements, jac, observables, operations,
-                            trainableParams, apply_operations);
-
-        tensorDot(vjp, jac, dy);
+        this->adjointJacobian(psi, num_elements, jac, observables, operations,
+                              trainableParams, apply_operations);
+
+        // // -- debug -- // //
+        // const size_t num_obs = observables.size();
+        // std::cerr << "num_params: " << num_params << std::endl;
+        // std::cerr << "num_obs: " << num_obs << std::endl;
+        // std::cerr << "vjp.size(): " << vjp.size() << std::endl;
+        // std::cerr << "jac.size(): " << jac.size() << std::endl;
+        // std::cerr << "jac.front().size(): " << jac.front().size() <<
+        // std::endl; std::cerr << "dy.size(): " << dy.size() << std::endl;
+        // std::cerr << "dy.front().size(): " << dy.front().size() << std::endl;
+        // for (auto &j: jac) {
+        //     for (auto &i: j) {
+        //         std::cerr << "jac[i]: " << i << std::endl;
+        //     }
+        // }
+        // std::cerr << "------------------------------" << std::endl;
+        // // -- debug -- // //
+
+        tensorDot(vjp, jac, dy_row);
+
+        // // -- debug -- // //
+        // for (auto &j: jac) {
+        //     std::cerr << "vjp[j]: " << j << std::endl;
+        // }
+        // std::cerr << "------------------------------" << std::endl;
+        // // -- debug -- // //
     }
 }; // class VectorJacobianProduct
 
diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt
index 1bab182e5d..fda5ca67be 100644
--- a/pennylane_lightning/src/tests/CMakeLists.txt
+++ b/pennylane_lightning/src/tests/CMakeLists.txt
@@ -33,13 +33,13 @@ add_executable(runner runner_main.cpp)
 target_link_libraries(runner lightning_simulator lightning_utils lightning_algorithms Catch2::Catch2)
 
 target_sources(runner PRIVATE   Test_Util.cpp
-                                Test_Bindings.cpp
                                 Test_AdjDiff.cpp
                                 Test_VectorJacobianProduct.cpp
                                 Test_StateVector_Nonparam.cpp 
                                 Test_StateVector_Param.cpp 
                                 Test_StateVectorManaged_Nonparam.cpp 
                                 Test_StateVectorManaged_Param.cpp 
+                                Test_Bindings.cpp
 )
 
 target_compile_options(runner PRIVATE "$<$<CONFIG:DEBUG>:-Wall>")
diff --git a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
index 385f54898c..dadae71ca6 100644
--- a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
+++ b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
@@ -37,7 +37,7 @@ TEMPLATE_TEST_CASE("VectorJacobianProduct::VectorJacobianProduct",
 
 TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z",
           "[VectorJacobianProduct]") {
-    AdjointJacobian<double> adj;
+    VectorJacobianProduct<double> VJP;
     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
 
     {
@@ -47,18 +47,354 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z",
         auto obs = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
 
         for (const auto &p : param) {
-            auto ops = adj.createOpsData({"RX"}, {{p}}, {{0}}, {false});
+            auto ops = VJP.createOpsData({"RX"}, {{p}}, {{0}}, {false});
 
             std::vector<std::complex<double>> cdata(0b1 << num_qubits);
             cdata[0] = std::complex<double>{1, 0};
 
             StateVector<double> psi(cdata.data(), cdata.size());
-            adj.adjointJacobian(psi.getData(), psi.getLength(), jacobian, {obs},
-                                ops, {0}, true);
+            VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                      psi.getLength(), {obs}, ops, {0}, true);
+
             CAPTURE(jacobian);
             CHECK(-sin(p) == Approx(jacobian[0].front()));
         }
     }
-}
\ No newline at end of file
+}
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RY, Obs=X",
+          "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 1;
+        const size_t num_params = 3;
+        const size_t num_obs = 1;
+
+        auto obs = ObsDatum<double>({"PauliX"}, {{}}, {{0}});
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
+
+        for (const auto &p : param) {
+            auto ops = VJP.createOpsData({"RY"}, {{p}}, {{0}}, {false});
+
+            std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+            cdata[0] = std::complex<double>{1, 0};
+
+            StateVector<double> psi(cdata.data(), cdata.size());
+            VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                      psi.getLength(), {obs}, ops, {0}, true);
+
+            CAPTURE(jacobian);
+            CHECK(cos(p) == Approx(jacobian[0].front()).margin(1e-7));
+        }
+    }
+}
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=[Z,Z]",
+          "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 2;
+        const size_t num_params = 1;
+        const size_t num_obs = 2;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+        auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
+
+        auto ops = VJP.createOpsData({"RX"}, {{param[0]}}, {{0}}, {false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs1, obs2}, ops, {0},
+                                  true);
+
+        CAPTURE(jacobian);
+        CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(0.0 == Approx(jacobian[1][0]).margin(1e-7));
+    }
+}
+TEST_CASE(
+    "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[Z,Z,Z]",
+    "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 3;
+        const size_t num_params = 3;
+        const size_t num_obs = 3;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+        auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
+        auto obs3 = ObsDatum<double>({"PauliZ"}, {{}}, {{2}});
+
+        auto ops = VJP.createOpsData({"RX", "RX", "RX"},
+                                     {{param[0]}, {param[1]}, {param[2]}},
+                                     {{0}, {1}, {2}}, {false, false, false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs1, obs2, obs3}, ops,
+                                  {0, 1, 2}, true);
+
+        CAPTURE(jacobian);
+        CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(-sin(param[1]) == Approx(jacobian[1][1]).margin(1e-7));
+        CHECK(-sin(param[2]) == Approx(jacobian[2][2]).margin(1e-7));
+    }
+}
+TEST_CASE(
+    "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[Z,Z,Z], "
+    "TParams=[0,2]",
+    "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 3;
+        const size_t num_params = 3;
+        const size_t num_obs = 3;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<size_t> t_params{0, 2};
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+        auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
+        auto obs3 = ObsDatum<double>({"PauliZ"}, {{}}, {{2}});
+
+        auto ops = VJP.createOpsData({"RX", "RX", "RX"},
+                                     {{param[0]}, {param[1]}, {param[2]}},
+                                     {{0}, {1}, {2}}, {false, false, false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs1, obs2, obs3}, ops,
+                                  t_params, true);
+
+        CAPTURE(jacobian);
+        CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(0 == Approx(jacobian[1][1]).margin(1e-7));
+        CHECK(-sin(param[2]) == Approx(jacobian[2][1]).margin(1e-7));
+    }
+}
+TEST_CASE(
+    "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[ZZZ]",
+    "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 3;
+        const size_t num_params = 3;
+        const size_t num_obs = 1;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs = ObsDatum<double>({"PauliZ", "PauliZ", "PauliZ"},
+                                    {{}, {}, {}}, {{0}, {1}, {2}});
+        auto ops = VJP.createOpsData({"RX", "RX", "RX"},
+                                     {{param[0]}, {param[1]}, {param[2]}},
+                                     {{0}, {1}, {2}}, {false, false, false});
+
+        // adj.adjointJacobian(psi.getData(), psi.getLength(), jacobian, {obs},
+        //                     ops, {0, 1, 2}, true);
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs}, ops, {0, 1, 2}, true);
+
+        CAPTURE(jacobian);
+        CHECK(-0.1755096592645253 == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(0.26478810666384334 == Approx(jacobian[0][1]).margin(1e-7));
+        CHECK(-0.6312451595102775 == Approx(jacobian[0][2]).margin(1e-7));
+    }
+}
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=Mixed, Obs=[XXX]",
+          "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 3;
+        const size_t num_params = 6;
+        const size_t num_obs = 1;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs = ObsDatum<double>({"PauliX", "PauliX", "PauliX"},
+                                    {{}, {}, {}}, {{0}, {1}, {2}});
+        auto ops = VJP.createOpsData(
+            {"RZ", "RY", "RZ", "CNOT", "CNOT", "RZ", "RY", "RZ"},
+            {{param[0]},
+             {param[1]},
+             {param[2]},
+             {},
+             {},
+             {param[0]},
+             {param[1]},
+             {param[2]}},
+            {{0}, {0}, {0}, {0, 1}, {1, 2}, {1}, {1}, {1}},
+            {false, false, false, false, false, false, false, false});
+
+        // adj.adjointJacobian(psi.getData(), psi.getLength(), jacobian, {obs},
+        //                     ops, {0, 1, 2, 3, 4, 5}, true);
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs}, ops,
+                                  {0, 1, 2, 3, 4, 5}, true);
+
+        CAPTURE(jacobian);
+        CHECK(0.0 == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(-0.674214427 == Approx(jacobian[0][1]).margin(1e-7));
+        CHECK(0.275139672 == Approx(jacobian[0][2]).margin(1e-7));
+        CHECK(0.275139672 == Approx(jacobian[0][3]).margin(1e-7));
+        CHECK(-0.0129093062 == Approx(jacobian[0][4]).margin(1e-7));
+        CHECK(0.323846156 == Approx(jacobian[0][5]).margin(1e-7));
+    }
+}
+TEST_CASE(
+    "VectorJacobianProduct::vectorJacobianProduct Decomposed Rot gate, non "
+    "computational basis state",
+    "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 1;
+        const size_t num_params = 3;
+        const size_t num_obs = 1;
+
+        const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 7);
+        std::unordered_map<double, std::vector<double>> expec_results{
+            {thetas[0], {0, -9.90819496e-01, 0}},
+            {thetas[1], {-8.18996553e-01, 1.62526544e-01, 0}},
+            {thetas[2], {-0.203949, 0.48593716, 0}},
+            {thetas[3], {0, 1, 0}},
+            {thetas[4], {-2.03948985e-01, 4.85937177e-01, 0}},
+            {thetas[5], {-8.18996598e-01, 1.62526487e-01, 0}},
+            {thetas[6], {0, -9.90819511e-01, 0}}};
+
+        for (const auto &theta : thetas) {
+            std::vector<double> local_params{theta, std::pow(theta, 3),
+                                             SQRT2<double>() * theta};
+            std::vector<std::vector<double>> jacobian(
+                num_obs, std::vector<double>(num_params, 0));
+            std::vector<double> vjp_res(num_params);
+            std::vector<std::vector<double>> dy(
+                1, std::vector<double>(num_obs, 1.0));
+
+            std::vector<std::complex<double>> cdata{INVSQRT2<double>(),
+                                                    -INVSQRT2<double>()};
+            StateVector<double> psi(cdata.data(), cdata.size());
+
+            auto obs = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+            auto ops = VJP.createOpsData(
+                {"RZ", "RY", "RZ"},
+                {{local_params[0]}, {local_params[1]}, {local_params[2]}},
+                {{0}, {0}, {0}}, {false, false, false});
+
+            VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                      psi.getLength(), {obs}, ops, {0, 1, 2},
+                                      true);
+
+            CAPTURE(theta);
+            CAPTURE(jacobian);
+            CHECK(expec_results[theta][0] ==
+                  Approx(jacobian[0][0]).margin(1e-7));
+            CHECK(expec_results[theta][1] ==
+                  Approx(jacobian[0][1]).margin(1e-7));
+            CHECK(expec_results[theta][2] ==
+                  Approx(jacobian[0][2]).margin(1e-7));
+        }
+    }
+}
+TEST_CASE(
+    "VectorJacobianProduct::vectorJacobianProduct Mixed Ops, Obs and TParams",
+    "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 2;
+        const std::vector<size_t> t_params{1, 2, 3};
+        const size_t num_obs = 1;
+
+        const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 8);
+
+        std::vector<double> local_params{0.543, 0.54, 0.1,  0.5, 1.3,
+                                         -2.3,  0.5,  -0.5, 0.5};
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(t_params.size(), 0));
+        std::vector<double> vjp_res(t_params.size());
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
+
+        std::vector<std::complex<double>> cdata{ONE<double>(), ZERO<double>(),
+                                                ZERO<double>(), ZERO<double>()};
+        StateVector<double> psi(cdata.data(), cdata.size());
+
+        auto obs = ObsDatum<double>({"PauliX", "PauliZ"}, {{}, {}}, {{0}, {1}});
+        auto ops = VJP.createOpsData(
+            {"Hadamard", "RX", "CNOT", "RZ", "RY", "RZ", "RZ", "RY", "RZ", "RZ",
+             "RY", "CNOT"},
+            {{},
+             {local_params[0]},
+             {},
+             {local_params[1]},
+             {local_params[2]},
+             {local_params[3]},
+             {local_params[4]},
+             {local_params[5]},
+             {local_params[6]},
+             {local_params[7]},
+             {local_params[8]},
+             {}},
+            {{0}, {0}, {0, 1}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {1}, {0, 1}},
+            {false, false, false, false, false, false, false, false, false,
+             false, false, false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs}, ops, t_params, true);
+
+        std::vector<double> expected{-0.71429188, 0.04998561, -0.71904837};
+        CHECK(expected[0] == Approx(jacobian[0][0]));
+        CHECK(expected[1] == Approx(jacobian[0][1]));
+        CHECK(expected[2] == Approx(jacobian[0][2]));
+    }
+}
diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp
index 11ccaf51b9..2b20fe8e96 100644
--- a/pennylane_lightning/src/util/Util.hpp
+++ b/pennylane_lightning/src/util/Util.hpp
@@ -623,8 +623,7 @@ inline void vecMatrixProd(std::vector<T> &v_out, const std::vector<T> &v_in,
         throw std::invalid_argument("Invalid size for the input vector");
     }
     if (v_out.size() != n) {
-        throw std::invalid_argument(
-            "Invalid pre-allocated size for the result");
+        throw std::invalid_argument("Invalid preallocated size for the result");
     }
 
     vecMatrixProd(v_in.data(), mat.data(), v_out.data(), m, n);

From d833c4464468f0a3156c2dd4635f8d66edc52840 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Thu, 25 Nov 2021 19:35:14 -0500
Subject: [PATCH 11/27] Update cpp tests

---
 .../src/algorithms/AdjointDiff.hpp            |  46 +-
 .../src/tests/Test_VectorJacobianProduct.cpp  | 664 +++++++++---------
 2 files changed, 356 insertions(+), 354 deletions(-)

diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index 032c058bce..c40ccfd672 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -833,6 +833,7 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
         std::vector<T> jac_row(t_len);
         getRowMajor(jac_row, jac, t_len);
 
+
         Util::vecMatrixProd(vjp, dy_row, jac_row, jac_len[0], jac_len[1]);
     }
 
@@ -873,41 +874,42 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
         std::vector<T> dy_row(t_len);
         getRowMajor(dy_row, dy, t_len);
 
-        const bool allzero = std::all_of(dy_row.cbegin(), dy_row.cend(),
-                                         [](T e) { return e == 0; });
+        const bool allzero =
+            std::all_of(dy_row.cbegin(), dy_row.cend(), [](T e) { return e == 0; });
         if (allzero) {
             vjp.resize(num_params);
             return;
         }
 
         this->adjointJacobian(psi, num_elements, jac, observables, operations,
-                              trainableParams, apply_operations);
+                            trainableParams, apply_operations);
 
         // // -- debug -- // //
-        // const size_t num_obs = observables.size();
-        // std::cerr << "num_params: " << num_params << std::endl;
-        // std::cerr << "num_obs: " << num_obs << std::endl;
-        // std::cerr << "vjp.size(): " << vjp.size() << std::endl;
-        // std::cerr << "jac.size(): " << jac.size() << std::endl;
-        // std::cerr << "jac.front().size(): " << jac.front().size() <<
-        // std::endl; std::cerr << "dy.size(): " << dy.size() << std::endl;
-        // std::cerr << "dy.front().size(): " << dy.front().size() << std::endl;
-        // for (auto &j: jac) {
-        //     for (auto &i: j) {
-        //         std::cerr << "jac[i]: " << i << std::endl;
-        //     }
-        // }
-        // std::cerr << "------------------------------" << std::endl;
+        const size_t num_obs = observables.size();
+        std::cerr << "num_params: " << num_params << std::endl;
+        std::cerr << "num_obs: " << num_obs << std::endl;
+        std::cerr << "vjp.size(): " << vjp.size() << std::endl;
+        std::cerr << "jac.size(): " << jac.size() << std::endl;
+        std::cerr << "jac.front().size(): " << jac.front().size() << std::endl;
+        std::cerr << "dy.size(): " << dy.size() << std::endl;
+        std::cerr << "dy.front().size(): " << dy.front().size() << std::endl;
+        for (auto &j: jac) {
+            for (auto &i: j) {
+                std::cerr << "jac[i]: " << i << std::endl; 
+            }
+        }
+        std::cerr << "------------------------------" << std::endl;
         // // -- debug -- // //
 
         tensorDot(vjp, jac, dy_row);
 
+
+        for (auto &j: jac) {
+            std::cerr << "vjp[j]: " << j << std::endl;
+        }
+        std::cerr << "------------------------------" << std::endl;
         // // -- debug -- // //
-        // for (auto &j: jac) {
-        //     std::cerr << "vjp[j]: " << j << std::endl;
-        // }
-        // std::cerr << "------------------------------" << std::endl;
-        // // -- debug -- // //
+
     }
 }; // class VectorJacobianProduct
 
diff --git a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
index dadae71ca6..2804c47b18 100644
--- a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
+++ b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
@@ -66,335 +66,335 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z",
         }
     }
 }
-TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RY, Obs=X",
-          "[VectorJacobianProduct]") {
-    VectorJacobianProduct<double> VJP;
-    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-    {
-        const size_t num_qubits = 1;
-        const size_t num_params = 3;
-        const size_t num_obs = 1;
-
-        auto obs = ObsDatum<double>({"PauliX"}, {{}}, {{0}});
-        std::vector<std::vector<double>> jacobian(
-            num_obs, std::vector<double>(num_params, 0));
-        std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
-
-        for (const auto &p : param) {
-            auto ops = VJP.createOpsData({"RY"}, {{p}}, {{0}}, {false});
-
-            std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-            cdata[0] = std::complex<double>{1, 0};
-
-            StateVector<double> psi(cdata.data(), cdata.size());
-            VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-                                      psi.getLength(), {obs}, ops, {0}, true);
-
-            CAPTURE(jacobian);
-            CHECK(cos(p) == Approx(jacobian[0].front()).margin(1e-7));
-        }
-    }
-}
-TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=[Z,Z]",
-          "[VectorJacobianProduct]") {
-    VectorJacobianProduct<double> VJP;
-    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-    {
-        const size_t num_qubits = 2;
-        const size_t num_params = 1;
-        const size_t num_obs = 2;
-        std::vector<std::vector<double>> jacobian(
-            num_obs, std::vector<double>(num_params, 0));
-        std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
-
-        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-        StateVector<double> psi(cdata.data(), cdata.size());
-        cdata[0] = std::complex<double>{1, 0};
-
-        auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
-        auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
-
-        auto ops = VJP.createOpsData({"RX"}, {{param[0]}}, {{0}}, {false});
-
-        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-                                  psi.getLength(), {obs1, obs2}, ops, {0},
-                                  true);
-
-        CAPTURE(jacobian);
-        CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
-        CHECK(0.0 == Approx(jacobian[1][0]).margin(1e-7));
-    }
-}
-TEST_CASE(
-    "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[Z,Z,Z]",
-    "[VectorJacobianProduct]") {
-    VectorJacobianProduct<double> VJP;
-    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-    {
-        const size_t num_qubits = 3;
-        const size_t num_params = 3;
-        const size_t num_obs = 3;
-        std::vector<std::vector<double>> jacobian(
-            num_obs, std::vector<double>(num_params, 0));
-        std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
-
-        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-        StateVector<double> psi(cdata.data(), cdata.size());
-        cdata[0] = std::complex<double>{1, 0};
-
-        auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
-        auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
-        auto obs3 = ObsDatum<double>({"PauliZ"}, {{}}, {{2}});
-
-        auto ops = VJP.createOpsData({"RX", "RX", "RX"},
-                                     {{param[0]}, {param[1]}, {param[2]}},
-                                     {{0}, {1}, {2}}, {false, false, false});
-
-        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-                                  psi.getLength(), {obs1, obs2, obs3}, ops,
-                                  {0, 1, 2}, true);
-
-        CAPTURE(jacobian);
-        CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
-        CHECK(-sin(param[1]) == Approx(jacobian[1][1]).margin(1e-7));
-        CHECK(-sin(param[2]) == Approx(jacobian[2][2]).margin(1e-7));
-    }
-}
-TEST_CASE(
-    "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[Z,Z,Z], "
-    "TParams=[0,2]",
-    "[VectorJacobianProduct]") {
-    VectorJacobianProduct<double> VJP;
-    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-    {
-        const size_t num_qubits = 3;
-        const size_t num_params = 3;
-        const size_t num_obs = 3;
-        std::vector<std::vector<double>> jacobian(
-            num_obs, std::vector<double>(num_params, 0));
-        std::vector<size_t> t_params{0, 2};
-        std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
-
-        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-        StateVector<double> psi(cdata.data(), cdata.size());
-        cdata[0] = std::complex<double>{1, 0};
-
-        auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
-        auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
-        auto obs3 = ObsDatum<double>({"PauliZ"}, {{}}, {{2}});
-
-        auto ops = VJP.createOpsData({"RX", "RX", "RX"},
-                                     {{param[0]}, {param[1]}, {param[2]}},
-                                     {{0}, {1}, {2}}, {false, false, false});
-
-        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-                                  psi.getLength(), {obs1, obs2, obs3}, ops,
-                                  t_params, true);
-
-        CAPTURE(jacobian);
-        CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
-        CHECK(0 == Approx(jacobian[1][1]).margin(1e-7));
-        CHECK(-sin(param[2]) == Approx(jacobian[2][1]).margin(1e-7));
-    }
-}
-TEST_CASE(
-    "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[ZZZ]",
-    "[VectorJacobianProduct]") {
-    VectorJacobianProduct<double> VJP;
-    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-    {
-        const size_t num_qubits = 3;
-        const size_t num_params = 3;
-        const size_t num_obs = 1;
-        std::vector<std::vector<double>> jacobian(
-            num_obs, std::vector<double>(num_params, 0));
-        std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
-
-        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-        StateVector<double> psi(cdata.data(), cdata.size());
-        cdata[0] = std::complex<double>{1, 0};
-
-        auto obs = ObsDatum<double>({"PauliZ", "PauliZ", "PauliZ"},
-                                    {{}, {}, {}}, {{0}, {1}, {2}});
-        auto ops = VJP.createOpsData({"RX", "RX", "RX"},
-                                     {{param[0]}, {param[1]}, {param[2]}},
-                                     {{0}, {1}, {2}}, {false, false, false});
-
-        // adj.adjointJacobian(psi.getData(), psi.getLength(), jacobian, {obs},
-        //                     ops, {0, 1, 2}, true);
-        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-                                  psi.getLength(), {obs}, ops, {0, 1, 2}, true);
-
-        CAPTURE(jacobian);
-        CHECK(-0.1755096592645253 == Approx(jacobian[0][0]).margin(1e-7));
-        CHECK(0.26478810666384334 == Approx(jacobian[0][1]).margin(1e-7));
-        CHECK(-0.6312451595102775 == Approx(jacobian[0][2]).margin(1e-7));
-    }
-}
-TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=Mixed, Obs=[XXX]",
-          "[VectorJacobianProduct]") {
-    VectorJacobianProduct<double> VJP;
-    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-    {
-        const size_t num_qubits = 3;
-        const size_t num_params = 6;
-        const size_t num_obs = 1;
-        std::vector<std::vector<double>> jacobian(
-            num_obs, std::vector<double>(num_params, 0));
-        std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
-
-        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-        StateVector<double> psi(cdata.data(), cdata.size());
-        cdata[0] = std::complex<double>{1, 0};
-
-        auto obs = ObsDatum<double>({"PauliX", "PauliX", "PauliX"},
-                                    {{}, {}, {}}, {{0}, {1}, {2}});
-        auto ops = VJP.createOpsData(
-            {"RZ", "RY", "RZ", "CNOT", "CNOT", "RZ", "RY", "RZ"},
-            {{param[0]},
-             {param[1]},
-             {param[2]},
-             {},
-             {},
-             {param[0]},
-             {param[1]},
-             {param[2]}},
-            {{0}, {0}, {0}, {0, 1}, {1, 2}, {1}, {1}, {1}},
-            {false, false, false, false, false, false, false, false});
-
-        // adj.adjointJacobian(psi.getData(), psi.getLength(), jacobian, {obs},
-        //                     ops, {0, 1, 2, 3, 4, 5}, true);
-        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-                                  psi.getLength(), {obs}, ops,
-                                  {0, 1, 2, 3, 4, 5}, true);
-
-        CAPTURE(jacobian);
-        CHECK(0.0 == Approx(jacobian[0][0]).margin(1e-7));
-        CHECK(-0.674214427 == Approx(jacobian[0][1]).margin(1e-7));
-        CHECK(0.275139672 == Approx(jacobian[0][2]).margin(1e-7));
-        CHECK(0.275139672 == Approx(jacobian[0][3]).margin(1e-7));
-        CHECK(-0.0129093062 == Approx(jacobian[0][4]).margin(1e-7));
-        CHECK(0.323846156 == Approx(jacobian[0][5]).margin(1e-7));
-    }
-}
-TEST_CASE(
-    "VectorJacobianProduct::vectorJacobianProduct Decomposed Rot gate, non "
-    "computational basis state",
-    "[VectorJacobianProduct]") {
-    VectorJacobianProduct<double> VJP;
-
-    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-    {
-        const size_t num_qubits = 1;
-        const size_t num_params = 3;
-        const size_t num_obs = 1;
-
-        const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 7);
-        std::unordered_map<double, std::vector<double>> expec_results{
-            {thetas[0], {0, -9.90819496e-01, 0}},
-            {thetas[1], {-8.18996553e-01, 1.62526544e-01, 0}},
-            {thetas[2], {-0.203949, 0.48593716, 0}},
-            {thetas[3], {0, 1, 0}},
-            {thetas[4], {-2.03948985e-01, 4.85937177e-01, 0}},
-            {thetas[5], {-8.18996598e-01, 1.62526487e-01, 0}},
-            {thetas[6], {0, -9.90819511e-01, 0}}};
-
-        for (const auto &theta : thetas) {
-            std::vector<double> local_params{theta, std::pow(theta, 3),
-                                             SQRT2<double>() * theta};
-            std::vector<std::vector<double>> jacobian(
-                num_obs, std::vector<double>(num_params, 0));
-            std::vector<double> vjp_res(num_params);
-            std::vector<std::vector<double>> dy(
-                1, std::vector<double>(num_obs, 1.0));
-
-            std::vector<std::complex<double>> cdata{INVSQRT2<double>(),
-                                                    -INVSQRT2<double>()};
-            StateVector<double> psi(cdata.data(), cdata.size());
-
-            auto obs = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
-            auto ops = VJP.createOpsData(
-                {"RZ", "RY", "RZ"},
-                {{local_params[0]}, {local_params[1]}, {local_params[2]}},
-                {{0}, {0}, {0}}, {false, false, false});
-
-            VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-                                      psi.getLength(), {obs}, ops, {0, 1, 2},
-                                      true);
-
-            CAPTURE(theta);
-            CAPTURE(jacobian);
-            CHECK(expec_results[theta][0] ==
-                  Approx(jacobian[0][0]).margin(1e-7));
-            CHECK(expec_results[theta][1] ==
-                  Approx(jacobian[0][1]).margin(1e-7));
-            CHECK(expec_results[theta][2] ==
-                  Approx(jacobian[0][2]).margin(1e-7));
-        }
-    }
-}
-TEST_CASE(
-    "VectorJacobianProduct::vectorJacobianProduct Mixed Ops, Obs and TParams",
-    "[VectorJacobianProduct]") {
-    VectorJacobianProduct<double> VJP;
-    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-    {
-        const size_t num_qubits = 2;
-        const std::vector<size_t> t_params{1, 2, 3};
-        const size_t num_obs = 1;
-
-        const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 8);
-
-        std::vector<double> local_params{0.543, 0.54, 0.1,  0.5, 1.3,
-                                         -2.3,  0.5,  -0.5, 0.5};
-        std::vector<std::vector<double>> jacobian(
-            num_obs, std::vector<double>(t_params.size(), 0));
-        std::vector<double> vjp_res(t_params.size());
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
-
-        std::vector<std::complex<double>> cdata{ONE<double>(), ZERO<double>(),
-                                                ZERO<double>(), ZERO<double>()};
-        StateVector<double> psi(cdata.data(), cdata.size());
-
-        auto obs = ObsDatum<double>({"PauliX", "PauliZ"}, {{}, {}}, {{0}, {1}});
-        auto ops = VJP.createOpsData(
-            {"Hadamard", "RX", "CNOT", "RZ", "RY", "RZ", "RZ", "RY", "RZ", "RZ",
-             "RY", "CNOT"},
-            {{},
-             {local_params[0]},
-             {},
-             {local_params[1]},
-             {local_params[2]},
-             {local_params[3]},
-             {local_params[4]},
-             {local_params[5]},
-             {local_params[6]},
-             {local_params[7]},
-             {local_params[8]},
-             {}},
-            {{0}, {0}, {0, 1}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {1}, {0, 1}},
-            {false, false, false, false, false, false, false, false, false,
-             false, false, false});
-
-        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-                                  psi.getLength(), {obs}, ops, t_params, true);
-
-        std::vector<double> expected{-0.71429188, 0.04998561, -0.71904837};
-        CHECK(expected[0] == Approx(jacobian[0][0]));
-        CHECK(expected[1] == Approx(jacobian[0][1]));
-        CHECK(expected[2] == Approx(jacobian[0][2]));
-    }
-}
+// TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RY, Obs=X",
+//           "[VectorJacobianProduct]") {
+//     VectorJacobianProduct<double> VJP;
+//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+//     {
+//         const size_t num_qubits = 1;
+//         const size_t num_params = 3;
+//         const size_t num_obs = 1;
+
+//         auto obs = ObsDatum<double>({"PauliX"}, {{}}, {{0}});
+//         std::vector<std::vector<double>> jacobian(
+//             num_obs, std::vector<double>(num_params, 0));
+//         std::vector<double> vjp_res(num_params);
+//         std::vector<std::vector<double>> dy(1,
+//                                             std::vector<double>(num_obs, 1.0));
+
+//         for (const auto &p : param) {
+//             auto ops = VJP.createOpsData({"RY"}, {{p}}, {{0}}, {false});
+
+//             std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+//             cdata[0] = std::complex<double>{1, 0};
+
+//             StateVector<double> psi(cdata.data(), cdata.size());
+//             VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+//                                       psi.getLength(), {obs}, ops, {0}, true);
+
+//             CAPTURE(jacobian);
+//             CHECK(cos(p) == Approx(jacobian[0].front()).margin(1e-7));
+//         }
+//     }
+// }
+// TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=[Z,Z]",
+//           "[VectorJacobianProduct]") {
+//     VectorJacobianProduct<double> VJP;
+//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+//     {
+//         const size_t num_qubits = 2;
+//         const size_t num_params = 1;
+//         const size_t num_obs = 2;
+//         std::vector<std::vector<double>> jacobian(
+//             num_obs, std::vector<double>(num_params, 0));
+//         std::vector<double> vjp_res(num_params);
+//         std::vector<std::vector<double>> dy(1,
+//                                             std::vector<double>(num_obs, 1.0));
+
+//         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+//         StateVector<double> psi(cdata.data(), cdata.size());
+//         cdata[0] = std::complex<double>{1, 0};
+
+//         auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+//         auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
+
+//         auto ops = VJP.createOpsData({"RX"}, {{param[0]}}, {{0}}, {false});
+
+//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+//                                   psi.getLength(), {obs1, obs2}, ops, {0},
+//                                   true);
+
+//         CAPTURE(jacobian);
+//         CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
+//         CHECK(0.0 == Approx(jacobian[1][0]).margin(1e-7));
+//     }
+// }
+// TEST_CASE(
+//     "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[Z,Z,Z]",
+//     "[VectorJacobianProduct]") {
+//     VectorJacobianProduct<double> VJP;
+//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+//     {
+//         const size_t num_qubits = 3;
+//         const size_t num_params = 3;
+//         const size_t num_obs = 3;
+//         std::vector<std::vector<double>> jacobian(
+//             num_obs, std::vector<double>(num_params, 0));
+//         std::vector<double> vjp_res(num_params);
+//         std::vector<std::vector<double>> dy(1,
+//                                             std::vector<double>(num_obs, 1.0));
+
+//         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+//         StateVector<double> psi(cdata.data(), cdata.size());
+//         cdata[0] = std::complex<double>{1, 0};
+
+//         auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+//         auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
+//         auto obs3 = ObsDatum<double>({"PauliZ"}, {{}}, {{2}});
+
+//         auto ops = VJP.createOpsData({"RX", "RX", "RX"},
+//                                      {{param[0]}, {param[1]}, {param[2]}},
+//                                      {{0}, {1}, {2}}, {false, false, false});
+
+//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+//                                   psi.getLength(), {obs1, obs2, obs3}, ops,
+//                                   {0, 1, 2}, true);
+
+//         CAPTURE(jacobian);
+//         CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
+//         CHECK(-sin(param[1]) == Approx(jacobian[1][1]).margin(1e-7));
+//         CHECK(-sin(param[2]) == Approx(jacobian[2][2]).margin(1e-7));
+//     }
+// }
+// TEST_CASE(
+//     "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[Z,Z,Z], "
+//     "TParams=[0,2]",
+//     "[VectorJacobianProduct]") {
+//     VectorJacobianProduct<double> VJP;
+//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+//     {
+//         const size_t num_qubits = 3;
+//         const size_t num_params = 3;
+//         const size_t num_obs = 3;
+//         std::vector<std::vector<double>> jacobian(
+//             num_obs, std::vector<double>(num_params, 0));
+//         std::vector<size_t> t_params{0, 2};
+//         std::vector<double> vjp_res(num_params);
+//         std::vector<std::vector<double>> dy(1,
+//                                             std::vector<double>(num_obs, 1.0));
+
+//         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+//         StateVector<double> psi(cdata.data(), cdata.size());
+//         cdata[0] = std::complex<double>{1, 0};
+
+//         auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+//         auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
+//         auto obs3 = ObsDatum<double>({"PauliZ"}, {{}}, {{2}});
+
+//         auto ops = VJP.createOpsData({"RX", "RX", "RX"},
+//                                      {{param[0]}, {param[1]}, {param[2]}},
+//                                      {{0}, {1}, {2}}, {false, false, false});
+
+//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+//                                   psi.getLength(), {obs1, obs2, obs3}, ops,
+//                                   t_params, true);
+
+//         CAPTURE(jacobian);
+//         CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
+//         CHECK(0 == Approx(jacobian[1][1]).margin(1e-7));
+//         CHECK(-sin(param[2]) == Approx(jacobian[2][1]).margin(1e-7));
+//     }
+// }
+// TEST_CASE(
+//     "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[ZZZ]",
+//     "[VectorJacobianProduct]") {
+//     VectorJacobianProduct<double> VJP;
+//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+//     {
+//         const size_t num_qubits = 3;
+//         const size_t num_params = 3;
+//         const size_t num_obs = 1;
+//         std::vector<std::vector<double>> jacobian(
+//             num_obs, std::vector<double>(num_params, 0));
+//         std::vector<double> vjp_res(num_params);
+//         std::vector<std::vector<double>> dy(1,
+//                                             std::vector<double>(num_obs, 1.0));
+
+//         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+//         StateVector<double> psi(cdata.data(), cdata.size());
+//         cdata[0] = std::complex<double>{1, 0};
+
+//         auto obs = ObsDatum<double>({"PauliZ", "PauliZ", "PauliZ"},
+//                                     {{}, {}, {}}, {{0}, {1}, {2}});
+//         auto ops = VJP.createOpsData({"RX", "RX", "RX"},
+//                                      {{param[0]}, {param[1]}, {param[2]}},
+//                                      {{0}, {1}, {2}}, {false, false, false});
+
+//         // adj.adjointJacobian(psi.getData(), psi.getLength(), jacobian, {obs},
+//         //                     ops, {0, 1, 2}, true);
+//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+//                                   psi.getLength(), {obs}, ops, {0, 1, 2}, true);
+
+//         CAPTURE(jacobian);
+//         CHECK(-0.1755096592645253 == Approx(jacobian[0][0]).margin(1e-7));
+//         CHECK(0.26478810666384334 == Approx(jacobian[0][1]).margin(1e-7));
+//         CHECK(-0.6312451595102775 == Approx(jacobian[0][2]).margin(1e-7));
+//     }
+// }
+// TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=Mixed, Obs=[XXX]",
+//           "[VectorJacobianProduct]") {
+//     VectorJacobianProduct<double> VJP;
+//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+//     {
+//         const size_t num_qubits = 3;
+//         const size_t num_params = 6;
+//         const size_t num_obs = 1;
+//         std::vector<std::vector<double>> jacobian(
+//             num_obs, std::vector<double>(num_params, 0));
+//         std::vector<double> vjp_res(num_params);
+//         std::vector<std::vector<double>> dy(1,
+//                                             std::vector<double>(num_obs, 1.0));
+
+//         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+//         StateVector<double> psi(cdata.data(), cdata.size());
+//         cdata[0] = std::complex<double>{1, 0};
+
+//         auto obs = ObsDatum<double>({"PauliX", "PauliX", "PauliX"},
+//                                     {{}, {}, {}}, {{0}, {1}, {2}});
+//         auto ops = VJP.createOpsData(
+//             {"RZ", "RY", "RZ", "CNOT", "CNOT", "RZ", "RY", "RZ"},
+//             {{param[0]},
+//              {param[1]},
+//              {param[2]},
+//              {},
+//              {},
+//              {param[0]},
+//              {param[1]},
+//              {param[2]}},
+//             {{0}, {0}, {0}, {0, 1}, {1, 2}, {1}, {1}, {1}},
+//             {false, false, false, false, false, false, false, false});
+
+//         // adj.adjointJacobian(psi.getData(), psi.getLength(), jacobian, {obs},
+//         //                     ops, {0, 1, 2, 3, 4, 5}, true);
+//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+//                                   psi.getLength(), {obs}, ops,
+//                                   {0, 1, 2, 3, 4, 5}, true);
+
+//         CAPTURE(jacobian);
+//         CHECK(0.0 == Approx(jacobian[0][0]).margin(1e-7));
+//         CHECK(-0.674214427 == Approx(jacobian[0][1]).margin(1e-7));
+//         CHECK(0.275139672 == Approx(jacobian[0][2]).margin(1e-7));
+//         CHECK(0.275139672 == Approx(jacobian[0][3]).margin(1e-7));
+//         CHECK(-0.0129093062 == Approx(jacobian[0][4]).margin(1e-7));
+//         CHECK(0.323846156 == Approx(jacobian[0][5]).margin(1e-7));
+//     }
+// }
+// TEST_CASE(
+//     "VectorJacobianProduct::vectorJacobianProduct Decomposed Rot gate, non "
+//     "computational basis state",
+//     "[VectorJacobianProduct]") {
+//     VectorJacobianProduct<double> VJP;
+
+//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+//     {
+//         const size_t num_qubits = 1;
+//         const size_t num_params = 3;
+//         const size_t num_obs = 1;
+
+//         const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 7);
+//         std::unordered_map<double, std::vector<double>> expec_results{
+//             {thetas[0], {0, -9.90819496e-01, 0}},
+//             {thetas[1], {-8.18996553e-01, 1.62526544e-01, 0}},
+//             {thetas[2], {-0.203949, 0.48593716, 0}},
+//             {thetas[3], {0, 1, 0}},
+//             {thetas[4], {-2.03948985e-01, 4.85937177e-01, 0}},
+//             {thetas[5], {-8.18996598e-01, 1.62526487e-01, 0}},
+//             {thetas[6], {0, -9.90819511e-01, 0}}};
+
+//         for (const auto &theta : thetas) {
+//             std::vector<double> local_params{theta, std::pow(theta, 3),
+//                                              SQRT2<double>() * theta};
+//             std::vector<std::vector<double>> jacobian(
+//                 num_obs, std::vector<double>(num_params, 0));
+//             std::vector<double> vjp_res(num_params);
+//             std::vector<std::vector<double>> dy(
+//                 1, std::vector<double>(num_obs, 1.0));
+
+//             std::vector<std::complex<double>> cdata{INVSQRT2<double>(),
+//                                                     -INVSQRT2<double>()};
+//             StateVector<double> psi(cdata.data(), cdata.size());
+
+//             auto obs = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+//             auto ops = VJP.createOpsData(
+//                 {"RZ", "RY", "RZ"},
+//                 {{local_params[0]}, {local_params[1]}, {local_params[2]}},
+//                 {{0}, {0}, {0}}, {false, false, false});
+
+//             VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+//                                       psi.getLength(), {obs}, ops, {0, 1, 2},
+//                                       true);
+
+//             CAPTURE(theta);
+//             CAPTURE(jacobian);
+//             CHECK(expec_results[theta][0] ==
+//                   Approx(jacobian[0][0]).margin(1e-7));
+//             CHECK(expec_results[theta][1] ==
+//                   Approx(jacobian[0][1]).margin(1e-7));
+//             CHECK(expec_results[theta][2] ==
+//                   Approx(jacobian[0][2]).margin(1e-7));
+//         }
+//     }
+// }
+// TEST_CASE(
+//     "VectorJacobianProduct::vectorJacobianProduct Mixed Ops, Obs and TParams",
+//     "[VectorJacobianProduct]") {
+//     VectorJacobianProduct<double> VJP;
+//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+//     {
+//         const size_t num_qubits = 2;
+//         const std::vector<size_t> t_params{1, 2, 3};
+//         const size_t num_obs = 1;
+
+//         const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 8);
+
+//         std::vector<double> local_params{0.543, 0.54, 0.1,  0.5, 1.3,
+//                                          -2.3,  0.5,  -0.5, 0.5};
+//         std::vector<std::vector<double>> jacobian(
+//             num_obs, std::vector<double>(t_params.size(), 0));
+//         std::vector<double> vjp_res(t_params.size());
+//         std::vector<std::vector<double>> dy(1,
+//                                             std::vector<double>(num_obs, 1.0));
+
+//         std::vector<std::complex<double>> cdata{ONE<double>(), ZERO<double>(),
+//                                                 ZERO<double>(), ZERO<double>()};
+//         StateVector<double> psi(cdata.data(), cdata.size());
+
+//         auto obs = ObsDatum<double>({"PauliX", "PauliZ"}, {{}, {}}, {{0}, {1}});
+//         auto ops = VJP.createOpsData(
+//             {"Hadamard", "RX", "CNOT", "RZ", "RY", "RZ", "RZ", "RY", "RZ", "RZ",
+//              "RY", "CNOT"},
+//             {{},
+//              {local_params[0]},
+//              {},
+//              {local_params[1]},
+//              {local_params[2]},
+//              {local_params[3]},
+//              {local_params[4]},
+//              {local_params[5]},
+//              {local_params[6]},
+//              {local_params[7]},
+//              {local_params[8]},
+//              {}},
+//             {{0}, {0}, {0, 1}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {1}, {0, 1}},
+//             {false, false, false, false, false, false, false, false, false,
+//              false, false, false});
+
+//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+//                                   psi.getLength(), {obs}, ops, t_params, true);
+
+//         std::vector<double> expected{-0.71429188, 0.04998561, -0.71904837};
+//         CHECK(expected[0] == Approx(jacobian[0][0]));
+//         CHECK(expected[1] == Approx(jacobian[0][1]));
+//         CHECK(expected[2] == Approx(jacobian[0][2]));
+//     }
+// }

From 74aeeacb69e290abad70e90f5fbdaecfdb976a30 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Thu, 25 Nov 2021 22:14:38 -0500
Subject: [PATCH 12/27] Complete adding cpp tests

---
 .../src/algorithms/AdjointDiff.hpp            |  41 +-
 .../src/tests/Test_VectorJacobianProduct.cpp  | 904 +++++++++++-------
 pennylane_lightning/src/util/Util.hpp         |  11 +-
 3 files changed, 584 insertions(+), 372 deletions(-)

diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index c40ccfd672..0a7a4dedf2 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -823,18 +823,18 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
             return;
         }
 
-        const size_t jac_len[2]{jac.size(), jac.front().size()};
-        if (dy_row.size() != jac_len[0]) {
+        const size_t r_len = jac.size();
+        const size_t c_len = jac.front().size();
+        if (dy_row.size() != r_len) {
             throw std::invalid_argument(
                 "Invalid size for gradient-output vector");
         }
 
-        const size_t t_len = jac_len[0] * jac_len[1];
+        const size_t t_len = r_len * c_len;
         std::vector<T> jac_row(t_len);
         getRowMajor(jac_row, jac, t_len);
 
-
-        Util::vecMatrixProd(vjp, dy_row, jac_row, jac_len[0], jac_len[1]);
+        Util::vecMatrixProd(vjp, dy_row, jac_row, r_len, c_len);
     }
 
     /**
@@ -874,42 +874,17 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
         std::vector<T> dy_row(t_len);
         getRowMajor(dy_row, dy, t_len);
 
-        const bool allzero =
-            std::all_of(dy_row.cbegin(), dy_row.cend(), [](T e) { return e == 0; });
+        const bool allzero = std::all_of(dy_row.cbegin(), dy_row.cend(),
+                                         [](T e) { return e == 0; });
         if (allzero) {
             vjp.resize(num_params);
             return;
         }
 
         this->adjointJacobian(psi, num_elements, jac, observables, operations,
-                            trainableParams, apply_operations);
-
-        // // -- debug -- // //
-        const size_t num_obs = observables.size();
-        std::cerr << "num_params: " << num_params << std::endl;
-        std::cerr << "num_obs: " << num_obs << std::endl;
-        std::cerr << "vjp.size(): " << vjp.size() << std::endl;
-        std::cerr << "jac.size(): " << jac.size() << std::endl;
-        std::cerr << "jac.front().size(): " << jac.front().size() << std::endl;
-        std::cerr << "dy.size(): " << dy.size() << std::endl;
-        std::cerr << "dy.front().size(): " << dy.front().size() << std::endl;
-        for (auto &j: jac) {
-            for (auto &i: j) {
-                std::cerr << "jac[i]: " << i << std::endl; 
-            }
-        }
-        std::cerr << "------------------------------" << std::endl;
-        // // -- debug -- // //
+                              trainableParams, apply_operations);
 
         tensorDot(vjp, jac, dy_row);
-
-
-        for (auto &j: jac) {
-            std::cerr << "vjp[j]: " << j << std::endl;
-        }
-        std::cerr << "------------------------------" << std::endl;
-        // // -- debug -- // //
-
     }
 }; // class VectorJacobianProduct
 
diff --git a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
index 2804c47b18..7c65ceaca4 100644
--- a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
+++ b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
@@ -35,7 +35,42 @@ TEMPLATE_TEST_CASE("VectorJacobianProduct::VectorJacobianProduct",
     }
 }
 
-TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z",
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={0}",
+          "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+
+    {
+        const size_t num_qubits = 1;
+        const size_t num_params = 3;
+        const size_t num_obs = 1;
+        auto obs = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1, std::vector<double>(num_obs, 0));
+
+        for (const auto &p : param) {
+            auto ops = VJP.createOpsData({"RX"}, {{p}}, {{0}}, {false});
+
+            std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+            cdata[0] = std::complex<double>{1, 0};
+
+            StateVector<double> psi(cdata.data(), cdata.size());
+            VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                      psi.getLength(), {obs}, ops, {0}, true);
+
+            CAPTURE(jacobian);
+            CHECK(0 == Approx(jacobian[0].front()));
+
+            CAPTURE(vjp_res);
+            CHECK(0 == Approx(vjp_res[0]));
+            CHECK(0 == Approx(vjp_res[1]));
+            CHECK(0 == Approx(vjp_res[2]));
+        }
+    }
+}
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={1}",
           "[VectorJacobianProduct]") {
     VectorJacobianProduct<double> VJP;
     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
@@ -63,338 +98,541 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z",
 
             CAPTURE(jacobian);
             CHECK(-sin(p) == Approx(jacobian[0].front()));
+
+            CAPTURE(vjp_res);
+            CHECK(-sin(p) == Approx(vjp_res[0]));
+            CHECK(0 == Approx(vjp_res[1]));
+            CHECK(0 == Approx(vjp_res[2]));
+        }
+    }
+}
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={0.4}",
+          "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+
+    {
+        const size_t num_qubits = 1;
+        const size_t num_params = 3;
+        const size_t num_obs = 1;
+        auto obs = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 0.4));
+
+        for (const auto &p : param) {
+            auto ops = VJP.createOpsData({"RX"}, {{p}}, {{0}}, {false});
+
+            std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+            cdata[0] = std::complex<double>{1, 0};
+
+            StateVector<double> psi(cdata.data(), cdata.size());
+            VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                      psi.getLength(), {obs}, ops, {0}, true);
+
+            CAPTURE(jacobian);
+            CHECK(-sin(p) == Approx(jacobian[0].front()));
+
+            CAPTURE(vjp_res);
+            CHECK(vjp_res.size() == num_params);
+            CHECK(-sin(p) * 0.4 == Approx(vjp_res[0]));
+            CHECK(0 == Approx(vjp_res[1]));
+            CHECK(0 == Approx(vjp_res[2]));
+        }
+    }
+}
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RY, Obs=X dy={0.4}",
+          "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 1;
+        const size_t num_params = 3;
+        const size_t num_obs = 1;
+
+        auto obs = ObsDatum<double>({"PauliX"}, {{}}, {{0}});
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 0.4));
+
+        for (const auto &p : param) {
+            auto ops = VJP.createOpsData({"RY"}, {{p}}, {{0}}, {false});
+
+            std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+            cdata[0] = std::complex<double>{1, 0};
+
+            StateVector<double> psi(cdata.data(), cdata.size());
+            VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                      psi.getLength(), {obs}, ops, {0}, true);
+
+            CAPTURE(jacobian);
+            CHECK(cos(p) == Approx(jacobian[0].front()).margin(1e-7));
+
+            CAPTURE(vjp_res);
+            CHECK(vjp_res.size() == num_params);
+            CHECK(cos(p) * 0.4 == Approx(vjp_res[0]));
+            CHECK(0 == Approx(vjp_res[1]));
+            CHECK(0 == Approx(vjp_res[2]));
+        }
+    }
+}
+TEST_CASE(
+    "VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=[Z,Z] dy={1}",
+    "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 2;
+        const size_t num_params = 1;
+        const size_t num_obs = 2;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1, std::vector<double>(num_obs, 1));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+        auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
+
+        auto ops = VJP.createOpsData({"RX"}, {{param[0]}}, {{0}}, {false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs1, obs2}, ops, {0},
+                                  true);
+
+        CAPTURE(jacobian);
+        CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(0.0 == Approx(jacobian[1][0]).margin(1e-7));
+
+        CAPTURE(vjp_res);
+        CHECK(vjp_res.size() == num_params);
+        CHECK(-sin(param[0]) == Approx(vjp_res[0]));
+    }
+}
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], "
+          "Obs=[Z,Z,Z], dy={0.4}",
+          "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 3;
+        const size_t num_params = 3;
+        const size_t num_obs = 3;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 0.4));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+        auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
+        auto obs3 = ObsDatum<double>({"PauliZ"}, {{}}, {{2}});
+
+        auto ops = VJP.createOpsData({"RX", "RX", "RX"},
+                                     {{param[0]}, {param[1]}, {param[2]}},
+                                     {{0}, {1}, {2}}, {false, false, false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs1, obs2, obs3}, ops,
+                                  {0, 1, 2}, true);
+
+        CAPTURE(jacobian);
+        CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(-sin(param[1]) == Approx(jacobian[1][1]).margin(1e-7));
+        CHECK(-sin(param[2]) == Approx(jacobian[2][2]).margin(1e-7));
+
+        CAPTURE(vjp_res);
+        CHECK(vjp_res.size() == num_params);
+        CHECK(-sin(param[0]) * 0.4 == Approx(vjp_res[0]));
+        CHECK(-sin(param[1]) * 0.4 == Approx(vjp_res[1]));
+        CHECK(-sin(param[2]) * 0.4 == Approx(vjp_res[2]));
+    }
+}
+TEST_CASE(
+    "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[Z,Z,Z], "
+    "TParams=[0,2], dy={1}",
+    "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 3;
+        const size_t num_params = 3;
+        const size_t num_obs = 3;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<size_t> t_params{0, 2};
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+        auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
+        auto obs3 = ObsDatum<double>({"PauliZ"}, {{}}, {{2}});
+
+        auto ops = VJP.createOpsData({"RX", "RX", "RX"},
+                                     {{param[0]}, {param[1]}, {param[2]}},
+                                     {{0}, {1}, {2}}, {false, false, false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs1, obs2, obs3}, ops,
+                                  t_params, true);
+
+        CAPTURE(jacobian);
+        CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(0 == Approx(jacobian[1][1]).margin(1e-7));
+        CHECK(-sin(param[2]) == Approx(jacobian[2][1]).margin(1e-7));
+
+        CAPTURE(vjp_res);
+        CHECK(vjp_res.size() == num_params);
+        CHECK(-sin(param[0]) == Approx(vjp_res[0]));
+        CHECK(0 == Approx(vjp_res[2]));
+        CHECK(-sin(param[2]) == Approx(vjp_res[1]));
+    }
+}
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], "
+          "Obs=[ZZZ], dy={0.4}",
+          "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 3;
+        const size_t num_params = 3;
+        const size_t num_obs = 1;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 0.4));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs = ObsDatum<double>({"PauliZ", "PauliZ", "PauliZ"},
+                                    {{}, {}, {}}, {{0}, {1}, {2}});
+        auto ops = VJP.createOpsData({"RX", "RX", "RX"},
+                                     {{param[0]}, {param[1]}, {param[2]}},
+                                     {{0}, {1}, {2}}, {false, false, false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs}, ops, {0, 1, 2}, true);
+
+        CAPTURE(jacobian);
+        CHECK(-0.1755096592645253 == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(0.26478810666384334 == Approx(jacobian[0][1]).margin(1e-7));
+        CHECK(-0.6312451595102775 == Approx(jacobian[0][2]).margin(1e-7));
+
+        CAPTURE(vjp_res);
+        CHECK(vjp_res.size() == num_params);
+        CHECK(-0.1755096592645253 * 0.4 == Approx(vjp_res[0]));
+        CHECK(0.26478810666384334 * 0.4 == Approx(vjp_res[1]));
+        CHECK(-0.6312451595102775 * 0.4 == Approx(vjp_res[2]));
+    }
+}
+TEST_CASE(
+    "VectorJacobianProduct::vectorJacobianProduct Op=Mixed, Obs=[XXX], dy={1}",
+    "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 3;
+        const size_t num_params = 6;
+        const size_t num_obs = 1;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs = ObsDatum<double>({"PauliX", "PauliX", "PauliX"},
+                                    {{}, {}, {}}, {{0}, {1}, {2}});
+        auto ops = VJP.createOpsData(
+            {"RZ", "RY", "RZ", "CNOT", "CNOT", "RZ", "RY", "RZ"},
+            {{param[0]},
+             {param[1]},
+             {param[2]},
+             {},
+             {},
+             {param[0]},
+             {param[1]},
+             {param[2]}},
+            {{0}, {0}, {0}, {0, 1}, {1, 2}, {1}, {1}, {1}},
+            {false, false, false, false, false, false, false, false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs}, ops,
+                                  {0, 1, 2, 3, 4, 5}, true);
+
+        CAPTURE(jacobian);
+        CHECK(0.0 == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(-0.674214427 == Approx(jacobian[0][1]).margin(1e-7));
+        CHECK(0.275139672 == Approx(jacobian[0][2]).margin(1e-7));
+        CHECK(0.275139672 == Approx(jacobian[0][3]).margin(1e-7));
+        CHECK(-0.0129093062 == Approx(jacobian[0][4]).margin(1e-7));
+        CHECK(0.323846156 == Approx(jacobian[0][5]).margin(1e-7));
+
+        CAPTURE(vjp_res);
+        CHECK(vjp_res.size() == num_params);
+        CHECK(0.0 == Approx(vjp_res[0]).margin(1e-7));
+        CHECK(-0.674214427 == Approx(vjp_res[1]).margin(1e-7));
+        CHECK(0.275139672 == Approx(vjp_res[2]).margin(1e-7));
+        CHECK(0.275139672 == Approx(vjp_res[3]).margin(1e-7));
+        CHECK(-0.0129093062 == Approx(vjp_res[4]).margin(1e-7));
+        CHECK(0.323846156 == Approx(vjp_res[5]).margin(1e-7));
+    }
+}
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=Mixed, Obs=[XXX], "
+          "dy={-0.2}",
+          "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 3;
+        const size_t num_params = 6;
+        const size_t num_obs = 1;
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(num_params, 0));
+        std::vector<double> vjp_res(num_params);
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, -0.2));
+
+        std::vector<std::complex<double>> cdata(0b1 << num_qubits);
+        StateVector<double> psi(cdata.data(), cdata.size());
+        cdata[0] = std::complex<double>{1, 0};
+
+        auto obs = ObsDatum<double>({"PauliX", "PauliX", "PauliX"},
+                                    {{}, {}, {}}, {{0}, {1}, {2}});
+        auto ops = VJP.createOpsData(
+            {"RZ", "RY", "RZ", "CNOT", "CNOT", "RZ", "RY", "RZ"},
+            {{param[0]},
+             {param[1]},
+             {param[2]},
+             {},
+             {},
+             {param[0]},
+             {param[1]},
+             {param[2]}},
+            {{0}, {0}, {0}, {0, 1}, {1, 2}, {1}, {1}, {1}},
+            {false, false, false, false, false, false, false, false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs}, ops,
+                                  {0, 1, 2, 3, 4, 5}, true);
+
+        CAPTURE(jacobian);
+        CHECK(0.0 == Approx(jacobian[0][0]).margin(1e-7));
+        CHECK(-0.674214427 == Approx(jacobian[0][1]).margin(1e-7));
+        CHECK(0.275139672 == Approx(jacobian[0][2]).margin(1e-7));
+        CHECK(0.275139672 == Approx(jacobian[0][3]).margin(1e-7));
+        CHECK(-0.0129093062 == Approx(jacobian[0][4]).margin(1e-7));
+        CHECK(0.323846156 == Approx(jacobian[0][5]).margin(1e-7));
+
+        CAPTURE(vjp_res);
+        CHECK(vjp_res.size() == num_params);
+        CHECK(-0.0 == Approx(vjp_res[0]).margin(1e-7));
+        CHECK(-0.2 * -0.674214427 == Approx(vjp_res[1]).margin(1e-7));
+        CHECK(-0.2 * 0.275139672 == Approx(vjp_res[2]).margin(1e-7));
+        CHECK(-0.2 * 0.275139672 == Approx(vjp_res[3]).margin(1e-7));
+        CHECK(-0.2 * -0.0129093062 == Approx(vjp_res[4]).margin(1e-7));
+        CHECK(-0.2 * 0.323846156 == Approx(vjp_res[5]).margin(1e-7));
+    }
+}
+TEST_CASE(
+    "VectorJacobianProduct::vectorJacobianProduct Decomposed Rot gate, non "
+    "computational basis state",
+    "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 1;
+        const size_t num_params = 3;
+        const size_t num_obs = 1;
+
+        const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 7);
+        std::unordered_map<double, std::vector<double>> expec_results{
+            {thetas[0], {0, -9.90819496e-01, 0}},
+            {thetas[1], {-8.18996553e-01, 1.62526544e-01, 0}},
+            {thetas[2], {-0.203949, 0.48593716, 0}},
+            {thetas[3], {0, 1, 0}},
+            {thetas[4], {-2.03948985e-01, 4.85937177e-01, 0}},
+            {thetas[5], {-8.18996598e-01, 1.62526487e-01, 0}},
+            {thetas[6], {0, -9.90819511e-01, 0}}};
+
+        for (const auto &theta : thetas) {
+            std::vector<double> local_params{theta, std::pow(theta, 3),
+                                             SQRT2<double>() * theta};
+            std::vector<std::vector<double>> jacobian(
+                num_obs, std::vector<double>(num_params, 0));
+            std::vector<double> vjp_res(num_params);
+            std::vector<std::vector<double>> dy(
+                1, std::vector<double>(num_obs, 1.0));
+
+            std::vector<std::complex<double>> cdata{INVSQRT2<double>(),
+                                                    -INVSQRT2<double>()};
+            StateVector<double> psi(cdata.data(), cdata.size());
+
+            auto obs = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
+            auto ops = VJP.createOpsData(
+                {"RZ", "RY", "RZ"},
+                {{local_params[0]}, {local_params[1]}, {local_params[2]}},
+                {{0}, {0}, {0}}, {false, false, false});
+
+            VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                      psi.getLength(), {obs}, ops, {0, 1, 2},
+                                      true);
+
+            CAPTURE(theta);
+            CAPTURE(jacobian);
+            CHECK(expec_results[theta][0] ==
+                  Approx(jacobian[0][0]).margin(1e-7));
+            CHECK(expec_results[theta][1] ==
+                  Approx(jacobian[0][1]).margin(1e-7));
+            CHECK(expec_results[theta][2] ==
+                  Approx(jacobian[0][2]).margin(1e-7));
+
+            CAPTURE(vjp_res);
+            CHECK(vjp_res.size() == num_params);
+            CHECK(expec_results[theta][0] == Approx(vjp_res[0]).margin(1e-7));
+            CHECK(expec_results[theta][1] == Approx(vjp_res[1]).margin(1e-7));
+            CHECK(expec_results[theta][2] == Approx(vjp_res[2]).margin(1e-7));
         }
     }
 }
-// TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RY, Obs=X",
-//           "[VectorJacobianProduct]") {
-//     VectorJacobianProduct<double> VJP;
-//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-//     {
-//         const size_t num_qubits = 1;
-//         const size_t num_params = 3;
-//         const size_t num_obs = 1;
-
-//         auto obs = ObsDatum<double>({"PauliX"}, {{}}, {{0}});
-//         std::vector<std::vector<double>> jacobian(
-//             num_obs, std::vector<double>(num_params, 0));
-//         std::vector<double> vjp_res(num_params);
-//         std::vector<std::vector<double>> dy(1,
-//                                             std::vector<double>(num_obs, 1.0));
-
-//         for (const auto &p : param) {
-//             auto ops = VJP.createOpsData({"RY"}, {{p}}, {{0}}, {false});
-
-//             std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-//             cdata[0] = std::complex<double>{1, 0};
-
-//             StateVector<double> psi(cdata.data(), cdata.size());
-//             VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-//                                       psi.getLength(), {obs}, ops, {0}, true);
-
-//             CAPTURE(jacobian);
-//             CHECK(cos(p) == Approx(jacobian[0].front()).margin(1e-7));
-//         }
-//     }
-// }
-// TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=[Z,Z]",
-//           "[VectorJacobianProduct]") {
-//     VectorJacobianProduct<double> VJP;
-//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-//     {
-//         const size_t num_qubits = 2;
-//         const size_t num_params = 1;
-//         const size_t num_obs = 2;
-//         std::vector<std::vector<double>> jacobian(
-//             num_obs, std::vector<double>(num_params, 0));
-//         std::vector<double> vjp_res(num_params);
-//         std::vector<std::vector<double>> dy(1,
-//                                             std::vector<double>(num_obs, 1.0));
-
-//         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-//         StateVector<double> psi(cdata.data(), cdata.size());
-//         cdata[0] = std::complex<double>{1, 0};
-
-//         auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
-//         auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
-
-//         auto ops = VJP.createOpsData({"RX"}, {{param[0]}}, {{0}}, {false});
-
-//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-//                                   psi.getLength(), {obs1, obs2}, ops, {0},
-//                                   true);
-
-//         CAPTURE(jacobian);
-//         CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
-//         CHECK(0.0 == Approx(jacobian[1][0]).margin(1e-7));
-//     }
-// }
-// TEST_CASE(
-//     "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[Z,Z,Z]",
-//     "[VectorJacobianProduct]") {
-//     VectorJacobianProduct<double> VJP;
-//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-//     {
-//         const size_t num_qubits = 3;
-//         const size_t num_params = 3;
-//         const size_t num_obs = 3;
-//         std::vector<std::vector<double>> jacobian(
-//             num_obs, std::vector<double>(num_params, 0));
-//         std::vector<double> vjp_res(num_params);
-//         std::vector<std::vector<double>> dy(1,
-//                                             std::vector<double>(num_obs, 1.0));
-
-//         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-//         StateVector<double> psi(cdata.data(), cdata.size());
-//         cdata[0] = std::complex<double>{1, 0};
-
-//         auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
-//         auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
-//         auto obs3 = ObsDatum<double>({"PauliZ"}, {{}}, {{2}});
-
-//         auto ops = VJP.createOpsData({"RX", "RX", "RX"},
-//                                      {{param[0]}, {param[1]}, {param[2]}},
-//                                      {{0}, {1}, {2}}, {false, false, false});
-
-//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-//                                   psi.getLength(), {obs1, obs2, obs3}, ops,
-//                                   {0, 1, 2}, true);
-
-//         CAPTURE(jacobian);
-//         CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
-//         CHECK(-sin(param[1]) == Approx(jacobian[1][1]).margin(1e-7));
-//         CHECK(-sin(param[2]) == Approx(jacobian[2][2]).margin(1e-7));
-//     }
-// }
-// TEST_CASE(
-//     "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[Z,Z,Z], "
-//     "TParams=[0,2]",
-//     "[VectorJacobianProduct]") {
-//     VectorJacobianProduct<double> VJP;
-//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-//     {
-//         const size_t num_qubits = 3;
-//         const size_t num_params = 3;
-//         const size_t num_obs = 3;
-//         std::vector<std::vector<double>> jacobian(
-//             num_obs, std::vector<double>(num_params, 0));
-//         std::vector<size_t> t_params{0, 2};
-//         std::vector<double> vjp_res(num_params);
-//         std::vector<std::vector<double>> dy(1,
-//                                             std::vector<double>(num_obs, 1.0));
-
-//         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-//         StateVector<double> psi(cdata.data(), cdata.size());
-//         cdata[0] = std::complex<double>{1, 0};
-
-//         auto obs1 = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
-//         auto obs2 = ObsDatum<double>({"PauliZ"}, {{}}, {{1}});
-//         auto obs3 = ObsDatum<double>({"PauliZ"}, {{}}, {{2}});
-
-//         auto ops = VJP.createOpsData({"RX", "RX", "RX"},
-//                                      {{param[0]}, {param[1]}, {param[2]}},
-//                                      {{0}, {1}, {2}}, {false, false, false});
-
-//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-//                                   psi.getLength(), {obs1, obs2, obs3}, ops,
-//                                   t_params, true);
-
-//         CAPTURE(jacobian);
-//         CHECK(-sin(param[0]) == Approx(jacobian[0][0]).margin(1e-7));
-//         CHECK(0 == Approx(jacobian[1][1]).margin(1e-7));
-//         CHECK(-sin(param[2]) == Approx(jacobian[2][1]).margin(1e-7));
-//     }
-// }
-// TEST_CASE(
-//     "VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], Obs=[ZZZ]",
-//     "[VectorJacobianProduct]") {
-//     VectorJacobianProduct<double> VJP;
-//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-//     {
-//         const size_t num_qubits = 3;
-//         const size_t num_params = 3;
-//         const size_t num_obs = 1;
-//         std::vector<std::vector<double>> jacobian(
-//             num_obs, std::vector<double>(num_params, 0));
-//         std::vector<double> vjp_res(num_params);
-//         std::vector<std::vector<double>> dy(1,
-//                                             std::vector<double>(num_obs, 1.0));
-
-//         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-//         StateVector<double> psi(cdata.data(), cdata.size());
-//         cdata[0] = std::complex<double>{1, 0};
-
-//         auto obs = ObsDatum<double>({"PauliZ", "PauliZ", "PauliZ"},
-//                                     {{}, {}, {}}, {{0}, {1}, {2}});
-//         auto ops = VJP.createOpsData({"RX", "RX", "RX"},
-//                                      {{param[0]}, {param[1]}, {param[2]}},
-//                                      {{0}, {1}, {2}}, {false, false, false});
-
-//         // adj.adjointJacobian(psi.getData(), psi.getLength(), jacobian, {obs},
-//         //                     ops, {0, 1, 2}, true);
-//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-//                                   psi.getLength(), {obs}, ops, {0, 1, 2}, true);
-
-//         CAPTURE(jacobian);
-//         CHECK(-0.1755096592645253 == Approx(jacobian[0][0]).margin(1e-7));
-//         CHECK(0.26478810666384334 == Approx(jacobian[0][1]).margin(1e-7));
-//         CHECK(-0.6312451595102775 == Approx(jacobian[0][2]).margin(1e-7));
-//     }
-// }
-// TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=Mixed, Obs=[XXX]",
-//           "[VectorJacobianProduct]") {
-//     VectorJacobianProduct<double> VJP;
-//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-//     {
-//         const size_t num_qubits = 3;
-//         const size_t num_params = 6;
-//         const size_t num_obs = 1;
-//         std::vector<std::vector<double>> jacobian(
-//             num_obs, std::vector<double>(num_params, 0));
-//         std::vector<double> vjp_res(num_params);
-//         std::vector<std::vector<double>> dy(1,
-//                                             std::vector<double>(num_obs, 1.0));
-
-//         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
-//         StateVector<double> psi(cdata.data(), cdata.size());
-//         cdata[0] = std::complex<double>{1, 0};
-
-//         auto obs = ObsDatum<double>({"PauliX", "PauliX", "PauliX"},
-//                                     {{}, {}, {}}, {{0}, {1}, {2}});
-//         auto ops = VJP.createOpsData(
-//             {"RZ", "RY", "RZ", "CNOT", "CNOT", "RZ", "RY", "RZ"},
-//             {{param[0]},
-//              {param[1]},
-//              {param[2]},
-//              {},
-//              {},
-//              {param[0]},
-//              {param[1]},
-//              {param[2]}},
-//             {{0}, {0}, {0}, {0, 1}, {1, 2}, {1}, {1}, {1}},
-//             {false, false, false, false, false, false, false, false});
-
-//         // adj.adjointJacobian(psi.getData(), psi.getLength(), jacobian, {obs},
-//         //                     ops, {0, 1, 2, 3, 4, 5}, true);
-//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-//                                   psi.getLength(), {obs}, ops,
-//                                   {0, 1, 2, 3, 4, 5}, true);
-
-//         CAPTURE(jacobian);
-//         CHECK(0.0 == Approx(jacobian[0][0]).margin(1e-7));
-//         CHECK(-0.674214427 == Approx(jacobian[0][1]).margin(1e-7));
-//         CHECK(0.275139672 == Approx(jacobian[0][2]).margin(1e-7));
-//         CHECK(0.275139672 == Approx(jacobian[0][3]).margin(1e-7));
-//         CHECK(-0.0129093062 == Approx(jacobian[0][4]).margin(1e-7));
-//         CHECK(0.323846156 == Approx(jacobian[0][5]).margin(1e-7));
-//     }
-// }
-// TEST_CASE(
-//     "VectorJacobianProduct::vectorJacobianProduct Decomposed Rot gate, non "
-//     "computational basis state",
-//     "[VectorJacobianProduct]") {
-//     VectorJacobianProduct<double> VJP;
-
-//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-//     {
-//         const size_t num_qubits = 1;
-//         const size_t num_params = 3;
-//         const size_t num_obs = 1;
-
-//         const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 7);
-//         std::unordered_map<double, std::vector<double>> expec_results{
-//             {thetas[0], {0, -9.90819496e-01, 0}},
-//             {thetas[1], {-8.18996553e-01, 1.62526544e-01, 0}},
-//             {thetas[2], {-0.203949, 0.48593716, 0}},
-//             {thetas[3], {0, 1, 0}},
-//             {thetas[4], {-2.03948985e-01, 4.85937177e-01, 0}},
-//             {thetas[5], {-8.18996598e-01, 1.62526487e-01, 0}},
-//             {thetas[6], {0, -9.90819511e-01, 0}}};
-
-//         for (const auto &theta : thetas) {
-//             std::vector<double> local_params{theta, std::pow(theta, 3),
-//                                              SQRT2<double>() * theta};
-//             std::vector<std::vector<double>> jacobian(
-//                 num_obs, std::vector<double>(num_params, 0));
-//             std::vector<double> vjp_res(num_params);
-//             std::vector<std::vector<double>> dy(
-//                 1, std::vector<double>(num_obs, 1.0));
-
-//             std::vector<std::complex<double>> cdata{INVSQRT2<double>(),
-//                                                     -INVSQRT2<double>()};
-//             StateVector<double> psi(cdata.data(), cdata.size());
-
-//             auto obs = ObsDatum<double>({"PauliZ"}, {{}}, {{0}});
-//             auto ops = VJP.createOpsData(
-//                 {"RZ", "RY", "RZ"},
-//                 {{local_params[0]}, {local_params[1]}, {local_params[2]}},
-//                 {{0}, {0}, {0}}, {false, false, false});
-
-//             VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-//                                       psi.getLength(), {obs}, ops, {0, 1, 2},
-//                                       true);
-
-//             CAPTURE(theta);
-//             CAPTURE(jacobian);
-//             CHECK(expec_results[theta][0] ==
-//                   Approx(jacobian[0][0]).margin(1e-7));
-//             CHECK(expec_results[theta][1] ==
-//                   Approx(jacobian[0][1]).margin(1e-7));
-//             CHECK(expec_results[theta][2] ==
-//                   Approx(jacobian[0][2]).margin(1e-7));
-//         }
-//     }
-// }
-// TEST_CASE(
-//     "VectorJacobianProduct::vectorJacobianProduct Mixed Ops, Obs and TParams",
-//     "[VectorJacobianProduct]") {
-//     VectorJacobianProduct<double> VJP;
-//     std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
-//     {
-//         const size_t num_qubits = 2;
-//         const std::vector<size_t> t_params{1, 2, 3};
-//         const size_t num_obs = 1;
-
-//         const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 8);
-
-//         std::vector<double> local_params{0.543, 0.54, 0.1,  0.5, 1.3,
-//                                          -2.3,  0.5,  -0.5, 0.5};
-//         std::vector<std::vector<double>> jacobian(
-//             num_obs, std::vector<double>(t_params.size(), 0));
-//         std::vector<double> vjp_res(t_params.size());
-//         std::vector<std::vector<double>> dy(1,
-//                                             std::vector<double>(num_obs, 1.0));
-
-//         std::vector<std::complex<double>> cdata{ONE<double>(), ZERO<double>(),
-//                                                 ZERO<double>(), ZERO<double>()};
-//         StateVector<double> psi(cdata.data(), cdata.size());
-
-//         auto obs = ObsDatum<double>({"PauliX", "PauliZ"}, {{}, {}}, {{0}, {1}});
-//         auto ops = VJP.createOpsData(
-//             {"Hadamard", "RX", "CNOT", "RZ", "RY", "RZ", "RZ", "RY", "RZ", "RZ",
-//              "RY", "CNOT"},
-//             {{},
-//              {local_params[0]},
-//              {},
-//              {local_params[1]},
-//              {local_params[2]},
-//              {local_params[3]},
-//              {local_params[4]},
-//              {local_params[5]},
-//              {local_params[6]},
-//              {local_params[7]},
-//              {local_params[8]},
-//              {}},
-//             {{0}, {0}, {0, 1}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {1}, {0, 1}},
-//             {false, false, false, false, false, false, false, false, false,
-//              false, false, false});
-
-//         VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
-//                                   psi.getLength(), {obs}, ops, t_params, true);
-
-//         std::vector<double> expected{-0.71429188, 0.04998561, -0.71904837};
-//         CHECK(expected[0] == Approx(jacobian[0][0]));
-//         CHECK(expected[1] == Approx(jacobian[0][1]));
-//         CHECK(expected[2] == Approx(jacobian[0][2]));
-//     }
-// }
+TEST_CASE(
+    "VectorJacobianProduct::vectorJacobianProduct Mixed Ops, Obs and TParams",
+    "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 2;
+        const std::vector<size_t> t_params{1, 2, 3};
+        const size_t num_obs = 1;
+
+        const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 8);
+
+        std::vector<double> local_params{0.543, 0.54, 0.1,  0.5, 1.3,
+                                         -2.3,  0.5,  -0.5, 0.5};
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(t_params.size(), 0));
+        std::vector<double> vjp_res(t_params.size());
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, 1.0));
+
+        std::vector<std::complex<double>> cdata{ONE<double>(), ZERO<double>(),
+                                                ZERO<double>(), ZERO<double>()};
+        StateVector<double> psi(cdata.data(), cdata.size());
+
+        auto obs = ObsDatum<double>({"PauliX", "PauliZ"}, {{}, {}}, {{0}, {1}});
+        auto ops = VJP.createOpsData(
+            {"Hadamard", "RX", "CNOT", "RZ", "RY", "RZ", "RZ", "RY", "RZ", "RZ",
+             "RY", "CNOT"},
+            {{},
+             {local_params[0]},
+             {},
+             {local_params[1]},
+             {local_params[2]},
+             {local_params[3]},
+             {local_params[4]},
+             {local_params[5]},
+             {local_params[6]},
+             {local_params[7]},
+             {local_params[8]},
+             {}},
+            {{0}, {0}, {0, 1}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {1}, {0, 1}},
+            {false, false, false, false, false, false, false, false, false,
+             false, false, false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs}, ops, t_params, true);
+
+        std::vector<double> expected{-0.71429188, 0.04998561, -0.71904837};
+        CHECK(expected[0] == Approx(jacobian[0][0]));
+        CHECK(expected[1] == Approx(jacobian[0][1]));
+        CHECK(expected[2] == Approx(jacobian[0][2]));
+
+        CAPTURE(vjp_res);
+        CHECK(vjp_res.size() == t_params.size());
+        CHECK(expected[0] == Approx(vjp_res[0]).margin(1e-7));
+        CHECK(expected[1] == Approx(vjp_res[1]).margin(1e-7));
+        CHECK(expected[2] == Approx(vjp_res[2]).margin(1e-7));
+    }
+}
+TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Mixed Ops, Obs and "
+          "TParams, dy={-0.5}",
+          "[VectorJacobianProduct]") {
+    VectorJacobianProduct<double> VJP;
+    std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
+    {
+        const size_t num_qubits = 2;
+        const std::vector<size_t> t_params{1, 2, 3};
+        const size_t num_obs = 1;
+
+        const auto thetas = Util::linspace(-2 * M_PI, 2 * M_PI, 8);
+
+        std::vector<double> local_params{0.543, 0.54, 0.1,  0.5, 1.3,
+                                         -2.3,  0.5,  -0.5, 0.5};
+        std::vector<std::vector<double>> jacobian(
+            num_obs, std::vector<double>(t_params.size(), 0));
+        std::vector<double> vjp_res(t_params.size());
+        std::vector<std::vector<double>> dy(1,
+                                            std::vector<double>(num_obs, -0.5));
+
+        std::vector<std::complex<double>> cdata{ONE<double>(), ZERO<double>(),
+                                                ZERO<double>(), ZERO<double>()};
+        StateVector<double> psi(cdata.data(), cdata.size());
+
+        auto obs = ObsDatum<double>({"PauliX", "PauliZ"}, {{}, {}}, {{0}, {1}});
+        auto ops = VJP.createOpsData(
+            {"Hadamard", "RX", "CNOT", "RZ", "RY", "RZ", "RZ", "RY", "RZ", "RZ",
+             "RY", "CNOT"},
+            {{},
+             {local_params[0]},
+             {},
+             {local_params[1]},
+             {local_params[2]},
+             {local_params[3]},
+             {local_params[4]},
+             {local_params[5]},
+             {local_params[6]},
+             {local_params[7]},
+             {local_params[8]},
+             {}},
+            {{0}, {0}, {0, 1}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {1}, {0, 1}},
+            {false, false, false, false, false, false, false, false, false,
+             false, false, false});
+
+        VJP.vectorJacobianProduct(vjp_res, jacobian, dy, psi.getData(),
+                                  psi.getLength(), {obs}, ops, t_params, true);
+
+        std::vector<double> expected{-0.71429188, 0.04998561, -0.71904837};
+        CHECK(expected[0] == Approx(jacobian[0][0]));
+        CHECK(expected[1] == Approx(jacobian[0][1]));
+        CHECK(expected[2] == Approx(jacobian[0][2]));
+
+        CAPTURE(vjp_res);
+        CHECK(vjp_res.size() == t_params.size());
+        CHECK(-0.5 * expected[0] == Approx(vjp_res[0]).margin(1e-7));
+        CHECK(-0.5 * expected[1] == Approx(vjp_res[1]).margin(1e-7));
+        CHECK(-0.5 * expected[2] == Approx(vjp_res[2]).margin(1e-7));
+    }
+}
\ No newline at end of file
diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp
index 2b20fe8e96..a4c93125d3 100644
--- a/pennylane_lightning/src/util/Util.hpp
+++ b/pennylane_lightning/src/util/Util.hpp
@@ -568,21 +568,20 @@ inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, size_t m,
             break;
         }
     }
-
     if (allzero) {
         return;
     }
 
-    T *mat_t = new T[m * n];
-    CFTranspose(mat, mat_t, m, n, 0, m, 0, n);
+    std::vector<T> mat_t(m*n);
+    CFTranspose(mat, mat_t.data(), m, n, 0, m, 0, n);
 
     for (i = 0; i < n; i++) {
+        T t = z;
         for (j = 0; j < m; j++) {
-            v_out[i] += mat_t[i * m + j] * v_in[j];
+            t += mat_t[i * m + j] * v_in[j];
         }
+        v_out[i] = t;
     }
-
-    delete[] mat_t;
 }
 
 /**

From e2f8bd663a6c1d363f3b120336d8510f0a03f1eb Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Fri, 26 Nov 2021 09:53:13 -0500
Subject: [PATCH 13/27] Update formatting w/ clang-tidy-12

---
 pennylane_lightning/src/util/Util.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp
index a4c93125d3..9497e22c9e 100644
--- a/pennylane_lightning/src/util/Util.hpp
+++ b/pennylane_lightning/src/util/Util.hpp
@@ -572,7 +572,7 @@ inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, size_t m,
         return;
     }
 
-    std::vector<T> mat_t(m*n);
+    std::vector<T> mat_t(m * n);
     CFTranspose(mat, mat_t.data(), m, n, 0, m, 0, n);
 
     for (i = 0; i < n; i++) {

From 6d0b6109bc32e9b2322d90bafc84eacdf375f628 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Fri, 26 Nov 2021 10:58:56 -0500
Subject: [PATCH 14/27] Add Bindings

---
 pennylane_lightning/lightning_qubit.py        | 95 ++++++++++++++++++-
 pennylane_lightning/src/bindings/Bindings.cpp | 53 +++++++++++
 2 files changed, 143 insertions(+), 5 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index ece28e89f4..42667517f9 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -44,6 +44,7 @@
             StateVectorC64,
             StateVectorC128,
             AdjointJacobianC128,
+            VectorJacobianProductC128,
         )
     else:
         from .lightning_qubit_ops import (
@@ -51,6 +52,7 @@
             StateVectorC64,
             StateVectorC128,
             AdjointJacobianC128,
+            VectorJacobianProductC128,
         )
     from ._serialize import _serialize_obs, _serialize_ops
 
@@ -270,7 +272,7 @@ def _compute_vjp_tensordot(self, dy, jac, num=None):
         return math.tensordot(jac, dy_reshaped, [[0], [0]])
 
     def vector_jacobian_product(
-        self, tape, dy, num=None, starting_state=None, use_device_state=False
+        self, tape, dy, num=None, starting_state=None, use_device_state=False, pybind=False,
     ):
         """Generate the the vector-Jacobian products of a tape.
         
@@ -305,10 +307,17 @@ def vector_jacobian_product(
             has no shape (for example, due to tracing or just-in-time compilation).
             starting_state (): ...
             use_device_state (): ...
+            pybind (bool): 
         Returns:
             tensor_like or None: Vector-Jacobian product. Returns None if the tape
             has no trainable parameters.  
         """
+        if self.shots is not None:
+            warn(
+                "Requested adjoint differentiation to be computed with finite shots."
+                " The derivative is always exact when using the adjoint differentiation method.",
+                UserWarning,
+            )
         num_params = len(tape.trainable_params)
         if num_params == 0:
             # The tape has no trainable parameters; the VJP
@@ -324,11 +333,87 @@ def vector_jacobian_product(
         except (AttributeError, TypeError):
             pass
 
-        jac = self.adjoint_jacobian(
-            tape, starting_state=starting_state, use_device_state=use_device_state
-        )
+        if not pybind:
+            jac = self.adjoint_jacobian(
+                tape, starting_state=starting_state, use_device_state=use_device_state
+            )
+
+            return self._compute_vjp_tensordot(dy, jac, num=num)
+        
+        for m in tape.measurements:
+            if m.return_type is not Expectation:
+                raise QuantumFunctionError(
+                    "Adjoint differentiation method does not support"
+                    f" measurement {m.return_type.value}"
+                )
+            if not isinstance(m.obs, qml.operation.Tensor):
+                if isinstance(m.obs, qml.Projector):
+                    raise QuantumFunctionError(
+                        "Adjoint differentiation method does not support the Projector observable"
+                    )
+                if isinstance(m.obs, qml.Hermitian):
+                    raise QuantumFunctionError(
+                        "Lightning adjoint differentiation method does not currently support the Hermitian observable"
+                    )
+            else:
+                if any([isinstance(o, qml.Projector) for o in m.obs.non_identity_obs]):
+                    raise QuantumFunctionError(
+                        "Adjoint differentiation method does not support the Projector observable"
+                    )
+                if any([isinstance(o, qml.Hermitian) for o in m.obs.non_identity_obs]):
+                    raise QuantumFunctionError(
+                        "Lightning adjoint differentiation method does not currently support the Hermitian observable"
+                    )
+
+        for op in tape.operations:
+            if (
+                op.num_params > 1 and not isinstance(op, qml.Rot)
+            ) or op.name in UNSUPPORTED_PARAM_GATES_ADJOINT:
+                raise QuantumFunctionError(
+                    f"The {op.name} operation is not supported using "
+                    'the "adjoint" differentiation method'
+                )
+
+        # Initialization of state
+        if starting_state is not None:
+            ket = np.ravel(starting_state)
+        else:
+            if not use_device_state:
+                self.reset()
+                self.execute(tape)
+            ket = np.ravel(self._pre_rotated_state)
+
+        # adj = AdjointJacobianC128()
+        VJP = VectorJacobianProductC128()
 
-        return self._compute_vjp_tensordot(dy, jac, num=num)
+        obs_serialized = _serialize_obs(tape, self.wire_map)
+        ops_serialized, use_sp = _serialize_ops(tape, self.wire_map)
+
+        ops_serialized = VJP.create_ops_list(*ops_serialized)
+
+        trainable_params = sorted(tape.trainable_params)
+        first_elem = 1 if trainable_params[0] == 0 else 0
+
+        tp_shift = (
+            trainable_params if not use_sp else [i - 1 for i in trainable_params[first_elem:]]
+        )  # exclude first index if explicitly setting sv
+
+        # jac = adj.adjoint_jacobian(
+        #     StateVectorC128(ket),
+        #     obs_serialized,
+        #     ops_serialized,
+        #     tp_shift,
+        #     tape.num_params,
+        # )
+        vjp_res = VJP.vjp(
+            dy,
+            StateVectorC128(ket),
+            obs_serialized,
+            ops_serialized,
+            tp_shift,
+            tape.num_params,
+        )
+        return vjp_res        
 
     def batch_vector_jacobian_product(
         self, tapes, dys, num=None, reduction="append", starting_state=None, use_device_state=False
diff --git a/pennylane_lightning/src/bindings/Bindings.cpp b/pennylane_lightning/src/bindings/Bindings.cpp
index 41d4b922bf..421e98a21c 100644
--- a/pennylane_lightning/src/bindings/Bindings.cpp
+++ b/pennylane_lightning/src/bindings/Bindings.cpp
@@ -805,6 +805,59 @@ void lightning_class_bindings(py::module &m) {
                                      observables, operations, trainableParams);
                  return py::array_t<Param_t>(py::cast(jac));
              });
+
+    class_name = "VectorJacobianProductC" + bitsize;
+    py::class_<VectorJacobianProduct<PrecisionT>>(m, class_name.c_str())
+        .def(py::init<>())
+        .def("create_ops_list", &VectorJacobianProduct<PrecisionT>::createOpsData)
+        .def("create_ops_list",
+             [](VectorJacobianProduct<PrecisionT> &vjp,
+                const std::vector<std::string> &ops_name,
+                const std::vector<np_arr_r> &ops_params,
+                const std::vector<std::vector<size_t>> &ops_wires,
+                const std::vector<bool> &ops_inverses,
+                const std::vector<np_arr_c> &ops_matrices) {
+                 std::vector<std::vector<PrecisionT>> conv_params(
+                     ops_params.size());
+                 std::vector<std::vector<std::complex<PrecisionT>>>
+                     conv_matrices(ops_matrices.size());
+                 static_cast<void>(adj);
+                 for (size_t op = 0; op < ops_name.size(); op++) {
+                     const auto p_buffer = ops_params[op].request();
+                     const auto m_buffer = ops_matrices[op].request();
+                     if (p_buffer.size) {
+                         const auto *const p_ptr =
+                             static_cast<const Param_t *>(p_buffer.ptr);
+                         conv_params[op] =
+                             std::vector<Param_t>{p_ptr, p_ptr + p_buffer.size};
+                     }
+                     if (m_buffer.size) {
+                         const auto m_ptr =
+                             static_cast<const std::complex<Param_t> *>(
+                                 m_buffer.ptr);
+                         conv_matrices[op] = std::vector<std::complex<Param_t>>{
+                             m_ptr, m_ptr + m_buffer.size};
+                     }
+                 }
+                 return OpsData<PrecisionT>{ops_name, conv_params, ops_wires,
+                                            ops_inverses, conv_matrices};
+             })
+        .def("vjp", &VectorJacobianProduct<PrecisionT>::vectorJacobianProduct)
+        .def("vjp",
+             [](VectorJacobianProduct<PrecisionT> &v,
+                const std::vector<std::vector<T>> &dy, 
+                const StateVecBinder<PrecisionT> &sv,
+                const std::vector<ObsDatum<PrecisionT>> &observables,
+                const OpsData<PrecisionT> &operations,
+                const std::vector<size_t> &trainableParams, size_t num_params) {
+                 std::vector<std::vector<PrecisionT>> jac(
+                     observables.size(),
+                     std::vector<PrecisionT>(num_params, 0));
+                 std::vector<PrecisionT> vjp_res(num_params);
+                 adj.adjointJacobian(vjp_res, jac, dy, sv.getData(), sv.getLength(),
+                                     observables, operations, trainableParams);
+                 return py::array_t<Param_t>(py::cast(vjp_res));
+             });
 }
 
 /**

From d249d5ea54a53370babb21fd98283e7862ced0fa Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Fri, 26 Nov 2021 11:24:15 -0500
Subject: [PATCH 15/27] Update Bindings

---
 pennylane_lightning/lightning_qubit.py        | 12 ++++--
 pennylane_lightning/src/bindings/Bindings.cpp | 37 ++++++++++---------
 2 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 42667517f9..702c23c872 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -272,7 +272,13 @@ def _compute_vjp_tensordot(self, dy, jac, num=None):
         return math.tensordot(jac, dy_reshaped, [[0], [0]])
 
     def vector_jacobian_product(
-        self, tape, dy, num=None, starting_state=None, use_device_state=False, pybind=False,
+        self,
+        tape,
+        dy,
+        num=None,
+        starting_state=None,
+        use_device_state=False,
+        pybind=False,
     ):
         """Generate the the vector-Jacobian products of a tape.
         
@@ -339,7 +345,7 @@ def vector_jacobian_product(
             )
 
             return self._compute_vjp_tensordot(dy, jac, num=num)
-        
+
         for m in tape.measurements:
             if m.return_type is not Expectation:
                 raise QuantumFunctionError(
@@ -413,7 +419,7 @@ def vector_jacobian_product(
             tp_shift,
             tape.num_params,
         )
-        return vjp_res        
+        return vjp_res
 
     def batch_vector_jacobian_product(
         self, tapes, dys, num=None, reduction="append", starting_state=None, use_device_state=False
diff --git a/pennylane_lightning/src/bindings/Bindings.cpp b/pennylane_lightning/src/bindings/Bindings.cpp
index 421e98a21c..154b12ac4d 100644
--- a/pennylane_lightning/src/bindings/Bindings.cpp
+++ b/pennylane_lightning/src/bindings/Bindings.cpp
@@ -809,9 +809,10 @@ void lightning_class_bindings(py::module &m) {
     class_name = "VectorJacobianProductC" + bitsize;
     py::class_<VectorJacobianProduct<PrecisionT>>(m, class_name.c_str())
         .def(py::init<>())
-        .def("create_ops_list", &VectorJacobianProduct<PrecisionT>::createOpsData)
         .def("create_ops_list",
-             [](VectorJacobianProduct<PrecisionT> &vjp,
+             &VectorJacobianProduct<PrecisionT>::createOpsData)
+        .def("create_ops_list",
+             [](VectorJacobianProduct<PrecisionT> &v,
                 const std::vector<std::string> &ops_name,
                 const std::vector<np_arr_r> &ops_params,
                 const std::vector<std::vector<size_t>> &ops_wires,
@@ -821,7 +822,7 @@ void lightning_class_bindings(py::module &m) {
                      ops_params.size());
                  std::vector<std::vector<std::complex<PrecisionT>>>
                      conv_matrices(ops_matrices.size());
-                 static_cast<void>(adj);
+                 static_cast<void>(v);
                  for (size_t op = 0; op < ops_name.size(); op++) {
                      const auto p_buffer = ops_params[op].request();
                      const auto m_buffer = ops_matrices[op].request();
@@ -843,21 +844,21 @@ void lightning_class_bindings(py::module &m) {
                                             ops_inverses, conv_matrices};
              })
         .def("vjp", &VectorJacobianProduct<PrecisionT>::vectorJacobianProduct)
-        .def("vjp",
-             [](VectorJacobianProduct<PrecisionT> &v,
-                const std::vector<std::vector<T>> &dy, 
-                const StateVecBinder<PrecisionT> &sv,
-                const std::vector<ObsDatum<PrecisionT>> &observables,
-                const OpsData<PrecisionT> &operations,
-                const std::vector<size_t> &trainableParams, size_t num_params) {
-                 std::vector<std::vector<PrecisionT>> jac(
-                     observables.size(),
-                     std::vector<PrecisionT>(num_params, 0));
-                 std::vector<PrecisionT> vjp_res(num_params);
-                 adj.adjointJacobian(vjp_res, jac, dy, sv.getData(), sv.getLength(),
-                                     observables, operations, trainableParams);
-                 return py::array_t<Param_t>(py::cast(vjp_res));
-             });
+        .def("vjp", [](VectorJacobianProduct<PrecisionT> &v,
+                       const std::vector<std::vector<PrecisionT>> &dy,
+                       const StateVecBinder<PrecisionT> &sv,
+                       const std::vector<ObsDatum<PrecisionT>> &observables,
+                       const OpsData<PrecisionT> &operations,
+                       const std::vector<size_t> &trainableParams,
+                       size_t num_params) {
+            std::vector<std::vector<PrecisionT>> jac(
+                observables.size(), std::vector<PrecisionT>(num_params, 0));
+            std::vector<PrecisionT> vjp_res(num_params);
+            v.vectorJacobianProduct(vjp_res, jac, dy, sv.getData(),
+                                    sv.getLength(), observables, operations,
+                                    trainableParams);
+            return py::array_t<Param_t>(py::cast(vjp_res));
+        });
 }
 
 /**

From 09cbfcdd638ea346eabf72e338c6ec13bc1725a8 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Fri, 26 Nov 2021 14:18:52 -0500
Subject: [PATCH 16/27] Fix issue with vjp pybinds and Add more tests

---
 pennylane_lightning/lightning_qubit.py        | 46 +++++++------
 .../src/algorithms/AdjointDiff.hpp            | 15 +++--
 pennylane_lightning/src/bindings/Bindings.cpp |  2 +-
 .../src/tests/Test_VectorJacobianProduct.cpp  | 37 ++++-------
 tests/test_vector_jacobian_product.py         | 66 +++++++++++++------
 5 files changed, 95 insertions(+), 71 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 702c23c872..ba97704dd9 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -278,7 +278,7 @@ def vector_jacobian_product(
         num=None,
         starting_state=None,
         use_device_state=False,
-        pybind=False,
+        vjp_pybind=True,
     ):
         """Generate the the vector-Jacobian products of a tape.
         
@@ -309,11 +309,12 @@ def vector_jacobian_product(
             dy (tensor_like): Gradient-output vector. Must have shape
                 matching the output shape of the corresponding tape.
             num (int): The length of the flattened ``dy`` argument. This is an
-            optional argument, but can be useful to provide if ``dy`` potentially
-            has no shape (for example, due to tracing or just-in-time compilation).
-            starting_state (): ...
-            use_device_state (): ...
-            pybind (bool): 
+                optional argument, but can be useful to provide if ``dy`` potentially
+                has no shape (for example, due to tracing or just-in-time compilation).
+            starting_state (array[int]): Starting state indeces.
+            use_device_state (bool): Use device if `True` in case of `starting_state=None`.  
+            vjp_pybind (bool): Use the `VectorJacobianProduct` class in `lighting.qubit` if `True`, 
+                otherwise `adjoint_jacobian` will be called to compute vjp. 
         Returns:
             tensor_like or None: Vector-Jacobian product. Returns None if the tape
             has no trainable parameters.  
@@ -339,7 +340,7 @@ def vector_jacobian_product(
         except (AttributeError, TypeError):
             pass
 
-        if not pybind:
+        if not vjp_pybind:
             jac = self.adjoint_jacobian(
                 tape, starting_state=starting_state, use_device_state=use_device_state
             )
@@ -404,15 +405,8 @@ def vector_jacobian_product(
             trainable_params if not use_sp else [i - 1 for i in trainable_params[first_elem:]]
         )  # exclude first index if explicitly setting sv
 
-        # jac = adj.adjoint_jacobian(
-        #     StateVectorC128(ket),
-        #     obs_serialized,
-        #     ops_serialized,
-        #     tp_shift,
-        #     tape.num_params,
-        # )
         vjp_res = VJP.vjp(
-            dy,
+            math.reshape(dy, [-1]),
             StateVectorC128(ket),
             obs_serialized,
             ops_serialized,
@@ -422,7 +416,14 @@ def vector_jacobian_product(
         return vjp_res
 
     def batch_vector_jacobian_product(
-        self, tapes, dys, num=None, reduction="append", starting_state=None, use_device_state=False
+        self,
+        tapes,
+        dys,
+        num=None,
+        reduction="append",
+        starting_state=None,
+        use_device_state=False,
+        vjp_pybind=True,
     ):
         """Generate the the vector-Jacobian products of a batch of tapes.
         
@@ -459,9 +460,11 @@ def batch_vector_jacobian_product(
             reduction (str): Determines how the vector-Jacobian products are returned.
                 If ``append``, then the output of the function will be of the form
                 ``List[tensor_like]``, with each element corresponding to the VJP of each
-            starting_state (): ...
-            use_device_state (): ...
                 input tape. If ``extend``, then the output VJPs will be concatenated.
+            starting_state (array[int]): Starting state indeces.
+            use_device_state (bool): Use device if `True` in case of `starting_state=None`.  
+            vjp_pybind (bool): Use the `VectorJacobianProduct` class in `lighting.qubit` if `True`, 
+                otherwise `adjoint_jacobian` will be called to compute vjp. 
         Returns:
             List[tensor_like or None]: list of vector-Jacobian products. ``None`` elements corresponds
             to tapes with no trainable parameters.
@@ -471,7 +474,12 @@ def batch_vector_jacobian_product(
         # Loop through the tapes and dys vector
         for tape, dy in zip(tapes, dys):
             vjp = self.vector_jacobian_product(
-                tape, dy, num=num, starting_state=starting_state, use_device_state=use_device_state
+                tape,
+                dy,
+                num=num,
+                starting_state=starting_state,
+                use_device_state=use_device_state,
+                vjp_pybind=vjp_pybind,
             )
             if vjp is None:
                 if reduction == "append":
diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index 0a7a4dedf2..7bf2c170e9 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -857,7 +857,7 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
      */
     void vectorJacobianProduct(std::vector<T> &vjp,
                                std::vector<std::vector<T>> &jac,
-                               const std::vector<std::vector<T>> &dy,
+                               const std::vector<T> &dy,
                                const std::complex<T> *psi, size_t num_elements,
                                const std::vector<ObsDatum<T>> &observables,
                                const OpsData<T> &operations,
@@ -870,12 +870,13 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
             return;
         }
 
-        const size_t t_len = dy.size() * dy.front().size();
-        std::vector<T> dy_row(t_len);
-        getRowMajor(dy_row, dy, t_len);
+        // std::vector<T> dy_row;
+        // const size_t t_len = dy.size() * dy.front().size();
+        // dy_row.resize(t_len);
+        // getRowMajor(dy_row, dy, t_len);
 
-        const bool allzero = std::all_of(dy_row.cbegin(), dy_row.cend(),
-                                         [](T e) { return e == 0; });
+        const bool allzero =
+            std::all_of(dy.cbegin(), dy.cend(), [](T e) { return e == 0; });
         if (allzero) {
             vjp.resize(num_params);
             return;
@@ -884,7 +885,7 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
         this->adjointJacobian(psi, num_elements, jac, observables, operations,
                               trainableParams, apply_operations);
 
-        tensorDot(vjp, jac, dy_row);
+        tensorDot(vjp, jac, dy);
     }
 }; // class VectorJacobianProduct
 
diff --git a/pennylane_lightning/src/bindings/Bindings.cpp b/pennylane_lightning/src/bindings/Bindings.cpp
index 154b12ac4d..580bae326e 100644
--- a/pennylane_lightning/src/bindings/Bindings.cpp
+++ b/pennylane_lightning/src/bindings/Bindings.cpp
@@ -845,7 +845,7 @@ void lightning_class_bindings(py::module &m) {
              })
         .def("vjp", &VectorJacobianProduct<PrecisionT>::vectorJacobianProduct)
         .def("vjp", [](VectorJacobianProduct<PrecisionT> &v,
-                       const std::vector<std::vector<PrecisionT>> &dy,
+                       const std::vector<PrecisionT> &dy,
                        const StateVecBinder<PrecisionT> &sv,
                        const std::vector<ObsDatum<PrecisionT>> &observables,
                        const OpsData<PrecisionT> &operations,
diff --git a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
index 7c65ceaca4..dfec34413f 100644
--- a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
+++ b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
@@ -48,7 +48,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={0}",
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(num_params, 0));
         std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1, std::vector<double>(num_obs, 0));
+        std::vector<double> dy(num_obs, 0);
 
         for (const auto &p : param) {
             auto ops = VJP.createOpsData({"RX"}, {{p}}, {{0}}, {false});
@@ -83,8 +83,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={1}",
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(num_params, 0));
         std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
+        std::vector<double> dy(num_obs, 1.0);
 
         for (const auto &p : param) {
             auto ops = VJP.createOpsData({"RX"}, {{p}}, {{0}}, {false});
@@ -119,8 +118,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={0.4}",
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(num_params, 0));
         std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 0.4));
+        std::vector<double> dy(num_obs, 0.4);
 
         for (const auto &p : param) {
             auto ops = VJP.createOpsData({"RX"}, {{p}}, {{0}}, {false});
@@ -156,8 +154,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RY, Obs=X dy={0.4}",
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(num_params, 0));
         std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 0.4));
+        std::vector<double> dy(num_obs, 0.4);
 
         for (const auto &p : param) {
             auto ops = VJP.createOpsData({"RY"}, {{p}}, {{0}}, {false});
@@ -192,7 +189,7 @@ TEST_CASE(
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(num_params, 0));
         std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1, std::vector<double>(num_obs, 1));
+        std::vector<double> dy(num_obs, 1);
 
         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
         StateVector<double> psi(cdata.data(), cdata.size());
@@ -228,8 +225,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], "
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(num_params, 0));
         std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 0.4));
+        std::vector<double> dy(num_obs, 0.4);
 
         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
         StateVector<double> psi(cdata.data(), cdata.size());
@@ -273,8 +269,7 @@ TEST_CASE(
             num_obs, std::vector<double>(num_params, 0));
         std::vector<size_t> t_params{0, 2};
         std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
+        std::vector<double> dy(num_obs, 1);
 
         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
         StateVector<double> psi(cdata.data(), cdata.size());
@@ -316,8 +311,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], "
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(num_params, 0));
         std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 0.4));
+        std::vector<double> dy(num_obs, 0.4);
 
         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
         StateVector<double> psi(cdata.data(), cdata.size());
@@ -356,8 +350,7 @@ TEST_CASE(
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(num_params, 0));
         std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
+        std::vector<double> dy(num_obs, 1);
 
         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
         StateVector<double> psi(cdata.data(), cdata.size());
@@ -412,8 +405,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=Mixed, Obs=[XXX], "
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(num_params, 0));
         std::vector<double> vjp_res(num_params);
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, -0.2));
+        std::vector<double> dy(num_obs, -0.2);
 
         std::vector<std::complex<double>> cdata(0b1 << num_qubits);
         StateVector<double> psi(cdata.data(), cdata.size());
@@ -484,8 +476,7 @@ TEST_CASE(
             std::vector<std::vector<double>> jacobian(
                 num_obs, std::vector<double>(num_params, 0));
             std::vector<double> vjp_res(num_params);
-            std::vector<std::vector<double>> dy(
-                1, std::vector<double>(num_obs, 1.0));
+            std::vector<double> dy(num_obs, 1);
 
             std::vector<std::complex<double>> cdata{INVSQRT2<double>(),
                                                     -INVSQRT2<double>()};
@@ -535,8 +526,7 @@ TEST_CASE(
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(t_params.size(), 0));
         std::vector<double> vjp_res(t_params.size());
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, 1.0));
+        std::vector<double> dy(num_obs, 1);
 
         std::vector<std::complex<double>> cdata{ONE<double>(), ZERO<double>(),
                                                 ZERO<double>(), ZERO<double>()};
@@ -594,8 +584,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Mixed Ops, Obs and "
         std::vector<std::vector<double>> jacobian(
             num_obs, std::vector<double>(t_params.size(), 0));
         std::vector<double> vjp_res(t_params.size());
-        std::vector<std::vector<double>> dy(1,
-                                            std::vector<double>(num_obs, -0.5));
+        std::vector<double> dy(num_obs, -0.5);
 
         std::vector<std::complex<double>> cdata{ONE<double>(), ZERO<double>(),
                                                 ZERO<double>(), ZERO<double>()};
diff --git a/tests/test_vector_jacobian_product.py b/tests/test_vector_jacobian_product.py
index f668556fd5..00614624c3 100644
--- a/tests/test_vector_jacobian_product.py
+++ b/tests/test_vector_jacobian_product.py
@@ -62,7 +62,8 @@ class TestVectorJacobianProduct:
     def dev(self):
         return qml.device("lightning.qubit", wires=2)
 
-    def test_no_trainable_parameters(self, dev):
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_no_trainable_parameters(self, dev, vjp_pybind):
         """A tape with no trainable parameters will simply return None"""
         x = 0.4
 
@@ -73,11 +74,28 @@ def test_no_trainable_parameters(self, dev):
 
         tape.trainable_params = {}
         dy = np.array([1.0])
-        vjp = dev.vector_jacobian_product(tape, dy)
+        vjp = dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
 
         assert vjp is None
 
-    def test_zero_dy(self, dev):
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_no_trainable_parameters_(self, dev, vjp_pybind):
+        """A tape with no trainable parameters will simply return None"""
+        x = 0.4
+
+        with qml.tape.QuantumTape() as tape:
+            qml.RX(x, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        tape.trainable_params = {}
+        dy = np.array([1.0])
+        vjp = dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
+
+        assert vjp is None
+
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_zero_dy(self, dev, vjp_pybind):
         """A zero dy vector will return no tapes and a zero matrix"""
         x = 0.4
         y = 0.6
@@ -90,11 +108,12 @@ def test_zero_dy(self, dev):
 
         tape.trainable_params = {0, 1}
         dy = np.array([0.0])
-        vjp = dev.vector_jacobian_product(tape, dy)
+        vjp = dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
 
         assert np.all(vjp == np.zeros([len(tape.trainable_params)]))
 
-    def test_single_expectation_value(self, tol, dev):
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_single_expectation_value(self, tol, dev, vjp_pybind):
         """Tests correct output shape and evaluation for a tape
         with a single expval output"""
         x = 0.543
@@ -109,12 +128,13 @@ def test_single_expectation_value(self, tol, dev):
         tape.trainable_params = {0, 1}
         dy = np.array([1.0])
 
-        vjp = dev.vector_jacobian_product(tape, dy)
+        vjp = dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
 
         expected = np.array([-np.sin(y) * np.sin(x), np.cos(y) * np.cos(x)])
         assert np.allclose(vjp, expected, atol=tol, rtol=0)
 
-    def test_multiple_expectation_values(self, tol, dev):
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_multiple_expectation_values(self, tol, dev, vjp_pybind):
         """Tests correct output shape and evaluation for a tape
         with multiple expval outputs"""
         x = 0.543
@@ -130,12 +150,13 @@ def test_multiple_expectation_values(self, tol, dev):
         tape.trainable_params = {0, 1}
         dy = np.array([1.0, 2.0])
 
-        vjp = dev.vector_jacobian_product(tape, dy)
+        vjp = dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
 
         expected = np.array([-np.sin(x), 2 * np.cos(y)])
         assert np.allclose(vjp, expected, atol=tol, rtol=0)
 
-    def test_prob_expectation_values(self, tol, dev):
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_prob_expectation_values(self, dev, vjp_pybind):
         """Tests correct output shape and evaluation for a tape
         with prob and expval outputs"""
         x = 0.543
@@ -152,7 +173,7 @@ def test_prob_expectation_values(self, tol, dev):
         dy = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
 
         with pytest.raises(qml.QuantumFunctionError, match="Adjoint differentiation method does"):
-            dev.vector_jacobian_product(tape, dy)
+            dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
 
 
 class TestBatchVectorJacobianProduct:
@@ -162,7 +183,8 @@ class TestBatchVectorJacobianProduct:
     def dev(self):
         return qml.device("lightning.qubit", wires=2)
 
-    def test_one_tape_no_trainable_parameters(self, dev):
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_one_tape_no_trainable_parameters(self, dev, vjp_pybind):
         """A tape with no trainable parameters will simply return None"""
 
         with qml.tape.QuantumTape() as tape1:
@@ -182,12 +204,13 @@ def test_one_tape_no_trainable_parameters(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vector_jacobian_product(tapes, dys)
+        vjps = dev.batch_vector_jacobian_product(tapes, dys, vjp_pybind=vjp_pybind)
 
         assert vjps[0] is None
         assert vjps[1] is not None
 
-    def test_all_tapes_no_trainable_parameters(self, dev):
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_all_tapes_no_trainable_parameters(self, dev, vjp_pybind):
         """If all tapes have no trainable parameters all outputs will be None"""
 
         with qml.tape.QuantumTape() as tape1:
@@ -207,12 +230,13 @@ def test_all_tapes_no_trainable_parameters(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vector_jacobian_product(tapes, dys)
+        vjps = dev.batch_vector_jacobian_product(tapes, dys, vjp_pybind=vjp_pybind)
 
         assert vjps[0] is None
         assert vjps[1] is None
 
-    def test_zero_dy(self, dev):
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_zero_dy(self, dev, vjp_pybind):
         """A zero dy vector will return no tapes and a zero matrix"""
 
         with qml.tape.QuantumTape() as tape1:
@@ -232,11 +256,12 @@ def test_zero_dy(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([0.0]), np.array([1.0])]
 
-        vjps = dev.batch_vector_jacobian_product(tapes, dys)
+        vjps = dev.batch_vector_jacobian_product(tapes, dys, vjp_pybind=vjp_pybind)
 
         assert np.allclose(vjps[0], 0)
 
-    def test_reduction_append(self, dev):
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_reduction_append(self, dev, vjp_pybind):
         """Test the 'append' reduction strategy"""
 
         with qml.tape.JacobianTape() as tape1:
@@ -256,13 +281,14 @@ def test_reduction_append(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vector_jacobian_product(tapes, dys)
+        vjps = dev.batch_vector_jacobian_product(tapes, dys, vjp_pybind=vjp_pybind)
 
         assert len(vjps) == 2
         assert all(isinstance(v, np.ndarray) for v in vjps)
         assert all(len(v) == len(t.trainable_params) for t, v in zip(tapes, vjps))
 
-    def test_reduction_extend(self, dev):
+    @pytest.mark.parametrize("vjp_pybind", [True, False])
+    def test_reduction_extend(self, dev, vjp_pybind):
         """Test the 'extend' reduction strategy"""
 
         with qml.tape.JacobianTape() as tape1:
@@ -282,6 +308,6 @@ def test_reduction_extend(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vector_jacobian_product(tapes, dys)
+        vjps = dev.batch_vector_jacobian_product(tapes, dys, vjp_pybind=vjp_pybind)
 
         assert sum(len(t) for t in vjps) == sum(len(t.trainable_params) for t in tapes)

From ed76b80fa358f9d36ba02981b8293b210eba99cb Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Fri, 26 Nov 2021 14:28:00 -0500
Subject: [PATCH 17/27] Update #181

---
 .github/CHANGELOG.md                   |  3 +++
 pennylane_lightning/lightning_qubit.py |  2 +-
 tests/test_vector_jacobian_product.py  | 12 ++++++------
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
index e087079375..03db8657af 100644
--- a/.github/CHANGELOG.md
+++ b/.github/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ### New features since last release
 
+* Add VJP support to PL-Lightning.
+[(#181)](https://github.com/PennyLaneAI/pennylane-lightning/pull/181)
+
 ### Breaking changes
 
 ### Improvements
diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index ba97704dd9..741def17ea 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -415,7 +415,7 @@ def vector_jacobian_product(
         )
         return vjp_res
 
-    def batch_vector_jacobian_product(
+    def batch_vjp(
         self,
         tapes,
         dys,
diff --git a/tests/test_vector_jacobian_product.py b/tests/test_vector_jacobian_product.py
index 00614624c3..db8f393d4d 100644
--- a/tests/test_vector_jacobian_product.py
+++ b/tests/test_vector_jacobian_product.py
@@ -177,7 +177,7 @@ def test_prob_expectation_values(self, dev, vjp_pybind):
 
 
 class TestBatchVectorJacobianProduct:
-    """Tests for the batch_vector_jacobian_product function"""
+    """Tests for the batch_vjp function"""
 
     @pytest.fixture
     def dev(self):
@@ -204,7 +204,7 @@ def test_one_tape_no_trainable_parameters(self, dev, vjp_pybind):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vector_jacobian_product(tapes, dys, vjp_pybind=vjp_pybind)
+        vjps = dev.batch_vjp(tapes, dys, vjp_pybind=vjp_pybind)
 
         assert vjps[0] is None
         assert vjps[1] is not None
@@ -230,7 +230,7 @@ def test_all_tapes_no_trainable_parameters(self, dev, vjp_pybind):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vector_jacobian_product(tapes, dys, vjp_pybind=vjp_pybind)
+        vjps = dev.batch_vjp(tapes, dys, vjp_pybind=vjp_pybind)
 
         assert vjps[0] is None
         assert vjps[1] is None
@@ -256,7 +256,7 @@ def test_zero_dy(self, dev, vjp_pybind):
         tapes = [tape1, tape2]
         dys = [np.array([0.0]), np.array([1.0])]
 
-        vjps = dev.batch_vector_jacobian_product(tapes, dys, vjp_pybind=vjp_pybind)
+        vjps = dev.batch_vjp(tapes, dys, vjp_pybind=vjp_pybind)
 
         assert np.allclose(vjps[0], 0)
 
@@ -281,7 +281,7 @@ def test_reduction_append(self, dev, vjp_pybind):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vector_jacobian_product(tapes, dys, vjp_pybind=vjp_pybind)
+        vjps = dev.batch_vjp(tapes, dys, vjp_pybind=vjp_pybind)
 
         assert len(vjps) == 2
         assert all(isinstance(v, np.ndarray) for v in vjps)
@@ -308,6 +308,6 @@ def test_reduction_extend(self, dev, vjp_pybind):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vector_jacobian_product(tapes, dys, vjp_pybind=vjp_pybind)
+        vjps = dev.batch_vjp(tapes, dys, vjp_pybind=vjp_pybind)
 
         assert sum(len(t) for t in vjps) == sum(len(t.trainable_params) for t in tapes)

From 99e7928b412705becb8529172e789fb0f8412ac1 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Fri, 26 Nov 2021 15:17:00 -0500
Subject: [PATCH 18/27] Apply codecov suggestions

---
 pennylane_lightning/lightning_qubit.py             | 9 +--------
 pennylane_lightning/src/algorithms/AdjointDiff.hpp | 5 -----
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 741def17ea..0b60f7edab 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -19,7 +19,6 @@
 import platform, os, sys
 
 import numpy as np
-from numpy.lib.function_base import vectorize
 from pennylane import (
     BasisState,
     DeviceError,
@@ -272,13 +271,7 @@ def _compute_vjp_tensordot(self, dy, jac, num=None):
         return math.tensordot(jac, dy_reshaped, [[0], [0]])
 
     def vector_jacobian_product(
-        self,
-        tape,
-        dy,
-        num=None,
-        starting_state=None,
-        use_device_state=False,
-        vjp_pybind=True,
+        self, tape, dy, num=None, starting_state=None, use_device_state=False, vjp_pybind=True
     ):
         """Generate the the vector-Jacobian products of a tape.
         
diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index 7bf2c170e9..a024d5e2ea 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -870,11 +870,6 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
             return;
         }
 
-        // std::vector<T> dy_row;
-        // const size_t t_len = dy.size() * dy.front().size();
-        // dy_row.resize(t_len);
-        // getRowMajor(dy_row, dy, t_len);
-
         const bool allzero =
             std::all_of(dy.cbegin(), dy.cend(), [](T e) { return e == 0; });
         if (allzero) {

From 205f874d9dfd213383f99742f87897319699f1ba Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Tue, 30 Nov 2021 15:30:48 -0500
Subject: [PATCH 19/27] Apply code review suggestions

---
 pennylane_lightning/lightning_qubit.py        | 107 ++++-----
 .../src/algorithms/AdjointDiff.cpp            |   3 -
 .../src/algorithms/AdjointDiff.hpp            | 120 ----------
 .../src/algorithms/CMakeLists.txt             |   2 +-
 .../src/algorithms/JacobianProd.cpp           |  19 ++
 .../src/algorithms/JacobianProd.hpp           | 158 +++++++++++++
 pennylane_lightning/src/bindings/Bindings.cpp |   1 +
 pennylane_lightning/src/tests/CMakeLists.txt  |  10 +-
 .../src/tests/Test_VectorJacobianProduct.cpp  |   1 +
 pennylane_lightning/src/util/Util.hpp         |  30 ++-
 tests/test_vector_jacobian_product.py         | 218 +++++++++++++-----
 11 files changed, 412 insertions(+), 257 deletions(-)
 create mode 100644 pennylane_lightning/src/algorithms/JacobianProd.cpp
 create mode 100644 pennylane_lightning/src/algorithms/JacobianProd.hpp

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 0b60f7edab..7c26e57f70 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -251,47 +251,34 @@ def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False):
         )
         return jac
 
-    def _compute_vjp_tensordot(self, dy, jac, num=None):
-        if jac is None:
-            return None
-
-        dy_reshaped = math.reshape(dy, [-1])
-        num = math.shape(dy_reshaped)[0] if num is None else num
-        jac = (
-            math.convert_like(jac, dy_reshaped) if not isinstance(dy_reshaped, np.ndarray) else jac
-        )
-        jac = math.reshape(jac, [num, -1])
-
-        try:
-            if math.allclose(dy, 0):
-                return math.convert_like(np.zeros([jac.shape[1]]), dy)
-        except (AttributeError, TypeError):
-            pass
-
-        return math.tensordot(jac, dy_reshaped, [[0], [0]])
-
-    def vector_jacobian_product(
-        self, tape, dy, num=None, starting_state=None, use_device_state=False, vjp_pybind=True
-    ):
+    def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_state=False):
         """Generate the the vector-Jacobian products of a tape.
         
         Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by
+
         .. math::
+
             \mathbf{J}_{\mathbf{f}}(\mathbf{x}) = \begin{pmatrix}
                 \frac{\partial f_1}{\partial x_1} &\cdots &\frac{\partial f_1}{\partial x_n}\\
                 \vdots &\ddots &\vdots\\
                 \frac{\partial f_m}{\partial x_1} &\cdots &\frac{\partial f_m}{\partial x_n}\\
             \end{pmatrix}.
+
         During backpropagation, the chain rule is applied. For example, consider the
         cost function :math:`h = y\circ f: \mathbb{R}^n \rightarrow \mathbb{R}`,
         where :math:`y: \mathbb{R}^m \rightarrow \mathbb{R}`.
         The gradient is:
+
         .. math::
+
             \nabla h(\mathbf{x}) = \frac{\partial y}{\partial \mathbf{f}} \frac{\partial \mathbf{f}}{\partial \mathbf{x}}
             = \frac{\partial y}{\partial \mathbf{f}} \mathbf{J}_{\mathbf{f}}(\mathbf{x}).
+
         Denote :math:`d\mathbf{y} = \frac{\partial y}{\partial \mathbf{f}}`; we can write this in the form
         of a matrix multiplication:
+
         .. math:: \left[\nabla h(\mathbf{x})\right]_{j} = \sum_{i=0}^m d\mathbf{y}_i ~ \mathbf{J}_{ij}.
+
         Thus, we can see that the gradient of the cost function is given by the so-called
         **vector-Jacobian product**; the product of the row-vector :math:`d\mathbf{y}`, representing
         the gradient of subsequent components of the cost function, and :math:`\mathbf{J}`,
@@ -301,13 +288,14 @@ def vector_jacobian_product(
             tape (.QuantumTape): quantum tape to differentiate
             dy (tensor_like): Gradient-output vector. Must have shape
                 matching the output shape of the corresponding tape.
-            num (int): The length of the flattened ``dy`` argument. This is an
-                optional argument, but can be useful to provide if ``dy`` potentially
-                has no shape (for example, due to tracing or just-in-time compilation).
-            starting_state (array[int]): Starting state indeces.
-            use_device_state (bool): Use device if `True` in case of `starting_state=None`.  
-            vjp_pybind (bool): Use the `VectorJacobianProduct` class in `lighting.qubit` if `True`, 
-                otherwise `adjoint_jacobian` will be called to compute vjp. 
+
+        Keyword Args:
+            starting_state (tensor_like): post-forward pass state to start execution with. It should be
+                complex-valued. Takes precedence over ``use_device_state``.
+            use_device_state (bool): use current device state to initialize. A forward pass of the same
+                circuit should be the last thing the device has executed. If a ``starting_state`` is
+                provided, that takes precedence.
+
         Returns:
             tensor_like or None: Vector-Jacobian product. Returns None if the tape
             has no trainable parameters.  
@@ -324,21 +312,11 @@ def vector_jacobian_product(
             # is simply none.
             return None
 
-        try:
-            # If the dy vector is zero, then the
-            # corresponding element of the VJP will be zero,
-            # and we can avoid a quantum computation.
-            if math.allclose(dy, 0):
-                return math.convert_like(np.zeros([num_params]), dy)
-        except (AttributeError, TypeError):
-            pass
-
-        if not vjp_pybind:
-            jac = self.adjoint_jacobian(
-                tape, starting_state=starting_state, use_device_state=use_device_state
-            )
-
-            return self._compute_vjp_tensordot(dy, jac, num=num)
+        # If the dy vector is zero, then the
+        # corresponding element of the VJP will be zero,
+        # and we can avoid a quantum computation.
+        if math.allclose(dy, 0):
+            return math.convert_like(np.zeros([num_params]), dy)
 
         for m in tape.measurements:
             if m.return_type is not Expectation:
@@ -383,7 +361,6 @@ def vector_jacobian_product(
                 self.execute(tape)
             ket = np.ravel(self._pre_rotated_state)
 
-        # adj = AdjointJacobianC128()
         VJP = VectorJacobianProductC128()
 
         obs_serialized = _serialize_obs(tape, self.wire_map)
@@ -398,7 +375,7 @@ def vector_jacobian_product(
             trainable_params if not use_sp else [i - 1 for i in trainable_params[first_elem:]]
         )  # exclude first index if explicitly setting sv
 
-        vjp_res = VJP.vjp(
+        vjp_tensor = VJP.vjp(
             math.reshape(dy, [-1]),
             StateVectorC128(ket),
             obs_serialized,
@@ -406,37 +383,38 @@ def vector_jacobian_product(
             tp_shift,
             tape.num_params,
         )
-        return vjp_res
+        return vjp_tensor
 
     def batch_vjp(
-        self,
-        tapes,
-        dys,
-        num=None,
-        reduction="append",
-        starting_state=None,
-        use_device_state=False,
-        vjp_pybind=True,
+        self, tapes, dys, reduction="append", starting_state=None, use_device_state=False
     ):
         """Generate the the vector-Jacobian products of a batch of tapes.
-        
+
         Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by
+
         .. math::
+
             \mathbf{J}_{\mathbf{f}}(\mathbf{x}) = \begin{pmatrix}
                 \frac{\partial f_1}{\partial x_1} &\cdots &\frac{\partial f_1}{\partial x_n}\\
                 \vdots &\ddots &\vdots\\
                 \frac{\partial f_m}{\partial x_1} &\cdots &\frac{\partial f_m}{\partial x_n}\\
             \end{pmatrix}.
+
         During backpropagation, the chain rule is applied. For example, consider the
         cost function :math:`h = y\circ f: \mathbb{R}^n \rightarrow \mathbb{R}`,
         where :math:`y: \mathbb{R}^m \rightarrow \mathbb{R}`.
         The gradient is:
+
         .. math::
+
             \nabla h(\mathbf{x}) = \frac{\partial y}{\partial \mathbf{f}} \frac{\partial \mathbf{f}}{\partial \mathbf{x}}
             = \frac{\partial y}{\partial \mathbf{f}} \mathbf{J}_{\mathbf{f}}(\mathbf{x}).
+
         Denote :math:`d\mathbf{y} = \frac{\partial y}{\partial \mathbf{f}}`; we can write this in the form
         of a matrix multiplication:
+
         .. math:: \left[\nabla h(\mathbf{x})\right]_{j} = \sum_{i=0}^m d\mathbf{y}_i ~ \mathbf{J}_{ij}.
+
         Thus, we can see that the gradient of the cost function is given by the so-called
         **vector-Jacobian product**; the product of the row-vector :math:`d\mathbf{y}`, representing
         the gradient of subsequent components of the cost function, and :math:`\mathbf{J}`,
@@ -447,17 +425,18 @@ def batch_vjp(
             dys (Sequence[tensor_like]): Sequence of gradient-output vectors ``dy``. Must be the
                 same length as ``tapes``. Each ``dy`` tensor should have shape
                 matching the output shape of the corresponding tape.
-            num (int): The length of the flattened ``dy`` argument. This is an
-            optional argument, but can be useful to provide if ``dy`` potentially
-            has no shape (for example, due to tracing or just-in-time compilation).
+
+        Keyword Args:
             reduction (str): Determines how the vector-Jacobian products are returned.
                 If ``append``, then the output of the function will be of the form
                 ``List[tensor_like]``, with each element corresponding to the VJP of each
                 input tape. If ``extend``, then the output VJPs will be concatenated.
-            starting_state (array[int]): Starting state indeces.
-            use_device_state (bool): Use device if `True` in case of `starting_state=None`.  
-            vjp_pybind (bool): Use the `VectorJacobianProduct` class in `lighting.qubit` if `True`, 
-                otherwise `adjoint_jacobian` will be called to compute vjp. 
+            starting_state (tensor_like): post-forward pass state to start execution with. It should be
+                complex-valued. Takes precedence over ``use_device_state``.
+            use_device_state (bool): use current device state to initialize. A forward pass of the same
+                circuit should be the last thing the device has executed. If a ``starting_state`` is
+                provided, that takes precedence.
+
         Returns:
             List[tensor_like or None]: list of vector-Jacobian products. ``None`` elements corresponds
             to tapes with no trainable parameters.
@@ -469,10 +448,8 @@ def batch_vjp(
             vjp = self.vector_jacobian_product(
                 tape,
                 dy,
-                num=num,
                 starting_state=starting_state,
                 use_device_state=use_device_state,
-                vjp_pybind=vjp_pybind,
             )
             if vjp is None:
                 if reduction == "append":
diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.cpp b/pennylane_lightning/src/algorithms/AdjointDiff.cpp
index 3b2bd7d925..4b2b1ec8ad 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.cpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.cpp
@@ -18,9 +18,6 @@
 template class Pennylane::Algorithms::AdjointJacobian<float>;
 template class Pennylane::Algorithms::AdjointJacobian<double>;
 
-template class Pennylane::Algorithms::VectorJacobianProduct<float>;
-template class Pennylane::Algorithms::VectorJacobianProduct<double>;
-
 template class Pennylane::Algorithms::ObsDatum<float>;
 template class Pennylane::Algorithms::ObsDatum<double>;
 template class Pennylane::Algorithms::ObsDatum<std::complex<float>>;
diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
index a024d5e2ea..74217681a9 100644
--- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp
+++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp
@@ -764,125 +764,5 @@ template <class T = double> class AdjointJacobian {
     }
 }; // class AdjointJacobian
 
-/**
- * @brief Represent the class to compute the vector-Jacobian products
- * following the implementation in Pennylane.
- *
- * @tparam T Floating-point precision.
- */
-template <class T = double>
-class VectorJacobianProduct : public AdjointJacobian<T> {
-  private:
-    /**
-     * @brief Computes the vector-Jacobian product for a given vector of
-     * gradient outputs and a Jacobian.
-     *
-     * @param res Prealloacted vector for row-major ordered `jac` matrix
-     * representation.
-     * @param jac Jacobian matrix from `AdjointJacobian`.
-     * @param len Total allocation size of `jac`.
-     */
-    void getRowMajor(std::vector<T> &res,
-                     const std::vector<std::vector<T>> &jac, size_t len = 0U) {
-        if (jac.empty()) {
-            return;
-        }
-
-        const size_t r_len = jac.size();
-        const size_t c_len = jac.front().size();
-        const size_t t_len = len != 0U ? len : r_len * c_len;
-
-        if (res.size() != t_len) {
-            res.resize(t_len);
-        }
-
-        size_t k = 0;
-        for (size_t i = 0; i < r_len; i++) {
-            for (size_t j = 0; j < c_len; j++) {
-                res[k] = jac[i][j];
-                k++;
-            }
-        }
-    }
-
-  public:
-    VectorJacobianProduct() = default;
-
-    /**
-     * @brief Computes the vector-Jacobian product for a given vector of
-     * gradient outputs and a Jacobian.
-     *
-     * @param vjp Preallocated vector for vector-jacobian product data results.
-     * @param jac Jacobian matrix from `AdjointJacobian`.
-     * @param dy_row Gradient-output vector.
-     */
-    void tensorDot(std::vector<T> &vjp, const std::vector<std::vector<T>> &jac,
-                   const std::vector<T> &dy_row) {
-        if (jac.empty() || dy_row.empty()) {
-            vjp.clear();
-            return;
-        }
-
-        const size_t r_len = jac.size();
-        const size_t c_len = jac.front().size();
-        if (dy_row.size() != r_len) {
-            throw std::invalid_argument(
-                "Invalid size for gradient-output vector");
-        }
-
-        const size_t t_len = r_len * c_len;
-        std::vector<T> jac_row(t_len);
-        getRowMajor(jac_row, jac, t_len);
-
-        Util::vecMatrixProd(vjp, dy_row, jac_row, r_len, c_len);
-    }
-
-    /**
-     * @brief Calculates the VectorJacobianProduct for the statevector
-     * for the selected set of parametric gates using `AdjointJacobian`.
-     *
-     * @param vjp Preallocated vector for vector-jacobian product data results
-     * of size `trainableParams.size()`.
-     * @param jac Preallocated Jacobian matrix from `AdjointJacobian` of size
-     * `observables.size() * trainableParams.size()`.
-     * @param psi Pointer to the statevector data.
-     * @param num_elements Length of the statevector data.
-     * @param dy Gradient-output vector.
-     * @param observables Observables for which to calculate Jacobian.
-     * @param operations Operations used to create given state.
-     * @param trainableParams List of parameters participating in Jacobian
-     * calculation.
-     * @param apply_operations Indicate whether to apply operations to psi prior
-     * to calculation.
-     */
-    void vectorJacobianProduct(std::vector<T> &vjp,
-                               std::vector<std::vector<T>> &jac,
-                               const std::vector<T> &dy,
-                               const std::complex<T> *psi, size_t num_elements,
-                               const std::vector<ObsDatum<T>> &observables,
-                               const OpsData<T> &operations,
-                               const std::vector<size_t> &trainableParams,
-                               bool apply_operations = false) {
-        const size_t num_params = trainableParams.size();
-
-        if (num_params == 0U || dy.empty()) {
-            vjp.clear();
-            return;
-        }
-
-        const bool allzero =
-            std::all_of(dy.cbegin(), dy.cend(), [](T e) { return e == 0; });
-        if (allzero) {
-            vjp.resize(num_params);
-            return;
-        }
-
-        this->adjointJacobian(psi, num_elements, jac, observables, operations,
-                              trainableParams, apply_operations);
-
-        tensorDot(vjp, jac, dy);
-    }
-}; // class VectorJacobianProduct
-
 } // namespace Algorithms
 } // namespace Pennylane
\ No newline at end of file
diff --git a/pennylane_lightning/src/algorithms/CMakeLists.txt b/pennylane_lightning/src/algorithms/CMakeLists.txt
index da826874bf..29a50307f2 100644
--- a/pennylane_lightning/src/algorithms/CMakeLists.txt
+++ b/pennylane_lightning/src/algorithms/CMakeLists.txt
@@ -1,7 +1,7 @@
 project(lightning_algorithms LANGUAGES CXX)
 set(CMAKE_CXX_STANDARD 17)
 
-set(ALGORITHM_FILES AdjointDiff.hpp AdjointDiff.cpp CACHE INTERNAL "" FORCE)
+set(ALGORITHM_FILES AdjointDiff.hpp AdjointDiff.cpp JacobianProd.hpp JacobianProd.cpp CACHE INTERNAL "" FORCE)
 add_library(lightning_algorithms STATIC ${ALGORITHM_FILES})
 
 target_link_libraries(lightning_algorithms PRIVATE pennylane_lightning_compile_options
diff --git a/pennylane_lightning/src/algorithms/JacobianProd.cpp b/pennylane_lightning/src/algorithms/JacobianProd.cpp
new file mode 100644
index 0000000000..9768b59284
--- /dev/null
+++ b/pennylane_lightning/src/algorithms/JacobianProd.cpp
@@ -0,0 +1,19 @@
+// Copyright 2021 Xanadu Quantum Technologies Inc.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "JacobianProd.hpp"
+
+// explicit instantiation
+template class Pennylane::Algorithms::VectorJacobianProduct<float>;
+template class Pennylane::Algorithms::VectorJacobianProduct<double>;
\ No newline at end of file
diff --git a/pennylane_lightning/src/algorithms/JacobianProd.hpp b/pennylane_lightning/src/algorithms/JacobianProd.hpp
new file mode 100644
index 0000000000..548834c474
--- /dev/null
+++ b/pennylane_lightning/src/algorithms/JacobianProd.hpp
@@ -0,0 +1,158 @@
+// Copyright 2021 Xanadu Quantum Technologies Inc.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <complex>
+#include <cstring>
+#include <numeric>
+#include <stdexcept>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+#include <variant>
+#include <vector>
+
+#include "AdjointDiff.hpp"
+#include "Error.hpp"
+#include "StateVector.hpp"
+#include "StateVectorManaged.hpp"
+#include "Util.hpp"
+
+#include <iostream>
+
+namespace Pennylane {
+namespace Algorithms {
+
+/**
+ * @brief Represent the class to compute the vector-Jacobian products
+ * following the implementation in Pennylane.
+ *
+ * @tparam T Floating-point precision.
+ */
+template <class T = double>
+class VectorJacobianProduct : public AdjointJacobian<T> {
+  private:
+    /**
+     * @brief Computes the vector-Jacobian product for a given vector of
+     * gradient outputs and a Jacobian.
+     *
+     * @param res Prealloacted vector for row-major ordered `jac` matrix
+     * representation.
+     * @param jac Jacobian matrix from `AdjointJacobian`.
+     * @param len Total allocation size of `jac`.
+     */
+    void getRowMajor(std::vector<T> &res,
+                     const std::vector<std::vector<T>> &jac, size_t len = 0U) {
+        if (jac.empty()) {
+            return;
+        }
+
+        const size_t r_len = jac.size();
+        const size_t c_len = jac.front().size();
+        const size_t t_len = len != 0U ? len : r_len * c_len;
+
+        if (res.size() != t_len) {
+            res.resize(t_len);
+        }
+
+        size_t k = 0;
+        for (size_t i = 0; i < r_len; i++) {
+            for (size_t j = 0; j < c_len; j++) {
+                res[k] = jac[i][j];
+                k++;
+            }
+        }
+    }
+
+  public:
+    VectorJacobianProduct() = default;
+
+    /**
+     * @brief Computes the vector-Jacobian product for a given vector of
+     * gradient outputs and a Jacobian.
+     *
+     * @param vjp Preallocated vector for vector-jacobian product data results.
+     * @param jac Jacobian matrix from `AdjointJacobian`.
+     * @param dy_row Gradient-output vector.
+     */
+    void tensorDot(std::vector<T> &vjp, const std::vector<std::vector<T>> &jac,
+                   const std::vector<T> &dy_row) {
+        if (jac.empty() || dy_row.empty()) {
+            vjp.clear();
+            return;
+        }
+
+        const size_t r_len = jac.size();
+        const size_t c_len = jac.front().size();
+        if (dy_row.size() != r_len) {
+            throw std::invalid_argument(
+                "Invalid size for gradient-output vector");
+        }
+
+        const size_t t_len = r_len * c_len;
+        std::vector<T> jac_row(t_len);
+        getRowMajor(jac_row, jac, t_len);
+
+        Util::vecMatrixProd(vjp, dy_row, jac_row, r_len, c_len);
+    }
+
+    /**
+     * @brief Calculates the VectorJacobianProduct for the statevector
+     * for the selected set of parametric gates using `AdjointJacobian`.
+     *
+     * @param vjp Preallocated vector for vector-jacobian product data results
+     * of size `trainableParams.size()`.
+     * @param jac Preallocated Jacobian matrix from `AdjointJacobian` of size
+     * `observables.size() * trainableParams.size()`.
+     * @param psi Pointer to the statevector data.
+     * @param num_elements Length of the statevector data.
+     * @param dy Gradient-output vector.
+     * @param observables Observables for which to calculate Jacobian.
+     * @param operations Operations used to create given state.
+     * @param trainableParams List of parameters participating in Jacobian
+     * calculation.
+     * @param apply_operations Indicate whether to apply operations to psi prior
+     * to calculation.
+     */
+    void vectorJacobianProduct(std::vector<T> &vjp,
+                               std::vector<std::vector<T>> &jac,
+                               const std::vector<T> &dy,
+                               const std::complex<T> *psi, size_t num_elements,
+                               const std::vector<ObsDatum<T>> &observables,
+                               const OpsData<T> &operations,
+                               const std::vector<size_t> &trainableParams,
+                               bool apply_operations = false) {
+        const size_t num_params = trainableParams.size();
+
+        if (num_params == 0U || dy.empty()) {
+            vjp.clear();
+            return;
+        }
+
+        const bool allzero =
+            std::all_of(dy.cbegin(), dy.cend(), [](T e) { return e == 0; });
+        if (allzero) {
+            vjp.resize(num_params);
+            return;
+        }
+
+        this->adjointJacobian(psi, num_elements, jac, observables, operations,
+                              trainableParams, apply_operations);
+
+        tensorDot(vjp, jac, dy);
+    }
+}; // class VectorJacobianProduct
+
+} // namespace Algorithms
+} // namespace Pennylane
\ No newline at end of file
diff --git a/pennylane_lightning/src/bindings/Bindings.cpp b/pennylane_lightning/src/bindings/Bindings.cpp
index 580bae326e..89fdf80312 100644
--- a/pennylane_lightning/src/bindings/Bindings.cpp
+++ b/pennylane_lightning/src/bindings/Bindings.cpp
@@ -17,6 +17,7 @@
 #include <vector>
 
 #include "AdjointDiff.hpp"
+#include "JacobianProd.hpp"
 #include "StateVector.hpp"
 #include "pybind11/complex.h"
 #include "pybind11/numpy.h"
diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt
index fda5ca67be..5b05bf70ab 100644
--- a/pennylane_lightning/src/tests/CMakeLists.txt
+++ b/pennylane_lightning/src/tests/CMakeLists.txt
@@ -32,14 +32,14 @@ include(Catch)
 add_executable(runner runner_main.cpp)
 target_link_libraries(runner lightning_simulator lightning_utils lightning_algorithms Catch2::Catch2)
 
-target_sources(runner PRIVATE   Test_Util.cpp
-                                Test_AdjDiff.cpp
-                                Test_VectorJacobianProduct.cpp
+target_sources(runner PRIVATE   Test_AdjDiff.cpp
+                                Test_Bindings.cpp
                                 Test_StateVector_Nonparam.cpp 
                                 Test_StateVector_Param.cpp 
                                 Test_StateVectorManaged_Nonparam.cpp 
                                 Test_StateVectorManaged_Param.cpp 
-                                Test_Bindings.cpp
+                                Test_Util.cpp
+                                Test_VectorJacobianProduct.cpp
 )
 
 target_compile_options(runner PRIVATE "$<$<CONFIG:DEBUG>:-Wall>")
@@ -49,4 +49,4 @@ if(ENABLE_NATIVE)
     target_compile_options(runner PRIVATE -march=native)
 endif()
 
-catch_discover_tests(runner)
+catch_discover_tests(runner)
\ No newline at end of file
diff --git a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
index dfec34413f..cf39b97f9d 100644
--- a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
+++ b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp
@@ -13,6 +13,7 @@
 #include <catch2/catch.hpp>
 
 #include "AdjointDiff.hpp"
+#include "JacobianProd.hpp"
 #include "StateVector.hpp"
 #include "Util.hpp"
 
diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp
index 9497e22c9e..baa4356353 100644
--- a/pennylane_lightning/src/util/Util.hpp
+++ b/pennylane_lightning/src/util/Util.hpp
@@ -504,7 +504,18 @@ inline auto matrixVecProd(const std::vector<std::complex<T>> mat,
 
 /**
  * @brief Calculates transpose of a matrix recursively and Cache-Friendly
- * using blacking and Cache-optimized techniques.
+ * using blocking and Cache-optimized techniques.
+ *
+ * @tparam T Floating point precision type.
+ * @tparam BLOCKSIZE Size of submatrices in the blocking techinque.
+ * @param mat Data array repr. a flatten (row-wise) matrix m * n.
+ * @param mat_t Pre-allocated data array to store the transpose of `mat`.
+ * @param m Number of rows of `mat`.
+ * @param n Number of columns of `mat`.
+ * @param m1 Index of the first row.
+ * @param m2 Index of the last row.
+ * @param n1 Index of the first column.
+ * @param n2 Index of the last column.
  */
 template <class T, size_t BLOCKSIZE = 32> // NOLINT(readability-magic-numbers)
 inline static void CFTranspose(const T *mat, T *mat_t, size_t m, size_t n,
@@ -609,8 +620,8 @@ inline auto vecMatrixProd(const std::vector<T> &v_in, const std::vector<T> &mat,
 /**
  * @brief Calculates the vactor-matrix product using the best available method.
  *
- * @see inline void vecMatrixProd(const T *v_in,
- * const T *mat, T *v_out, size_t m, size_t n)
+ * @see inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, size_t
+ * m, size_t n)
  */
 template <class T>
 inline void vecMatrixProd(std::vector<T> &v_out, const std::vector<T> &v_in,
@@ -630,7 +641,18 @@ inline void vecMatrixProd(std::vector<T> &v_out, const std::vector<T> &v_in,
 
 /**
  * @brief Calculates transpose of a matrix recursively and Cache-Friendly
- * using blacking and Cache-optimized techniques.
+ * using blocking and Cache-optimized techniques.
+ *
+ * @tparam T Floating point precision type.
+ * @tparam BLOCKSIZE Size of submatrices in the blocking techinque.
+ * @param mat Data array repr. a flatten (row-wise) matrix m * n.
+ * @param mat_t Pre-allocated data array to store the transpose of `mat`.
+ * @param m Number of rows of `mat`.
+ * @param n Number of columns of `mat`.
+ * @param m1 Index of the first row.
+ * @param m2 Index of the last row.
+ * @param n1 Index of the first column.
+ * @param n2 Index of the last column.
  */
 template <class T, size_t BLOCKSIZE = 32> // NOLINT(readability-magic-numbers)
 inline static void CFTranspose(const std::complex<T> *mat,
diff --git a/tests/test_vector_jacobian_product.py b/tests/test_vector_jacobian_product.py
index db8f393d4d..e14f461eb6 100644
--- a/tests/test_vector_jacobian_product.py
+++ b/tests/test_vector_jacobian_product.py
@@ -20,50 +20,160 @@
 from pennylane import numpy as np
 
 
-class TestComputeVJPTensordot:
-    """Tests for the numeric computation of VJPs' Tensordots"""
+class TestVectorJacobianProduct:
+    """Tests for the vector_jacobian_product function"""
 
     @pytest.fixture
     def dev(self):
         return qml.device("lightning.qubit", wires=2)
 
-    def test_computation(self, dev):
-        """Test that the correct VJP is returned"""
-        dy = np.array([[1.0, 2.0], [3.0, 4.0]])
-        jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]])
+    def test_use_device_state(self, tol, dev):
+        """Tests that when using the device state, the correct answer is still returned."""
 
-        vjp = dev._compute_vjp_tensordot(dy, jac)
+        x, y, z = [0.5, 0.3, -0.7]
 
-        assert vjp.shape == (3,)
-        assert np.all(vjp == np.tensordot(dy, jac, axes=[[0, 1], [0, 1]]))
+        with qml.tape.JacobianTape() as tape:
+            qml.RX(0.4, wires=[0])
+            qml.Rot(x, y, z, wires=[0])
+            qml.RY(-0.2, wires=[0])
+            qml.expval(qml.PauliZ(0))
 
-    def test_jacobian_is_none(self, dev):
-        """A None Jacobian returns a None VJP"""
+        tape.trainable_params = {1, 2, 3}
 
-        dy = np.array([[1.0, 2.0], [3.0, 4.0]])
-        jac = None
+        dy = np.array([1.0])
 
-        vjp = dev._compute_vjp_tensordot(dy, jac)
-        assert vjp is None
+        vjp1 = dev.vector_jacobian_product(tape, dy)
 
-    def test_zero_dy(self, dev):
-        """A zero dy vector will return a zero matrix"""
-        dy = np.zeros([2, 2])
-        jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]])
+        tape.execute(dev)
+        vjp2 = dev.vector_jacobian_product(tape, dy, use_device_state=True)
 
-        vjp = dev._compute_vjp_tensordot(dy, jac)
-        assert np.all(vjp == np.zeros([3]))
+        assert np.allclose(vjp1, vjp2, atol=tol, rtol=0)
 
+    def test_provide_starting_state(self, tol, dev):
+        """Tests provides correct answer when provided starting state."""
+        x, y, z = [0.5, 0.3, -0.7]
 
-class TestVectorJacobianProduct:
-    """Tests for the vector_jacobian_product function"""
+        with qml.tape.JacobianTape() as tape:
+            qml.RX(0.4, wires=[0])
+            qml.Rot(x, y, z, wires=[0])
+            qml.RY(-0.2, wires=[0])
+            qml.expval(qml.PauliZ(0))
 
-    @pytest.fixture
-    def dev(self):
-        return qml.device("lightning.qubit", wires=2)
+        tape.trainable_params = {1, 2, 3}
+
+        dy = np.array([1.0])
+
+        vjp1 = dev.vector_jacobian_product(tape, dy)
+
+        tape.execute(dev)
+        vjp2 = dev.vector_jacobian_product(tape, dy, starting_state=dev._pre_rotated_state)
+
+        assert np.allclose(vjp1, vjp2, atol=tol, rtol=0)
+
+    def test_not_expval(self, dev):
+        """Test if a QuantumFunctionError is raised for a tape with measurements that are not
+        expectation values"""
+
+        with qml.tape.JacobianTape() as tape:
+            qml.RX(0.1, wires=0)
+            qml.var(qml.PauliZ(0))
+
+        dy = np.array([1.0])
+
+        with pytest.raises(qml.QuantumFunctionError, match="Adjoint differentiation method does"):
+            dev.vector_jacobian_product(tape, dy)
+
+    def test_finite_shots_warns(self):
+        """Tests warning raised when finite shots specified"""
+
+        dev = qml.device("lightning.qubit", wires=1, shots=1)
+
+        with qml.tape.JacobianTape() as tape:
+            qml.expval(qml.PauliZ(0))
+
+        dy = np.array([1.0])
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_no_trainable_parameters(self, dev, vjp_pybind):
+        with pytest.warns(
+            UserWarning, match="Requested adjoint differentiation to be computed with finite shots."
+        ):
+            dev.vector_jacobian_product(tape, dy)
+
+    from pennylane_lightning import LightningQubit as lq
+
+    @pytest.mark.skipif(not lq._CPP_BINARY_AVAILABLE, reason="Lightning binary required")
+    def test_unsupported_op(self, dev):
+        """Test if a QuantumFunctionError is raised for an unsupported operation, i.e.,
+        multi-parameter operations that are not qml.Rot"""
+
+        with qml.tape.JacobianTape() as tape:
+            qml.CRot(0.1, 0.2, 0.3, wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        dy = np.array([1.0])
+
+        with pytest.raises(
+            qml.QuantumFunctionError, match="The CRot operation is not supported using the"
+        ):
+            dev.vector_jacobian_product(tape, dy)
+
+        with qml.tape.JacobianTape() as tape:
+            qml.SingleExcitation(0.1, wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        with pytest.raises(
+            qml.QuantumFunctionError,
+            match="The SingleExcitation operation is not supported using the",
+        ):
+            dev.vector_jacobian_product(tape, dy)
+
+    @pytest.mark.skipif(not lq._CPP_BINARY_AVAILABLE, reason="Lightning binary required")
+    def test_proj_unsupported(self, dev):
+        """Test if a QuantumFunctionError is raised for a Projector observable"""
+        with qml.tape.JacobianTape() as tape:
+            qml.CRX(0.1, wires=[0, 1])
+            qml.expval(qml.Projector([0, 1], wires=[0, 1]))
+
+        dy = np.array([1.0])
+
+        with pytest.raises(
+            qml.QuantumFunctionError, match="differentiation method does not support the Projector"
+        ):
+            dev.vector_jacobian_product(tape, dy)
+
+        with qml.tape.JacobianTape() as tape:
+            qml.CRX(0.1, wires=[0, 1])
+            qml.expval(qml.Projector([0], wires=[0]) @ qml.PauliZ(0))
+
+        with pytest.raises(
+            qml.QuantumFunctionError, match="differentiation method does not support the Projector"
+        ):
+            dev.vector_jacobian_product(tape, dy)
+
+    @pytest.mark.skipif(not lq._CPP_BINARY_AVAILABLE, reason="Lightning binary required")
+    def test_unsupported_hermitian_expectation(self, dev):
+        obs = np.array([[1, 0], [0, -1]], dtype=np.complex128, requires_grad=False)
+
+        with qml.tape.JacobianTape() as tape:
+            qml.RY(0.1, wires=(0,))
+            qml.expval(qml.Hermitian(obs, wires=(0,)))
+
+        dy = np.array([1.0])
+
+        with pytest.raises(
+            qml.QuantumFunctionError, match="Lightning adjoint differentiation method does not"
+        ):
+            dev.vector_jacobian_product(tape, dy)
+
+        with qml.tape.JacobianTape() as tape:
+            qml.RY(0.1, wires=(0,))
+            qml.expval(qml.Hermitian(obs, wires=(0,)) @ qml.PauliZ(wires=1))
+
+        with pytest.raises(
+            qml.QuantumFunctionError, match="Lightning adjoint differentiation method does not"
+        ):
+            dev.vector_jacobian_product(tape, dy)
+
+    def test_no_trainable_parameters(self, dev):
         """A tape with no trainable parameters will simply return None"""
         x = 0.4
 
@@ -74,12 +184,11 @@ def test_no_trainable_parameters(self, dev, vjp_pybind):
 
         tape.trainable_params = {}
         dy = np.array([1.0])
-        vjp = dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
+        vjp = dev.vector_jacobian_product(tape, dy)
 
         assert vjp is None
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_no_trainable_parameters_(self, dev, vjp_pybind):
+    def test_no_trainable_parameters_(self, dev):
         """A tape with no trainable parameters will simply return None"""
         x = 0.4
 
@@ -90,12 +199,11 @@ def test_no_trainable_parameters_(self, dev, vjp_pybind):
 
         tape.trainable_params = {}
         dy = np.array([1.0])
-        vjp = dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
+        vjp = dev.vector_jacobian_product(tape, dy)
 
         assert vjp is None
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_zero_dy(self, dev, vjp_pybind):
+    def test_zero_dy(self, dev):
         """A zero dy vector will return no tapes and a zero matrix"""
         x = 0.4
         y = 0.6
@@ -108,12 +216,11 @@ def test_zero_dy(self, dev, vjp_pybind):
 
         tape.trainable_params = {0, 1}
         dy = np.array([0.0])
-        vjp = dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
+        vjp = dev.vector_jacobian_product(tape, dy)
 
         assert np.all(vjp == np.zeros([len(tape.trainable_params)]))
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_single_expectation_value(self, tol, dev, vjp_pybind):
+    def test_single_expectation_value(self, tol, dev):
         """Tests correct output shape and evaluation for a tape
         with a single expval output"""
         x = 0.543
@@ -128,13 +235,12 @@ def test_single_expectation_value(self, tol, dev, vjp_pybind):
         tape.trainable_params = {0, 1}
         dy = np.array([1.0])
 
-        vjp = dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
+        vjp = dev.vector_jacobian_product(tape, dy)
 
         expected = np.array([-np.sin(y) * np.sin(x), np.cos(y) * np.cos(x)])
         assert np.allclose(vjp, expected, atol=tol, rtol=0)
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_multiple_expectation_values(self, tol, dev, vjp_pybind):
+    def test_multiple_expectation_values(self, tol, dev):
         """Tests correct output shape and evaluation for a tape
         with multiple expval outputs"""
         x = 0.543
@@ -150,13 +256,12 @@ def test_multiple_expectation_values(self, tol, dev, vjp_pybind):
         tape.trainable_params = {0, 1}
         dy = np.array([1.0, 2.0])
 
-        vjp = dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
+        vjp = dev.vector_jacobian_product(tape, dy)
 
         expected = np.array([-np.sin(x), 2 * np.cos(y)])
         assert np.allclose(vjp, expected, atol=tol, rtol=0)
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_prob_expectation_values(self, dev, vjp_pybind):
+    def test_prob_expectation_values(self, dev):
         """Tests correct output shape and evaluation for a tape
         with prob and expval outputs"""
         x = 0.543
@@ -173,7 +278,7 @@ def test_prob_expectation_values(self, dev, vjp_pybind):
         dy = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
 
         with pytest.raises(qml.QuantumFunctionError, match="Adjoint differentiation method does"):
-            dev.vector_jacobian_product(tape, dy, vjp_pybind=vjp_pybind)
+            dev.vector_jacobian_product(tape, dy)
 
 
 class TestBatchVectorJacobianProduct:
@@ -183,8 +288,7 @@ class TestBatchVectorJacobianProduct:
     def dev(self):
         return qml.device("lightning.qubit", wires=2)
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_one_tape_no_trainable_parameters(self, dev, vjp_pybind):
+    def test_one_tape_no_trainable_parameters(self, dev):
         """A tape with no trainable parameters will simply return None"""
 
         with qml.tape.QuantumTape() as tape1:
@@ -204,13 +308,12 @@ def test_one_tape_no_trainable_parameters(self, dev, vjp_pybind):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys, vjp_pybind=vjp_pybind)
+        vjps = dev.batch_vjp(tapes, dys)
 
         assert vjps[0] is None
         assert vjps[1] is not None
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_all_tapes_no_trainable_parameters(self, dev, vjp_pybind):
+    def test_all_tapes_no_trainable_parameters(self, dev):
         """If all tapes have no trainable parameters all outputs will be None"""
 
         with qml.tape.QuantumTape() as tape1:
@@ -230,13 +333,12 @@ def test_all_tapes_no_trainable_parameters(self, dev, vjp_pybind):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys, vjp_pybind=vjp_pybind)
+        vjps = dev.batch_vjp(tapes, dys)
 
         assert vjps[0] is None
         assert vjps[1] is None
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_zero_dy(self, dev, vjp_pybind):
+    def test_zero_dy(self, dev):
         """A zero dy vector will return no tapes and a zero matrix"""
 
         with qml.tape.QuantumTape() as tape1:
@@ -256,12 +358,11 @@ def test_zero_dy(self, dev, vjp_pybind):
         tapes = [tape1, tape2]
         dys = [np.array([0.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys, vjp_pybind=vjp_pybind)
+        vjps = dev.batch_vjp(tapes, dys)
 
         assert np.allclose(vjps[0], 0)
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_reduction_append(self, dev, vjp_pybind):
+    def test_reduction_append(self, dev):
         """Test the 'append' reduction strategy"""
 
         with qml.tape.JacobianTape() as tape1:
@@ -281,14 +382,13 @@ def test_reduction_append(self, dev, vjp_pybind):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys, vjp_pybind=vjp_pybind)
+        vjps = dev.batch_vjp(tapes, dys)
 
         assert len(vjps) == 2
         assert all(isinstance(v, np.ndarray) for v in vjps)
         assert all(len(v) == len(t.trainable_params) for t, v in zip(tapes, vjps))
 
-    @pytest.mark.parametrize("vjp_pybind", [True, False])
-    def test_reduction_extend(self, dev, vjp_pybind):
+    def test_reduction_extend(self, dev):
         """Test the 'extend' reduction strategy"""
 
         with qml.tape.JacobianTape() as tape1:
@@ -308,6 +408,6 @@ def test_reduction_extend(self, dev, vjp_pybind):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys, vjp_pybind=vjp_pybind)
+        vjps = dev.batch_vjp(tapes, dys)
 
         assert sum(len(t) for t in vjps) == sum(len(t.trainable_params) for t in tapes)

From 25b93e82cd1bba122e7858f3637141c40b80f53e Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Tue, 30 Nov 2021 16:09:31 -0500
Subject: [PATCH 20/27] Fix rendering math formulas in docs

---
 pennylane_lightning/lightning_qubit.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 7c26e57f70..9cef172057 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -252,7 +252,7 @@ def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False):
         return jac
 
     def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_state=False):
-        """Generate the the vector-Jacobian products of a tape.
+        r"""Generate the the vector-Jacobian products of a tape.
         
         Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by
 
@@ -388,7 +388,7 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
     def batch_vjp(
         self, tapes, dys, reduction="append", starting_state=None, use_device_state=False
     ):
-        """Generate the the vector-Jacobian products of a batch of tapes.
+        r"""Generate the the vector-Jacobian products of a batch of tapes.
 
         Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by
 

From b9416e60bc7900b16c0b17632d0a60750fd43bf6 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Tue, 30 Nov 2021 16:15:31 -0500
Subject: [PATCH 21/27] Apply code factor suggestions

---
 pennylane_lightning/lightning_qubit.py | 66 ++------------------------
 1 file changed, 3 insertions(+), 63 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 9cef172057..3f52767122 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -252,37 +252,7 @@ def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False):
         return jac
 
     def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_state=False):
-        r"""Generate the the vector-Jacobian products of a tape.
-        
-        Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by
-
-        .. math::
-
-            \mathbf{J}_{\mathbf{f}}(\mathbf{x}) = \begin{pmatrix}
-                \frac{\partial f_1}{\partial x_1} &\cdots &\frac{\partial f_1}{\partial x_n}\\
-                \vdots &\ddots &\vdots\\
-                \frac{\partial f_m}{\partial x_1} &\cdots &\frac{\partial f_m}{\partial x_n}\\
-            \end{pmatrix}.
-
-        During backpropagation, the chain rule is applied. For example, consider the
-        cost function :math:`h = y\circ f: \mathbb{R}^n \rightarrow \mathbb{R}`,
-        where :math:`y: \mathbb{R}^m \rightarrow \mathbb{R}`.
-        The gradient is:
-
-        .. math::
-
-            \nabla h(\mathbf{x}) = \frac{\partial y}{\partial \mathbf{f}} \frac{\partial \mathbf{f}}{\partial \mathbf{x}}
-            = \frac{\partial y}{\partial \mathbf{f}} \mathbf{J}_{\mathbf{f}}(\mathbf{x}).
-
-        Denote :math:`d\mathbf{y} = \frac{\partial y}{\partial \mathbf{f}}`; we can write this in the form
-        of a matrix multiplication:
-
-        .. math:: \left[\nabla h(\mathbf{x})\right]_{j} = \sum_{i=0}^m d\mathbf{y}_i ~ \mathbf{J}_{ij}.
-
-        Thus, we can see that the gradient of the cost function is given by the so-called
-        **vector-Jacobian product**; the product of the row-vector :math:`d\mathbf{y}`, representing
-        the gradient of subsequent components of the cost function, and :math:`\mathbf{J}`,
-        the Jacobian of the current node of interest.
+        """Generate the the vector-Jacobian products of a tape.
 
         Args:
             tape (.QuantumTape): quantum tape to differentiate
@@ -298,7 +268,7 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
 
         Returns:
             tensor_like or None: Vector-Jacobian product. Returns None if the tape
-            has no trainable parameters.  
+            has no trainable parameters.
         """
         if self.shots is not None:
             warn(
@@ -388,37 +358,7 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
     def batch_vjp(
         self, tapes, dys, reduction="append", starting_state=None, use_device_state=False
     ):
-        r"""Generate the the vector-Jacobian products of a batch of tapes.
-
-        Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by
-
-        .. math::
-
-            \mathbf{J}_{\mathbf{f}}(\mathbf{x}) = \begin{pmatrix}
-                \frac{\partial f_1}{\partial x_1} &\cdots &\frac{\partial f_1}{\partial x_n}\\
-                \vdots &\ddots &\vdots\\
-                \frac{\partial f_m}{\partial x_1} &\cdots &\frac{\partial f_m}{\partial x_n}\\
-            \end{pmatrix}.
-
-        During backpropagation, the chain rule is applied. For example, consider the
-        cost function :math:`h = y\circ f: \mathbb{R}^n \rightarrow \mathbb{R}`,
-        where :math:`y: \mathbb{R}^m \rightarrow \mathbb{R}`.
-        The gradient is:
-
-        .. math::
-
-            \nabla h(\mathbf{x}) = \frac{\partial y}{\partial \mathbf{f}} \frac{\partial \mathbf{f}}{\partial \mathbf{x}}
-            = \frac{\partial y}{\partial \mathbf{f}} \mathbf{J}_{\mathbf{f}}(\mathbf{x}).
-
-        Denote :math:`d\mathbf{y} = \frac{\partial y}{\partial \mathbf{f}}`; we can write this in the form
-        of a matrix multiplication:
-
-        .. math:: \left[\nabla h(\mathbf{x})\right]_{j} = \sum_{i=0}^m d\mathbf{y}_i ~ \mathbf{J}_{ij}.
-
-        Thus, we can see that the gradient of the cost function is given by the so-called
-        **vector-Jacobian product**; the product of the row-vector :math:`d\mathbf{y}`, representing
-        the gradient of subsequent components of the cost function, and :math:`\mathbf{J}`,
-        the Jacobian of the current node of interest.
+        """Generate the the vector-Jacobian products of a batch of tapes.
 
         Args:
             tapes (Sequence[.QuantumTape]): sequence of quantum tapes to differentiate

From 30534c05cbf125af18dca8344bb6068a31c03d2d Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Tue, 30 Nov 2021 16:32:05 -0500
Subject: [PATCH 22/27] Update python tests

---
 tests/test_vector_jacobian_product.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_vector_jacobian_product.py b/tests/test_vector_jacobian_product.py
index e14f461eb6..0d4c9cad3e 100644
--- a/tests/test_vector_jacobian_product.py
+++ b/tests/test_vector_jacobian_product.py
@@ -382,7 +382,7 @@ def test_reduction_append(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys)
+        vjps = dev.batch_vjp(tapes, dys, reduction="append")
 
         assert len(vjps) == 2
         assert all(isinstance(v, np.ndarray) for v in vjps)
@@ -408,6 +408,6 @@ def test_reduction_extend(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys)
+        vjps = dev.batch_vjp(tapes, dys, reduction="extend")
 
-        assert sum(len(t) for t in vjps) == sum(len(t.trainable_params) for t in tapes)
+        assert len(vjps) == sum(len(t.trainable_params) for t in tapes)

From 20b1e12de790e57aed265f36812d7cce34cbef34 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Tue, 30 Nov 2021 16:55:22 -0500
Subject: [PATCH 23/27] Update vector_jacobian_product method

---
 pennylane_lightning/lightning_qubit.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 3f52767122..29ed3c9f39 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -276,15 +276,12 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
                 " The derivative is always exact when using the adjoint differentiation method.",
                 UserWarning,
             )
+
         num_params = len(tape.trainable_params)
+
         if num_params == 0:
-            # The tape has no trainable parameters; the VJP
-            # is simply none.
             return None
 
-        # If the dy vector is zero, then the
-        # corresponding element of the VJP will be zero,
-        # and we can avoid a quantum computation.
         if math.allclose(dy, 0):
             return math.convert_like(np.zeros([num_params]), dy)
 
@@ -345,7 +342,7 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
             trainable_params if not use_sp else [i - 1 for i in trainable_params[first_elem:]]
         )  # exclude first index if explicitly setting sv
 
-        vjp_tensor = VJP.vjp(
+        return VJP.vjp(
             math.reshape(dy, [-1]),
             StateVectorC128(ket),
             obs_serialized,
@@ -353,7 +350,6 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
             tp_shift,
             tape.num_params,
         )
-        return vjp_tensor
 
     def batch_vjp(
         self, tapes, dys, reduction="append", starting_state=None, use_device_state=False

From c0f42024f3764a0e8c427211dcf16af753ec282b Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Tue, 30 Nov 2021 17:52:14 -0500
Subject: [PATCH 24/27] Add adjoint_diff_support_check method

---
 pennylane_lightning/lightning_qubit.py        | 65 +++++++------------
 .../src/algorithms/JacobianProd.hpp           | 16 -----
 2 files changed, 23 insertions(+), 58 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 29ed3c9f39..14c6914ed6 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -174,17 +174,15 @@ def apply_lightning(self, state, operations):
 
         return np.reshape(state_vector, state.shape)
 
-    def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False):
-        if self.shots is not None:
-            warn(
-                "Requested adjoint differentiation to be computed with finite shots."
-                " The derivative is always exact when using the adjoint differentiation method.",
-                UserWarning,
-            )
+    def adjoint_diff_support_check(self, tape):
+        """Check Lightning adjoint differentiation method support for a tape.
 
-        if len(tape.trainable_params) == 0:
-            return np.array(0)
+        Raise ``QuantumFunctionError`` in case of not supported measurements, observables,
+        or operations in the Lightning adjoint differentiation method for a given tape.
 
+        Args:
+            tape (.QuantumTape): quantum tape to differentiate
+        """
         for m in tape.measurements:
             if m.return_type is not Expectation:
                 raise QuantumFunctionError(
@@ -219,6 +217,20 @@ def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False):
                     'the "adjoint" differentiation method'
                 )
 
+    def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False):
+        if self.shots is not None:
+            warn(
+                "Requested adjoint differentiation to be computed with finite shots."
+                " The derivative is always exact when using the adjoint differentiation method.",
+                UserWarning,
+            )
+
+        if len(tape.trainable_params) == 0:
+            return np.array(0)
+
+        # Check adjoint diff support
+        self.adjoint_diff_support_check(tape)
+
         # Initialization of state
         if starting_state is not None:
             ket = np.ravel(starting_state)
@@ -285,39 +297,8 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
         if math.allclose(dy, 0):
             return math.convert_like(np.zeros([num_params]), dy)
 
-        for m in tape.measurements:
-            if m.return_type is not Expectation:
-                raise QuantumFunctionError(
-                    "Adjoint differentiation method does not support"
-                    f" measurement {m.return_type.value}"
-                )
-            if not isinstance(m.obs, qml.operation.Tensor):
-                if isinstance(m.obs, qml.Projector):
-                    raise QuantumFunctionError(
-                        "Adjoint differentiation method does not support the Projector observable"
-                    )
-                if isinstance(m.obs, qml.Hermitian):
-                    raise QuantumFunctionError(
-                        "Lightning adjoint differentiation method does not currently support the Hermitian observable"
-                    )
-            else:
-                if any([isinstance(o, qml.Projector) for o in m.obs.non_identity_obs]):
-                    raise QuantumFunctionError(
-                        "Adjoint differentiation method does not support the Projector observable"
-                    )
-                if any([isinstance(o, qml.Hermitian) for o in m.obs.non_identity_obs]):
-                    raise QuantumFunctionError(
-                        "Lightning adjoint differentiation method does not currently support the Hermitian observable"
-                    )
-
-        for op in tape.operations:
-            if (
-                op.num_params > 1 and not isinstance(op, qml.Rot)
-            ) or op.name in UNSUPPORTED_PARAM_GATES_ADJOINT:
-                raise QuantumFunctionError(
-                    f"The {op.name} operation is not supported using "
-                    'the "adjoint" differentiation method'
-                )
+        # Check adjoint diff support
+        self.adjoint_diff_support_check(tape)
 
         # Initialization of state
         if starting_state is not None:
diff --git a/pennylane_lightning/src/algorithms/JacobianProd.hpp b/pennylane_lightning/src/algorithms/JacobianProd.hpp
index 548834c474..3f69819e54 100644
--- a/pennylane_lightning/src/algorithms/JacobianProd.hpp
+++ b/pennylane_lightning/src/algorithms/JacobianProd.hpp
@@ -13,23 +13,7 @@
 // limitations under the License.
 #pragma once
 
-#include <complex>
-#include <cstring>
-#include <numeric>
-#include <stdexcept>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-#include <variant>
-#include <vector>
-
 #include "AdjointDiff.hpp"
-#include "Error.hpp"
-#include "StateVector.hpp"
-#include "StateVectorManaged.hpp"
-#include "Util.hpp"
-
-#include <iostream>
 
 namespace Pennylane {
 namespace Algorithms {

From 3d0bf52fe8ec9855ce5fda64003a9fcf0caded46 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Tue, 30 Nov 2021 18:14:57 -0500
Subject: [PATCH 25/27] Add more tests for batch_vjp

---
 pennylane_lightning/lightning_qubit.py |  6 ++--
 tests/test_vector_jacobian_product.py  | 50 ++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 14c6914ed6..0192adb48f 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -20,6 +20,7 @@
 
 import numpy as np
 from pennylane import (
+    math,
     BasisState,
     DeviceError,
     QuantumFunctionError,
@@ -27,7 +28,6 @@
     QubitUnitary,
 )
 import pennylane as qml
-from pennylane import math
 from pennylane.devices import DefaultQubit
 from pennylane.operation import Expectation
 
@@ -177,8 +177,8 @@ def apply_lightning(self, state, operations):
     def adjoint_diff_support_check(self, tape):
         """Check Lightning adjoint differentiation method support for a tape.
 
-        Raise ``QuantumFunctionError`` in case of not supported measurements, observables,
-        or operations in the Lightning adjoint differentiation method for a given tape.
+        Raise ``QuantumFunctionError`` if ``tape`` contains not supported measurements,
+        observables, or operations by the Lightning adjoint differentiation method.
 
         Args:
             tape (.QuantumTape): quantum tape to differentiate
diff --git a/tests/test_vector_jacobian_product.py b/tests/test_vector_jacobian_product.py
index 0d4c9cad3e..5c18dee3f5 100644
--- a/tests/test_vector_jacobian_product.py
+++ b/tests/test_vector_jacobian_product.py
@@ -388,6 +388,32 @@ def test_reduction_append(self, dev):
         assert all(isinstance(v, np.ndarray) for v in vjps)
         assert all(len(v) == len(t.trainable_params) for t, v in zip(tapes, vjps))
 
+    def test_reduction_append_callable(self, dev):
+        """Test the 'append' reduction strategy"""
+
+        with qml.tape.JacobianTape() as tape1:
+            qml.RX(0.4, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        with qml.tape.JacobianTape() as tape2:
+            qml.RX(0.4, wires=0)
+            qml.RX(0.6, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        tape1.trainable_params = {0}
+        tape2.trainable_params = {0, 1}
+
+        tapes = [tape1, tape2]
+        dys = [np.array([1.0]), np.array([1.0])]
+
+        vjps = dev.batch_vjp(tapes, dys, reduction=list.append)
+
+        assert len(vjps) == 2
+        assert all(isinstance(v, np.ndarray) for v in vjps)
+        assert all(len(v) == len(t.trainable_params) for t, v in zip(tapes, vjps))
+
     def test_reduction_extend(self, dev):
         """Test the 'extend' reduction strategy"""
 
@@ -411,3 +437,27 @@ def test_reduction_extend(self, dev):
         vjps = dev.batch_vjp(tapes, dys, reduction="extend")
 
         assert len(vjps) == sum(len(t.trainable_params) for t in tapes)
+
+    def test_reduction_extend_callable(self, dev):
+        """Test the 'extend' reduction strategy"""
+
+        with qml.tape.JacobianTape() as tape1:
+            qml.RX(0.4, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        with qml.tape.JacobianTape() as tape2:
+            qml.RX(0.4, wires=0)
+            qml.RX(0.6, wires=0)
+            qml.CNOT(wires=[0, 1])
+            qml.expval(qml.PauliZ(0))
+
+        tape1.trainable_params = {0}
+        tape2.trainable_params = {0, 1}
+
+        tapes = [tape1, tape2]
+        dys = [np.array([1.0]), np.array([1.0])]
+
+        vjps = dev.batch_vjp(tapes, dys, reduction=list.extend)
+
+        assert len(vjps) == sum(len(t.trainable_params) for t in tapes)

From 6bca8307884d297655a3e849540ab7d0f11e7819 Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Wed, 1 Dec 2021 12:10:29 -0500
Subject: [PATCH 26/27] Update VJP Python bindings

---
 pennylane_lightning/lightning_qubit.py        |  67 ++++++++--
 .../src/algorithms/JacobianProd.hpp           |  33 ++++-
 pennylane_lightning/src/bindings/Bindings.cpp |  13 +-
 pennylane_lightning/src/tests/Test_Util.cpp   |  24 ++--
 pennylane_lightning/src/util/Util.hpp         |  30 +++--
 tests/test_vector_jacobian_product.py         | 121 +++++++++++++++---
 6 files changed, 237 insertions(+), 51 deletions(-)

diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py
index 0192adb48f..a0797a2196 100644
--- a/pennylane_lightning/lightning_qubit.py
+++ b/pennylane_lightning/lightning_qubit.py
@@ -279,8 +279,8 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
                 provided, that takes precedence.
 
         Returns:
-            tensor_like or None: Vector-Jacobian product. Returns None if the tape
-            has no trainable parameters.
+            tuple[array or None, tensor_like or None]: A tuple of the adjoint-jacobian and the Vector-Jacobian
+            product. Returns ``None`` if the tape has no trainable parameters.
         """
         if self.shots is not None:
             warn(
@@ -292,10 +292,10 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
         num_params = len(tape.trainable_params)
 
         if num_params == 0:
-            return None
+            return None, None
 
         if math.allclose(dy, 0):
-            return math.convert_like(np.zeros([num_params]), dy)
+            return None, math.convert_like(np.zeros([num_params]), dy)
 
         # Check adjoint diff support
         self.adjoint_diff_support_check(tape)
@@ -323,7 +323,7 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
             trainable_params if not use_sp else [i - 1 for i in trainable_params[first_elem:]]
         )  # exclude first index if explicitly setting sv
 
-        return VJP.vjp(
+        jac, vjp = VJP.vjp(
             math.reshape(dy, [-1]),
             StateVectorC128(ket),
             obs_serialized,
@@ -331,6 +331,52 @@ def vector_jacobian_product(self, tape, dy, starting_state=None, use_device_stat
             tp_shift,
             tape.num_params,
         )
+        return jac, vjp
+
+    def compute_vjp(self, dy, jac, num=None):
+        """Convenience function to compute the vector-Jacobian product for a given
+        vector of gradient outputs and a Jacobian.
+
+        Args:
+            dy (tensor_like): vector of gradient outputs
+            jac (tensor_like): Jacobian matrix. For an n-dimensional ``dy``
+                vector, the first n-dimensions of ``jac`` should match
+                the shape of ``dy``.
+
+        Keyword Args:
+        num (int): The length of the flattened ``dy`` argument. This is an
+            optional argument, but can be useful to provide if ``dy`` potentially
+            has no shape (for example, due to tracing or just-in-time compilation).
+
+        Returns:
+            tensor_like: the vector-Jacobian product
+        """
+        if jac is None:
+            return None
+
+        dy_row = math.reshape(dy, [-1])
+
+        if num is None:
+            num = math.shape(dy_row)[0]
+
+        if not isinstance(dy_row, np.ndarray):
+            jac = math.convert_like(jac, dy_row)
+
+        jac = math.reshape(jac, [num, -1])
+        num_params = jac.shape[1]
+
+        if math.allclose(dy, 0):
+            return math.convert_like(np.zeros([num_params]), dy)
+
+        VJP = VectorJacobianProductC128()
+
+        vjp_tensor = VJP.compute_vjp_from_jac(
+            math.reshape(jac, [-1]),
+            dy_row,
+            num,
+            num_params,
+        )
+        return vjp_tensor
 
     def batch_vjp(
         self, tapes, dys, reduction="append", starting_state=None, use_device_state=False
@@ -355,14 +401,16 @@ def batch_vjp(
                 provided, that takes precedence.
 
         Returns:
-            List[tensor_like or None]: list of vector-Jacobian products. ``None`` elements corresponds
+            tuple[List[array or None], List[tensor_like or None]]: A tuple containing a list
+            of adjoint-jacobians and a list of vector-Jacobian products. ``None`` elements corresponds
             to tapes with no trainable parameters.
         """
         vjps = []
+        jacs = []
 
         # Loop through the tapes and dys vector
         for tape, dy in zip(tapes, dys):
-            vjp = self.vector_jacobian_product(
+            jac, vjp = self.vector_jacobian_product(
                 tape,
                 dy,
                 starting_state=starting_state,
@@ -371,13 +419,16 @@ def batch_vjp(
             if vjp is None:
                 if reduction == "append":
                     vjps.append(None)
+                    jacs.append(jac)
                 continue
             if isinstance(reduction, str):
                 getattr(vjps, reduction)(vjp)
+                getattr(jacs, reduction)(jac)
             elif callable(reduction):
                 reduction(vjps, vjp)
+                reduction(jacs, jac)
 
-        return vjps
+        return jacs, vjps
 
 
 if not CPP_BINARY_AVAILABLE:
diff --git a/pennylane_lightning/src/algorithms/JacobianProd.hpp b/pennylane_lightning/src/algorithms/JacobianProd.hpp
index 3f69819e54..8c6d2967cd 100644
--- a/pennylane_lightning/src/algorithms/JacobianProd.hpp
+++ b/pennylane_lightning/src/algorithms/JacobianProd.hpp
@@ -70,8 +70,8 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
      * @param jac Jacobian matrix from `AdjointJacobian`.
      * @param dy_row Gradient-output vector.
      */
-    void tensorDot(std::vector<T> &vjp, const std::vector<std::vector<T>> &jac,
-                   const std::vector<T> &dy_row) {
+    void computeVJP(std::vector<T> &vjp, const std::vector<std::vector<T>> &jac,
+                    const std::vector<T> &dy_row) {
         if (jac.empty() || dy_row.empty()) {
             vjp.clear();
             return;
@@ -81,7 +81,7 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
         const size_t c_len = jac.front().size();
         if (dy_row.size() != r_len) {
             throw std::invalid_argument(
-                "Invalid size for gradient-output vector");
+                "Invalid size for the gradient-output vector");
         }
 
         const size_t t_len = r_len * c_len;
@@ -91,6 +91,31 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
         Util::vecMatrixProd(vjp, dy_row, jac_row, r_len, c_len);
     }
 
+    /**
+     * @brief Computes the vector-Jacobian product for a given vector of
+     * gradient outputs and a Jacobian.
+     *
+     * @param vjp Preallocated vector for vector-jacobian product data results.
+     * @param jac Row-wise flatten Jacobian matrix of shape m * n.
+     * @param dy_row Gradient-output vector.
+     * @param m Number of rows of `jac`.
+     * @param n Number of columns of `jac`.
+     */
+    void _computeVJP(std::vector<T> &vjp, const std::vector<T> &jac,
+                     const std::vector<T> &dy_row, size_t m, size_t n) {
+        if (jac.empty() || dy_row.empty()) {
+            vjp.clear();
+            return;
+        }
+
+        if (dy_row.size() != m) {
+            throw std::invalid_argument(
+                "Invalid size for the gradient-output vector");
+        }
+
+        Util::vecMatrixProd(vjp, dy_row, jac, m, n);
+    }
+
     /**
      * @brief Calculates the VectorJacobianProduct for the statevector
      * for the selected set of parametric gates using `AdjointJacobian`.
@@ -134,7 +159,7 @@ class VectorJacobianProduct : public AdjointJacobian<T> {
         this->adjointJacobian(psi, num_elements, jac, observables, operations,
                               trainableParams, apply_operations);
 
-        tensorDot(vjp, jac, dy);
+        computeVJP(vjp, jac, dy);
     }
 }; // class VectorJacobianProduct
 
diff --git a/pennylane_lightning/src/bindings/Bindings.cpp b/pennylane_lightning/src/bindings/Bindings.cpp
index 89fdf80312..aa11bf0857 100644
--- a/pennylane_lightning/src/bindings/Bindings.cpp
+++ b/pennylane_lightning/src/bindings/Bindings.cpp
@@ -844,6 +844,16 @@ void lightning_class_bindings(py::module &m) {
                  return OpsData<PrecisionT>{ops_name, conv_params, ops_wires,
                                             ops_inverses, conv_matrices};
              })
+        .def("compute_vjp_from_jac",
+             &VectorJacobianProduct<PrecisionT>::computeVJP)
+        .def("compute_vjp_from_jac",
+             [](VectorJacobianProduct<PrecisionT> &v,
+                const std::vector<PrecisionT> &jac,
+                const std::vector<PrecisionT> &dy_row, size_t m, size_t n) {
+                 std::vector<PrecisionT> vjp_res(n);
+                 v._computeVJP(vjp_res, jac, dy_row, m, n);
+                 return py::array_t<Param_t>(py::cast(vjp_res));
+             })
         .def("vjp", &VectorJacobianProduct<PrecisionT>::vectorJacobianProduct)
         .def("vjp", [](VectorJacobianProduct<PrecisionT> &v,
                        const std::vector<PrecisionT> &dy,
@@ -858,7 +868,8 @@ void lightning_class_bindings(py::module &m) {
             v.vectorJacobianProduct(vjp_res, jac, dy, sv.getData(),
                                     sv.getLength(), observables, operations,
                                     trainableParams);
-            return py::array_t<Param_t>(py::cast(vjp_res));
+            return py::make_tuple(py::array_t<Param_t>(py::cast(jac)),
+                                  py::array_t<Param_t>(py::cast(vjp_res)));
         });
 }
 
diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp
index dce88d8930..7fa4bc6992 100644
--- a/pennylane_lightning/src/tests/Test_Util.cpp
+++ b/pennylane_lightning/src/tests/Test_Util.cpp
@@ -193,8 +193,10 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util]", float, double) {
                               Contains("Invalid size for the input vector"));
             CHECK_THROWS_AS(Util::matrixVecProd(mat, v_in, 2, 2),
                             std::invalid_argument);
-            CHECK_THROWS_WITH(Util::matrixVecProd(mat, v_in, 2, 2),
-                              Contains("Invalid m & n for the input matrix"));
+            CHECK_THROWS_WITH(
+                Util::matrixVecProd(mat, v_in, 2, 2),
+                Contains(
+                    "Invalid number of rows and columns for the input matrix"));
         }
     }
     SECTION("vecMatrixProd") {
@@ -286,8 +288,10 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util]", float, double) {
             using namespace Catch::Matchers;
             std::vector<std::complex<double>> mat(2 * 3, {1, 1});
             CHECK_THROWS_AS(Util::Transpose(mat, 2, 2), std::invalid_argument);
-            CHECK_THROWS_WITH(Util::Transpose(mat, 2, 2),
-                              Contains("Invalid m & n for the input matrix"));
+            CHECK_THROWS_WITH(
+                Util::Transpose(mat, 2, 2),
+                Contains(
+                    "Invalid number of rows and columns for the input matrix"));
         }
     }
     SECTION("matrixMatProd") {
@@ -371,14 +375,14 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util]", float, double) {
             std::vector<std::complex<double>> m_right(3 * 4, {1, 1});
             CHECK_THROWS_AS(Util::matrixMatProd(m_left, m_right, 2, 3, 4),
                             std::invalid_argument);
-            CHECK_THROWS_WITH(
-                Util::matrixMatProd(m_left, m_right, 2, 3, 4),
-                Contains("Invalid m & k for the input left matrix"));
+            CHECK_THROWS_WITH(Util::matrixMatProd(m_left, m_right, 2, 3, 4),
+                              Contains("Invalid number of rows and columns for "
+                                       "the input left matrix"));
             CHECK_THROWS_AS(Util::matrixMatProd(m_left, m_right, 2, 3, 3),
                             std::invalid_argument);
-            CHECK_THROWS_WITH(
-                Util::matrixMatProd(m_left, m_right, 2, 3, 3),
-                Contains("Invalid k & n for the input right matrix"));
+            CHECK_THROWS_WITH(Util::matrixMatProd(m_left, m_right, 2, 3, 3),
+                              Contains("Invalid number of rows and columns for "
+                                       "the input right matrix"));
         }
     }
 }
diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp
index baa4356353..f7af4b4707 100644
--- a/pennylane_lightning/src/util/Util.hpp
+++ b/pennylane_lightning/src/util/Util.hpp
@@ -491,7 +491,8 @@ inline auto matrixVecProd(const std::vector<std::complex<T>> mat,
                           size_t n, bool transpose = false)
     -> std::vector<std::complex<T>> {
     if (mat.size() != m * n) {
-        throw std::invalid_argument("Invalid m & n for the input matrix");
+        throw std::invalid_argument(
+            "Invalid number of rows and columns for the input matrix");
     }
     if (v_in.size() != n) {
         throw std::invalid_argument("Invalid size for the input vector");
@@ -608,7 +609,8 @@ inline auto vecMatrixProd(const std::vector<T> &v_in, const std::vector<T> &mat,
         throw std::invalid_argument("Invalid size for the input vector");
     }
     if (mat.size() != m * n) {
-        throw std::invalid_argument("Invalid m & n for the input matrix");
+        throw std::invalid_argument(
+            "Invalid number of rows and columns for the input matrix");
     }
 
     std::vector<T> v_out(n);
@@ -627,7 +629,8 @@ template <class T>
 inline void vecMatrixProd(std::vector<T> &v_out, const std::vector<T> &v_in,
                           const std::vector<T> &mat, size_t m, size_t n) {
     if (mat.size() != m * n) {
-        throw std::invalid_argument("Invalid m & n for the input matrix");
+        throw std::invalid_argument(
+            "Invalid number of rows and columns for the input matrix");
     }
     if (v_in.size() != m) {
         throw std::invalid_argument("Invalid size for the input vector");
@@ -702,7 +705,8 @@ template <class T>
 inline auto Transpose(const std::vector<std::complex<T>> mat, size_t m,
                       size_t n) -> std::vector<std::complex<T>> {
     if (mat.size() != m * n) {
-        throw std::invalid_argument("Invalid m & n for the input matrix");
+        throw std::invalid_argument(
+            "Invalid number of rows and columns for the input matrix");
     }
 
     std::vector<std::complex<T>> mat_t(n * m);
@@ -715,9 +719,9 @@ inline auto Transpose(const std::vector<std::complex<T>> mat, size_t m,
  *
  * @tparam T Floating point precision type.
  * @tparam STRIDE Size of stride in the cache-blocking technique
- * @param m_left Row-wise flatten matrix of size m * k.
- * @param m_right Row-wise flatten matrix of size k * n.
- * @param m_out Pre-allocated row-wise flatten matrix of size m * n.
+ * @param m_left Row-wise flatten matrix of shape m * k.
+ * @param m_right Row-wise flatten matrix of shape k * n.
+ * @param m_out Pre-allocated row-wise flatten matrix of shape m * n.
  * @param m Number of rows of `m_left`.
  * @param n Number of columns of `m_right`.
  * @param k Number of rows of `m_right`.
@@ -786,9 +790,9 @@ inline void omp_matrixMatProd(const std::complex<T> *m_left,
  * @brief Calculates matrix-matrix product using the best avaiable method.
  *
  * @tparam T Floating point precision type.
- * @param m_left Row-wise flatten matrix of size m * k.
- * @param m_right Row-wise flatten matrix of size k * n.
- * @param m_out Pre-allocated row-wise flatten matrix of size m * n.
+ * @param m_left Row-wise flatten matrix of shape m * k.
+ * @param m_right Row-wise flatten matrix of shape k * n.
+ * @param m_out Pre-allocated row-wise flatten matrix of shape m * n.
  * @param m Number of rows of `m_left`.
  * @param n Number of columns of `m_right`.
  * @param k Number of rows of `m_right`.
@@ -837,10 +841,12 @@ inline auto matrixMatProd(const std::vector<std::complex<T>> m_left,
                           size_t n, size_t k, bool transpose = false)
     -> std::vector<std::complex<T>> {
     if (m_left.size() != m * k) {
-        throw std::invalid_argument("Invalid m & k for the input left matrix");
+        throw std::invalid_argument(
+            "Invalid number of rows and columns for the input left matrix");
     }
     if (m_right.size() != k * n) {
-        throw std::invalid_argument("Invalid k & n for the input right matrix");
+        throw std::invalid_argument(
+            "Invalid number of rows and columns for the input right matrix");
     }
 
     std::vector<std::complex<T>> m_out(m * n);
diff --git a/tests/test_vector_jacobian_product.py b/tests/test_vector_jacobian_product.py
index 5c18dee3f5..97012ae22b 100644
--- a/tests/test_vector_jacobian_product.py
+++ b/tests/test_vector_jacobian_product.py
@@ -20,6 +20,59 @@
 from pennylane import numpy as np
 
 
+class TestComputeVJP:
+    """Tests for the numeric computation of VJPs"""
+
+    @pytest.fixture
+    def dev(self):
+        return qml.device("lightning.qubit", wires=2)
+
+    def test_computation(self, dev):
+        """Test that the correct VJP is returned"""
+        dy = np.array([[1.0, 2.0], [3.0, 4.0]])
+        jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]])
+
+        vjp = dev.compute_vjp(dy, jac)
+
+        assert vjp.shape == (3,)
+        assert np.all(vjp == np.tensordot(dy, jac, axes=[[0, 1], [0, 1]]))
+
+    def test_computation_num(self, dev):
+        """Test that the correct VJP is returned"""
+        dy = np.array([[1.0, 2.0], [3.0, 4.0]])
+        jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]])
+
+        vjp = dev.compute_vjp(dy, jac, num=4)
+
+        assert vjp.shape == (3,)
+        assert np.all(vjp == np.tensordot(dy, jac, axes=[[0, 1], [0, 1]]))
+
+    def test_computation_num_error(self, dev):
+        """Test that the correct VJP is returned"""
+        dy = np.array([[1.0, 2.0], [3.0, 4.0]])
+        jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]])
+
+        with pytest.raises(ValueError, match="Invalid size for the gradient-output vector"):
+            dev.compute_vjp(dy, jac, num=3)
+
+    def test_jacobian_is_none(self, dev):
+        """A None Jacobian returns a None VJP"""
+
+        dy = np.array([[1.0, 2.0], [3.0, 4.0]])
+        jac = None
+
+        vjp = dev.compute_vjp(dy, jac)
+        assert vjp is None
+
+    def test_zero_dy(self, dev):
+        """A zero dy vector will return a zero matrix"""
+        dy = np.zeros([2, 2])
+        jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]])
+
+        vjp = dev.compute_vjp(dy, jac)
+        assert np.all(vjp == np.zeros([3]))
+
+
 class TestVectorJacobianProduct:
     """Tests for the vector_jacobian_product function"""
 
@@ -42,12 +95,13 @@ def test_use_device_state(self, tol, dev):
 
         dy = np.array([1.0])
 
-        vjp1 = dev.vector_jacobian_product(tape, dy)
+        jac1, vjp1 = dev.vector_jacobian_product(tape, dy)
 
         tape.execute(dev)
-        vjp2 = dev.vector_jacobian_product(tape, dy, use_device_state=True)
+        jac2, vjp2 = dev.vector_jacobian_product(tape, dy, use_device_state=True)
 
         assert np.allclose(vjp1, vjp2, atol=tol, rtol=0)
+        assert np.allclose(jac1, jac2, atol=tol, rtol=0)
 
     def test_provide_starting_state(self, tol, dev):
         """Tests provides correct answer when provided starting state."""
@@ -63,12 +117,13 @@ def test_provide_starting_state(self, tol, dev):
 
         dy = np.array([1.0])
 
-        vjp1 = dev.vector_jacobian_product(tape, dy)
+        jac1, vjp1 = dev.vector_jacobian_product(tape, dy)
 
         tape.execute(dev)
-        vjp2 = dev.vector_jacobian_product(tape, dy, starting_state=dev._pre_rotated_state)
+        jac2, vjp2 = dev.vector_jacobian_product(tape, dy, starting_state=dev._pre_rotated_state)
 
         assert np.allclose(vjp1, vjp2, atol=tol, rtol=0)
+        assert np.allclose(jac1, jac2, atol=tol, rtol=0)
 
     def test_not_expval(self, dev):
         """Test if a QuantumFunctionError is raised for a tape with measurements that are not
@@ -184,9 +239,10 @@ def test_no_trainable_parameters(self, dev):
 
         tape.trainable_params = {}
         dy = np.array([1.0])
-        vjp = dev.vector_jacobian_product(tape, dy)
+        jac, vjp = dev.vector_jacobian_product(tape, dy)
 
         assert vjp is None
+        assert jac is None
 
     def test_no_trainable_parameters_(self, dev):
         """A tape with no trainable parameters will simply return None"""
@@ -199,9 +255,10 @@ def test_no_trainable_parameters_(self, dev):
 
         tape.trainable_params = {}
         dy = np.array([1.0])
-        vjp = dev.vector_jacobian_product(tape, dy)
+        jac, vjp = dev.vector_jacobian_product(tape, dy)
 
         assert vjp is None
+        assert jac is None
 
     def test_zero_dy(self, dev):
         """A zero dy vector will return no tapes and a zero matrix"""
@@ -216,9 +273,10 @@ def test_zero_dy(self, dev):
 
         tape.trainable_params = {0, 1}
         dy = np.array([0.0])
-        vjp = dev.vector_jacobian_product(tape, dy)
+        jac, vjp = dev.vector_jacobian_product(tape, dy)
 
         assert np.all(vjp == np.zeros([len(tape.trainable_params)]))
+        assert jac is None
 
     def test_single_expectation_value(self, tol, dev):
         """Tests correct output shape and evaluation for a tape
@@ -235,10 +293,13 @@ def test_single_expectation_value(self, tol, dev):
         tape.trainable_params = {0, 1}
         dy = np.array([1.0])
 
-        vjp = dev.vector_jacobian_product(tape, dy)
+        jac1, vjp = dev.vector_jacobian_product(tape, dy)
+
+        jac2 = dev.adjoint_jacobian(tape)
 
         expected = np.array([-np.sin(y) * np.sin(x), np.cos(y) * np.cos(x)])
         assert np.allclose(vjp, expected, atol=tol, rtol=0)
+        assert np.allclose(jac1, jac2, atol=tol, rtol=0)
 
     def test_multiple_expectation_values(self, tol, dev):
         """Tests correct output shape and evaluation for a tape
@@ -256,10 +317,13 @@ def test_multiple_expectation_values(self, tol, dev):
         tape.trainable_params = {0, 1}
         dy = np.array([1.0, 2.0])
 
-        vjp = dev.vector_jacobian_product(tape, dy)
+        jac1, vjp = dev.vector_jacobian_product(tape, dy)
+
+        jac2 = dev.adjoint_jacobian(tape)
 
         expected = np.array([-np.sin(x), 2 * np.cos(y)])
         assert np.allclose(vjp, expected, atol=tol, rtol=0)
+        assert np.allclose(jac1, jac2, atol=tol, rtol=0)
 
     def test_prob_expectation_values(self, dev):
         """Tests correct output shape and evaluation for a tape
@@ -308,11 +372,14 @@ def test_one_tape_no_trainable_parameters(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys)
+        jacs, vjps = dev.batch_vjp(tapes, dys)
 
         assert vjps[0] is None
         assert vjps[1] is not None
 
+        assert jacs[0] is None
+        assert jacs[1] is not None
+
     def test_all_tapes_no_trainable_parameters(self, dev):
         """If all tapes have no trainable parameters all outputs will be None"""
 
@@ -333,11 +400,14 @@ def test_all_tapes_no_trainable_parameters(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys)
+        jacs, vjps = dev.batch_vjp(tapes, dys)
 
         assert vjps[0] is None
         assert vjps[1] is None
 
+        assert jacs[0] is None
+        assert jacs[1] is None
+
     def test_zero_dy(self, dev):
         """A zero dy vector will return no tapes and a zero matrix"""
 
@@ -358,9 +428,10 @@ def test_zero_dy(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([0.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys)
+        jacs, vjps = dev.batch_vjp(tapes, dys)
 
         assert np.allclose(vjps[0], 0)
+        assert jacs[0] is None
 
     def test_reduction_append(self, dev):
         """Test the 'append' reduction strategy"""
@@ -382,12 +453,18 @@ def test_reduction_append(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys, reduction="append")
+        jacs, vjps = dev.batch_vjp(tapes, dys, reduction="append")
+
+        jac0 = dev.adjoint_jacobian(tape1)
+        jac1 = dev.adjoint_jacobian(tape2)
 
         assert len(vjps) == 2
         assert all(isinstance(v, np.ndarray) for v in vjps)
         assert all(len(v) == len(t.trainable_params) for t, v in zip(tapes, vjps))
 
+        assert np.allclose(jacs[0], jac0)
+        assert np.allclose(jacs[1], jac1)
+
     def test_reduction_append_callable(self, dev):
         """Test the 'append' reduction strategy"""
 
@@ -408,12 +485,18 @@ def test_reduction_append_callable(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys, reduction=list.append)
+        jacs, vjps = dev.batch_vjp(tapes, dys, reduction=list.append)
+
+        jac0 = dev.adjoint_jacobian(tape1)
+        jac1 = dev.adjoint_jacobian(tape2)
 
         assert len(vjps) == 2
         assert all(isinstance(v, np.ndarray) for v in vjps)
         assert all(len(v) == len(t.trainable_params) for t, v in zip(tapes, vjps))
 
+        assert np.allclose(jacs[0], jac0)
+        assert np.allclose(jacs[1], jac1)
+
     def test_reduction_extend(self, dev):
         """Test the 'extend' reduction strategy"""
 
@@ -434,10 +517,16 @@ def test_reduction_extend(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys, reduction="extend")
+        jacs, vjps = dev.batch_vjp(tapes, dys, reduction="extend")
+
+        jac0 = dev.adjoint_jacobian(tape1)
+        jac1 = dev.adjoint_jacobian(tape2)
 
         assert len(vjps) == sum(len(t.trainable_params) for t in tapes)
 
+        assert np.allclose(jacs[0], jac0)
+        assert np.allclose(jacs[1], jac1)
+
     def test_reduction_extend_callable(self, dev):
         """Test the 'extend' reduction strategy"""
 
@@ -458,6 +547,6 @@ def test_reduction_extend_callable(self, dev):
         tapes = [tape1, tape2]
         dys = [np.array([1.0]), np.array([1.0])]
 
-        vjps = dev.batch_vjp(tapes, dys, reduction=list.extend)
+        _, vjps = dev.batch_vjp(tapes, dys, reduction=list.extend)
 
         assert len(vjps) == sum(len(t.trainable_params) for t in tapes)

From 07423020b1f7bf5dae9c5f16fbb06082f75bca2a Mon Sep 17 00:00:00 2001
From: Ali Asadi <ali@xanadu.ai>
Date: Wed, 1 Dec 2021 12:23:00 -0500
Subject: [PATCH 27/27] Update tests

---
 tests/test_vector_jacobian_product.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/test_vector_jacobian_product.py b/tests/test_vector_jacobian_product.py
index 97012ae22b..9377a8fd70 100644
--- a/tests/test_vector_jacobian_product.py
+++ b/tests/test_vector_jacobian_product.py
@@ -27,25 +27,27 @@ class TestComputeVJP:
     def dev(self):
         return qml.device("lightning.qubit", wires=2)
 
-    def test_computation(self, dev):
+    def test_computation(self, tol, dev):
         """Test that the correct VJP is returned"""
         dy = np.array([[1.0, 2.0], [3.0, 4.0]])
         jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]])
 
         vjp = dev.compute_vjp(dy, jac)
+        expected = np.tensordot(dy, jac, axes=[[0, 1], [0, 1]])
 
         assert vjp.shape == (3,)
-        assert np.all(vjp == np.tensordot(dy, jac, axes=[[0, 1], [0, 1]]))
+        assert np.allclose(vjp, expected, atol=tol, rtol=0)
 
-    def test_computation_num(self, dev):
+    def test_computation_num(self, tol, dev):
         """Test that the correct VJP is returned"""
         dy = np.array([[1.0, 2.0], [3.0, 4.0]])
         jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]])
 
         vjp = dev.compute_vjp(dy, jac, num=4)
+        expected = np.tensordot(dy, jac, axes=[[0, 1], [0, 1]])
 
         assert vjp.shape == (3,)
-        assert np.all(vjp == np.tensordot(dy, jac, axes=[[0, 1], [0, 1]]))
+        assert np.allclose(vjp, expected, atol=tol, rtol=0)
 
     def test_computation_num_error(self, dev):
         """Test that the correct VJP is returned"""