Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add inverse support for gate operation in lightning.tensor #753

Merged
merged 21 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Release 0.37.0-dev

### New features since last release
* Add `inverse` suport for gate operations in `lightning.tensor` in the C++ layer.
multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved
[(#753)](https://github.com/PennyLaneAI/pennylane-lightning/pull/753)

* Add `observable` and `expval` support to `cutensornet` backed `lightning.tensor` C++ layer.
[(#728)](https://github.com/PennyLaneAI/pennylane-lightning/pull/728)

Expand Down Expand Up @@ -29,6 +32,9 @@

### Improvements

* Set `state_tensor` as `const` for the `MeasurementTNCuda` class.
[(#753)](https://github.com/PennyLaneAI/pennylane-lightning/pull/753)

* Updated Kokkos version and support to 4.3.01.
[(#725)](https://github.com/PennyLaneAI/pennylane-lightning/pull/725)

Expand Down
3 changes: 2 additions & 1 deletion pennylane_lightning/core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@
Version number (major.minor.patch[-label])
"""

__version__ = "0.37.0-dev28"

multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved
__version__ = "0.37.0-dev29"
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
public:
MPSTNCuda() = delete;

// TODO: Add method to the constructor to all user to select methods at
multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved
// runtime in the C++ layer
explicit MPSTNCuda(const std::size_t numQubits,
const std::size_t maxBondDim)
: BaseType(numQubits), maxBondDim_(maxBondDim),
Expand All @@ -90,6 +92,8 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
initTensors_();
}

// TODO: Add method to the constructor to all user to select methods at
// runtime in the C++ layer
explicit MPSTNCuda(const std::size_t numQubits,
const std::size_t maxBondDim, DevTag<int> dev_tag)
: BaseType(numQubits, dev_tag), maxBondDim_(maxBondDim),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
public:
TNCudaBase() = delete;

// TODO: Add method to the constructor to all user to select methods at
// runtime in the C++ layer
explicit TNCudaBase(const std::size_t numQubits, int device_id = 0,
cudaStream_t stream_id = 0)
: BaseType(numQubits), handle_(make_shared_tncuda_handle()),
Expand All @@ -98,6 +100,8 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
/* cutensornetState_t * */ &quantumState_));
}

// TODO: Add method to the constructor to all user to select methods at
// runtime in the C++ layer
explicit TNCudaBase(const std::size_t numQubits, DevTag<int> dev_tag)
: BaseType(numQubits), handle_(make_shared_tncuda_handle()),
dev_tag_(dev_tag),
Expand Down Expand Up @@ -236,6 +240,13 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
bool adjoint = false,
const std::vector<PrecisionT> &params = {0.0},
const std::vector<ComplexT> &gate_matrix = {}) {
// TODO: Need to revisit this line of code for the exact TN backend.
// We should be able to turn on/ skip this check based on the backend,
// if(getMethod() == "mps") { ... }
PL_ABORT_IF(
wires.size() > 2,
"Unsupported gate: MPS method only supports 1, 2-wires gates");

auto &&par = (params.empty()) ? std::vector<PrecisionT>{0.0} : params;
DataBuffer<PrecisionT, int> dummy_device_data(
Pennylane::Util::exp2(wires.size()), getDevTag());
Expand All @@ -259,17 +270,18 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
/* void * */ static_cast<void *>(dummy_device_data.getData()),
/* const int64_t *tensorModeStrides */ nullptr,
/* const int32_t immutable */ 1,
/* const int32_t adjoint */ adjoint,
/* const int32_t adjoint */ 0,
/* const int32_t unitary */ 1,
/* int64_t * */ &id));
if (!gate_matrix.empty()) {
auto gate_key = std::make_pair(opName, par);
std::vector<CFP_t> matrix_cu =
cuUtil::complexToCu<ComplexT>(gate_matrix);
gate_cache_->add_gate(static_cast<std::size_t>(id), gate_key,
matrix_cu);
matrix_cu, adjoint);
multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved
} else {
gate_cache_->add_gate(static_cast<std::size_t>(id), opName, par);
gate_cache_->add_gate(static_cast<std::size_t>(id), opName, par,
multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved
adjoint);
}
PL_CUTENSORNET_IS_SUCCESS(cutensornetStateUpdateTensorOperator(
/* const cutensornetHandle_t */ getTNCudaHandle(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,18 @@ template <class PrecisionT> class TNCudaGateCache {
* @param gate_name String representing the name of the given gate.
* @param gate_param Vector of parameter values. `{}` if non-parametric
* gate.
* @param adjoint Boolean value indicating if the gate requires adjoint.
multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved
*/
void add_gate(const std::size_t gate_id, const std::string &gate_name,
[[maybe_unused]] std::vector<PrecisionT> gate_param = {}) {
[[maybe_unused]] std::vector<PrecisionT> gate_param = {},
bool adjoint = false) {
auto gate_key = std::make_pair(gate_name, gate_param);

auto &gateMap =
cuGates::DynamicGateDataAccess<PrecisionT>::getInstance();

add_gate(gate_id, gate_key, gateMap.getGateData(gate_name, gate_param));
add_gate(gate_id, gate_key, gateMap.getGateData(gate_name, gate_param),
adjoint);
}
/**
* @brief Add gate numerical value to the cache, indexed by the id of gate
Expand All @@ -93,10 +96,12 @@ template <class PrecisionT> class TNCudaGateCache {
* its associated parameter value.
* @param gate_data_host Vector of complex floating point values
* representing the gate data on host.
* @param adjoint Boolean value indicating if the gate requires adjoint.
*/

multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved
void add_gate(const std::size_t gate_id, gate_key_info gate_key,
const std::vector<CFP_t> &gate_data_host) {
const std::vector<CFP_t> &gate_data_host,
bool adjoint = false) {
const std::size_t rank = Pennylane::Util::log2(gate_data_host.size());
auto modes = std::vector<std::size_t>(rank, 0);
auto extents = std::vector<std::size_t>(rank, 2);
Expand All @@ -108,8 +113,29 @@ template <class PrecisionT> class TNCudaGateCache {
std::piecewise_construct, std::forward_as_tuple(gate_id),
std::forward_as_tuple(gate_key, std::move(tensor)));

device_gates_.at(gate_id).second.getDataBuffer().CopyHostDataToGpu(
gate_data_host.data(), gate_data_host.size());
if (adjoint) {
// TODO: This is a temporary solution for gates data transpose.
// There should be a better way to handle this, but there is not
// a big performance issue for now since the size of gates is small.
std::vector<CFP_t> data_host_transpose(gate_data_host.size());

std::size_t col_size = 1 << (rank / 2);
std::size_t row_size = 1 << (rank / 2);
multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved

for (std::size_t idx = 0; idx < gate_data_host.size(); idx++) {
std::size_t col = idx / row_size;
std::size_t row = idx % row_size;

data_host_transpose.at(row * col_size + col) = {
gate_data_host.at(idx).x, -gate_data_host.at(idx).y};
}

device_gates_.at(gate_id).second.getDataBuffer().CopyHostDataToGpu(
data_host_transpose.data(), data_host_transpose.size());
} else {
device_gates_.at(gate_id).second.getDataBuffer().CopyHostDataToGpu(
gate_data_host.data(), gate_data_host.size());
}

total_alloc_bytes_ += (sizeof(CFP_t) * gate_data_host.size());
}
Expand Down
Loading
Loading