Skip to content

Commit

Permalink
Fix rebase conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
maliasadi committed Aug 21, 2024
2 parents 561a364 + b387cfc commit cfba50d
Show file tree
Hide file tree
Showing 31 changed files with 728 additions and 313 deletions.
11 changes: 10 additions & 1 deletion .github/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
* Add LightningGPU Linux (aarch64+GraceHopper) wheels to PyPI.
[(#815)](https://github.com/PennyLaneAI/pennylane-lightning/pull/815)

* Add the analytic `qml.probs()` measurement support to `lightning.tensor`.
[(#830)](https://github.com/PennyLaneAI/pennylane-lightning/pull/830)

* Add `qml.state()` measurement support to `lightning.tensor`.
[(#827)](https://github.com/PennyLaneAI/pennylane-lightning/pull/827)

Expand All @@ -30,6 +33,12 @@

### Improvements

* Multiple calls to the `append_mps_final_state()` API is allowed in `lightning.tensor`.
[(#830)](https://github.com/PennyLaneAI/pennylane-lightning/pull/830)

* Update `generate_samples` in `LightningKokkos` and `LightningGPU` to support `qml.measurements.Shots` type instances.
[(#839)](https://github.com/PennyLaneAI/pennylane-lightning/pull/839)

* LightningQubit gains native support for the `PauliRot` gate.
[(#834)](https://github.com/PennyLaneAI/pennylane-lightning/pull/834)

Expand Down Expand Up @@ -139,7 +148,7 @@

This release contains contributions from (in alphabetical order):

Ali Asadi, Astral Cai, Amintor Dusko, Vincent Michaud-Rioux, Erick Ochoa Lopez, Lee J. O'Riordan, Mudit Pandey, Shuli Shu, Raul Torres, Paul Haochen Wang
Ali Asadi, Astral Cai, Ahmed Darwish, Amintor Dusko, Vincent Michaud-Rioux, Erick Ochoa Lopez, Lee J. O'Riordan, Mudit Pandey, Shuli Shu, Raul Torres, Paul Haochen Wang

---

Expand Down
2 changes: 1 addition & 1 deletion pennylane_lightning/core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
Version number (major.minor.patch[-label])
"""

__version__ = "0.38.0-dev39"
__version__ = "0.38.0-dev41"
7 changes: 7 additions & 0 deletions pennylane_lightning/core/src/bindings/Bindings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,13 @@ void registerLightningTensorBackendAgnosticMeasurements(PyClass &pyclass) {
return M.expval(*ob);
},
"Expected value of an observable object.")
.def(
"probs",
[](MeasurementsT &M, const std::vector<std::size_t> &wires) {
return py::array_t<typename TensorNetT::PrecisionT>(
py::cast(M.probs(wires)));
},
"Probabilities of a set of wires.")
.def(
"var",
[](MeasurementsT &M, const std::shared_ptr<ObservableT> &ob) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
using BaseType = TNCudaBase<Precision, MPSTNCuda>;

MPSStatus MPSInitialized_ = MPSStatus::MPSInitNotSet;
MPSStatus MPSFinalized_ = MPSStatus::MPSFinalizedNotSet;

const std::size_t maxBondDim_;

Expand Down Expand Up @@ -215,21 +214,18 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
*/
void append_mps_final_state(double cutoff = 0,
std::string cutoff_mode = "abs") {
if (MPSFinalized_ == MPSStatus::MPSFinalizedNotSet) {
MPSFinalized_ = MPSStatus::MPSFinalizedSet;
PL_CUTENSORNET_IS_SUCCESS(cutensornetStateFinalizeMPS(
/* const cutensornetHandle_t */ BaseType::getTNCudaHandle(),
/* cutensornetState_t */ BaseType::getQuantumState(),
/* cutensornetBoundaryCondition_t */
CUTENSORNET_BOUNDARY_CONDITION_OPEN,
/* const int64_t *const extentsOut[] */
getSitesExtentsPtr().data(),
/*strides=*/nullptr));
}
PL_CUTENSORNET_IS_SUCCESS(cutensornetStateFinalizeMPS(
/* const cutensornetHandle_t */ BaseType::getTNCudaHandle(),
/* cutensornetState_t */ BaseType::getQuantumState(),
/* cutensornetBoundaryCondition_t */
CUTENSORNET_BOUNDARY_CONDITION_OPEN,
/* const int64_t *const extentsOut[] */
getSitesExtentsPtr().data(),
/*strides=*/nullptr));

// Optional: SVD
cutensornetTensorSVDAlgo_t algo =
CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; // default
CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; // default option

PL_CUTENSORNET_IS_SUCCESS(cutensornetStateConfigure(
/* const cutensornetHandle_t */ BaseType::getTNCudaHandle(),
Expand Down Expand Up @@ -257,6 +253,21 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
BaseType::computeState(
const_cast<int64_t **>(getSitesExtentsPtr().data()),
reinterpret_cast<void **>(getTensorsOutDataPtr().data()));

// TODO: This is a dummy tensor update to allow multiple calls to the
// `append_mps_final_state` method as well as appending additional
// operations to the graph. This is a temporary solution and this line
// can be removed in the future when the `cutensornet` backend allows
// multiple calls to the `cutensornetStateFinalizeMPS` method. For more
// details, please see the `cutensornet` high-level API workflow logic
// [here]
// (https://docs.nvidia.com/cuda/cuquantum/latest/cutensornet/api/functions.html#high-level-tensor-network-api).
// In order to proceed with the following gate operations or
// measurements after calling the `cutensornetStateCompute()` API, we
// have to call the `cutensornetStateUpdateTensor()` API, which is
// wrapped inside the `dummy_tensor_update()` method.
//
BaseType::dummy_tensor_update();
}

/**
Expand All @@ -276,7 +287,7 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {

PL_ABORT_IF(log2(avail_gpu_memory) < BaseType::getNumQubits(),
"State tensor size exceeds the available GPU memory!");
this->get_state_tensor(res);
BaseType::get_state_tensor(res);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
using ComplexT = std::complex<PrecisionT>;
using BaseType = TensornetBase<PrecisionT, Derived>;
SharedTNCudaHandle handle_;
SharedCublasCaller cublascaller_;
cudaDataType_t typeData_;
DevTag<int> dev_tag_;
cutensornetComputeType_t typeCompute_;
Expand All @@ -78,6 +79,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
explicit TNCudaBase(const std::size_t numQubits, int device_id = 0,
cudaStream_t stream_id = 0)
: BaseType(numQubits), handle_(make_shared_tncuda_handle()),
cublascaller_(make_shared_cublas_caller()),
dev_tag_({device_id, stream_id}),
gate_cache_(std::make_shared<TNCudaGateCache<PrecisionT>>(dev_tag_)) {
// TODO this code block could be moved to base class and need to revisit
Expand Down Expand Up @@ -108,7 +110,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
// runtime in the C++ layer
explicit TNCudaBase(const std::size_t numQubits, DevTag<int> dev_tag)
: BaseType(numQubits), handle_(make_shared_tncuda_handle()),
dev_tag_(dev_tag),
cublascaller_(make_shared_cublas_caller()), dev_tag_(dev_tag),
gate_cache_(std::make_shared<TNCudaGateCache<PrecisionT>>(dev_tag_)) {
// TODO this code block could be moved to base class and need to revisit
// when working on copy ctor
Expand Down Expand Up @@ -155,6 +157,15 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
return handle_.get();
}

/**
* @brief Access the CublasCaller the object is using.
*
* @return a reference to the object's CublasCaller object.
*/
auto getCublasCaller() const -> const CublasCaller & {
return *cublascaller_;
}

/**
* @brief Get the quantum state pointer.
*
Expand Down Expand Up @@ -299,7 +310,6 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
/* int32_t unitary*/ 1));
}

protected:
/**
* @brief Get the state vector representation of a tensor network.
*
Expand All @@ -323,7 +333,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
}

/**
* @brief Get a slice of the state tensor
* @brief Get a slice of the full state tensor
*
* @param tensor_data Pointer to the device memory for state tensor data.
* @param tensor_data_size Size of the state tensor data.
Expand All @@ -334,17 +344,61 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
void get_state_tensor(CFP_t *tensor_data,
const std::size_t tensor_data_size,
const std::vector<std::size_t> &wires,
const int32_t numHyperSamples = 1) {
// NOTE: this is a solution to get the full state tensor
// TODO: projected_modes and projectedModeValues are to be updated for
// prob() support.
const int32_t numHyperSamples = 1) const {
auto stateModes = cuUtil::NormalizeCastIndices<std::size_t, int32_t>(
wires, BaseType::getNumQubits());

std::vector<int32_t> projected_modes{};

std::vector<int64_t> projectedModeValues{};
for (int32_t idx = 0;
idx < static_cast<int32_t>(BaseType::getNumQubits()); idx++) {
auto it = std::find(stateModes.begin(), stateModes.end(), idx);
if (it == stateModes.end()) {
projected_modes.emplace_back(idx);
}
}

std::vector<int64_t> projectedModeValues(projected_modes.size(), 0);

if (projected_modes.empty()) {
get_accessor_(tensor_data, tensor_data_size, projected_modes,
projectedModeValues, numHyperSamples);
} else {
DataBuffer<CFP_t, int> tmp(tensor_data_size, getDevTag(), true);

const std::size_t projected_modes_size = size_t(1)
<< projected_modes.size();
for (std::size_t idx = 0; idx < projected_modes_size; idx++) {
for (std::size_t j = 0; j < projected_modes.size(); j++) {
projectedModeValues[j] = (idx >> j) & 1;
}

get_accessor_(tmp.getData(), tensor_data_size, projected_modes,
projectedModeValues, numHyperSamples);
// Copy the data to the output tensor
scaleAndAddC_CUDA(std::complex<PrecisionT>{1.0, 0.0},
tmp.getData(), tensor_data, tmp.getLength(),
getDevTag().getDeviceID(),
getDevTag().getStreamID(), getCublasCaller());
}
}
}

private:
/**
* @brief Get accessor of a state tensor
*
* @param tensor_data Pointer to the device memory for state tensor data.
* @param tensor_data_size Size of the tensor data.
* @param projected_modes Projected modes to get the state tensor for.
* @param projectedModeValues Values of the projected modes.
* @param numHyperSamples Number of hyper samples to use in the calculation
* and is set to 1 by default.
*/
void get_accessor_(CFP_t *tensor_data, const std::size_t tensor_data_size,
const std::vector<int32_t> &projected_modes,
const std::vector<int64_t> &projectedModeValues,
const int32_t numHyperSamples = 1) const {
cutensornetStateAccessor_t accessor;
PL_CUTENSORNET_IS_SUCCESS(cutensornetCreateAccessor(
/* const cutensornetHandle_t */ getTNCudaHandle(),
Expand Down Expand Up @@ -415,17 +469,40 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {

CFP_t scale_scalar_cu{scale_scalar.real(), scale_scalar.imag()};

SharedCublasCaller cublascaller = make_shared_cublas_caller();

scaleC_CUDA<CFP_t, CFP_t>(scale_scalar_cu, tensor_data,
tensor_data_size, getDevTag().getDeviceID(),
getDevTag().getStreamID(), *cublascaller);
getDevTag().getStreamID(), getCublasCaller());

PL_CUTENSORNET_IS_SUCCESS(
cutensornetDestroyWorkspaceDescriptor(workDesc));
PL_CUTENSORNET_IS_SUCCESS(cutensornetDestroyAccessor(accessor));
}

protected:
/**
* @brief Dummy tensor operator update to allow multiple calls of
* appendMPSFinalize. This is a workaround to avoid the issue of the
* cutensornet library not allowing multiple calls of appendMPSFinalize.
*
* This function either appends a new `Identity` gate to the graph when the
* gate cache is empty or update the existing gate operator by itself.
*/
void dummy_tensor_update() {
if (gate_cache_->is_empty()) {
applyOperation("Identity", {0}, false);
}

const std::size_t id = gate_cache_->get_cache_head_idx();

PL_CUTENSORNET_IS_SUCCESS(cutensornetStateUpdateTensorOperator(
/* const cutensornetHandle_t */ getTNCudaHandle(),
/* cutensornetState_t */ getQuantumState(),
/* int64_t tensorId*/ static_cast<int64_t>(id),
/* void* */
static_cast<void *>(gate_cache_->get_gate_device_ptr(id)),
/* int32_t unitary*/ 1));
}

/**
* @brief Save quantumState information to data provided by a user
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,22 @@ template <class PrecisionT> class TNCudaGateCache {
return device_gates_.at(gate_id).second.getDataBuffer().getData();
}

/**
* @brief Returns the key (index of the gate) of the first element in the
* `device_gates_`.
*
* @return size_t Key of the first element in the `device_gates_`.
*/
auto get_cache_head_idx() const -> std::size_t {
auto it = device_gates_.begin();
return it->first;
}

/**
* @brief Returns if the `device_gates_` is empty.
*/
auto is_empty() const -> bool { return device_gates_.empty(); }

private:
const DevTag<int> device_tag_;
std::size_t total_alloc_bytes_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,17 @@ TEMPLATE_TEST_CASE("MPSTNCuda::Gates::Hadamard", "[MPSTNCuda_Nonparam]", float,
const std::size_t index = GENERATE(0, 1, 2);
MPSTNCuda<TestType> mps_state{num_qubits, maxExtent, dev_tag};

mps_state.append_mps_final_state();

mps_state.applyOperation("Hadamard", {index}, inverse);

mps_state.append_mps_final_state();

mps_state.applyOperation("Identity", {index}, inverse);

// Test for multiple final states appendings
mps_state.append_mps_final_state();

cp_t expected(1.0 / std::sqrt(2), 0);

auto results = mps_state.getDataVector();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
cmake_minimum_required(VERSION 3.20)

project(${PL_BACKEND}_measurements LANGUAGES CXX)
project(${PL_BACKEND}_measurements LANGUAGES CXX C CUDA)

add_library(${PL_BACKEND}_measurements INTERFACE)
if(NOT DEFINED CMAKE_CUDA20_STANDARD_COMPILE_OPTION)
set(CMAKE_CUDA20_STANDARD_COMPILE_OPTION "")
set(CMAKE_CUDA20_EXTENSION_COMPILE_OPTION "")
endif()

set(LTENSOR_MPS_FILES cuda_kernels_measures.cu CACHE INTERNAL "" FORCE)

add_library(${PL_BACKEND}_measurements STATIC ${LTENSOR_MPS_FILES})

target_include_directories(${PL_BACKEND}_measurements INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})

target_link_libraries(${PL_BACKEND}_measurements INTERFACE lightning_compile_options
target_link_libraries(${PL_BACKEND}_measurements PUBLIC lightning_compile_options
lightning_external_libs
${PL_BACKEND}
${PL_BACKEND}_utils
Expand Down
Loading

0 comments on commit cfba50d

Please sign in to comment.