Skip to content

Commit

Permalink
Merge branch 'add_LGPUMPI' into add_py_LGPUMPI
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentmr authored Oct 23, 2023
2 parents 6ad1c7c + 84e2eb6 commit d3af819
Show file tree
Hide file tree
Showing 15 changed files with 103 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ template <typename TypeList> void testAdjointJacobian() {
using ComplexT = typename StateVectorT::ComplexT;

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

const std::vector<PrecisionT> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};

Expand All @@ -81,6 +82,7 @@ template <typename TypeList> void testAdjointJacobian() {

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -131,6 +133,7 @@ template <typename TypeList> void testAdjointJacobian() {

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -181,6 +184,7 @@ template <typename TypeList> void testAdjointJacobian() {

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -293,6 +297,7 @@ template <typename TypeList> void testAdjointJacobian() {

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -344,6 +349,7 @@ template <typename TypeList> void testAdjointJacobian() {

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ template <typename TypeList> void testProbabilities() {
size_t num_qubits = 3;

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -77,6 +78,7 @@ template <typename TypeList> void testProbabilities() {

int nDevices = 0;
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -127,6 +129,7 @@ template <typename TypeList> void testNamedObsExpval() {
size_t num_qubits = 3;

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -136,6 +139,7 @@ template <typename TypeList> void testNamedObsExpval() {

int nDevices = 0;
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -197,6 +201,7 @@ template <typename TypeList> void testHermitianObsExpval() {
size_t num_qubits = 3;

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -206,6 +211,7 @@ template <typename TypeList> void testHermitianObsExpval() {

int nDevices = 0;
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -294,6 +300,7 @@ template <typename TypeList> void testNamedObsVar() {
size_t num_qubits = 3;

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -303,6 +310,7 @@ template <typename TypeList> void testNamedObsVar() {

int nDevices = 0;
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -365,6 +373,7 @@ template <typename TypeList> void testHermitianObsVar() {
size_t num_qubits = 3;

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -374,6 +383,7 @@ template <typename TypeList> void testHermitianObsVar() {

int nDevices = 0;
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -470,6 +480,7 @@ template <typename TypeList> void testSamples() {
size_t num_qubits = 3;

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -479,6 +490,7 @@ template <typename TypeList> void testSamples() {

int nDevices = 0;
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,20 +422,6 @@ template <typename TypeList> void testHamiltonianBase() {

REQUIRE(ham1->getWires() == std::vector<size_t>{0, 5, 9});
}

/*
DYNAMIC_SECTION("applyInPlace must fail - "
<< StateVectorMPIToName<StateVectorT>::name) {
auto ham =
HamiltonianT::create({PrecisionT{1.0}, h, h}, {zz, x1, x2});
auto st_data = createZeroState<ComplexT>(2);
StateVectorT state_vector(st_data.data(), st_data.size());
REQUIRE_THROWS_AS(ham->applyInPlace(state_vector),
LightningException);
}
*/
}
testHamiltonianBase<typename TypeList::Next>();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1867,6 +1867,7 @@ class StateVectorCudaMPI
// LCOV_EXCL_STOP

cuDoubleComplex expect_;

// compute expectation
PL_CUSTATEVEC_IS_SUCCESS(custatevecComputeExpectation(
/* custatevecHandle_t */ handle_.get(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobianMPI Op=RX, Obs=[Z,Z]",
using StateVectorT = StateVectorCudaMPI<double>;

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

AdjointJacobianMPI<StateVectorT> adj;
std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
Expand All @@ -66,6 +67,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobianMPI Op=RX, Obs=[Z,Z]",

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -104,6 +106,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobianMPI Op=[QubitStateVector, "
using StateVectorT = StateVectorCudaMPI<double>;

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

AdjointJacobianMPI<StateVectorT> adj;
std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
Expand All @@ -124,6 +127,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobianMPI Op=[QubitStateVector, "

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -167,6 +171,7 @@ TEST_CASE(
std::vector<double> jacobian_serial(num_obs * tp.size(), 0);

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -177,6 +182,7 @@ TEST_CASE(

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -234,6 +240,7 @@ TEST_CASE(
std::vector<double> jacobian_serial(num_obs * tp.size(), 0);

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -244,6 +251,7 @@ TEST_CASE(

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -297,6 +305,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobian Op=[RX,RX,RX], Obs=[ZZZ]",
std::vector<double> jacobian_serial(num_obs * tp.size(), 0);

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -307,6 +316,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobian Op=[RX,RX,RX], Obs=[ZZZ]",

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -358,6 +368,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobian Op=Mixed, Obs=[XXX]",
std::vector<double> jacobian_serial(num_obs * tp.size(), 0);

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -368,6 +379,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobian Op=Mixed, Obs=[XXX]",

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -436,6 +448,7 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPUMPI Op=[RX,RX,RX], "
std::vector<double> jacobian_serial(num_obs * tp.size(), 0);

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -446,6 +459,7 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPUMPI Op=[RX,RX,RX], "

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -504,6 +518,7 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPU Test HermitianObs",
std::vector<double> jacobian2_serial(num_obs * tp.size(), 0);

MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand All @@ -514,6 +529,7 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPU Test HermitianObs",

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ using namespace Pennylane::LightningGPU::MPI;
using cp_t = std::complex<TestType>; \
using PrecisionT = TestType; \
MPIManager mpi_manager(MPI_COMM_WORLD); \
CHECK(mpi_manager.getSize() == 2); \
size_t mpi_buffersize = 1; \
size_t nGlobalIndexBits = \
std::bit_width(static_cast<size_t>(mpi_manager.getSize())) - 1; \
Expand All @@ -82,6 +83,7 @@ using namespace Pennylane::LightningGPU::MPI;
mpi_manager.Barrier(); \
int nDevices = 0; \
cudaGetDeviceCount(&nDevices); \
CHECK(nDevices >= 2); \
int deviceId = mpi_manager.getRank() % nDevices; \
cudaSetDevice(deviceId); \
DevTag<int> dt_local(deviceId, 0); \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetStateVector",
using PrecisionT = TestType;
using cp_t = std::complex<PrecisionT>;
MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand Down Expand Up @@ -132,6 +133,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetStateVector",

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand Down Expand Up @@ -165,6 +167,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetIthStates",
using PrecisionT = TestType;
using cp_t = std::complex<PrecisionT>;
MPIManager mpi_manager(MPI_COMM_WORLD);
CHECK(mpi_manager.getSize() == 2);

size_t mpi_buffersize = 1;

Expand Down Expand Up @@ -193,6 +196,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetIthStates",

int nDevices = 0; // Number of GPU devices per node
cudaGetDeviceCount(&nDevices);
CHECK(nDevices >= 2);
int deviceId = mpi_manager.getRank() % nDevices;
cudaSetDevice(deviceId);
DevTag<int> dt_local(deviceId, 0);
Expand All @@ -219,6 +223,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetIthStates",
using cp_t = std::complex<TestType>; \
using PrecisionT = TestType; \
MPIManager mpi_manager(MPI_COMM_WORLD); \
CHECK(mpi_manager.getSize() == 2); \
size_t mpi_buffersize = 1; \
size_t nGlobalIndexBits = \
std::bit_width(static_cast<size_t>(mpi_manager.getSize())) - 1; \
Expand All @@ -237,6 +242,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetIthStates",
mpi_manager.Barrier(); \
int nDevices = 0; \
cudaGetDeviceCount(&nDevices); \
CHECK(nDevices >= 2); \
int deviceId = mpi_manager.getRank() % nDevices; \
cudaSetDevice(deviceId); \
DevTag<int> dt_local(deviceId, 0); \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ using namespace Pennylane::LightningGPU::MPI;
using cp_t = std::complex<TestType>; \
using PrecisionT = TestType; \
MPIManager mpi_manager(MPI_COMM_WORLD); \
CHECK(mpi_manager.getSize() == 2); \
size_t mpi_buffersize = 1; \
size_t nGlobalIndexBits = \
std::bit_width(static_cast<size_t>(mpi_manager.getSize())) - 1; \
Expand All @@ -81,6 +82,7 @@ using namespace Pennylane::LightningGPU::MPI;
mpi_manager.Barrier(); \
int nDevices = 0; \
cudaGetDeviceCount(&nDevices); \
CHECK(nDevices >= 2); \
int deviceId = mpi_manager.getRank() % nDevices; \
cudaSetDevice(deviceId); \
DevTag<int> dt_local(deviceId, 0); \
Expand Down
Loading

0 comments on commit d3af819

Please sign in to comment.