Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor CUDA Utils Python binding #801

Merged
merged 12 commits into from
Jul 17, 2024
5 changes: 4 additions & 1 deletion .github/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

### Improvements

* Refactor CUDA utils Python bindings to a separate module.
[(#801)](https://github.com/PennyLaneAI/pennylane-lightning/pull/801)

* Parallelize Lightning-Qubit `probs` with OpenMP when using the `-DLQ_ENABLE_KERNEL_OMP=1` CMake argument.
[(#800)](https://github.com/PennyLaneAI/pennylane-lightning/pull/800)

Expand Down Expand Up @@ -47,7 +50,7 @@

This release contains contributions from (in alphabetical order):

Amintor Dusko, Vincent Michaud-Rioux
Amintor Dusko, Vincent Michaud-Rioux, Shuli Shu

---

Expand Down
2 changes: 1 addition & 1 deletion pennylane_lightning/core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
Version number (major.minor.patch[-label])
"""

__version__ = "0.38.0-dev8"
__version__ = "0.38.0-dev9"
101 changes: 101 additions & 0 deletions pennylane_lightning/core/src/bindings/BindingsCudaUtils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright 2024 Xanadu Quantum Technologies Inc.

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
* @file BindingsCudaUtils.hpp
* Defines CUDA device - specific operations to export to Python, other
* utility functions interfacing with Pybind11 and support to agnostic bindings.
*/

#pragma once

#include "BindingsBase.hpp"
#include "DevTag.hpp"
#include "DevicePool.hpp"
#include "cuda_helpers.hpp"

/// @cond DEV
namespace {
using namespace Pennylane;
using namespace Pennylane::Bindings;
} // namespace
/// @endcond

namespace py = pybind11;

namespace Pennylane::LightningGPU::Util {
/**
* @brief Register bindings for CUDA utils.
*
* @param m Pybind11 module.
*/
void registerCudaUtils(py::module_ &m) {
m.def("device_reset", &deviceReset, "Reset all GPU devices and contexts.");
m.def("allToAllAccess", []() {
for (int i = 0; i < static_cast<int>(getGPUCount()); i++) {
cudaDeviceEnablePeerAccess(i, 0);
}
});

m.def("is_gpu_supported", &isCuQuantumSupported,
py::arg("device_number") = 0,
"Checks if the given GPU device meets the minimum architecture "
"support for the PennyLane-Lightning-GPU device.");

m.def("get_gpu_arch", &getGPUArch, py::arg("device_number") = 0,
"Returns the given GPU major and minor GPU support.");
py::class_<DevicePool<int>>(m, "DevPool")
.def(py::init<>())
.def("getActiveDevices", &DevicePool<int>::getActiveDevices)
.def("isActive", &DevicePool<int>::isActive)
.def("isInactive", &DevicePool<int>::isInactive)
.def("acquireDevice", &DevicePool<int>::acquireDevice)
.def("releaseDevice", &DevicePool<int>::releaseDevice)
.def("syncDevice", &DevicePool<int>::syncDevice)
.def("refresh", &DevicePool<int>::refresh)
.def_static("getTotalDevices", &DevicePool<int>::getTotalDevices)
.def_static("getDeviceUIDs", &DevicePool<int>::getDeviceUIDs)
.def_static("setDeviceID", &DevicePool<int>::setDeviceIdx)
.def(py::pickle(
[]([[maybe_unused]] const DevicePool<int> &self) { // __getstate__
return py::make_tuple();
},
[](py::tuple &t) { // __setstate__
if (t.size() != 0) {
throw std::runtime_error("Invalid state!");
}
return DevicePool<int>{};
}));

py::class_<DevTag<int>>(m, "DevTag")
.def(py::init<>())
.def(py::init<int>())
.def(py::init([](int device_id, void *stream_id) {
// Note, streams must be handled externally for now.
// Binding support provided through void* conversion to cudaStream_t
return new DevTag<int>(device_id,
static_cast<cudaStream_t>(stream_id));
}))
.def(py::init<const DevTag<int> &>())
.def("getDeviceID", &DevTag<int>::getDeviceID)
.def("getStreamID",
[](DevTag<int> &dev_tag) {
// default stream points to nullptr, so just return void* as
// type
return static_cast<void *>(dev_tag.getStreamID());
})
.def("refresh", &DevTag<int>::refresh);
}

} // namespace Pennylane::LightningGPU::Util
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "cuda.h"

#include "BindingsBase.hpp"
#include "BindingsCudaUtils.hpp"
#include "Constant.hpp"
#include "ConstantUtil.hpp" // lookup
#include "DevTag.hpp"
Expand Down Expand Up @@ -362,62 +363,7 @@ auto getBackendInfo() -> py::dict {
*/
void registerBackendSpecificInfo(py::module_ &m) {
m.def("backend_info", &getBackendInfo, "Backend-specific information.");
m.def("device_reset", &deviceReset, "Reset all GPU devices and contexts.");
m.def("allToAllAccess", []() {
for (int i = 0; i < static_cast<int>(getGPUCount()); i++) {
cudaDeviceEnablePeerAccess(i, 0);
}
});

m.def("is_gpu_supported", &isCuQuantumSupported,
py::arg("device_number") = 0,
"Checks if the given GPU device meets the minimum architecture "
"support for the PennyLane-Lightning-GPU device.");

m.def("get_gpu_arch", &getGPUArch, py::arg("device_number") = 0,
"Returns the given GPU major and minor GPU support.");
py::class_<DevicePool<int>>(m, "DevPool")
.def(py::init<>())
.def("getActiveDevices", &DevicePool<int>::getActiveDevices)
.def("isActive", &DevicePool<int>::isActive)
.def("isInactive", &DevicePool<int>::isInactive)
.def("acquireDevice", &DevicePool<int>::acquireDevice)
.def("releaseDevice", &DevicePool<int>::releaseDevice)
.def("syncDevice", &DevicePool<int>::syncDevice)
.def("refresh", &DevicePool<int>::refresh)
.def_static("getTotalDevices", &DevicePool<int>::getTotalDevices)
.def_static("getDeviceUIDs", &DevicePool<int>::getDeviceUIDs)
.def_static("setDeviceID", &DevicePool<int>::setDeviceIdx)
.def(py::pickle(
[]([[maybe_unused]] const DevicePool<int> &self) { // __getstate__
return py::make_tuple();
},
[](py::tuple &t) { // __setstate__
if (t.size() != 0) {
throw std::runtime_error("Invalid state!");
}
return DevicePool<int>{};
}));

py::class_<DevTag<int>>(m, "DevTag")
.def(py::init<>())
.def(py::init<int>())
.def(py::init([](int device_id, void *stream_id) {
// Note, streams must be handled externally for now.
// Binding support provided through void* conversion to cudaStream_t
return new DevTag<int>(device_id,
static_cast<cudaStream_t>(stream_id));
}))
.def(py::init<const DevTag<int> &>())
.def("getDeviceID", &DevTag<int>::getDeviceID)
.def("getStreamID",
[](DevTag<int> &dev_tag) {
// default stream points to nullptr, so just return void* as
// type
return static_cast<void *>(dev_tag.getStreamID());
})
.def("refresh", &DevTag<int>::refresh);
registerCudaUtils(m);
}

} // namespace Pennylane::LightningGPU
/// @endcond
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "cuda.h"

#include "BindingsBase.hpp"
#include "BindingsCudaUtils.hpp"
#include "DevTag.hpp"
#include "DevicePool.hpp"
#include "Error.hpp"
Expand All @@ -35,6 +36,7 @@
namespace {
using namespace Pennylane;
using namespace Pennylane::Bindings;
using namespace Pennylane::LightningGPU::Util;
using Pennylane::LightningTensor::TNCuda::MPSTNCuda;
} // namespace
/// @endcond
Expand Down Expand Up @@ -88,54 +90,9 @@ auto getBackendInfo() -> py::dict {
*
* @param m Pybind11 module.
*/
// TODO Move this method to a separate module for both LGPU and LTensor usage.
void registerBackendSpecificInfo(py::module_ &m) {
m.def("backend_info", &getBackendInfo, "Backend-specific information.");
m.def("device_reset", &deviceReset, "Reset all GPU devices and contexts.");
m.def("allToAllAccess", []() {
for (int i = 0; i < static_cast<int>(getGPUCount()); i++) {
cudaDeviceEnablePeerAccess(i, 0);
}
});

m.def("is_gpu_supported", &isCuQuantumSupported,
py::arg("device_number") = 0,
"Checks if the given GPU device meets the minimum architecture "
"support for the PennyLane-Lightning-Tensor device.");

m.def("get_gpu_arch", &getGPUArch, py::arg("device_number") = 0,
"Returns the given GPU major and minor GPU support.");
py::class_<DevicePool<int>>(m, "DevPool")
.def(py::init<>())
.def("getActiveDevices", &DevicePool<int>::getActiveDevices)
.def("isActive", &DevicePool<int>::isActive)
.def("isInactive", &DevicePool<int>::isInactive)
.def("acquireDevice", &DevicePool<int>::acquireDevice)
.def("releaseDevice", &DevicePool<int>::releaseDevice)
.def("syncDevice", &DevicePool<int>::syncDevice)
.def_static("getTotalDevices", &DevicePool<int>::getTotalDevices)
.def_static("getDeviceUIDs", &DevicePool<int>::getDeviceUIDs)
.def_static("setDeviceID", &DevicePool<int>::setDeviceIdx);

py::class_<DevTag<int>>(m, "DevTag")
.def(py::init<>())
.def(py::init<int>())
.def(py::init([](int device_id, void *stream_id) {
// Note, streams must be handled externally for now.
// Binding support provided through void* conversion to cudaStream_t
return new DevTag<int>(device_id,
static_cast<cudaStream_t>(stream_id));
}))
.def(py::init<const DevTag<int> &>())
.def("getDeviceID", &DevTag<int>::getDeviceID)
.def("getStreamID",
[](DevTag<int> &dev_tag) {
// default stream points to nullptr, so just return void* as
// type
return static_cast<void *>(dev_tag.getStreamID());
})
.def("refresh", &DevTag<int>::refresh);
registerCudaUtils(m);
}

} // namespace Pennylane::LightningTensor::TNCuda
/// @endcond
Loading