Skip to content

Commit

Permalink
use_dpctl_conj_for_dpnp
Browse files Browse the repository at this point in the history
  • Loading branch information
vtavana committed Aug 16, 2023
1 parent 42e02d9 commit 3cba1ce
Show file tree
Hide file tree
Showing 11 changed files with 227 additions and 45 deletions.
78 changes: 78 additions & 0 deletions dpnp/backend/extensions/vm/conj.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
//*****************************************************************************
// Copyright (c) 2023, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// - Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
// THE POSSIBILITY OF SUCH DAMAGE.
//*****************************************************************************

#pragma once

#include <CL/sycl.hpp>

#include "common.hpp"
#include "types_matrix.hpp"

namespace dpnp
{
namespace backend
{
namespace ext
{
namespace vm
{
template <typename T>
sycl::event conj_contig_impl(sycl::queue exec_q,
const std::int64_t n,
const char *in_a,
char *out_y,
const std::vector<sycl::event> &depends)
{
type_utils::validate_type_for_device<T>(exec_q);

const T *a = reinterpret_cast<const T *>(in_a);
T *y = reinterpret_cast<T *>(out_y);

return mkl_vm::conj(exec_q,
n, // number of elements to be calculated
a, // pointer `a` containing input vector of size n
y, // pointer `y` to the output vector of size n
depends);
}

template <typename fnT, typename T>
struct ConjContigFactory
{
fnT get()
{
if constexpr (std::is_same_v<
typename types::ConjOutputType<T>::value_type, void>)
{
return nullptr;
}
else {
return conj_contig_impl<T>;
}
}
};
} // namespace vm
} // namespace ext
} // namespace backend
} // namespace dpnp
17 changes: 17 additions & 0 deletions dpnp/backend/extensions/vm/types_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,23 @@ struct DivOutputType
dpctl_td_ns::DefaultResultEntry<void>>::result_type;
};

/**
* @brief A factory to define pairs of supported types for which
* MKL VM library provides support in oneapi::mkl::vm::conj<T> function.
*
* @tparam T Type of input vector `a` and of result vector `y`.
*/
template <typename T>
struct ConjOutputType
{
using value_type = typename std::disjunction<
dpctl_td_ns::
TypeMapResultEntry<T, std::complex<double>, std::complex<double>>,
dpctl_td_ns::
TypeMapResultEntry<T, std::complex<float>, std::complex<float>>,
dpctl_td_ns::DefaultResultEntry<void>>::result_type;
};

/**
* @brief A factory to define pairs of supported types for which
* MKL VM library provides support in oneapi::mkl::vm::cos<T> function.
Expand Down
30 changes: 30 additions & 0 deletions dpnp/backend/extensions/vm/vm_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <pybind11/stl.h>

#include "common.hpp"
#include "conj.hpp"
#include "cos.hpp"
#include "div.hpp"
#include "ln.hpp"
Expand All @@ -48,6 +49,7 @@ using vm_ext::unary_impl_fn_ptr_t;
static binary_impl_fn_ptr_t div_dispatch_vector[dpctl_td_ns::num_types];

static unary_impl_fn_ptr_t cos_dispatch_vector[dpctl_td_ns::num_types];
static unary_impl_fn_ptr_t conj_dispatch_vector[dpctl_td_ns::num_types];
static unary_impl_fn_ptr_t ln_dispatch_vector[dpctl_td_ns::num_types];
static unary_impl_fn_ptr_t sin_dispatch_vector[dpctl_td_ns::num_types];
static unary_impl_fn_ptr_t sqr_dispatch_vector[dpctl_td_ns::num_types];
Expand Down Expand Up @@ -116,6 +118,34 @@ PYBIND11_MODULE(_vm_impl, m)
py::arg("sycl_queue"), py::arg("src"), py::arg("dst"));
}

// UnaryUfunc: ==== Conj(x) ====
{
vm_ext::init_ufunc_dispatch_vector<unary_impl_fn_ptr_t,
vm_ext::ConjContigFactory>(
conj_dispatch_vector);

auto conj_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst,
const event_vecT &depends = {}) {
return vm_ext::unary_ufunc(exec_q, src, dst, depends,
conj_dispatch_vector);
};
m.def("_conj", conj_pyapi,
"Call `conj` function from OneMKL VM library to compute "
"conjugate of vector elements",
py::arg("sycl_queue"), py::arg("src"), py::arg("dst"),
py::arg("depends") = py::list());

auto conj_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src,
arrayT dst) {
return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst,
conj_dispatch_vector);
};
m.def("_mkl_conj_to_call", conj_need_to_call_pyapi,
"Check input arguments to answer if `conj` function from "
"OneMKL VM library can be used",
py::arg("sycl_queue"), py::arg("src"), py::arg("dst"));
}

// UnaryUfunc: ==== Ln(x) ====
{
vm_ext::init_ufunc_dispatch_vector<unary_impl_fn_ptr_t,
Expand Down
16 changes: 7 additions & 9 deletions dpnp/backend/include/dpnp_iface_fptr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,13 @@ enum class DPNPFuncName : size_t
DPNP_FN_CEIL_EXT, /**< Used in numpy.ceil() impl, requires extra parameters
*/
DPNP_FN_CHOLESKY, /**< Used in numpy.linalg.cholesky() impl */
DPNP_FN_CHOLESKY_EXT, /**< Used in numpy.linalg.cholesky() impl, requires
extra parameters */
DPNP_FN_CONJIGUATE, /**< Used in numpy.conjugate() impl */
DPNP_FN_CONJIGUATE_EXT, /**< Used in numpy.conjugate() impl, requires extra
parameters */
DPNP_FN_CHOOSE, /**< Used in numpy.choose() impl */
DPNP_FN_CHOOSE_EXT, /**< Used in numpy.choose() impl, requires extra
parameters */
DPNP_FN_COPY, /**< Used in numpy.copy() impl */
DPNP_FN_CHOLESKY_EXT, /**< Used in numpy.linalg.cholesky() impl, requires
extra parameters */
DPNP_FN_CONJUGATE, /**< Used in numpy.conjugate() impl */
DPNP_FN_CHOOSE, /**< Used in numpy.choose() impl */
DPNP_FN_CHOOSE_EXT, /**< Used in numpy.choose() impl, requires extra
parameters */
DPNP_FN_COPY, /**< Used in numpy.copy() impl */
DPNP_FN_COPY_EXT, /**< Used in numpy.copy() impl, requires extra parameters
*/
DPNP_FN_COPYSIGN, /**< Used in numpy.copysign() impl */
Expand Down
21 changes: 5 additions & 16 deletions dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,28 +1029,17 @@ constexpr auto dispatch_fmod_op(T elem1, T elem2)

static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
{
fmap[DPNPFuncName::DPNP_FN_CONJIGUATE][eft_INT][eft_INT] = {
fmap[DPNPFuncName::DPNP_FN_CONJUGATE][eft_INT][eft_INT] = {
eft_INT, (void *)dpnp_copy_c_default<int32_t>};
fmap[DPNPFuncName::DPNP_FN_CONJIGUATE][eft_LNG][eft_LNG] = {
fmap[DPNPFuncName::DPNP_FN_CONJUGATE][eft_LNG][eft_LNG] = {
eft_LNG, (void *)dpnp_copy_c_default<int64_t>};
fmap[DPNPFuncName::DPNP_FN_CONJIGUATE][eft_FLT][eft_FLT] = {
fmap[DPNPFuncName::DPNP_FN_CONJUGATE][eft_FLT][eft_FLT] = {
eft_FLT, (void *)dpnp_copy_c_default<float>};
fmap[DPNPFuncName::DPNP_FN_CONJIGUATE][eft_DBL][eft_DBL] = {
fmap[DPNPFuncName::DPNP_FN_CONJUGATE][eft_DBL][eft_DBL] = {
eft_DBL, (void *)dpnp_copy_c_default<double>};
fmap[DPNPFuncName::DPNP_FN_CONJIGUATE][eft_C128][eft_C128] = {
fmap[DPNPFuncName::DPNP_FN_CONJUGATE][eft_C128][eft_C128] = {
eft_C128, (void *)dpnp_conjugate_c_default<std::complex<double>>};

fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_INT][eft_INT] = {
eft_INT, (void *)dpnp_copy_c_ext<int32_t>};
fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_LNG][eft_LNG] = {
eft_LNG, (void *)dpnp_copy_c_ext<int64_t>};
fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_FLT][eft_FLT] = {
eft_FLT, (void *)dpnp_copy_c_ext<float>};
fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_DBL][eft_DBL] = {
eft_DBL, (void *)dpnp_copy_c_ext<double>};
fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_C128][eft_C128] = {
eft_C128, (void *)dpnp_conjugate_c_ext<std::complex<double>>};

fmap[DPNPFuncName::DPNP_FN_COPY][eft_BLN][eft_BLN] = {
eft_BLN, (void *)dpnp_copy_c_default<bool>};
fmap[DPNPFuncName::DPNP_FN_COPY][eft_INT][eft_INT] = {
Expand Down
2 changes: 0 additions & 2 deletions dpnp/dpnp_algo/dpnp_algo.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na
DPNP_FN_CHOLESKY_EXT
DPNP_FN_CHOOSE
DPNP_FN_CHOOSE_EXT
DPNP_FN_CONJIGUATE
DPNP_FN_CONJIGUATE_EXT
DPNP_FN_COPY
DPNP_FN_COPY_EXT
DPNP_FN_COPYSIGN
Expand Down
5 changes: 0 additions & 5 deletions dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ __all__ += [
"dpnp_arctan2",
"dpnp_around",
"dpnp_ceil",
"dpnp_conjugate",
"dpnp_copysign",
"dpnp_cross",
"dpnp_cumprod",
Expand Down Expand Up @@ -163,10 +162,6 @@ cpdef utils.dpnp_descriptor dpnp_ceil(utils.dpnp_descriptor x1, utils.dpnp_descr
return call_fptr_1in_1out_strides(DPNP_FN_CEIL_EXT, x1, dtype=None, out=out, where=True, func_name='ceil')


cpdef utils.dpnp_descriptor dpnp_conjugate(utils.dpnp_descriptor x1):
return call_fptr_1in_1out_strides(DPNP_FN_CONJIGUATE_EXT, x1)


cpdef utils.dpnp_descriptor dpnp_copysign(utils.dpnp_descriptor x1_obj,
utils.dpnp_descriptor x2_obj,
object dtype=None,
Expand Down
53 changes: 53 additions & 0 deletions dpnp/dpnp_algo/dpnp_elementwise_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
"dpnp_bitwise_and",
"dpnp_bitwise_or",
"dpnp_bitwise_xor",
"dpnp_conj",
"dpnp_cos",
"dpnp_divide",
"dpnp_equal",
Expand Down Expand Up @@ -367,6 +368,58 @@ def _call_cos(src, dst, sycl_queue, depends=None):
return dpnp_array._create_from_usm_ndarray(res_usm)


_conj_docstring = """
conj(x, out=None, order='K')
Computes conjugate for each element `x_i` for input array `x`.
Args:
x (dpnp.ndarray):
Input array, expected to have numeric data type.
out ({None, dpnp.ndarray}, optional):
Output array to populate. Array must have the correct
shape and the expected data type.
order ("C","F","A","K", optional): memory layout of the new
output array, if parameter `out` is `None`.
Default: "K".
Return:
dpnp.ndarray:
An array containing the element-wise conjugate.
The returned array has the same data type as `x`.
"""


def _call_conj(src, dst, sycl_queue, depends=None):
"""A callback to register in UnaryElementwiseFunc class of dpctl.tensor"""

if depends is None:
depends = []

if vmi._mkl_conj_to_call(sycl_queue, src, dst):
# call pybind11 extension for conj() function from OneMKL VM
return vmi._conj(sycl_queue, src, dst, depends)
return ti._conj(src, dst, sycl_queue, depends)


conj_func = UnaryElementwiseFunc(
"conj", ti._conj_result_type, _call_conj, _conj_docstring
)


def dpnp_conj(x, out=None, order="K"):
"""
Invokes conj() function from pybind11 extension of OneMKL VM if possible.
Otherwise fully relies on dpctl.tensor implementation for conj() function.
"""
# dpctl.tensor only works with usm_ndarray
x1_usm = dpnp.get_usm_ndarray(x)
out_usm = None if out is None else dpnp.get_usm_ndarray(out)

res_usm = conj_func(x1_usm, out=out_usm, order=order)
return dpnp_array._create_from_usm_ndarray(res_usm)


_divide_docstring_ = """
divide(x1, x2, out=None, order="K")
Expand Down
42 changes: 35 additions & 7 deletions dpnp/dpnp_iface_mathematical.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@

from .dpnp_algo import *
from .dpnp_algo.dpnp_elementwise_common import (
check_nd_call_func,
dpnp_add,
dpnp_conj,
dpnp_divide,
dpnp_floor_divide,
dpnp_multiply,
Expand Down Expand Up @@ -387,7 +389,17 @@ def ceil(x1, out=None, **kwargs):
return call_origin(numpy.ceil, x1, out=out, **kwargs)


def conjugate(x1, **kwargs):
def conjugate(
x,
/,
out=None,
*,
order="K",
where=True,
dtype=None,
subok=True,
**kwargs,
):
"""
Return the complex conjugate, element-wise.
Expand All @@ -396,6 +408,18 @@ def conjugate(x1, **kwargs):
For full documentation refer to :obj:`numpy.conjugate`.
Returns
-------
out : dpnp.ndarray
The conjugate of each element of `x`.
Limitations
-----------
Parameters `x` is only supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
Parameters `where`, `dtype` and `subok` are supported with their default values.
Otherwise the function will be executed sequentially on CPU.
Input array data types are limited by supported DPNP :ref:`Data types`.
Examples
--------
>>> import dpnp as np
Expand All @@ -409,13 +433,17 @@ def conjugate(x1, **kwargs):
"""

x1_desc = dpnp.get_dpnp_descriptor(
x1, copy_when_strides=False, copy_when_nondefault_queue=False
return check_nd_call_func(
numpy.conjugate,
dpnp_conj,
x,
out=out,
where=where,
order=order,
dtype=dtype,
subok=subok,
**kwargs,
)
if x1_desc and not kwargs:
return dpnp_conjugate(x1_desc).get_pyobj()

return call_origin(numpy.conjugate, x1, **kwargs)


conj = conjugate
Expand Down
2 changes: 0 additions & 2 deletions tests/skipped_tests_gpu.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.
tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.astype(dpnp.asarray(x), dpnp.float32)]

tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4]
tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5]
Expand All @@ -22,7 +21,6 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]

tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]
tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-copy-data3]
tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumprod-data4]
tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumsum-data5]
Expand Down
Loading

0 comments on commit 3cba1ce

Please sign in to comment.