Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.11.0 - kraoñenn #1946

Merged
merged 3 commits into from
Dec 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
2021-12-08 Serge Guelton <serge.guelton@telecom-bretagne.eu>

* Upgrade xsimd version to 8.0.5

* Performance fix on np.dot with transposed parameters

* Raise an error when no specs are given and we want to generate a native module

* Support more complex transpose expression

* Honor quotes in config files for *FLAGS

* Add pythran option -ftime-report to print out the time spent on optimizations
* Support isinstance when second argument is a tuple

2021-09-08 Serge Guelton <serge.guelton@telecom-bretagne.eu>

* Fix performance issue with assert handling
Expand Down
2 changes: 1 addition & 1 deletion docs/CLI.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ The generated native ``.so`` module can then be used with the Python interpreter
Pythran version can be dumped through ``--version``::

$> pythran --version 2>&1
0.11.0.dev0
0.11.0

The module-level ``__pythran__`` variable indicates that the module loaded has been pythranized::

Expand Down
10 changes: 10 additions & 0 deletions pythran/pythonic/include/numpy/conjugate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@

#include <xsimd/xsimd.hpp>

// Inject some extra symbol in xsimd namespace, until that's fixed upstream
#if XSIMD_VERSION_MAJOR < 8 || \
(XSIMD_VERSION_MAJOR == 8 && XSIMD_VERSION_MINOR == 0 && \
XSIMD_VERSION_PATCH <= 5)
namespace xsimd
{
using std::conj;
}
#endif

PYTHONIC_NS_BEGIN

namespace numpy
Expand Down
2 changes: 1 addition & 1 deletion pythran/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__version__ = '0.11.0.dev0'
__version__ = '0.11.0'
__url__ = 'https://github.com/serge-sans-paille/pythran'
__descr__ = 'Ahead of Time compiler for numeric kernels'
197 changes: 112 additions & 85 deletions third_party/xsimd/arch/generic/xsimd_generic_arithmetic.hpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_GENERIC_ARITHMETIC_HPP
#define XSIMD_GENERIC_ARITHMETIC_HPP
Expand All @@ -17,85 +17,112 @@

#include "./xsimd_generic_details.hpp"

namespace xsimd {
namespace xsimd
{

namespace kernel
{

using namespace types;

// bitwise_lshift
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
inline batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
{
return detail::apply([](T x, T y) noexcept
{ return x << y; },
self, other);
}

// bitwise_rshift
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
inline batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
{
return detail::apply([](T x, T y) noexcept
{ return x >> y; },
self, other);
}

// div
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
{
return detail::apply([](T x, T y) noexcept -> T
{ return x / y; },
self, other);
}

// fma
template <class A, class T>
inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
{
return x * y + z;
}

template <class A, class T>
inline batch<std::complex<T>, A> fma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
{
auto res_r = fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
auto res_i = fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
return { res_r, res_i };
}

// fms
template <class A, class T>
inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
{
return x * y - z;
}

template <class A, class T>
inline batch<std::complex<T>, A> fms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
{
auto res_r = fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
auto res_i = fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
return { res_r, res_i };
}

// fnma
template <class A, class T>
inline batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
{
return -x * y + z;
}

template <class A, class T>
inline batch<std::complex<T>, A> fnma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
{
auto res_r = -fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
auto res_i = -fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
return { res_r, res_i };
}

// fnms
template <class A, class T>
inline batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
{
return -x * y - z;
}

template <class A, class T>
inline batch<std::complex<T>, A> fnms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
{
auto res_r = -fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
auto res_i = -fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
return { res_r, res_i };
}

// mul
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
inline batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
{
return detail::apply([](T x, T y) noexcept -> T
{ return x * y; },
self, other);
}

namespace kernel {

using namespace types;

// bitwise_lshift
template<class A, class T, class/*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
return detail::apply([](T x, T y) { return x << y;}, self, other);
}

// bitwise_rshift
template<class A, class T, class/*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
return detail::apply([](T x, T y) { return x >> y;}, self, other);
}

// div
template<class A, class T, class=typename std::enable_if<std::is_integral<T>::value, void>::type>
batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
return detail::apply([](T x, T y) -> T { return x / y;}, self, other);
}

// fma
template<class A, class T> batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
return x * y + z;
}

template<class A, class T> batch<std::complex<T>, A> fma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
auto res_r = fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
auto res_i = fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
return {res_r, res_i};
}

// fms
template<class A, class T> batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
return x * y - z;
}

template<class A, class T> batch<std::complex<T>, A> fms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
auto res_r = fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
auto res_i = fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
return {res_r, res_i};
}

// fnma
template<class A, class T> batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
return -x * y + z;
}

template<class A, class T> batch<std::complex<T>, A> fnma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
auto res_r = - fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
auto res_i = - fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
return {res_r, res_i};
}

// fnms
template<class A, class T> batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) {
return -x * y - z;
}

template<class A, class T> batch<std::complex<T>, A> fnms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) {
auto res_r = - fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
auto res_i = - fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
return {res_r, res_i};
}



// mul
template<class A, class T, class/*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) {
return detail::apply([](T x, T y) -> T { return x * y;}, self, other);
}

}

}

#endif

Loading