Skip to content

Commit

Permalink
KokkosKernels: use numeric_limits<T>::quiet_NaN
Browse files Browse the repository at this point in the history
using this instead of strtod() and friends
dramatically speeds up the
Kokkos::ArithTraits<T>::nan()
function.
The performance test
packages/minitensor/test/perf_test_01.cc
went from 5.5 seconds to 0.6 seconds.

see kokkos/kokkos-kernels#35
  • Loading branch information
ibaned committed Jul 11, 2017
1 parent b14ae51 commit dde72d7
Showing 1 changed file with 3 additions and 8 deletions.
11 changes: 3 additions & 8 deletions packages/kokkos-kernels/src/Kokkos_ArithTraits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
#include <cfloat>
#include <climits>
#include <cmath>
#include <cstdlib> // strtof, strtod, strtold
#include <complex> // std::complex
#include <limits> // std::numeric_limits
#ifdef __CUDACC__
Expand Down Expand Up @@ -645,10 +644,8 @@ class ArithTraits<float> {
static KOKKOS_FORCEINLINE_FUNCTION float nan () {
#ifdef __CUDA_ARCH__
return CUDART_NAN_F;
//return nan (); //this returns 0???
#else
// http://pubs.opengroup.org/onlinepubs/009696899/functions/nan.html
return strtof ("NAN()", (char**) NULL);
return std::numeric_limits<float>::quiet_NaN();
#endif // __CUDA_ARCH__
}
static KOKKOS_FORCEINLINE_FUNCTION mag_type eps () {
Expand Down Expand Up @@ -886,10 +883,8 @@ class ArithTraits<double> {
static KOKKOS_FORCEINLINE_FUNCTION val_type nan () {
#ifdef __CUDA_ARCH__
return CUDART_NAN;
//return nan (); // this returns 0 ???
#else
// http://pubs.opengroup.org/onlinepubs/009696899/functions/nan.html
return strtod ("NAN", (char**) NULL);
return std::numeric_limits<val_type>::quiet_NaN();
#endif // __CUDA_ARCH__
}
static KOKKOS_FORCEINLINE_FUNCTION mag_type epsilon () {
Expand Down Expand Up @@ -1020,7 +1015,7 @@ class ArithTraits<long double> {
return ::log10 (x);
}
static val_type nan () {
return strtold ("NAN()", (char**) NULL);
return std::numeric_limits<val_type>::quiet_NaN();
}
static mag_type epsilon () {
return LDBL_EPSILON;
Expand Down

0 comments on commit dde72d7

Please sign in to comment.