Skip to content

Commit

Permalink
Fix cpu detection of sse2 on non-64 x86 (AcademySoftwareFoundation#1467)
Browse files Browse the repository at this point in the history
* Fix cpu detection of sse2 on non-64 x86

Previous code accidentally elided the i386 (i.e. non-64 bit versions of
x86 compiles). This refactors and simplifies the ifdef snarl to clarify
and fix that.

Fixes AcademySoftwareFoundation#1459

Signed-off-by: Kimball Thurston <kdt3rd@gmail.com>

* Fix mismatch with windows and enable avx support check there

Signed-off-by: Kimball Thurston <kdt3rd@gmail.com>

* Disable avx detection under msvc for now

We require gcc / clang style inline asm for avx support, detecting it
properly causes problems for the dwa support which uses that. Disable
again.

Signed-off-by: Kimball Thurston <kdt3rd@gmail.com>

---------

Signed-off-by: Kimball Thurston <kdt3rd@gmail.com>
  • Loading branch information
kdt3rd authored and cary-ilm committed Jul 25, 2023
1 parent d394f92 commit fbffcb8
Showing 1 changed file with 54 additions and 24 deletions.
78 changes: 54 additions & 24 deletions src/lib/OpenEXRCore/internal_cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@

#include "OpenEXRConfigInternal.h"

#if defined(__x86_64__) || defined(_M_X64)
#if defined(i386) || defined(__i386__) || defined(__i386) || defined(_M_X86) || defined(__x86_64__) || defined(_M_X64)
# define OPENEXR_ENABLE_X86_SIMD_CHECK 1
#else
# define OPENEXR_ENABLE_X86_SIMD_CHECK 0
#endif

#if OPENEXR_ENABLE_X86_SIMD_CHECK
# if defined(_MSC_VER) && defined(_WIN32)
# include <intrin.h>
# else
Expand All @@ -19,30 +25,35 @@ static inline void check_for_x86_simd (int *f16c, int *avx, int *sse2)
#ifdef __e2k__
# if defined(__SSE2__)
*sse2 = 1;
# else
*sse2 = 0;
# endif
# if defined(__AVX__)
*avx = 1;
# else
*avx = 0;
# endif
# if defined(__F16C__)
*f16c = 1;
# else
*f16c = 0;
# endif

#else
# if (defined(__x86_64__) || defined(_M_X64))
#elif OPENEXR_ENABLE_X86_SIMD_CHECK

# if defined(__AVX__) && defined(__F16C__)
// shortcut if everything is turned on / compiled in
# if defined(__AVX__) && defined(__F16C__)
*f16c = 1;
*avx = 1;
*sse2 = 1;
# else
# ifdef _WIN32
# elif defined(_MSC_VER) && defined(_WIN32)
int regs[4], osxsave;

__cpuid (regs, 0);
if (regs[0] >= 1) { __cpuidex (regs, 1, 0); }
else
regs[2] = 0;
# else
# else
unsigned int regs[4], osxsave;
__get_cpuid (0, &regs[0], &regs[1], &regs[2], &regs[3]);
if (regs[0] >= 1)
Expand All @@ -51,9 +62,16 @@ static inline void check_for_x86_simd (int *f16c, int *avx, int *sse2)
}
else
regs[2] = 0;
# endif
/* AVX is indicated by bit 28, F16C by 29 of ECX (reg 2) */
# endif

/*
* linux cpuid.h for x86 has defines but not consistent cross platform
*
* see cpuid.h bit_AVX bit_F16C bit_SSE2
*/

osxsave = (regs[2] & (1 << 27)) ? 1 : 0;
/* AVX is indicated by bit 28, F16C by 29 of ECX (reg 2) */
*avx = (regs[2] & (1 << 28)) ? 1 : 0;
*f16c = (regs[2] & (1 << 29)) ? 1 : 0;
/* sse2 is in EDX bit 26 */
Expand All @@ -67,40 +85,52 @@ static inline void check_for_x86_simd (int *f16c, int *avx, int *sse2)
else
{
/* check extended control register */
# if defined(OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX) && \
(defined(_M_X64) || defined(__x86_64__))
__asm__ __volatile__("xgetbv"
: /* Output */ "=a"(regs[0]), "=d"(regs[3])
: /* Input */ "c"(0)
: /* Clobber */);
# if defined(_M_X64) || defined(__x86_64__)
# if defined(_MSC_VER)
/* TODO: remove the following disablement once we can do inline msvc */
# if defined(OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX)
regs[0] = _xgetbv(0);
# else
regs[0] = 0;
# endif
# else
__asm__ __volatile__ ("xgetbv"
: /* Output */ "=a"(regs[0]), "=d"(regs[3])
: /* Input */ "c"(0)
: /* Clobber */);
# endif
/* eax bit 1 - SSE managed, bit 2 - AVX managed */
if ((regs[0] & 6) != 6)
{
*avx = 0;
*f16c = 0;
}
# else
*avx = 0;
*f16c = 0;
# endif
}
# endif
# else
*avx = 0;
*f16c = 0;
# endif
}

#else
// not on x86
*f16c = 0;
*avx = 0;
*sse2 = 0;
# endif
#endif

}

static inline int has_native_half (void)
{
#if defined(__x86_64__) || defined(_M_X64)
#if OPENEXR_ENABLE_X86_SIMD_CHECK
int sse2, avx, f16c;
check_for_x86_simd (&f16c, &avx, &sse2);
return avx && f16c;
#elif defined(__aarch64__)
return 1;
#else
// TODO: add case for neon?
return 0;
#endif
}

#undef OPENEXR_ENABLE_X86_SIMD_CHECK

0 comments on commit fbffcb8

Please sign in to comment.