Skip to content

Commit

Permalink
ht_dec.c: Improve MSVC arm64 popcount performance
Browse files Browse the repository at this point in the history
Use NEON instructions for ARM64 (implementation based on microsoft/STL#2127).

Godbolt output here: https://godbolt.org/z/q7GPTqT14
  • Loading branch information
PeterJohnson authored Sep 7, 2023
1 parent 1ee6d11 commit 9ae5a26
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/lib/openjp2/ht_dec.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@
#define OPJ_COMPILER_GNUC
#endif

#if defined(OPJ_COMPILER_MSVC) && defined(_M_ARM64)
#include <arm64_neon.h>
#endif

//************************************************************************/
/** @brief Displays the error message for disabling the decoding of SPP and
* MRP passes
Expand All @@ -71,6 +75,9 @@ OPJ_UINT32 population_count(OPJ_UINT32 val)
{
#if defined(OPJ_COMPILER_MSVC) && (defined(_M_IX86) || defined(_M_AMD64))
return (OPJ_UINT32)__popcnt(val);
#elif defined(OPJ_COMPILER_MSVC) && defined(_M_ARM64)
const __n64 _Temp = neon_cnt(__uint64ToN64_v(val));
return neon_addv8(_Temp).n8_i8[0];
#elif (defined OPJ_COMPILER_GNUC)
return (OPJ_UINT32)__builtin_popcount(val);
#else
Expand Down

0 comments on commit 9ae5a26

Please sign in to comment.