Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce XXH_SIZE_OPT and XXH_NO_STREAM #667

Merged
merged 3 commits into from
Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,23 @@ jobs:
run: |
CFLAGS="-Wall -Wextra -Werror" make DISPATCH=1 clean default

- name: XXH_SIZE_OPT == 2
if: always()
run: |
CFLAGS="-Os -DXXH_SIZE_OPT=2 -Wall -Wextra -Werror" make clean xxhsum

- name: noxxh3test
if: always()
run: |
# check library can be compiled with XXH_NO_XXH3, resulting in no XXH3_* symbol
make clean noxxh3test

- name: nostreamtest
if: always()
run: |
# check library can be compiled with XXH_NO_STREAM, resulting in no streaming symbols
make clean noxxh3test

- name: make avx512f
if: ${{ matrix.avx512 == 'true' }}
run: |
Expand Down
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,16 @@ noxxh3test: xxhash.c
$(NM) $(OFILE) | $(GREP) XXH3_ ; test $$? -eq 1
$(RM) $(OFILE)

.PHONY: nostreamtest
nostreamtest: CPPFLAGS += -DXXH_NO_STREAM
nostreamtest: CFLAGS += -Werror -pedantic -Wno-long-long # XXH64 requires long long support
nostreamtest: OFILE = xxh_nostream.o
nostreamtest: xxhash.c
@echo ---- test compilation without streaming ----
$(CC) $(FLAGS) -c $^ -o $(OFILE)
$(NM) $(OFILE) | $(GREP) update ; test $$? -eq 1
$(RM) $(OFILE)

.PHONY: nostdlibtest
nostdlibtest: CPPFLAGS += -DXXH_NO_STDLIB
nostdlibtest: CFLAGS += -Werror -pedantic -Wno-long-long # XXH64 requires long long support
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ The following macros can be set at compilation time to modify libxxhash's behavi
- `XXH_VECTOR` : manually select a vector instruction set (default: auto-selected at compilation time). Available instruction sets are `XXH_SCALAR`, `XXH_SSE2`, `XXH_AVX2`, `XXH_AVX512`, `XXH_NEON` and `XXH_VSX`. Compiler may require additional flags to ensure proper support (for example, `gcc` on linux will require `-mavx2` for AVX2, and `-mavx512f` for AVX512).
- `XXH_NO_PREFETCH` : disable prefetching. Some platforms or situations may perform better without prefetching. XXH3 only.
- `XXH_PREFETCH_DIST` : select prefetching distance. For close-to-metal adaptation to specific hardware platforms. XXH3 only.
- `XXH_NO_STREAM`: Disables the streaming API, limiting it to single shot variants only.
- `XXH_SIZE_OPT`: `0`: default, optimize for speed
`1`: default for `-Os` and `-Oz`: disables some speed hacks for size optimization
`2`: makes code as small as possible, performance may cry
- `XXH_NO_INLINE_HINTS`: By default, xxHash uses `__attribute__((always_inline))` and `__forceinline` to improve performance at the cost of code size.
Defining this macro to 1 will mark all internal functions as `static`, allowing the compiler to decide whether to inline a function or not.
This is very useful when optimizing for smallest binary size,
Expand Down
113 changes: 89 additions & 24 deletions xxhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,7 @@ typedef uint32_t XXH32_hash_t;
*/
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);

#ifndef XXH_NO_STREAM
/*!
* Streaming functions generate the xxHash value from an incremental input.
* This method is slower than single-call functions, due to state management.
Expand Down Expand Up @@ -655,6 +656,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void*
* @return The calculated xxHash32 value from that state.
*/
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
#endif /* !XXH_NO_STREAM */

/******* Canonical representation *******/

Expand Down Expand Up @@ -814,6 +816,7 @@ typedef uint64_t XXH64_hash_t;
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);

/******* Streaming *******/
#ifndef XXH_NO_STREAM
/*!
* @brief The opaque state struct for the XXH64 streaming API.
*
Expand All @@ -827,7 +830,7 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_
XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed);
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);

#endif /* !XXH_NO_STREAM */
/******* Canonical representation *******/
typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
Expand Down Expand Up @@ -949,6 +952,7 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(const void* data, s


/******* Streaming *******/
#ifndef XXH_NO_STREAM
/*
* Streaming requires state maintenance.
* This operation costs memory and CPU.
Expand Down Expand Up @@ -991,6 +995,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr

XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr);
#endif /* !XXH_NO_STREAM */

/* note : canonical representation of XXH3 is the same as XXH64
* since they both produce XXH64_hash_t values */
Expand Down Expand Up @@ -1035,6 +1040,7 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(const void* data, s
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);

/******* Streaming *******/
#ifndef XXH_NO_STREAM
/*
* Streaming requires state maintenance.
* This operation costs memory and CPU.
Expand All @@ -1053,6 +1059,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePt

XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
#endif /* !XXH_NO_STREAM */

/* Following helper functions make it possible to compare XXH128_hast_t values.
* Since XXH128_hash_t is a structure, this capability is not offered by the language.
Expand Down Expand Up @@ -1398,6 +1405,7 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
XXH3_128bits_withSecretandSeed(const void* input, size_t length,
const void* secret, size_t secretSize,
XXH64_hash_t seed64);
#ifndef XXH_NO_STREAM
/*! @copydoc XXH3_64bits_withSecretandSeed() */
XXH_PUBLIC_API XXH_errorcode
XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
Expand All @@ -1408,7 +1416,7 @@ XXH_PUBLIC_API XXH_errorcode
XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
const void* secret, size_t secretSize,
XXH64_hash_t seed64);

#endif /* !XXH_NO_STREAM */

#endif /* !XXH_NO_XXH3 */
#endif /* XXH_NO_LONG_LONG */
Expand Down Expand Up @@ -1520,6 +1528,34 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
*/
# define XXH_FORCE_MEMORY_ACCESS 0

/*!
* @def XXH_SIZE_OPT
* @brief Controls how much xxHash optimizes for size.
*
* xxHash, when compiled, tends to result in a rather large binary size. This
* is mostly due to heavy usage to forced inlining and constant folding of the
* @ref XXH3_family to increase performance.
*
* However, some developers prefer size over speed. This option can
* significantly reduce the size of the generated code. When using the `-Os`
* or `-Oz` options on GCC or Clang, this is defined to 1 by default,
* otherwise it is defined to 0.
*
* Most of these size optimizations can be controlled manually.
*
* This is a number from 0-2.
* - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
* comes first.
* - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
* conservative and disables hacks that increase code size. It implies the
* options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
* and @ref XXH3_NEON_LANES == 8 if they are not already defined.
* - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
* Performance may cry. For example, the single shot functions just use the
* streaming API.
*/
# define XXH_SIZE_OPT 0

/*!
* @def XXH_FORCE_ALIGN_CHECK
* @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
Expand All @@ -1541,9 +1577,11 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
*
* In these cases, the alignment check can be removed by setting this macro to 0.
* Then the code will always use unaligned memory access.
* Align check is automatically disabled on x86, x64 & arm64,
* Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
* which are platforms known to offer good unaligned memory accesses performance.
*
* It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
*
* This option does not affect XXH3 (only XXH32 and XXH64).
*/
# define XXH_FORCE_ALIGN_CHECK 0
Expand All @@ -1565,8 +1603,8 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
* XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
* compiler full control on whether to inline or not.
*
* When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
* -fno-inline with GCC or Clang, this will automatically be defined.
* When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
* @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
*/
# define XXH_NO_INLINE_HINTS 0

Expand All @@ -1591,6 +1629,17 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
*/
# define XXH_OLD_NAMES
# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */

/*!
* @def XXH_NO_STREAM
* @brief Disablea the streaming API.
easyaspi314 marked this conversation as resolved.
Show resolved Hide resolved
*
* When xxHash is not inlined and the streaming functions are not used, disabling
* the streaming functions can improve code size significantly, especially with
* the @ref XXH3_family which tends to make constant folded copies of itself.
*/
# define XXH_NO_STREAM
# undef XXH_NO_STREAM /* don't actually */
#endif /* XXH_DOXYGEN */
/*!
* @}
Expand All @@ -1605,9 +1654,19 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
# endif
#endif

#ifndef XXH_SIZE_OPT
/* default to 1 for -Os or -Oz */
# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
# define XXH_SIZE_OPT 1
# else
# define XXH_SIZE_OPT 0
# endif
#endif

#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
/* don't check on x86, aarch64, or arm when unaligned access is available */
# if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
/* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
# if XXH_SIZE_OPT >= 1 || \
defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
|| defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
# define XXH_FORCE_ALIGN_CHECK 0
# else
Expand All @@ -1616,8 +1675,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
#endif

#ifndef XXH_NO_INLINE_HINTS
# if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
|| defined(__NO_INLINE__) /* -O0, -fno-inline */
# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
# define XXH_NO_INLINE_HINTS 1
# else
# define XXH_NO_INLINE_HINTS 0
Expand All @@ -1638,7 +1696,9 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
/* *************************************
* Includes & Memory related functions
***************************************/
#if defined(XXH_NO_STDLIB)
#if defined(XXH_NO_STREAM)
/* nothing */
#elif defined(XXH_NO_STDLIB)

/* When requesting to disable any mention of stdlib,
* the library loses the ability to invoked malloc / free.
Expand Down Expand Up @@ -2323,7 +2383,7 @@ XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment
/*! @ingroup XXH32_family */
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
{
#if 0
#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH32_state_t state;
XXH32_reset(&state, seed);
Expand All @@ -2342,6 +2402,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t s


/******* Hash streaming *******/
#ifndef XXH_NO_STREAM
/*! @ingroup XXH32_family */
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
{
Expand Down Expand Up @@ -2446,7 +2507,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)

return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
}

#endif /* !XXH_NO_STREAM */

/******* Canonical representation *******/

Expand Down Expand Up @@ -2765,7 +2826,7 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
/*! @ingroup XXH64_family */
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
{
#if 0
#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH64_state_t state;
XXH64_reset(&state, seed);
Expand All @@ -2783,7 +2844,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t s
}

/******* Hash Streaming *******/

#ifndef XXH_NO_STREAM
/*! @ingroup XXH64_family*/
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
{
Expand Down Expand Up @@ -2885,7 +2946,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)

return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
}

#endif /* !XXH_NO_STREAM */

/******* Canonical representation *******/

Expand Down Expand Up @@ -3164,7 +3225,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
*/
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
&& defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
&& defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
# pragma GCC push_options
# pragma GCC optimize("-O2")
#endif
Expand Down Expand Up @@ -3334,7 +3395,7 @@ XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
*/
# ifndef XXH3_NEON_LANES
# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
&& !defined(__OPTIMIZE_SIZE__)
&& XXH_SIZE_OPT <= 0
# define XXH3_NEON_LANES 6
# else
# define XXH3_NEON_LANES XXH_ACC_NB
Expand Down Expand Up @@ -4567,7 +4628,7 @@ XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
#if defined(__GNUC__) && !defined(__clang__) \
&& (defined(__arm__) || defined(__thumb2__)) \
&& defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
&& !defined(__OPTIMIZE_SIZE__)
&& XXH_SIZE_OPT <= 0
# pragma GCC unroll 8
#endif
for (i=0; i < XXH_ACC_NB; i++) {
Expand Down Expand Up @@ -4726,7 +4787,10 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);

#endif


#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
# undef XXH3_initCustomSecret
# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
#endif

#ifndef XXH_PREFETCH_DIST
# ifdef __clang__
Expand Down Expand Up @@ -4948,6 +5012,7 @@ XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,

/* === Public entry point === */

/* TODO: streaming single shot XXH3? */
/*! @ingroup XXH3_family */
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t length)
{
Expand Down Expand Up @@ -4978,7 +5043,7 @@ XXH3_64bits_withSecretandSeed(const void* input, size_t length, const void* secr


/* === XXH3 streaming === */

#ifndef XXH_NO_STREAM
/*
* Malloc's a pointer that is always aligned to align.
*
Expand Down Expand Up @@ -5348,7 +5413,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
secret, state->secretLimit + XXH_STRIPE_LEN);
}

#endif /* !XXH_NO_STREAM */


/* ==========================================
Expand Down Expand Up @@ -5765,7 +5830,7 @@ XXH128(const void* input, size_t len, XXH64_hash_t seed)


/* === XXH3 128-bit streaming === */

#ifndef XXH_NO_STREAM
/*
* All initialization and update functions are identical to 64-bit streaming variant.
* The only difference is the finalization routine.
Expand Down Expand Up @@ -5832,7 +5897,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
secret, state->secretLimit + XXH_STRIPE_LEN);
}

#endif /* !XXH_NO_STREAM */
/* 128-bit utility functions */

#include <string.h> /* memcmp, memcpy */
Expand Down Expand Up @@ -5959,7 +6024,7 @@ XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
/* Pop our optimization override from above */
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
&& defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
&& defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
# pragma GCC pop_options
#endif

Expand Down