Skip to content

Commit 3ab23f7

Browse files
committed
update libdivide to 5.2.0
1 parent c894f3e commit 3ab23f7

File tree

1 file changed

+53
-22
lines changed

1 file changed

+53
-22
lines changed

third_party/libdivide.h

+53-22
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111
#ifndef LIBDIVIDE_H
1212
#define LIBDIVIDE_H
1313

14-
#define LIBDIVIDE_VERSION "5.1"
14+
// *** Version numbers are auto generated - do not edit ***
15+
#define LIBDIVIDE_VERSION "5.2.0"
1516
#define LIBDIVIDE_VERSION_MAJOR 5
16-
#define LIBDIVIDE_VERSION_MINOR 1
17+
#define LIBDIVIDE_VERSION_MINOR 2
18+
#define LIBDIVIDE_VERSION_PATCH 0
1719

1820
#include <stdint.h>
1921

@@ -34,8 +36,15 @@
3436
#include <arm_neon.h>
3537
#endif
3638

39+
// Clang-cl prior to Visual Studio 2022 doesn't include __umulh/__mulh intrinsics
40+
#if defined(_MSC_VER) && defined(LIBDIVIDE_X86_64) && (!defined(__clang__) || _MSC_VER>1930)
41+
#define LIBDIVIDE_X64_INTRINSICS
42+
#endif
43+
3744
#if defined(_MSC_VER)
45+
#if defined(LIBDIVIDE_X64_INTRINSICS)
3846
#include <intrin.h>
47+
#endif
3948
#pragma warning(push)
4049
// disable warning C4146: unary minus operator applied
4150
// to unsigned type, result still unsigned
@@ -238,18 +247,28 @@ static LIBDIVIDE_INLINE struct libdivide_u32_branchfree_t libdivide_u32_branchfr
238247
static LIBDIVIDE_INLINE struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d);
239248
static LIBDIVIDE_INLINE struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d);
240249

241-
static LIBDIVIDE_INLINE int16_t libdivide_s16_do_raw(int16_t numer, int16_t magic, uint8_t more);
250+
static LIBDIVIDE_INLINE int16_t libdivide_s16_do_raw(
251+
int16_t numer, int16_t magic, uint8_t more);
242252
static LIBDIVIDE_INLINE int16_t libdivide_s16_do(
243253
int16_t numer, const struct libdivide_s16_t *denom);
244-
static LIBDIVIDE_INLINE uint16_t libdivide_u16_do_raw(uint16_t numer, uint16_t magic, uint8_t more);
254+
static LIBDIVIDE_INLINE uint16_t libdivide_u16_do_raw(
255+
uint16_t numer, uint16_t magic, uint8_t more);
245256
static LIBDIVIDE_INLINE uint16_t libdivide_u16_do(
246257
uint16_t numer, const struct libdivide_u16_t *denom);
258+
static LIBDIVIDE_INLINE int32_t libdivide_s32_do_raw(
259+
int32_t numer, int32_t magic, uint8_t more);
247260
static LIBDIVIDE_INLINE int32_t libdivide_s32_do(
248261
int32_t numer, const struct libdivide_s32_t *denom);
262+
static LIBDIVIDE_INLINE uint32_t libdivide_u32_do_raw(
263+
uint32_t numer, uint32_t magic, uint8_t more);
249264
static LIBDIVIDE_INLINE uint32_t libdivide_u32_do(
250265
uint32_t numer, const struct libdivide_u32_t *denom);
266+
static LIBDIVIDE_INLINE int64_t libdivide_s64_do_raw(
267+
int64_t numer, int64_t magic, uint8_t more);
251268
static LIBDIVIDE_INLINE int64_t libdivide_s64_do(
252269
int64_t numer, const struct libdivide_s64_t *denom);
270+
static LIBDIVIDE_INLINE uint64_t libdivide_u64_do_raw(
271+
uint64_t numer, uint64_t magic, uint8_t more);
253272
static LIBDIVIDE_INLINE uint64_t libdivide_u64_do(
254273
uint64_t numer, const struct libdivide_u64_t *denom);
255274

@@ -315,7 +334,7 @@ static LIBDIVIDE_INLINE int32_t libdivide_mullhi_s32(int32_t x, int32_t y) {
315334
}
316335

317336
static LIBDIVIDE_INLINE uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) {
318-
#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64)
337+
#if defined(LIBDIVIDE_X64_INTRINSICS)
319338
return __umulh(x, y);
320339
#elif defined(HAS_INT128_T)
321340
__uint128_t xl = x, yl = y;
@@ -341,7 +360,7 @@ static LIBDIVIDE_INLINE uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) {
341360
}
342361

343362
static LIBDIVIDE_INLINE int64_t libdivide_mullhi_s64(int64_t x, int64_t y) {
344-
#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64)
363+
#if defined(LIBDIVIDE_X64_INTRINSICS)
345364
return __mulh(x, y);
346365
#elif defined(HAS_INT128_T)
347366
__int128_t xl = x, yl = y;
@@ -914,12 +933,11 @@ struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) {
914933
return ret;
915934
}
916935

917-
uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
918-
uint8_t more = denom->more;
919-
if (!denom->magic) {
936+
uint32_t libdivide_u32_do_raw(uint32_t numer, uint32_t magic, uint8_t more) {
937+
if (!magic) {
920938
return numer >> more;
921939
} else {
922-
uint32_t q = libdivide_mullhi_u32(denom->magic, numer);
940+
uint32_t q = libdivide_mullhi_u32(magic, numer);
923941
if (more & LIBDIVIDE_ADD_MARKER) {
924942
uint32_t t = ((numer - q) >> 1) + q;
925943
return t >> (more & LIBDIVIDE_32_SHIFT_MASK);
@@ -931,6 +949,10 @@ uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
931949
}
932950
}
933951

952+
uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
953+
return libdivide_u32_do_raw(numer, denom->magic, denom->more);
954+
}
955+
934956
uint32_t libdivide_u32_branchfree_do(
935957
uint32_t numer, const struct libdivide_u32_branchfree_t *denom) {
936958
uint32_t q = libdivide_mullhi_u32(denom->magic, numer);
@@ -1074,12 +1096,11 @@ struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) {
10741096
return ret;
10751097
}
10761098

1077-
uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
1078-
uint8_t more = denom->more;
1079-
if (!denom->magic) {
1099+
uint64_t libdivide_u64_do_raw(uint64_t numer, uint64_t magic, uint8_t more) {
1100+
if (!magic) {
10801101
return numer >> more;
10811102
} else {
1082-
uint64_t q = libdivide_mullhi_u64(denom->magic, numer);
1103+
uint64_t q = libdivide_mullhi_u64(magic, numer);
10831104
if (more & LIBDIVIDE_ADD_MARKER) {
10841105
uint64_t t = ((numer - q) >> 1) + q;
10851106
return t >> (more & LIBDIVIDE_64_SHIFT_MASK);
@@ -1091,6 +1112,10 @@ uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
10911112
}
10921113
}
10931114

1115+
uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
1116+
return libdivide_u64_do_raw(numer, denom->magic, denom->more);
1117+
}
1118+
10941119
uint64_t libdivide_u64_branchfree_do(
10951120
uint64_t numer, const struct libdivide_u64_branchfree_t *denom) {
10961121
uint64_t q = libdivide_mullhi_u64(denom->magic, numer);
@@ -1430,11 +1455,10 @@ struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) {
14301455
return result;
14311456
}
14321457

1433-
int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
1434-
uint8_t more = denom->more;
1458+
int32_t libdivide_s32_do_raw(int32_t numer, int32_t magic, uint8_t more) {
14351459
uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
14361460

1437-
if (!denom->magic) {
1461+
if (!magic) {
14381462
uint32_t sign = (int8_t)more >> 7;
14391463
uint32_t mask = ((uint32_t)1 << shift) - 1;
14401464
uint32_t uq = numer + ((numer >> 31) & mask);
@@ -1443,7 +1467,7 @@ int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
14431467
q = (q ^ sign) - sign;
14441468
return q;
14451469
} else {
1446-
uint32_t uq = (uint32_t)libdivide_mullhi_s32(denom->magic, numer);
1470+
uint32_t uq = (uint32_t)libdivide_mullhi_s32(magic, numer);
14471471
if (more & LIBDIVIDE_ADD_MARKER) {
14481472
// must be arithmetic shift and then sign extend
14491473
int32_t sign = (int8_t)more >> 7;
@@ -1458,6 +1482,10 @@ int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
14581482
}
14591483
}
14601484

1485+
int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
1486+
return libdivide_s32_do_raw(numer, denom->magic, denom->more);
1487+
}
1488+
14611489
int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) {
14621490
uint8_t more = denom->more;
14631491
uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
@@ -1599,11 +1627,10 @@ struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) {
15991627
return ret;
16001628
}
16011629

1602-
int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
1603-
uint8_t more = denom->more;
1630+
int64_t libdivide_s64_do_raw(int64_t numer, int64_t magic, uint8_t more) {
16041631
uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
16051632

1606-
if (!denom->magic) { // shift path
1633+
if (!magic) { // shift path
16071634
uint64_t mask = ((uint64_t)1 << shift) - 1;
16081635
uint64_t uq = numer + ((numer >> 63) & mask);
16091636
int64_t q = (int64_t)uq;
@@ -1613,7 +1640,7 @@ int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
16131640
q = (q ^ sign) - sign;
16141641
return q;
16151642
} else {
1616-
uint64_t uq = (uint64_t)libdivide_mullhi_s64(denom->magic, numer);
1643+
uint64_t uq = (uint64_t)libdivide_mullhi_s64(magic, numer);
16171644
if (more & LIBDIVIDE_ADD_MARKER) {
16181645
// must be arithmetic shift and then sign extend
16191646
int64_t sign = (int8_t)more >> 7;
@@ -1628,6 +1655,10 @@ int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
16281655
}
16291656
}
16301657

1658+
int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
1659+
return libdivide_s64_do_raw(numer, denom->magic, denom->more);
1660+
}
1661+
16311662
int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) {
16321663
uint8_t more = denom->more;
16331664
uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;

0 commit comments

Comments
 (0)