11
11
#ifndef LIBDIVIDE_H
12
12
#define LIBDIVIDE_H
13
13
14
- #define LIBDIVIDE_VERSION " 5.1"
14
+ // *** Version numbers are auto generated - do not edit ***
15
+ #define LIBDIVIDE_VERSION " 5.2.0"
15
16
#define LIBDIVIDE_VERSION_MAJOR 5
16
- #define LIBDIVIDE_VERSION_MINOR 1
17
+ #define LIBDIVIDE_VERSION_MINOR 2
18
+ #define LIBDIVIDE_VERSION_PATCH 0
17
19
18
20
#include < stdint.h>
19
21
34
36
#include < arm_neon.h>
35
37
#endif
36
38
39
+ // Clang-cl prior to Visual Studio 2022 doesn't include __umulh/__mulh intrinsics
40
+ #if defined(_MSC_VER) && defined(LIBDIVIDE_X86_64) && (!defined(__clang__) || _MSC_VER>1930)
41
+ #define LIBDIVIDE_X64_INTRINSICS
42
+ #endif
43
+
37
44
#if defined(_MSC_VER)
45
+ #if defined(LIBDIVIDE_X64_INTRINSICS)
38
46
#include < intrin.h>
47
+ #endif
39
48
#pragma warning(push)
40
49
// disable warning C4146: unary minus operator applied
41
50
// to unsigned type, result still unsigned
@@ -238,18 +247,28 @@ static LIBDIVIDE_INLINE struct libdivide_u32_branchfree_t libdivide_u32_branchfr
238
247
static LIBDIVIDE_INLINE struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen (int64_t d);
239
248
static LIBDIVIDE_INLINE struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen (uint64_t d);
240
249
241
- static LIBDIVIDE_INLINE int16_t libdivide_s16_do_raw (int16_t numer, int16_t magic, uint8_t more);
250
+ static LIBDIVIDE_INLINE int16_t libdivide_s16_do_raw (
251
+ int16_t numer, int16_t magic, uint8_t more);
242
252
static LIBDIVIDE_INLINE int16_t libdivide_s16_do (
243
253
int16_t numer, const struct libdivide_s16_t *denom);
244
- static LIBDIVIDE_INLINE uint16_t libdivide_u16_do_raw (uint16_t numer, uint16_t magic, uint8_t more);
254
+ static LIBDIVIDE_INLINE uint16_t libdivide_u16_do_raw (
255
+ uint16_t numer, uint16_t magic, uint8_t more);
245
256
static LIBDIVIDE_INLINE uint16_t libdivide_u16_do (
246
257
uint16_t numer, const struct libdivide_u16_t *denom);
258
+ static LIBDIVIDE_INLINE int32_t libdivide_s32_do_raw (
259
+ int32_t numer, int32_t magic, uint8_t more);
247
260
static LIBDIVIDE_INLINE int32_t libdivide_s32_do (
248
261
int32_t numer, const struct libdivide_s32_t *denom);
262
+ static LIBDIVIDE_INLINE uint32_t libdivide_u32_do_raw (
263
+ uint32_t numer, uint32_t magic, uint8_t more);
249
264
static LIBDIVIDE_INLINE uint32_t libdivide_u32_do (
250
265
uint32_t numer, const struct libdivide_u32_t *denom);
266
+ static LIBDIVIDE_INLINE int64_t libdivide_s64_do_raw (
267
+ int64_t numer, int64_t magic, uint8_t more);
251
268
static LIBDIVIDE_INLINE int64_t libdivide_s64_do (
252
269
int64_t numer, const struct libdivide_s64_t *denom);
270
+ static LIBDIVIDE_INLINE uint64_t libdivide_u64_do_raw (
271
+ uint64_t numer, uint64_t magic, uint8_t more);
253
272
static LIBDIVIDE_INLINE uint64_t libdivide_u64_do (
254
273
uint64_t numer, const struct libdivide_u64_t *denom);
255
274
@@ -315,7 +334,7 @@ static LIBDIVIDE_INLINE int32_t libdivide_mullhi_s32(int32_t x, int32_t y) {
315
334
}
316
335
317
336
static LIBDIVIDE_INLINE uint64_t libdivide_mullhi_u64 (uint64_t x, uint64_t y) {
318
- #if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64 )
337
+ #if defined(LIBDIVIDE_X64_INTRINSICS )
319
338
return __umulh (x, y);
320
339
#elif defined(HAS_INT128_T)
321
340
__uint128_t xl = x, yl = y;
@@ -341,7 +360,7 @@ static LIBDIVIDE_INLINE uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) {
341
360
}
342
361
343
362
static LIBDIVIDE_INLINE int64_t libdivide_mullhi_s64 (int64_t x, int64_t y) {
344
- #if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64 )
363
+ #if defined(LIBDIVIDE_X64_INTRINSICS )
345
364
return __mulh (x, y);
346
365
#elif defined(HAS_INT128_T)
347
366
__int128_t xl = x, yl = y;
@@ -914,12 +933,11 @@ struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) {
914
933
return ret;
915
934
}
916
935
917
- uint32_t libdivide_u32_do (uint32_t numer, const struct libdivide_u32_t *denom) {
918
- uint8_t more = denom->more ;
919
- if (!denom->magic ) {
936
+ uint32_t libdivide_u32_do_raw (uint32_t numer, uint32_t magic, uint8_t more) {
937
+ if (!magic) {
920
938
return numer >> more;
921
939
} else {
922
- uint32_t q = libdivide_mullhi_u32 (denom-> magic , numer);
940
+ uint32_t q = libdivide_mullhi_u32 (magic, numer);
923
941
if (more & LIBDIVIDE_ADD_MARKER) {
924
942
uint32_t t = ((numer - q) >> 1 ) + q;
925
943
return t >> (more & LIBDIVIDE_32_SHIFT_MASK);
@@ -931,6 +949,10 @@ uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
931
949
}
932
950
}
933
951
952
+ uint32_t libdivide_u32_do (uint32_t numer, const struct libdivide_u32_t *denom) {
953
+ return libdivide_u32_do_raw (numer, denom->magic , denom->more );
954
+ }
955
+
934
956
uint32_t libdivide_u32_branchfree_do (
935
957
uint32_t numer, const struct libdivide_u32_branchfree_t *denom) {
936
958
uint32_t q = libdivide_mullhi_u32 (denom->magic , numer);
@@ -1074,12 +1096,11 @@ struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) {
1074
1096
return ret;
1075
1097
}
1076
1098
1077
- uint64_t libdivide_u64_do (uint64_t numer, const struct libdivide_u64_t *denom) {
1078
- uint8_t more = denom->more ;
1079
- if (!denom->magic ) {
1099
+ uint64_t libdivide_u64_do_raw (uint64_t numer, uint64_t magic, uint8_t more) {
1100
+ if (!magic) {
1080
1101
return numer >> more;
1081
1102
} else {
1082
- uint64_t q = libdivide_mullhi_u64 (denom-> magic , numer);
1103
+ uint64_t q = libdivide_mullhi_u64 (magic, numer);
1083
1104
if (more & LIBDIVIDE_ADD_MARKER) {
1084
1105
uint64_t t = ((numer - q) >> 1 ) + q;
1085
1106
return t >> (more & LIBDIVIDE_64_SHIFT_MASK);
@@ -1091,6 +1112,10 @@ uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
1091
1112
}
1092
1113
}
1093
1114
1115
+ uint64_t libdivide_u64_do (uint64_t numer, const struct libdivide_u64_t *denom) {
1116
+ return libdivide_u64_do_raw (numer, denom->magic , denom->more );
1117
+ }
1118
+
1094
1119
uint64_t libdivide_u64_branchfree_do (
1095
1120
uint64_t numer, const struct libdivide_u64_branchfree_t *denom) {
1096
1121
uint64_t q = libdivide_mullhi_u64 (denom->magic , numer);
@@ -1430,11 +1455,10 @@ struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) {
1430
1455
return result;
1431
1456
}
1432
1457
1433
- int32_t libdivide_s32_do (int32_t numer, const struct libdivide_s32_t *denom) {
1434
- uint8_t more = denom->more ;
1458
+ int32_t libdivide_s32_do_raw (int32_t numer, int32_t magic, uint8_t more) {
1435
1459
uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
1436
1460
1437
- if (!denom-> magic ) {
1461
+ if (!magic) {
1438
1462
uint32_t sign = (int8_t )more >> 7 ;
1439
1463
uint32_t mask = ((uint32_t )1 << shift) - 1 ;
1440
1464
uint32_t uq = numer + ((numer >> 31 ) & mask);
@@ -1443,7 +1467,7 @@ int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
1443
1467
q = (q ^ sign) - sign;
1444
1468
return q;
1445
1469
} else {
1446
- uint32_t uq = (uint32_t )libdivide_mullhi_s32 (denom-> magic , numer);
1470
+ uint32_t uq = (uint32_t )libdivide_mullhi_s32 (magic, numer);
1447
1471
if (more & LIBDIVIDE_ADD_MARKER) {
1448
1472
// must be arithmetic shift and then sign extend
1449
1473
int32_t sign = (int8_t )more >> 7 ;
@@ -1458,6 +1482,10 @@ int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
1458
1482
}
1459
1483
}
1460
1484
1485
+ int32_t libdivide_s32_do (int32_t numer, const struct libdivide_s32_t *denom) {
1486
+ return libdivide_s32_do_raw (numer, denom->magic , denom->more );
1487
+ }
1488
+
1461
1489
int32_t libdivide_s32_branchfree_do (int32_t numer, const struct libdivide_s32_branchfree_t *denom) {
1462
1490
uint8_t more = denom->more ;
1463
1491
uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
@@ -1599,11 +1627,10 @@ struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) {
1599
1627
return ret;
1600
1628
}
1601
1629
1602
- int64_t libdivide_s64_do (int64_t numer, const struct libdivide_s64_t *denom) {
1603
- uint8_t more = denom->more ;
1630
+ int64_t libdivide_s64_do_raw (int64_t numer, int64_t magic, uint8_t more) {
1604
1631
uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
1605
1632
1606
- if (!denom-> magic ) { // shift path
1633
+ if (!magic) { // shift path
1607
1634
uint64_t mask = ((uint64_t )1 << shift) - 1 ;
1608
1635
uint64_t uq = numer + ((numer >> 63 ) & mask);
1609
1636
int64_t q = (int64_t )uq;
@@ -1613,7 +1640,7 @@ int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
1613
1640
q = (q ^ sign) - sign;
1614
1641
return q;
1615
1642
} else {
1616
- uint64_t uq = (uint64_t )libdivide_mullhi_s64 (denom-> magic , numer);
1643
+ uint64_t uq = (uint64_t )libdivide_mullhi_s64 (magic, numer);
1617
1644
if (more & LIBDIVIDE_ADD_MARKER) {
1618
1645
// must be arithmetic shift and then sign extend
1619
1646
int64_t sign = (int8_t )more >> 7 ;
@@ -1628,6 +1655,10 @@ int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
1628
1655
}
1629
1656
}
1630
1657
1658
+ int64_t libdivide_s64_do (int64_t numer, const struct libdivide_s64_t *denom) {
1659
+ return libdivide_s64_do_raw (numer, denom->magic , denom->more );
1660
+ }
1661
+
1631
1662
int64_t libdivide_s64_branchfree_do (int64_t numer, const struct libdivide_s64_branchfree_t *denom) {
1632
1663
uint8_t more = denom->more ;
1633
1664
uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
0 commit comments