Skip to content

Commit

Permalink
Update to latest libprimesieve
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Sep 17, 2023
1 parent 4397959 commit 30beeab
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 30 deletions.
24 changes: 9 additions & 15 deletions lib/primesieve/doc/CPP_API.md
Original file line number Diff line number Diff line change
Expand Up @@ -459,31 +459,25 @@ int main()
primesieve::iterator it;
it.generate_next_primes();

uint64_t sum = 0;
uint64_t limit = 10000000000;
__m512i sums = _mm512_setzero_si512();

while (it.primes_[it.size_ - 1] <= limit)
{
std::size_t i = 0;
__m512i vsums = _mm512_setzero_si512();

// Sum 64-bit primes using AVX512
for (; i + 8 < it.size_; i += 8) {
__m512i primes = _mm512_loadu_si512((__m512i*) &it.primes_[i]);
vsums = _mm512_add_epi64(vsums, primes);
for (std::size_t i = 0; i < it.size_; i += 8) {
__mmask8 mask = (i + 8 < it.size_) ? 0xff : 0xff >> (i + 8 - it.size_);
__m512i primes = _mm512_maskz_loadu_epi64(mask, (__m512i*) &it.primes_[i]);
sums = _mm512_add_epi64(sums, primes);
}

// Sum 8 integers in the vsums vector
sum += _mm512_reduce_add_epi64(vsums);

// Process the remaining primes (at most 7)
for (; i < it.size_; i++)
sum += it.primes_[i];

// Generate up to 2^10 new primes
it.generate_next_primes();
}

// Sum the 8 partial sums
uint64_t sum = _mm512_reduce_add_epi64(sums);

// Process the remaining primes (at most 2^10)
for (std::size_t i = 0; it.primes_[i] <= limit; i++)
sum += it.primes_[i];
Expand All @@ -499,7 +493,7 @@ int main()

```bash
# Unix-like OSes
c++ -O3 -mavx512f primesum.cpp -o primesum -lprimesieve
c++ -O3 -mavx512f -funroll-loops primesum.cpp -o primesum -lprimesieve
time ./primesum
```

Expand Down
24 changes: 9 additions & 15 deletions lib/primesieve/doc/C_API.md
Original file line number Diff line number Diff line change
Expand Up @@ -525,31 +525,25 @@ int main(void)
primesieve_init(&it);
primesieve_generate_next_primes(&it);

uint64_t sum = 0;
uint64_t limit = 10000000000;
__m512i sums = _mm512_setzero_si512();

while (it.primes[it.size - 1] <= limit)
{
size_t i = 0;
__m512i vsums = _mm512_setzero_si512();

// Sum 64-bit primes using AVX512
for (; i + 8 < it.size; i += 8) {
__m512i primes = _mm512_loadu_si512((__m512i*) &it.primes[i]);
vsums = _mm512_add_epi64(vsums, primes);
for (size_t i = 0; i < it.size; i += 8) {
__mmask8 mask = (i + 8 < it.size) ? 0xff : 0xff >> (i + 8 - it.size);
__m512i primes = _mm512_maskz_loadu_epi64(mask, (__m512i*) &it.primes[i]);
sums = _mm512_add_epi64(sums, primes);
}

// Sum 8 integers in the vsums vector
sum += _mm512_reduce_add_epi64(vsums);

// Process the remaining primes (at most 7)
for (; i < it.size; i++)
sum += it.primes[i];

// Generate up to 2^10 new primes
primesieve_generate_next_primes(&it);
}

// Sum the 8 partial sums
uint64_t sum = _mm512_reduce_add_epi64(sums);

// Process the remaining primes (at most 2^10)
for (size_t i = 0; it.primes[i] <= limit; i++)
sum += it.primes[i];
Expand All @@ -566,7 +560,7 @@ int main(void)
```bash
# Unix-like OSes
cc -O3 -mavx512f primesum.c -o primesum -lprimesieve
cc -O3 -mavx512f -funroll-loops primesum.c -o primesum -lprimesieve
time ./primesum
```

Expand Down

0 comments on commit 30beeab

Please sign in to comment.