-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement support for Intel crc32 instruction (SSE 4.2)
This change authored by vadimskipin and submitted via: google/leveldb#309 Changes made to support iOS builds and other architectures without support for SSE 4.2. db_bench reports original crc32 speed at: crc32c : 3.610 micros/op; 1082.0 MB/s (4K per op) with this change performance has increased to: crc32c : 0.843 micros/op; 4633.6 MB/s (4K per op) ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=148694935
- Loading branch information
Showing
6 changed files
with
186 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
// Copyright 2016 The LevelDB Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. See the AUTHORS file for names of contributors. | ||
// | ||
// A portable implementation of crc32c, optimized to handle | ||
// four bytes at a time. | ||
// | ||
// In a separate source file to allow this accelerated CRC32C function to be | ||
// compiled with the appropriate compiler flags to enable x86 SSE 4.2 | ||
// instructions. | ||
|
||
#include <stdint.h> | ||
#include <string.h> | ||
#include "port/port.h" | ||
|
||
#if defined(LEVELDB_PLATFORM_POSIX_SSE) | ||
|
||
#if defined(_MSC_VER) | ||
#include <intrin.h> | ||
#elif defined(__GNUC__) && defined(__SSE4_2__) | ||
#include <nmmintrin.h> | ||
#include <cpuid.h> | ||
#endif | ||
|
||
#endif // defined(LEVELDB_PLATFORM_POSIX_SSE) | ||
|
||
namespace leveldb { | ||
namespace port { | ||
|
||
#if defined(LEVELDB_PLATFORM_POSIX_SSE) | ||
|
||
// Used to fetch a naturally-aligned 32-bit word in little endian byte-order | ||
static inline uint32_t LE_LOAD32(const uint8_t *p) { | ||
// SSE is x86 only, so ensured that |p| is always little-endian. | ||
uint32_t word; | ||
memcpy(&word, p, sizeof(word)); | ||
return word; | ||
} | ||
|
||
// Used to fetch a naturally-aligned 64-bit word in little endian byte-order | ||
static inline uint64_t LE_LOAD64(const uint8_t *p) { | ||
uint64_t dword; | ||
memcpy(&dword, p, sizeof(dword)); | ||
return dword; | ||
} | ||
|
||
static inline bool HaveSSE42() { | ||
#if defined(_MSC_VER) | ||
int cpu_info[4]; | ||
__cpuid(cpu_info, 1); | ||
return (cpu_info[2] & (1 << 20)) != 0; | ||
#elif defined(__GNUC__) | ||
unsigned int eax, ebx, ecx, edx; | ||
__get_cpuid(1, &eax, &ebx, &ecx, &edx); | ||
return (ecx & (1 << 20)) != 0; | ||
#else | ||
return false; | ||
#endif | ||
} | ||
|
||
#endif // defined(LEVELDB_PLATFORM_POSIX_SSE) | ||
|
||
// For further improvements see Intel publication at: | ||
// http://download.intel.com/design/intarch/papers/323405.pdf | ||
uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) { | ||
#if !defined(LEVELDB_PLATFORM_POSIX_SSE) | ||
return 0; | ||
#else | ||
static bool have = HaveSSE42(); | ||
if (!have) { | ||
return 0; | ||
} | ||
|
||
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf); | ||
const uint8_t *e = p + size; | ||
uint32_t l = crc ^ 0xffffffffu; | ||
|
||
#define STEP1 do { \ | ||
l = _mm_crc32_u8(l, *p++); \ | ||
} while (0) | ||
#define STEP4 do { \ | ||
l = _mm_crc32_u32(l, LE_LOAD32(p)); \ | ||
p += 4; \ | ||
} while (0) | ||
#define STEP8 do { \ | ||
l = _mm_crc32_u64(l, LE_LOAD64(p)); \ | ||
p += 8; \ | ||
} while (0) | ||
|
||
if (size > 16) { | ||
// Process unaligned bytes | ||
for (unsigned int i = reinterpret_cast<uintptr_t>(p) % 8; i; --i) { | ||
STEP1; | ||
} | ||
|
||
// _mm_crc32_u64 is only available on x64. | ||
#if defined(_M_X64) || defined(__x86_64__) | ||
// Process 8 bytes at a time | ||
while ((e-p) >= 8) { | ||
STEP8; | ||
} | ||
// Process 4 bytes at a time | ||
if ((e-p) >= 4) { | ||
STEP4; | ||
} | ||
#else // !(defined(_M_X64) || defined(__x86_64__)) | ||
// Process 4 bytes at a time | ||
while ((e-p) >= 4) { | ||
STEP4; | ||
} | ||
#endif // defined(_M_X64) || defined(__x86_64__) | ||
} | ||
// Process the last few bytes | ||
while (p != e) { | ||
STEP1; | ||
} | ||
#undef STEP8 | ||
#undef STEP4 | ||
#undef STEP1 | ||
return l ^ 0xffffffffu; | ||
#endif // defined(LEVELDB_PLATFORM_POSIX_SSE) | ||
} | ||
|
||
} // namespace port | ||
} // namespace leveldb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters