From 8a8c9674601ba00b47ef9ca4be8aeb8ef7395a49 Mon Sep 17 00:00:00 2001 From: Min Wei Date: Wed, 29 Mar 2017 17:34:36 -0700 Subject: [PATCH] Enable Fast CRC32 for Win64 Summary: Currently the fast crc32 path is not enabled on Windows. I am trying to enable it here, hopefully, with the minimum impact to the existing code structure. Closes https://github.com/facebook/rocksdb/pull/2033 Differential Revision: D4770635 Pulled By: siying fbshipit-source-id: 676f8b8 --- CMakeLists.txt | 13 +++++++++++-- util/crc32c.cc | 25 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 56b91490924..28a320e5d27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,11 @@ endif() list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/modules/") if(WIN32) + if (DEFINED AVX2) + set(USE_AVX2 ${AVX2}) + else () + set(USE_AVX2 1) + endif () include(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty.inc) else() option(WITH_JEMALLOC "build with JeMalloc" OFF) @@ -111,7 +116,11 @@ add_library(build_version OBJECT ${BUILD_VERSION_CC}) target_include_directories(build_version PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/util) if(WIN32) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue") + if (${USE_AVX2} EQUAL 1) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /arch:AVX2 /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue") + else () + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue") + endif () set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W3 /wd4127 /wd4800 /wd4996 /wd4351") else() @@ -378,7 +387,7 @@ set(SOURCES util/histogram_windowing.cc util/instrumented_mutex.cc util/iostats_context.cc - + util/lru_cache.cc tools/ldb_cmd.cc tools/ldb_tool.cc diff --git a/util/crc32c.cc b/util/crc32c.cc index ce574544e15..42bedc2445a 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -16,6 +16,11 @@ #ifdef __SSE4_2__ #include #endif +#if defined(_WIN64) +#ifdef __AVX2__ +#include +#endif +#endif #include "util/coding.h" namespace rocksdb { @@ -299,6 +304,13 @@ static inline uint64_t LE_LOAD64(const uint8_t *p) { #endif #endif +#if defined(_WIN64) +#ifdef __AVX2__ +static inline uint64_t LE_LOAD64(const uint8_t *p) { + return DecodeFixed64(reinterpret_cast(p)); +} +#endif +#endif static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) { uint32_t c = static_cast(*l ^ LE_LOAD32(*p)); *p += 4; @@ -326,6 +338,13 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) { *l = _mm_crc32_u32(static_cast(*l), LE_LOAD32(*p)); *p += 4; #endif +#elif defined(_WIN64) +#ifdef __AVX2__ + *l = _mm_crc32_u64(*l, LE_LOAD64(*p)); + *p += 8; +#else + Slow_CRC32(l, p); +#endif #else Slow_CRC32(l, p); #endif @@ -381,6 +400,10 @@ static bool isSSE42() { uint32_t d_; __asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx"); return c_ & (1U << 20); // copied from CpuId.h in Folly. +#elif defined(_WIN64) + int info[4]; + __cpuidex(info, 0x00000001, 0); + return (info[2] & ((int)1 << 20)) != 0; #else return false; #endif @@ -395,6 +418,8 @@ static inline Function Choose_Extend() { bool IsFastCrc32Supported() { #ifdef __SSE4_2__ return isSSE42(); +#elif defined(_WIN64) + return isSSE42(); #else return false; #endif