Skip to content

Commit

Permalink
build
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed May 23, 2024
1 parent b2da715 commit 8ef9f05
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 36 deletions.
12 changes: 6 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -476,19 +476,19 @@ else()

check_cxx_compiler_flag("/arch:AVX -mxop" NCNN_COMPILER_SUPPORT_X86_XOP)
check_cxx_compiler_flag("/arch:AVX -mf16c" NCNN_COMPILER_SUPPORT_X86_F16C)
check_cxx_compiler_flag("/arch:AVX2" NCNN_COMPILER_SUPPORT_X86_AVX2)
check_cxx_compiler_flag("/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl" NCNN_COMPILER_SUPPORT_X86_AVX512)
check_cxx_compiler_flag("/arch:AVX2 -mfma -mf16c" NCNN_COMPILER_SUPPORT_X86_AVX2)
check_cxx_compiler_flag("/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl" NCNN_COMPILER_SUPPORT_X86_AVX512)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mavxvnni")
set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mfma -mf16c -mavxvnni")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256i _s, _a, _b; _s = _mm256_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)

unset(CMAKE_REQUIRED_FLAGS)
Expand Down
32 changes: 16 additions & 16 deletions cmake/ncnn_add_layer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -167,38 +167,38 @@ macro(ncnn_add_layer class)
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl")
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
endif()
if(NCNN_RUNTIME_CPU AND NCNN_FMA)
ncnn_add_arch_opt_layer(${class} fma "/arch:AVX -mfma -mf16c")
ncnn_add_arch_opt_layer(${class} fma "/arch:AVX -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
endif()
if(NCNN_RUNTIME_CPU AND NCNN_AVX)
ncnn_add_arch_opt_layer(${class} avx "/arch:AVX")
ncnn_add_arch_opt_layer(${class} avx "/arch:AVX /D__SSSE3__ /D__SSE4_1__")
endif()
if(NCNN_AVX512VNNI)
ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512vnni /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__")
endif()
if(NCNN_AVX512BF16)
ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512bf16 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__")
endif()
if(NCNN_AVX512FP16)
ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512fp16 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__")
endif()
if(NCNN_AVXVNNI)
ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 -mavxvnni")
ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 -mfma -mf16c -mavxvnni /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__")
endif()
if(NCNN_AVX2)
ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2")
ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
endif()
if(NCNN_XOP)
ncnn_add_arch_opt_source(${class} xop "/arch:AVX -mxop")
ncnn_add_arch_opt_source(${class} xop "/arch:AVX -mxop /D__SSSE3__ /D__SSE4_1__ /D__XOP__")
endif()
if(NCNN_F16C)
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX -mf16c")
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX -mf16c /D__SSSE3__ /D__SSE4_1__ /D__F16C__")
endif()
else()
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
ncnn_add_arch_opt_layer(${class} avx512 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl")
ncnn_add_arch_opt_layer(${class} avx512 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c")
endif()
if(NCNN_RUNTIME_CPU AND NCNN_FMA)
ncnn_add_arch_opt_layer(${class} fma "-mavx -mfma -mf16c")
Expand All @@ -207,19 +207,19 @@ macro(ncnn_add_layer class)
ncnn_add_arch_opt_layer(${class} avx "-mavx")
endif()
if(NCNN_AVX512VNNI)
ncnn_add_arch_opt_source(${class} avx512vnni "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
ncnn_add_arch_opt_source(${class} avx512vnni "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512vnni")
endif()
if(NCNN_AVX512BF16)
ncnn_add_arch_opt_source(${class} avx512bf16 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
ncnn_add_arch_opt_source(${class} avx512bf16 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512bf16")
endif()
if(NCNN_AVX512FP16)
ncnn_add_arch_opt_source(${class} avx512fp16 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
ncnn_add_arch_opt_source(${class} avx512fp16 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512fp16")
endif()
if(NCNN_AVXVNNI)
ncnn_add_arch_opt_source(${class} avxvnni "-mavx2 -mavxvnni")
ncnn_add_arch_opt_source(${class} avxvnni "-mavx2 -mfma -mf16c -mavxvnni")
endif()
if(NCNN_AVX2)
ncnn_add_arch_opt_source(${class} avx2 "-mavx2")
ncnn_add_arch_opt_source(${class} avx2 "-mavx2 -mfma -mf16c")
endif()
if(NCNN_XOP)
ncnn_add_arch_opt_source(${class} xop "-mavx -mxop")
Expand Down
28 changes: 14 additions & 14 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -409,18 +409,18 @@ if(NCNN_TARGET_ARCH STREQUAL "x86")
target_compile_options(ncnn PRIVATE /D__AVX512VNNI__)
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
target_compile_options(ncnn PRIVATE /arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl)
target_compile_options(ncnn PRIVATE /arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__)
if(NCNN_AVX512VNNI)
target_compile_options(ncnn PRIVATE -mavx512vnni)
target_compile_options(ncnn PRIVATE -mavx512vnni /D__AVX512VNNI__)
endif()
if(NCNN_AVX512BF16)
target_compile_options(ncnn PRIVATE -mavx512bf16)
target_compile_options(ncnn PRIVATE -mavx512bf16 /D__AVX512BF16__)
endif()
if(NCNN_AVX512FP16)
target_compile_options(ncnn PRIVATE -mavx512fp16)
target_compile_options(ncnn PRIVATE -mavx512fp16 /D__AVX512FP16__)
endif()
else()
target_compile_options(ncnn PRIVATE -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl)
target_compile_options(ncnn PRIVATE -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c)
if(NCNN_AVX512VNNI)
target_compile_options(ncnn PRIVATE -mavx512vnni)
endif()
Expand Down Expand Up @@ -448,21 +448,21 @@ if(NCNN_TARGET_ARCH STREQUAL "x86")
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
if(NCNN_AVX2)
target_compile_options(ncnn PRIVATE /arch:AVX2)
target_compile_options(ncnn PRIVATE /arch:AVX2 -mfma /D__SSSE3__ /D__SSE4_1__ /D__FMA__)
else()
target_compile_options(ncnn PRIVATE /arch:AVX -mfma)
target_compile_options(ncnn PRIVATE /arch:AVX -mfma /D__SSSE3__ /D__SSE4_1__ /D__FMA__)
endif()
if(NCNN_AVXVNNI)
target_compile_options(ncnn PRIVATE -mavxvnni)
target_compile_options(ncnn PRIVATE -mavxvnni /D__AVXVNNI__)
elseif(NCNN_XOP)
target_compile_options(ncnn PRIVATE -mxop)
target_compile_options(ncnn PRIVATE -mxop /D__XOP__)
endif()
if(NCNN_F16C)
target_compile_options(ncnn PRIVATE -mf16c)
target_compile_options(ncnn PRIVATE -mf16c /D__F16C__)
endif()
else()
if(NCNN_AVX2)
target_compile_options(ncnn PRIVATE -mavx2)
target_compile_options(ncnn PRIVATE -mavx2 -mfma)
else()
target_compile_options(ncnn PRIVATE -mavx -mfma)
endif()
Expand All @@ -485,12 +485,12 @@ if(NCNN_TARGET_ARCH STREQUAL "x86")
target_compile_options(ncnn PRIVATE /D__F16C__)
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
target_compile_options(ncnn PRIVATE /arch:AVX)
target_compile_options(ncnn PRIVATE /arch:AVX /D__SSSE3__ /D__SSE4_1__)
if(NCNN_XOP)
target_compile_options(ncnn PRIVATE -mxop)
target_compile_options(ncnn PRIVATE -mxop /D__XOP__)
endif()
if(NCNN_F16C)
target_compile_options(ncnn PRIVATE -mf16c)
target_compile_options(ncnn PRIVATE -mf16c /D__F16C__)
endif()
else()
target_compile_options(ncnn PRIVATE -mavx)
Expand Down

0 comments on commit 8ef9f05

Please sign in to comment.