From 8ef9f0586f4b6e8980c849fa8a5b004dd879208b Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 23 May 2024 14:32:43 +0800 Subject: [PATCH] build --- CMakeLists.txt | 12 ++++++------ cmake/ncnn_add_layer.cmake | 32 ++++++++++++++++---------------- src/CMakeLists.txt | 28 ++++++++++++++-------------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 586ce136da3..7ff470ad9c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -476,19 +476,19 @@ else() check_cxx_compiler_flag("/arch:AVX -mxop" NCNN_COMPILER_SUPPORT_X86_XOP) check_cxx_compiler_flag("/arch:AVX -mf16c" NCNN_COMPILER_SUPPORT_X86_F16C) - check_cxx_compiler_flag("/arch:AVX2" NCNN_COMPILER_SUPPORT_X86_AVX2) - check_cxx_compiler_flag("/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl" NCNN_COMPILER_SUPPORT_X86_AVX512) + check_cxx_compiler_flag("/arch:AVX2 -mfma -mf16c" NCNN_COMPILER_SUPPORT_X86_AVX2) + check_cxx_compiler_flag("/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl" NCNN_COMPILER_SUPPORT_X86_AVX512) - set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mavxvnni") + set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mfma -mf16c -mavxvnni") check_cxx_source_compiles("#include \nint main() { __m256i _s, _a, _b; _s = _mm256_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI) - set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni") + set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni") check_cxx_source_compiles("#include \nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI) - set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16") + set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16") check_cxx_source_compiles("#include \nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16) - set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16") + set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16") check_cxx_source_compiles("#include \nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16) unset(CMAKE_REQUIRED_FLAGS) diff --git a/cmake/ncnn_add_layer.cmake b/cmake/ncnn_add_layer.cmake index ee64206b74e..7b6f7ba3789 100644 --- a/cmake/ncnn_add_layer.cmake +++ b/cmake/ncnn_add_layer.cmake @@ -167,38 +167,38 @@ macro(ncnn_add_layer class) endif() elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") if(NCNN_RUNTIME_CPU AND NCNN_AVX512) - ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl") + ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__") endif() if(NCNN_RUNTIME_CPU AND NCNN_FMA) - ncnn_add_arch_opt_layer(${class} fma "/arch:AVX -mfma -mf16c") + ncnn_add_arch_opt_layer(${class} fma "/arch:AVX -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__") endif() if(NCNN_RUNTIME_CPU AND NCNN_AVX) - ncnn_add_arch_opt_layer(${class} avx "/arch:AVX") + ncnn_add_arch_opt_layer(${class} avx "/arch:AVX /D__SSSE3__ /D__SSE4_1__") endif() if(NCNN_AVX512VNNI) - ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni") + ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512vnni /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__") endif() if(NCNN_AVX512BF16) - ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16") + ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512bf16 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__") endif() if(NCNN_AVX512FP16) - ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16") + ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512fp16 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__") endif() if(NCNN_AVXVNNI) - ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 -mavxvnni") + ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 -mfma -mf16c -mavxvnni /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__") endif() if(NCNN_AVX2) - ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2") + ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__") endif() if(NCNN_XOP) - ncnn_add_arch_opt_source(${class} xop "/arch:AVX -mxop") + ncnn_add_arch_opt_source(${class} xop "/arch:AVX -mxop /D__SSSE3__ /D__SSE4_1__ /D__XOP__") endif() if(NCNN_F16C) - ncnn_add_arch_opt_source(${class} f16c "/arch:AVX -mf16c") + ncnn_add_arch_opt_source(${class} f16c "/arch:AVX -mf16c /D__SSSE3__ /D__SSE4_1__ /D__F16C__") endif() else() if(NCNN_RUNTIME_CPU AND NCNN_AVX512) - ncnn_add_arch_opt_layer(${class} avx512 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl") + ncnn_add_arch_opt_layer(${class} avx512 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c") endif() if(NCNN_RUNTIME_CPU AND NCNN_FMA) ncnn_add_arch_opt_layer(${class} fma "-mavx -mfma -mf16c") @@ -207,19 +207,19 @@ macro(ncnn_add_layer class) ncnn_add_arch_opt_layer(${class} avx "-mavx") endif() if(NCNN_AVX512VNNI) - ncnn_add_arch_opt_source(${class} avx512vnni "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni") + ncnn_add_arch_opt_source(${class} avx512vnni "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512vnni") endif() if(NCNN_AVX512BF16) - ncnn_add_arch_opt_source(${class} avx512bf16 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16") + ncnn_add_arch_opt_source(${class} avx512bf16 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512bf16") endif() if(NCNN_AVX512FP16) - ncnn_add_arch_opt_source(${class} avx512fp16 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16") + ncnn_add_arch_opt_source(${class} avx512fp16 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512fp16") endif() if(NCNN_AVXVNNI) - ncnn_add_arch_opt_source(${class} avxvnni "-mavx2 -mavxvnni") + ncnn_add_arch_opt_source(${class} avxvnni "-mavx2 -mfma -mf16c -mavxvnni") endif() if(NCNN_AVX2) - ncnn_add_arch_opt_source(${class} avx2 "-mavx2") + ncnn_add_arch_opt_source(${class} avx2 "-mavx2 -mfma -mf16c") endif() if(NCNN_XOP) ncnn_add_arch_opt_source(${class} xop "-mavx -mxop") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0aaa604d718..2e444fdb629 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -409,18 +409,18 @@ if(NCNN_TARGET_ARCH STREQUAL "x86") target_compile_options(ncnn PRIVATE /D__AVX512VNNI__) endif() elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") - target_compile_options(ncnn PRIVATE /arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl) + target_compile_options(ncnn PRIVATE /arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__) if(NCNN_AVX512VNNI) - target_compile_options(ncnn PRIVATE -mavx512vnni) + target_compile_options(ncnn PRIVATE -mavx512vnni /D__AVX512VNNI__) endif() if(NCNN_AVX512BF16) - target_compile_options(ncnn PRIVATE -mavx512bf16) + target_compile_options(ncnn PRIVATE -mavx512bf16 /D__AVX512BF16__) endif() if(NCNN_AVX512FP16) - target_compile_options(ncnn PRIVATE -mavx512fp16) + target_compile_options(ncnn PRIVATE -mavx512fp16 /D__AVX512FP16__) endif() else() - target_compile_options(ncnn PRIVATE -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl) + target_compile_options(ncnn PRIVATE -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c) if(NCNN_AVX512VNNI) target_compile_options(ncnn PRIVATE -mavx512vnni) endif() @@ -448,21 +448,21 @@ if(NCNN_TARGET_ARCH STREQUAL "x86") endif() elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") if(NCNN_AVX2) - target_compile_options(ncnn PRIVATE /arch:AVX2) + target_compile_options(ncnn PRIVATE /arch:AVX2 -mfma /D__SSSE3__ /D__SSE4_1__ /D__FMA__) else() - target_compile_options(ncnn PRIVATE /arch:AVX -mfma) + target_compile_options(ncnn PRIVATE /arch:AVX -mfma /D__SSSE3__ /D__SSE4_1__ /D__FMA__) endif() if(NCNN_AVXVNNI) - target_compile_options(ncnn PRIVATE -mavxvnni) + target_compile_options(ncnn PRIVATE -mavxvnni /D__AVXVNNI__) elseif(NCNN_XOP) - target_compile_options(ncnn PRIVATE -mxop) + target_compile_options(ncnn PRIVATE -mxop /D__XOP__) endif() if(NCNN_F16C) - target_compile_options(ncnn PRIVATE -mf16c) + target_compile_options(ncnn PRIVATE -mf16c /D__F16C__) endif() else() if(NCNN_AVX2) - target_compile_options(ncnn PRIVATE -mavx2) + target_compile_options(ncnn PRIVATE -mavx2 -mfma) else() target_compile_options(ncnn PRIVATE -mavx -mfma) endif() @@ -485,12 +485,12 @@ if(NCNN_TARGET_ARCH STREQUAL "x86") target_compile_options(ncnn PRIVATE /D__F16C__) endif() elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") - target_compile_options(ncnn PRIVATE /arch:AVX) + target_compile_options(ncnn PRIVATE /arch:AVX /D__SSSE3__ /D__SSE4_1__) if(NCNN_XOP) - target_compile_options(ncnn PRIVATE -mxop) + target_compile_options(ncnn PRIVATE -mxop /D__XOP__) endif() if(NCNN_F16C) - target_compile_options(ncnn PRIVATE -mf16c) + target_compile_options(ncnn PRIVATE -mf16c /D__F16C__) endif() else() target_compile_options(ncnn PRIVATE -mavx)