Skip to content

Commit

Permalink
dispatch for rvv and xtheadvector
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Aug 30, 2024
1 parent 7a3e9bd commit 30e79c2
Show file tree
Hide file tree
Showing 16 changed files with 322 additions and 104 deletions.
25 changes: 15 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -416,21 +416,19 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")

if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv")
check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat32m1_t _s, _w; float _v; size_t vl; _s = __riscv_vfmacc_vf_f32m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RISCV_V)
check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat32m8_t _s, _w; float _v; size_t vl; _s = __riscv_vfmacc_vf_f32m8(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RISCV_V)

set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zvfh")
check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m1_t _s, _w; __fp16 _v; size_t vl; _s = __riscv_vfmacc_vf_f16m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RISCV_ZVFH)
set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zfh_zvfh -D__fp16=_Float16")
check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m8_t _s, _w; __fp16 _v; size_t vl; _s = __riscv_vfmacc_vf_f16m8(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RISCV_ZVFH)

# if(NOT NCNN_COMPILER_SUPPORT_RVV_ZFH)
# set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -D__fp16=_Float16")
# check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m1_t _s, _w; __fp16 _v; size_t vl; _s = __riscv_vfmacc_vf_f16m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RVV_ZVFH)
# endif()
set(CMAKE_REQUIRED_FLAGS "-march=rv64gc_zfh_xtheadvector -D__fp16=_Float16")
check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m8_t _s, _w; __fp16 _v; size_t vl; _s = __riscv_vfmacc_vf_f16m8(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)

unset(CMAKE_REQUIRED_FLAGS)

if(NCNN_COMPILER_SUPPORT_RISCV_V)
if(NCNN_COMPILER_SUPPORT_RISCV_V OR NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
option(NCNN_RVV "optimize risc-v platform with v extension" ON)
if(NCNN_COMPILER_SUPPORT_RISCV_ZVFH)
if(NCNN_COMPILER_SUPPORT_RISCV_ZVFH OR NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
if(NCNN_RVV)
option(NCNN_ZVFH "optimize risc-v platform with zvfh extension" ON)
endif()
Expand All @@ -456,8 +454,15 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")
# add_definitions(-D__rvv_tuple)
# endif()
else()
message(WARNING "The compiler does not support risc-v v extension. NCNN_RVV will be OFF.")
message(WARNING "The compiler does not support risc-v v or xtheadvector extension. NCNN_RVV will be OFF.")
endif()

if(NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
option(NCNN_XTHEADVECTOR "optimize risc-v platform with xtheadvector extension" ON)
else()
message(WARNING "The compiler does not support risc-v xtheadvector extension. NCNN_XTHEADVECTOR will be OFF.")
endif()

endif()
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
set(NCNN_TARGET_ARCH powerpc)
Expand Down
17 changes: 7 additions & 10 deletions cmake/ncnn_add_layer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -363,17 +363,14 @@ macro(ncnn_add_layer class)
endif()

if(NCNN_TARGET_ARCH STREQUAL "riscv" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
# if(NCNN_RUNTIME_CPU AND NCNN_RVV)
# ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv")

# if(NCNN_COMPILER_SUPPORT_RVV_ZVFH)
# ncnn_add_arch_opt_layer(${class} zvfh "-march=rv64gcv_zvfh")
# elseif(NCNN_COMPILER_SUPPORT_RVV_ZVFH)
# ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -D__fp16=_Float16")
# endif()
# endif()
if(NCNN_RUNTIME_CPU AND NCNN_RVV)
ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv")
endif()
if(NCNN_RUNTIME_CPU AND NCNN_XTHEADVECTOR)
ncnn_add_arch_opt_layer(${class} xtheadvector "-march=rv64gc_zfh_xtheadvector -D__fp16=_Float16")
endif()
if(NCNN_ZVFH)
ncnn_add_arch_opt_source(${class} zvfh "-march=rv64gcv_zvfh -D__fp16=_Float16")
ncnn_add_arch_opt_source(${class} zvfh "-march=rv64gcv_zfh_zvfh -D__fp16=_Float16")
endif()
endif()

Expand Down
14 changes: 14 additions & 0 deletions cmake/ncnn_generate_xtheadvector_source.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

# must define SRC DST CLASS

file(READ ${SRC} source_data)

# replace
string(TOUPPER ${CLASS} CLASS_UPPER)
string(TOLOWER ${CLASS} CLASS_LOWER)

string(REGEX REPLACE "LAYER_${CLASS_UPPER}_RISCV_H" "LAYER_${CLASS_UPPER}_RISCV_XTHEADVECTOR_H" source_data "${source_data}")
string(REGEX REPLACE "${CLASS}_riscv" "${CLASS}_riscv_xtheadvector" source_data "${source_data}")
string(REGEX REPLACE "#include \"${CLASS_LOWER}_riscv.h\"" "#include \"${CLASS_LOWER}_riscv_xtheadvector.h\"" source_data "${source_data}")

file(WRITE ${DST} "${source_data}")
22 changes: 7 additions & 15 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -631,24 +631,16 @@ if(NCNN_TARGET_ARCH STREQUAL "loongarch")
endif()

if(NCNN_TARGET_ARCH STREQUAL "riscv" AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT C906)
if(NCNN_RVV)
if(NOT NCNN_RUNTIME_CPU AND NCNN_RVV)
set(RISCV_MARCH_FLAG "-march=rv64gcv")
else()
set(RISCV_MARCH_FLAG "-march=rv64gc")
endif()

if(NOT NCNN_RUNTIME_CPU AND NCNN_ZVFH)
set(RISCV_MARCH_FLAG "${RISCV_MARCH_FLAG}_zvfh")
if(NCNN_ZVFH)
set(RISCV_MARCH_FLAG "${RISCV_MARCH_FLAG}_zfh_zvfh")
target_compile_options(ncnn PRIVATE -D__fp16=_Float16)
endif()
elseif(NOT NCNN_RUNTIME_CPU AND NCNN_ZVFH)
set(RISCV_MARCH_FLAG "-march=rv64gc_zfh_xtheadvector")
target_compile_options(ncnn PRIVATE -D__fp16=_Float16)
endif()

# if(NCNN_COMPILER_SUPPORT_RVV_ZFH)
# target_compile_options(ncnn PRIVATE -march=rv64gcv_zfh)
# elseif(NCNN_COMPILER_SUPPORT_RVV_ZVFH)
# target_compile_options(ncnn PRIVATE -march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -D__fp16=_Float16)
# elseif(NCNN_COMPILER_SUPPORT_RVV)
# target_compile_options(ncnn PRIVATE -march=rv64gcv)
# endif()
target_compile_options(ncnn PRIVATE ${RISCV_MARCH_FLAG})
endif()

Expand Down
16 changes: 16 additions & 0 deletions src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2580,6 +2580,22 @@ int cpu_support_riscv_zvfh()
#endif
}

int cpu_support_riscv_xtheadvector()
{
try_initialize_global_cpu_info();
#if defined __ANDROID__ || defined __linux__
#if __riscv
// v + f does not imply zfh, but how to discover zvfh properly ?
// upstream issue https://github.com/riscv/riscv-isa-manual/issues/414
return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F;
#else
return 0;
#endif
#else
return 0;
#endif
}

int cpu_riscv_vlenb()
{
try_initialize_global_cpu_info();
Expand Down
2 changes: 2 additions & 0 deletions src/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ NCNN_EXPORT int cpu_support_riscv_v();
NCNN_EXPORT int cpu_support_riscv_zfh();
// zvfh = riscv vector half-precision float
NCNN_EXPORT int cpu_support_riscv_zvfh();
// xtheadvector = riscv xtheadvector
NCNN_EXPORT int cpu_support_riscv_xtheadvector();
// vlenb = riscv vector length in bytes
NCNN_EXPORT int cpu_riscv_vlenb();

Expand Down
14 changes: 14 additions & 0 deletions src/layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,20 @@ Layer* create_layer_cpu(int index)
}
else
#endif // NCNN_RUNTIME_CPU && NCNN_MSA
#if NCNN_RUNTIME_CPU && NCNN_RVV
if (ncnn::cpu_support_riscv_v())
{
layer_creator = layer_registry_rvv[index].creator;
}
else
#endif // NCNN_RUNTIME_CPU && NCNN_RVV
#if NCNN_RUNTIME_CPU && NCNN_XTHEADVECTOR
if (ncnn::cpu_support_riscv_xtheadvector())
{
layer_creator = layer_registry_xtheadvector[index].creator;
}
else
#endif // NCNN_RUNTIME_CPU && NCNN_XTHEADVECTOR
{
layer_creator = layer_registry_arch[index].creator;
}
Expand Down
67 changes: 67 additions & 0 deletions src/layer/riscv/absval_fp16.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.


#if NCNN_RUNTIME_CPU && NCNN_ZVFH && __riscv_vector && !__riscv_zvfh
void absval_fp16_zvfh(Mat& bottom_top_blob, const Option& opt);
#endif

#if __riscv_zvfh
static inline vfloat16m8_t __riscv_vfabs_v_f16m8_absval(vfloat16m8_t op1, size_t vl)
{
return __riscv_vfsgnjx_vv_f16m8(op1, op1, vl);
}
#endif // __riscv_zvfh

static void absval_fp16(Mat& bottom_top_blob, const Option& opt)
{
#if NCNN_RUNTIME_CPU && NCNN_ZVFH && __riscv_vector && !__riscv_xtheadvector && !__riscv_zvfh
if (ncnn::cpu_support_riscv_zvfh())
{
absval_fp16_zvfh(bottom_top_blob, opt);
return;
}
#endif

#if __riscv_zvfh
const int w = bottom_top_blob.w;
const int h = bottom_top_blob.h;
const int d = bottom_top_blob.d;
const int channels = bottom_top_blob.c;
const int elempack = bottom_top_blob.elempack;
const int size = w * h * d * elempack;

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);

int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e16m8(n);

vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
_p = __riscv_vfabs_v_f16m8_absval(_p, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);

ptr += vl;
n -= vl;
}
}
#else
(void)bottom_top_blob;
(void)opt;
#endif // __riscv_zvfh
}
17 changes: 14 additions & 3 deletions src/layer/riscv/absval_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,22 @@
// specific language governing permissions and limitations under the License.

#include "absval_riscv.h"
#include "cpu.h"

#if __riscv_vector
#include <riscv_vector.h>
#endif // __riscv_vector

namespace ncnn {

#include "absval_fp16.h"

AbsVal_riscv::AbsVal_riscv()
{
#if __riscv_vector
support_packing = true;
#if NCNN_ZVFH
support_fp16_storage = cpu_support_riscv_zvfh();
#if NCNN_ZVFH || NCNN_XTHEADVECTOR
support_fp16_storage = cpu_support_riscv_zvfh() || cpu_support_riscv_xtheadvector();
#endif
#endif // __riscv_vector
}
Expand All @@ -39,7 +42,7 @@ static inline vfloat32m8_t __riscv_vfabs_v_f32m8_absval(vfloat32m8_t op1, size_t

int AbsVal_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
{
#if NCNN_ZVFH
#if __riscv_vector
int elembits = bottom_top_blob.elembits();

if (support_fp16_storage && opt.use_fp16_storage && elembits == 16)
Expand Down Expand Up @@ -86,4 +89,12 @@ int AbsVal_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
return 0;
}

#if __riscv_vector
int AbsVal_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const
{
absval_fp16(bottom_top_blob, opt);
return 0;
}
#endif // __riscv_vector

} // namespace ncnn
2 changes: 1 addition & 1 deletion src/layer/riscv/absval_riscv.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class AbsVal_riscv : public AbsVal
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;

protected:
#if NCNN_ZVFH
#if __riscv_vector
int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
#endif
};
Expand Down
44 changes: 5 additions & 39 deletions src/layer/riscv/absval_riscv_zvfh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,50 +12,16 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "absval_riscv.h"

#if __riscv_vector
#include <riscv_vector.h>
#endif
#include "cpu.h"
#include "mat.h"

namespace ncnn {

#if __riscv_zvfh
static inline vfloat16m8_t __riscv_vfabs_v_f16m8_absval(vfloat16m8_t op1, size_t vl)
{
return __riscv_vfsgnjx_vv_f16m8(op1, op1, vl);
}
#include "absval_fp16.h"

int AbsVal_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const
void absval_fp16_zvfh(Mat& bottom_top_blob, const Option& opt)
{
int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int d = bottom_top_blob.d;
int channels = bottom_top_blob.c;
int elempack = bottom_top_blob.elempack;
int size = w * h * d * elempack;

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);

int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e16m8(n);

vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
_p = __riscv_vfabs_v_f16m8_absval(_p, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);

ptr += vl;
n -= vl;
}
}

return 0;
absval_fp16(bottom_top_blob, opt);
}
#endif // __riscv_zvfh

} // namespace ncnn
2 changes: 2 additions & 0 deletions src/layer/riscv/packing_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#include "riscv_usability.h"

#include "cpu.h"

namespace ncnn {

Packing_riscv::Packing_riscv()
Expand Down
Loading

0 comments on commit 30e79c2

Please sign in to comment.