Skip to content

Commit

Permalink
build: fix build error with gcc-13 (#264)
Browse files Browse the repository at this point in the history
  • Loading branch information
guocuimi authored Jul 7, 2024
1 parent 7aeb7fa commit 515417e
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 25 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ jobs:
steps:
- name: Install toolkits
run: |
sudo apt-get install -y build-essential ninja-build bison gcc-12 g++-12 libunwind-dev python3-dev libboost-all-dev ccache
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
sudo apt-get install -y build-essential ninja-build bison libunwind-dev python3-dev ccache
- name: Show gcc version
run: gcc --version
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ endif()
# Build TORCH_CUDA_ARCH_LIST
set(TORCH_CUDA_ARCH_LIST "")
foreach(CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES)
if(CUDA_ARCH MATCHES "^([0-9])([0-9])(-real)*$")
if(CUDA_ARCH MATCHES "^([0-9])([0-9])*$")
set(TORCH_ARCH "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}")
elseif(CUDA_ARCH STREQUAL "native")
set(TORCH_ARCH "Auto")
Expand Down
11 changes: 0 additions & 11 deletions src/kernels/activation_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,10 @@ namespace {

/* Gelu Activation */
// adapted from https://github.com/NVIDIA/FasterTransformer
__forceinline__ __device__ float copysignf_pos(float a, float b) {
float r;
r = __int_as_float(__float_as_int(a) | (__float_as_int(b) & 0x80000000));
return r;
}

__inline__ __device__ float tanh_opt(float x) {
#if (__CUDA_ARCH__ >= 750 && CUDART_VERSION >= 11000)
float r;
asm("tanh.approx.f32 %0,%1; \n\t" : "=f"(r) : "f"(x));
return r;
#else
const float exp_val = -1.f * fabs(2 * x);
return copysignf_pos((1.0f - __expf(exp_val)) / (__expf(exp_val) + 1.0f), x);
#endif
}

template <typename T>
Expand Down
12 changes: 6 additions & 6 deletions src/kernels/pos_embedding_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ struct RotaryEmbedding {
// apply rotary embedding to data on position idx
// x -> x * cos - y * sin
// y -> x * sin + y * cos
static __device__ __forceinline__ T apply(T* __restrict__ data,
const T* __restrict__ cos,
const T* __restrict__ sin,
int idx,
int n,
bool interleaved) {
static __device__ __forceinline__ void apply(T* __restrict__ data,
const T* __restrict__ cos,
const T* __restrict__ sin,
int idx,
int n,
bool interleaved) {
// interleaved: x = data[2 * idx], y = data[2 * idx + 1]
// rotated: x = data[idx], y = data[idx + rotary_dim / 2]
const int x_idx = interleaved ? 2 * idx : idx;
Expand Down
5 changes: 0 additions & 5 deletions src/scheduler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,9 @@ cc_library(
HDRS
scheduler.h
response_handler.h
scheduler_config.h
scheduler_factory.h
scheduler_policy.h
continuous_scheduler.h
SRCS
response_handler.cpp
scheduler_config.cpp
scheduler_policy.cpp
continuous_scheduler.cpp
DEPS
:request
Expand Down

0 comments on commit 515417e

Please sign in to comment.