Skip to content

Commit

Permalink
Merge branch 'upstream' into concedo_experimental
Browse files Browse the repository at this point in the history
  • Loading branch information
LostRuins committed Dec 13, 2024
2 parents ed75f8a + 64ae065 commit 46d76d9
Show file tree
Hide file tree
Showing 21 changed files with 291 additions and 267 deletions.
13 changes: 2 additions & 11 deletions ggml/src/ggml-sycl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//

#include "common.hpp"
#include "ggml-impl.h"

int get_current_device_id() {
return dpct::dev_mgr::instance().current_device_id();
Expand All @@ -28,11 +29,7 @@ void* ggml_sycl_host_malloc(size_t size) try {

if (err != 0) {
// clear the error
fprintf(
stderr,
"WARNING: failed to allocate %.2f MB of pinned memory: %s\n",
size / 1024.0 / 1024.0,
"syclGetErrorString is not supported");
GGML_LOG_ERROR("WARNING: failed to allocate %.2f MB of pinned memory: %s\n", size / 1024.0 / 1024.0, "syclGetErrorString is not supported");
return nullptr;
}

Expand Down Expand Up @@ -66,18 +63,12 @@ int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block
void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor *dst,
const ggml_sycl_op_flatten_t op) try {
const int64_t nrows0 = ggml_nrows(src0);

const bool use_src1 = src1 != nullptr;
const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;

GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);

ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr;
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;

// dd = data device
float * src0_ddf = (float *) src0->data;
float * src1_ddf = use_src1 ? (float *) src1->data : nullptr;
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-sycl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ struct bin_bcast_sycl {
});
}
}
GGML_UNUSED(ctx);
}
};

Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-sycl/concat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ static void concat_f32_dim1(const float *x, const float *y, float *dst,
// operation
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
if (item_ct1.get_group(1) < ne01) { // src0
if (item_ct1.get_group(1) < (size_t) ne01) { // src0
int offset_src =
nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * ne01;
dst[offset_dst] = x[offset_src];
Expand All @@ -70,7 +70,7 @@ static void concat_f32_dim2(const float *x, const float *y, float *dst,
// operation
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
if (item_ct1.get_group(0) < ne02) { // src0
if (item_ct1.get_group(0) < (size_t) ne02) { // src0
int offset_src = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
dst[offset_dst] = x[offset_src];
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-sycl/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ static void convert_unary(const void * __restrict__ vx, dst_t * __restrict__ y,
const int64_t global_id = item_ct1.get_local_id(2) + work_group_size * item_ct1.get_group(2);

// make each work-item deal with more elements since sycl global range can not exceed max int
const src_t * x = (src_t *) vx;
const src_t * x = (const src_t *) vx;
for (int64_t i = global_id; i < k; i += work_group_size * item_ct1.get_group_range(2)) {
y[i] = x[i];
}
Expand Down
10 changes: 5 additions & 5 deletions ggml/src/ggml-sycl/dmmv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1015,9 +1015,9 @@ void ggml_sycl_op_dequantize_mul_mat_vec(
break;
}

(void) src1;
(void) dst;
(void) src1_ddq_i;
(void) src1_ncols;
(void) src1_padded_row_size;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_ddq_i);
GGML_UNUSED(src1_ncols);
GGML_UNUSED(src1_padded_row_size);
}
2 changes: 1 addition & 1 deletion ggml/src/ggml-sycl/dpct/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1237,7 +1237,7 @@ namespace dpct
std::map<byte_t *, allocation>::iterator get_map_iterator(const void *ptr)
{
auto it = m_map.upper_bound((byte_t *)ptr);
auto it = m_map.upper_bound(const_cast<byte_t *>(reinterpret_cast<const byte_t *>(ptr)));
if (it == m_map.end())
{
// Not a virtual pointer.
Expand Down
141 changes: 80 additions & 61 deletions ggml/src/ggml-sycl/element_wise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ void upscale_f32(const float *x, float *dst, const int nb00, const int nb01,
int i02 = i12 / sf2;
int i03 = i13 / sf3;

dst[index] = *(float *)((char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
dst[index] = *(const float *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
}

void pad_f32(const float *x, float *dst, const int ne0, const int ne00, const int ne01, const int ne02,
Expand All @@ -251,8 +251,7 @@ void pad_f32(const float *x, float *dst, const int ne0, const int ne00, const i
// operation
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
if (nidx < ne00 && item_ct1.get_group(1) < ne01 &&
item_ct1.get_group(0) < ne02) {
if (nidx < ne00 && item_ct1.get_group(1) < (size_t) ne01 && item_ct1.get_group(0) < (size_t) ne02) {
int offset_src = nidx + item_ct1.get_group(1) * ne00 +
item_ct1.get_group(0) * ne00 * ne01;
dst[offset_dst] = x[offset_src];
Expand Down Expand Up @@ -520,9 +519,10 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, const ggml_tensor

silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
Expand All @@ -535,9 +535,10 @@ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor

gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor *dst,
Expand All @@ -550,9 +551,10 @@ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_

gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
Expand All @@ -564,9 +566,10 @@ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor
GGML_ASSERT( dst->type == GGML_TYPE_F32);
tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
Expand All @@ -579,9 +582,10 @@ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor

relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -595,9 +599,10 @@ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml

hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -610,9 +615,10 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_t

hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -625,9 +631,10 @@ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor

exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -640,9 +647,10 @@ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor

log_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -655,9 +663,10 @@ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_ten

sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -670,9 +679,10 @@ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor

sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -685,9 +695,10 @@ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor

sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -700,9 +711,10 @@ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor

cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -715,9 +727,10 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor

step_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -730,9 +743,10 @@ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor

neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -749,9 +763,10 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_

leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), negative_slope, main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
Expand All @@ -764,9 +779,10 @@ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor

sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
Expand All @@ -787,9 +803,10 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_ten
dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
Expand All @@ -805,9 +822,10 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor
src0->ne[0], src0->ne[1], src0->ne[2],
dst->ne[0], dst->ne[1], dst->ne[2], main_stream);

(void) src1;
(void) dst;
(void) src1_dd;
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(src1_dd);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
Expand All @@ -827,7 +845,8 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor

acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream);

(void) dst;
GGML_UNUSED(dst);
GGML_UNUSED(ctx);
}

inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
Expand Down
Loading

0 comments on commit 46d76d9

Please sign in to comment.