Skip to content

Commit

Permalink
use case op instead of convert
Browse files Browse the repository at this point in the history
  • Loading branch information
chraac committed Feb 27, 2025
1 parent ccc71c0 commit e662fc2
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 21 deletions.
21 changes: 7 additions & 14 deletions ggml/src/ggml-qnn/backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,9 @@ bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context * ctx, const ggml_
case GGML_TYPE_Q8_0:
case GGML_TYPE_Q4_0:
if (!(ctx->supported_types & (uint64_t(1) << tensor->type))) {
QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x\n", qnn::get_backend_name(ctx->device),
ggml_type_name(tensor->type), (unsigned int) ctx->supported_types);
QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x\n",
qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type),
(unsigned int) ctx->supported_types);
return false;
}
break;
Expand Down Expand Up @@ -455,16 +456,8 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context * ctx, const gg
QNN_LOG_DEBUG("[qnn-npu][MUL_MAT]tensor size is too large\n");
return false;
}
// fall through, from test here, the convert op is super slow on NPU:
// https://github.com/usefulsensors/qc_npu_benchmark
case QNN_BACKEND_GPU:
if (src0->type != src1->type || src0->type != op->type) {
// there's no convert op for GPU.
QNN_LOG_DEBUG("[qnn-gpu][MUL_MAT]type src0(%s), src1(%s) and op(%s) are not equal\n",
ggml_type_name(src0->type), ggml_type_name(src1->type), ggml_type_name(op->type));
return false;
}
break;
case QNN_BACKEND_GPU:
default:
break;
}
Expand Down Expand Up @@ -503,9 +496,9 @@ bool device_supports_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor
#ifndef NDEBUG
std::string tensor_dims;
append_tensor_dimensions(op, tensor_dims);
QNN_LOG_DEBUG("[%s][%s]unsupported tensor(%s), support/unsupported: %d/%d\n", qnn::get_backend_name(ctx->device),
ggml_op_name(op->op), tensor_dims.c_str(), ctx->supported_op_count.load(),
ctx->unsupported_op_count.load());
QNN_LOG_DEBUG("[%s][%s]unsupported tensor(%s), support/unsupported: %d/%d\n",
qnn::get_backend_name(ctx->device), ggml_op_name(op->op), tensor_dims.c_str(),
ctx->supported_op_count.load(), ctx->unsupported_op_count.load());
#endif
return false;
}
Expand Down
9 changes: 2 additions & 7 deletions ggml/src/ggml-qnn/op-config-impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,11 +300,6 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic
bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
qnn_tensor_array_t & tensor_inputs,
qnn_tensor_array_t & tensor_outputs) {
if (device == QNN_BACKEND_GPU) {
// there's no convert op for GPU, so we should create matmul nodes directly.
return true;
}

// create tensors for convert node
auto tensor_type = get_tensor_type(tensor_inputs);
QNN_LOG_DEBUG("input tensor type: %s\n", qnn_datatype_to_string(tensor_type));
Expand All @@ -321,7 +316,7 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap
convert_in->get_dimensions(), tensor_type, rank, device,
graph_handle, _qnn_instance);
auto convert = std::make_shared<ggml_qnn_single_op_config>(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
QNN_OP_CONVERT, _qnn_instance);
QNN_OP_CAST, _qnn_instance);
convert->set_input_tensors({ convert_in });
convert->set_output_tensors({ convert_out });
tensor_inputs[i] = convert_out;
Expand All @@ -336,7 +331,7 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap
convert_out->get_dimensions(), tensor_type, rank, device,
graph_handle, _qnn_instance);
auto output_convert = std::make_shared<ggml_qnn_single_op_config>(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
QNN_OP_CONVERT, _qnn_instance);
QNN_OP_CAST, _qnn_instance);
output_convert->set_input_tensors({ convert_in });
output_convert->set_output_tensors({ convert_out });
tensor_outputs.front() = convert_in;
Expand Down

0 comments on commit e662fc2

Please sign in to comment.