diff --git a/deps/Project.toml b/deps/Project.toml index 27e54c22..bb872904 100644 --- a/deps/Project.toml +++ b/deps/Project.toml @@ -9,3 +9,7 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Preferences = "21216c6a-2e73-6563-6e65-726566657250" Scratch = "6c6a2e73-6563-6170-7368-637461726353" oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d" +oneAPI_Support_Headers_jll = "24f86df5-245d-5634-a4cc-32433d9800b3" + +[compat] +oneAPI_Support_Headers_jll = "=2023.0.0" diff --git a/deps/generate_interfaces.jl b/deps/generate_interfaces.jl new file mode 100644 index 00000000..ab99631f --- /dev/null +++ b/deps/generate_interfaces.jl @@ -0,0 +1,414 @@ +using oneAPI_Support_Headers_jll + +blas = joinpath(oneAPI_Support_Headers_jll.artifact_dir, "include", "oneapi", "mkl", "blas", "buffer_decls.hpp") +lapack = joinpath(oneAPI_Support_Headers_jll.artifact_dir, "include", "oneapi", "mkl", "lapack.hpp") +sparse = joinpath(oneAPI_Support_Headers_jll.artifact_dir, "include", "oneapi", "mkl", "spblas.hpp") + +dict_version = Dict{Int, Char}(1 => 'S', 2 => 'D', 3 => 'C', 4 => 'Z') + +version_types = Dict{Char, String}('S' => "float", + 'D' => "double", + 'C' => "std::complex", + 'Z' => "std::complex", + 'I' => "int32_t", + 'L' => "int64_t") + +version_types_header = Dict{Char, String}('S' => "float", + 'D' => "double", + 'C' => "float _Complex", + 'Z' => "double _Complex", + 'I' => "int32_t", + 'L' => "int64_t") + +function generate_headers(library::String, filename::String, output::String) + routines = Dict{String,Int}() + signatures = [] + signatures2 = [] + cpp_headers = read(filename, String) + headers = "" + + # Remove comments + for header in split(cpp_headers, '\n') + mapreduce(x -> !startswith(header, x) && !occursin("\"", header), &, ["/*", "*", "//", "[[deprecated", "#undef", "#define", "ONEMKL_DECLARE_BUF_"]) && (headers *= header) + end + + # Analyse each header + headers = split(headers, ';') + for (i, header) in enumerate(headers) + # We only generate C interfaces for exported symbols + !occursin("DLL_EXPORT", header) && !occursin("_scratchpad_size", header) && continue + + # We don't want to interface routines with the following types, parameters or names + occursin("class", header) && continue + occursin("sycl::half", header) && continue + occursin("bfloat16", header) && continue + occursin("::int8_t", header) && continue + occursin("sycl::event", header) && continue # USM API + occursin("group_count", header) && occursin("group_sizes", header) && continue # USM API + occursin("gemm_bias", header) && continue # BLAS routine + occursin("heevx", header) && continue # LAPACK routine + occursin("hegvx", header) && continue # LAPACK routine + occursin("(matrix_handle_t handle", header) && continue # SPARSE routine + occursin("gemvdot", header) && continue # SPARSE routine + occursin("matmat", header) && continue # SPARSE routine + + # Check if the routine is a template + template = occursin("template", header) + if template + header = replace(header, "template = nullptr>" => "") + header = replace(header, "template = nullptr>" => "") + header = replace(header, "template = nullptr>" => "") + header = replace(header, "template = nullptr>" => "") + header = replace(header, "template = nullptr>" => "") + header = replace(header, "template = nullptr>" => "") + end + + # Replace the types + header = replace(header, "void onemkl" => "int onemkl") + header = replace(header, "sycl::queue &queue" => "syclQueue_t device_queue") + header = replace(header, "std::int32_t" => "int32_t") + header = replace(header, "std::int64_t" => "int64_t") + + if library == "blas" + header = replace(header, "compute_mode mode = MKL_BLAS_COMPUTE_MODE" => "") + + header = replace(header, "sycl::buffer &" => "Ta *") + header = replace(header, "sycl::buffer &" => "Tb *") + header = replace(header, "sycl::buffer &" => "Tc *") + header = replace(header, "sycl::buffer &" => "Td *") + header = replace(header, "sycl::buffer &" => "Treal *") + header = replace(header, "sycl::buffer &" => "Tres *") + header = replace(header, "sycl::buffer &" => "T *") + + header = replace(header, "sycl::buffer &" => "Ta *") + header = replace(header, "sycl::buffer &" => "Tb *") + header = replace(header, "sycl::buffer &" => "Tc *") + header = replace(header, "sycl::buffer &" => "Td *") + header = replace(header, "sycl::buffer &" => "Treal *") + header = replace(header, "sycl::buffer &" => "Tres *") + header = replace(header, "sycl::buffer &" => "T *") + header = replace(header, "sycl::buffer &" => "T *") + end + + header = replace(header, "sycl::buffer &" => "float *") + header = replace(header, "sycl::buffer &" => "float *") + header = replace(header, "sycl::buffer &" => "double *") + header = replace(header, "sycl::buffer> &" => "float _Complex *") + header = replace(header, "sycl::buffer> &" => "float _Complex *") + header = replace(header, "sycl::buffer> &" => "double _Complex *") + header = replace(header, "sycl::buffer &" => "int32_t *") + header = replace(header, "sycl::buffer &" => "int64_t *") + + header = replace(header, "sycl::buffer &" => "float *") + header = replace(header, "sycl::buffer &" => "double *") + header = replace(header, "sycl::buffer, 1> &" => "float _Complex *") + header = replace(header, "sycl::buffer, 1> &" => "double _Complex *") + header = replace(header, "sycl::buffer &" => "int32_t *") + header = replace(header, "sycl::buffer &" => "int64_t *") + + header = replace(header, "template <>\n" => "") + header = replace(header, ">" => "") + header = replace(header, ">" => "") + header = replace(header, "" => "") + header = replace(header, "" => "") + + header = replace(header, "oneapi::mkl::transpose" => "onemklTranspose") + header = replace(header, "oneapi::mkl::uplo" => "onemklUplo") + header = replace(header, "oneapi::mkl::diag" => "onemklDiag") + header = replace(header, "oneapi::mkl::side" => "onemklSide") + header = replace(header, "oneapi::mkl::offset" => "onemklOffset") + header = replace(header, "oneapi::mkl::job" => "onemklJob") + header = replace(header, "oneapi::mkl::generate" => "onemklGenerate") + header = replace(header, "oneapi::mkl::compz" => "onemklCompz") + header = replace(header, "oneapi::mkl::direct" => "onemklDirect") + header = replace(header, "oneapi::mkl::storev" => "onemklStorev") + header = replace(header, "oneapi::mkl::rangev" => "onemklRangev") + header = replace(header, "oneapi::mkl::order" => "onemklOrder") + header = replace(header, "oneapi::mkl::jobsvd" => "onemklJobsvd") + header = replace(header, "oneapi::mkl::layout" => "onemklLayout") + header = replace(header, "oneapi::mkl::index" => "onemklIndex") + header = replace(header, "oneapi::mkl::property" => "onemklProperty") + + # Sanitize the header + header = replace(header, " \\" => "") + header = replace(header, "\n" => "") + header = replace(header, "DLL_EXPORT " => "") + header = replace(header, "const " => "") + for i = 1:20 + header = replace(header, " " => " ") + end + header = replace(header, "( " => "(") + header = replace(header, ", )" => ")") + occursin("voidgemm", header) && continue # Bug with SPARSE routine + + ind1 = findfirst(' ', header) + ind2 = findfirst('(', header) + name_routine = header[ind1+1:ind2-1] + !haskey(routines, name_routine) && (routines[name_routine] = 0) + routines[name_routine] += 1 + + # They use template for BLAS routines + list_parameters = () + list_types = [] + list_versions = String[] + if library == "blas" + occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))(T)", cpp_headers) && (list_parameters = ("T")) + occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))(T, Ts)", cpp_headers) && (list_parameters = ("T", "Ts")) + occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))(Ta, Tb, Tc, Ts)", cpp_headers) && (list_parameters = ("Ta", "Tb", "Tc", "Ts")) + occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))(T, Tres)", cpp_headers) && (list_parameters = ("T", "Tres")) + occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))(T, Treal)", cpp_headers) && (list_parameters = ("T", "Treal")) + occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))(T, Tc, Ts)", cpp_headers) && (list_parameters = ("T", "Tc", "Ts")) + occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))(T, Tc)", cpp_headers) && (list_parameters = ("T", "Tc")) + (list_parameters == ()) && @warn("Unable to determine the parametric parameters of $(name_routine).") + for (type, version) in [(("float",), "S"), + (("double",), "D"), + (("std::complex",), "C"), + (("std::complex",), "Z")] + if occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))($(type[1]))", cpp_headers) + push!(list_types, type) + push!(list_versions, version) + end + end + for (type, version) in [(("float","float"), "S"), + (("double","double"), "D"), + (("std::complex","float"), "CS"), + (("std::complex","double"), "ZD"), + (("std::complex","std::complex"), "C"), + (("std::complex","std::complex"), "Z")] + if occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))($(type[1]), $(type[2]))", cpp_headers) + push!(list_types, type) + push!(list_versions, version) + end + end + for (type, version) in [(("float","float","float"), "S"), + (("double","double","double"), "D"), + (("std::complex","float","float"), "CS"), + (("std::complex","float", "std::complex"), "C"), + (("std::complex","double","double"), "ZD"), + (("std::complex","double","std::complex"), "Z")] + if occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))($(type[1]), $(type[2]), $(type[3]))", cpp_headers) + push!(list_types, type) + push!(list_versions, version) + end + end + for (type, version) in [(("float","float","float","float"), "S"), + (("double","double","double","double"), "D"), + (("std::complex","std::complex","std::complex","std::complex"), "C"), + (("std::complex","std::complex","std::complex","std::complex"), "Z")] + if occursin("ONEMKL_DECLARE_BUF_$(uppercase(name_routine))($(type[1]), $(type[2]), $(type[3]), $(type[4]))", cpp_headers) + push!(list_types, type) + push!(list_versions, version) + end + end + end + version = 'X' + if library == "sparse" + version = occursin("int32_t", header) ? 'I' : version + version = occursin("int64_t", header) ? 'L' : version + end + version = occursin("double", header) ? 'D' : version + version = occursin("float", header) ? 'S' : version + version = occursin("float _Complex", header) ? 'C' : version + version = occursin("double _Complex", header) ? 'Z' : version + version = occursin("_scratchpad_size", header) ? 'W' : version + + if version == 'W' + # The version 'W' is used for routines with suffix "_scratchpad_size" + versions = ('S', 'D', 'C', 'Z') + mapreduce(x -> startswith(name_routine, x), |, ["or", "sy"]) && !startswith(name_routine, "sytrf") && (versions = ('S', 'D')) + mapreduce(x -> startswith(name_routine, x), |, ["un", "he"]) && (versions = ('C', 'Z')) + routines[name_routine] = routines[name_routine] - 1 + length(versions) + for blas_version in versions + copy_header = header + copy_header = replace(copy_header, "typename fp_type::value_type" => version_types_header[blas_version]) + copy_header = replace(copy_header, "fp_type" => version_types_header[blas_version]) + copy_header = replace(copy_header, name_routine => "onemkl$(blas_version)$(name_routine)") + copy_header = replace(copy_header, "void onemkl" => "int onemkl") + push!(signatures, (copy_header, name_routine, blas_version, template)) + end + else + if isempty(list_versions) + if name_routine == "set_csr_data" + occursin("int32_t", header) && (version = "I" * version) + occursin("int64_t", header) && (version = "L" * version) + end + header = replace(header, name_routine => "onemkl$(version)$(name_routine)") + header = replace(header, "void onemkl" => "int onemkl") + if library == "sparse" + if occursin("std::complex", header) + (version == 'C') && (header = replace(header, "std::complex " => "float _Complex ")) + (version == 'Z') && (header = replace(header, "std::complex " => "double _Complex ")) + end + header = replace(header, "transpose " => "onemklTranspose ") + header = replace(header, "uplo " => "onemklUplo ") + header = replace(header, "diag " => "onemklDiag ") + header = replace(header, "side " => "onemklSide ") + header = replace(header, "layout " => "onemklLayout ") + header = replace(header, "index_base " => "onemklIndex ") + header = replace(header, "property " => "onemklProperty ") + header = replace(header, name_routine => "sparse_" * name_routine) + end + push!(signatures, (header, name_routine, version, template)) + else + + n = length(list_parameters) + for (i, type) in enumerate(list_types) + version = list_versions[i] + version = (name_routine ∈ ("her", "herk", "her2k", "rotg", "nrm2", "asum", "hpr")) && (version == "CS") ? "C" : version + version = (name_routine ∈ ("her", "herk", "her2k", "rotg", "nrm2", "asum", "hpr")) && (version == "ZD") ? "Z" : version + + copy_header = header + for (j, parameter) in enumerate(reverse(list_parameters)) + k = n-j+1 + copy_header = replace(copy_header, parameter => type[k]) + end + copy_header = replace(copy_header, "transpose " => "onemklTranspose ") + copy_header = replace(copy_header, "uplo " => "onemklUplo ") + copy_header = replace(copy_header, "diag " => "onemklDiag ") + copy_header = replace(copy_header, "side " => "onemklSide ") + copy_header = replace(copy_header, "std::complex" => "float _Complex") + copy_header = replace(copy_header, "std::complex" => "double _Complex") + copy_header = replace(copy_header, name_routine => "onemkl$(version)$(name_routine)") + copy_header = replace(copy_header, "void onemkl" => "int onemkl") + push!(signatures, (copy_header, name_routine, version, template)) + end + end + end + end + + # Check the number of methods + blacklist = String[] + for name_routine in keys(routines) + if (routines[name_routine] > 5) && (library != "sparse") + @warn "The routine $(name_routine) has more than 4 methods and will not be interfaced." + push!(blacklist, name_routine) + end + end + + path_oneapi_headers = joinpath(@__DIR__, output) + oneapi_headers = open(path_oneapi_headers, "w") + # write(oneapi_headers, header) + for (header, name_routine, version, template) in signatures + # Blacklist + (name_routine in blacklist) && continue + + # Don't wrap just a "_scratchpad_size" + name_routine2 = replace(name_routine, "_scratchpad_size" => "") + !haskey(routines, name_routine2) && continue + push!(signatures2, (header, name_routine, version, template)) + + pos = findfirst('(', header) + fun = split(header, " ") + len = 0 + for (i, part) in enumerate(fun) + len += length(part) + if len ≤ 90 + (i ≠ 1) && write(oneapi_headers, " ") + write(oneapi_headers, part) + else + write(oneapi_headers, "\n") + for i = 1:pos + write(oneapi_headers, " ") + end + write(oneapi_headers, part) + len = pos + length(part) + end + end + write(oneapi_headers, ";\n\n") + end + close(oneapi_headers) + return signatures2 +end + +function generate_cpp(library::String, filename::String, output::String) + signatures = generate_headers(library, filename, output) + path_oneapi_cpp = joinpath(@__DIR__, output) + oneapi_cpp = open(path_oneapi_cpp, "w") + for (header, name, version, template) in signatures + parameters = split(header, "(")[2] + parameters = split(parameters, ")")[1] + parameters = replace(parameters, "syclQueue_t device_queue" => "device_queue->val") + parameters = replace(parameters, "int32_t " => "") + parameters = replace(parameters, "int64_t " => "") + parameters = replace(parameters, "matrix_handle_t " => "") + parameters = replace(parameters, "float _Complex *" => "reinterpret_cast *>") + parameters = replace(parameters, "double _Complex *" => "reinterpret_cast *>") + parameters = replace(parameters, "float _Complex " => "static_cast >") + parameters = replace(parameters, "double _Complex " => "static_cast >") + parameters = replace(parameters, ", float *" => ", ") + parameters = replace(parameters, ", double *" => ", ") + parameters = replace(parameters, ", float " => ", ") + parameters = replace(parameters, ", double " => ", ") + parameters = replace(parameters, ", *" => ", ") + + for type in ("onemklTranspose", "onemklSide", "onemklUplo", "onemklDiag", "onemklGenerate", + "onemklJob", "onemklJobsvd", "onemklCompz", "onemklRangev", "onemklIndex", "onemklProperty") + parameters = replace(parameters, Regex("$type ([a-z_]+),") => SubstitutionString("convert(\\1),")) + end + parameters = replace(parameters, r" >([a-z]+)" => s" >(\1)") + parameters = replace(parameters, r" \*>([a-z]+)" => s"*>(\1)") + + variant = "" + if library == "blas" + variant = "column_major::" + end + + write(oneapi_cpp, "extern \"C\" $header {\n") + if template + type = version_types[version] + !occursin("scratchpad_size", name) && write(oneapi_cpp, " auto status = oneapi::mkl::$library::$variant$name<$type>($parameters);\n") + occursin("scratchpad_size", name) && write(oneapi_cpp, " int64_t scratchpad_size = oneapi::mkl::$library::$variant$name<$type>($parameters);\n") + else + write(oneapi_cpp, " auto status = oneapi::mkl::$library::$variant$name($parameters);\n") + end + if occursin("scratchpad_size", name) + write(oneapi_cpp, " return scratchpad_size;\n") + else + write(oneapi_cpp, " __FORCE_MKL_FLUSH__(status);\n") + write(oneapi_cpp, " return 0;\n") + end + write(oneapi_cpp, "}") + write(oneapi_cpp, "\n\n") + end + close(oneapi_cpp) +end + +generate_headers("lapack", lapack, "onemkl_lapack.h") +generate_headers("blas", blas, "onemkl_blas.h") +generate_headers("sparse", sparse, "onemkl_sparse.h") + +io = open("src/onemkl.h", "w") +headers_prologue = read("onemkl_prologue.h", String) +write(io, headers_prologue) +headers_blas = read("onemkl_blas.h", String) +write(io, "// BLAS\n") +write(io, headers_blas) +headers_lapack = read("onemkl_lapack.h", String) +write(io, "// LAPACK\n") +write(io, headers_lapack) +headers_sparse = read("onemkl_sparse.h", String) +write(io, "// SPARSE\n") +write(io, headers_sparse) +headers_epilogue = read("onemkl_epilogue.h", String) +write(io, headers_epilogue) +close(io) + +generate_cpp("lapack", lapack, "onemkl_lapack.cpp") +generate_cpp("blas", blas, "onemkl_blas.cpp") +generate_cpp("sparse", sparse, "onemkl_sparse.cpp") + +io = open("src/onemkl.cpp", "w") +cpp_prologue = read("onemkl_prologue.cpp", String) +write(io, cpp_prologue) +cpp_blas = read("onemkl_blas.cpp", String) +write(io, "// BLAS\n") +write(io, cpp_blas) +cpp_lapack = read("onemkl_lapack.cpp", String) +write(io, "// LAPACK\n") +write(io, cpp_lapack) +cpp_sparse = read("onemkl_sparse.cpp", String) +write(io, "// SPARSE\n") +write(io, cpp_sparse) +cpp_epilogue = read("onemkl_epilogue.cpp", String) +write(io, cpp_epilogue) +close(io) diff --git a/deps/onemkl_epilogue.cpp b/deps/onemkl_epilogue.cpp new file mode 100644 index 00000000..469f920e --- /dev/null +++ b/deps/onemkl_epilogue.cpp @@ -0,0 +1,17 @@ +// other + +// oneMKL keeps a cache of SYCL queues and tries to destroy them when unloading the library. +// that is incompatible with oneAPI.jl destroying queues before that, so expose a function +// to manually wipe the device cache when we're destroying queues. + +namespace oneapi { +namespace mkl { +namespace gpu { +int clean_gpu_caches(); +} +} +} + +extern "C" void onemklDestroy() { + oneapi::mkl::gpu::clean_gpu_caches(); +} diff --git a/deps/onemkl_epilogue.h b/deps/onemkl_epilogue.h new file mode 100644 index 00000000..f78fd3d7 --- /dev/null +++ b/deps/onemkl_epilogue.h @@ -0,0 +1,4 @@ +void onemklDestroy(void); +#ifdef __cplusplus +} +#endif diff --git a/deps/onemkl_prologue.cpp b/deps/onemkl_prologue.cpp new file mode 100644 index 00000000..1f175a44 --- /dev/null +++ b/deps/onemkl_prologue.cpp @@ -0,0 +1,586 @@ +#include "onemkl.h" +#include "sycl.hpp" +#include +#include +#include +#include + +// This is a workaround to flush MKL submissions into Level-zero queue, using +// unspecified but guaranteed behavior of intel-sycl runtime. Once SYCL standard +// committee approves sycl::queue::flush() we will change the macro to use that +#define __FORCE_MKL_FLUSH__(cmd) \ + sycl::get_native(cmd) + +oneapi::mkl::transpose convert(onemklTranspose val) { + switch (val) { + case ONEMKL_TRANSPOSE_NONTRANS: + return oneapi::mkl::transpose::nontrans; + case ONEMKL_TRANSPOSE_TRANS: + return oneapi::mkl::transpose::trans; + case ONEMLK_TRANSPOSE_CONJTRANS: + return oneapi::mkl::transpose::conjtrans; + } +} + +oneapi::mkl::uplo convert(onemklUplo val) { + switch(val) { + case ONEMKL_UPLO_UPPER: + return oneapi::mkl::uplo::upper; + case ONEMKL_UPLO_LOWER: + return oneapi::mkl::uplo::lower; + } +} + +oneapi::mkl::diag convert(onemklDiag val) { + switch(val) { + case ONEMKL_DIAG_NONUNIT: + return oneapi::mkl::diag::nonunit; + case ONEMKL_DIAG_UNIT: + return oneapi::mkl::diag::unit; + } +} + +oneapi::mkl::side convert(onemklSide val) { + switch (val) { + case ONEMKL_SIDE_LEFT: + return oneapi::mkl::side::left; + case ONEMKL_SIDE_RIGHT: + return oneapi::mkl::side::right; + } +} + +oneapi::mkl::offset convert(onemklOffset val) { + switch (val) { + case ONEMKL_OFFSET_ROW: + return oneapi::mkl::offset::row; + case ONEMKL_OFFSET_COL: + return oneapi::mkl::offset::column; + case ONEMKL_OFFSET_FIX: + return oneapi::mkl::offset::fix; + } +} + +oneapi::mkl::job convert(onemklJob val) { + switch (val) { + case ONEMKL_JOB_N: + return oneapi::mkl::job::N; + case ONEMKL_JOB_V: + return oneapi::mkl::job::V; + case ONEMKL_JOB_U: + return oneapi::mkl::job::U; + case ONEMKL_JOB_A: + return oneapi::mkl::job::A; + case ONEMKL_JOB_S: + return oneapi::mkl::job::S; + case ONEMKL_JOB_O: + return oneapi::mkl::job::O; + } +} + +oneapi::mkl::generate convert(onemklGenerate val) { + switch (val) { + case ONEMKL_GENERATE_Q: + return oneapi::mkl::generate::Q; + case ONEMKL_GENERATE_P: + return oneapi::mkl::generate::P; + case ONEMKL_GENERATE_N: + return oneapi::mkl::generate::N; + case ONEMKL_GENERATE_V: + return oneapi::mkl::generate::V; + } +} + +oneapi::mkl::compz convert(onemklCompz val) { + switch (val) { + case ONEMKL_COMPZ_N: + return oneapi::mkl::compz::N; + case ONEMKL_COMPZ_V: + return oneapi::mkl::compz::V; + case ONEMKL_COMPZ_I: + return oneapi::mkl::compz::I; + } +} + +oneapi::mkl::direct convert(onemklDirect val) { + switch (val) { + case ONEMKL_DIRECT_F: + return oneapi::mkl::direct::F; + case ONEMKL_DIRECT_B: + return oneapi::mkl::direct::B; + } +} + +oneapi::mkl::storev convert(onemklStorev val) { + switch (val) { + case ONEMKL_STOREV_C: + return oneapi::mkl::storev::C; + case ONEMKL_STOREV_R: + return oneapi::mkl::storev::R; + } +} + +oneapi::mkl::rangev convert(onemklRangev val) { + switch (val) { + case ONEMKL_RANGEV_A: + return oneapi::mkl::rangev::A; + case ONEMKL_RANGEV_V: + return oneapi::mkl::rangev::V; + case ONEMKL_RANGEV_I: + return oneapi::mkl::rangev::I; + } +} + +oneapi::mkl::order convert(onemklOrder val) { + switch (val) { + case ONEMKL_ORDER_B: + return oneapi::mkl::order::B; + case ONEMKL_ORDER_E: + return oneapi::mkl::order::E; + } +} + +oneapi::mkl::jobsvd convert(onemklJobsvd val) { + switch (val) { + case ONEMKL_JOBSVD_N: + return oneapi::mkl::jobsvd::N; + case ONEMKL_JOBSVD_A: + return oneapi::mkl::jobsvd::A; + case ONEMKL_JOBSVD_O: + return oneapi::mkl::jobsvd::O; + case ONEMKL_JOBSVD_S: + return oneapi::mkl::jobsvd::S; + } +} + +oneapi::mkl::layout convert(onemklLayout val) { + switch (val) { + case ONEMKL_LAYOUT_ROW: + return oneapi::mkl::layout::row_major; + case ONEMKL_LAYOUT_COL: + return oneapi::mkl::layout::col_major; + } +} + +oneapi::mkl::index_base convert(onemklIndex val) { + switch (val) { + case ONEMKL_INDEX_ZERO: + return oneapi::mkl::index_base::zero; + case ONEMKL_INDEX_ONE: + return oneapi::mkl::index_base::one; + } +} + +oneapi::mkl::sparse::property convert(onemklProperty val) { + switch (val) { + case ONEMKL_PROPERTY_SYMMETRIC: + return oneapi::mkl::sparse::property::symmetric; + case ONEMKL_PROPERTY_SORTED: + return oneapi::mkl::sparse::property::sorted; + } +} + +// gemm +// https://spec.oneapi.io/versions/1.0-rev-1/elements/oneMKL/source/domains/blas/gemm.html +class gemmBatchInfo { + public: + oneapi::mkl::transpose *m_transa = nullptr; + oneapi::mkl::transpose *m_transb = nullptr; + sycl::device m_device; + sycl::context m_context; + oneapi::mkl::transpose m_ta; + oneapi::mkl::transpose m_tb; + // Constructor + gemmBatchInfo(syclQueue_t device_queue, + int64_t group_count, + onemklTranspose transa, + onemklTranspose transb) { + // Get device and context info from device_queue + auto main_queue = device_queue->val; + m_device = main_queue.get_device(); + m_context = main_queue.get_context(); + + // Allocate transpose shared buffers + try { + m_transa = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose), + m_device, m_context); + m_transb = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose), + m_device, m_context); + m_ta = convert(transa); + m_tb = convert(transb); + } catch(const std::bad_alloc& e) { + std::cerr << "Error: " << e.what() << std::endl; + } + + // Initialize + for (int i = 0; i < group_count; i++) { + m_transa[i] = m_ta; + m_transb[i] = m_tb; + } + }; + + // Destructor + ~gemmBatchInfo() { + free(m_transa, m_context); + free(m_transb, m_context); + } +}; + +class trsmBatchInfo { + public: + oneapi::mkl::transpose *m_transa = nullptr; + oneapi::mkl::side *m_leftright = nullptr; + oneapi::mkl::uplo *m_upperlower = nullptr; + oneapi::mkl::diag *m_unitdiag = nullptr; + sycl::device m_device; + sycl::context m_context; + oneapi::mkl::transpose m_ta; + oneapi::mkl::side m_side; + oneapi::mkl::uplo m_uplo; + oneapi::mkl::diag m_diag; + + // Constructor + trsmBatchInfo(syclQueue_t device_queue, + onemklSide left_right, + onemklUplo upper_lower, + onemklTranspose transa, + onemklDiag unit_diag, + int64_t group_count) { + // Get device and context info from device_queue + auto main_queue = device_queue->val; + m_device = main_queue.get_device(); + m_context = main_queue.get_context(); + try { + // Allocate uniform arrays of group_size and transpose_a, transpose_b supporting oneMKL + // gemm_batch API + m_transa = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose), + m_device, m_context); + m_leftright = (oneapi::mkl::side *) malloc_shared(group_count * sizeof(oneapi::mkl::side), + m_device, m_context); + m_upperlower = (oneapi::mkl::uplo *) malloc_shared(group_count * sizeof(oneapi::mkl::uplo), + m_device, m_context); + m_unitdiag = (oneapi::mkl::diag *) malloc_shared(group_count * sizeof(oneapi::mkl::diag), + m_device, m_context); + m_ta = convert(transa); + m_side = convert(left_right); + m_uplo = convert(upper_lower); + m_diag = convert(unit_diag); + } catch(const std::bad_alloc& e) { + std::cerr << "Error: " << e.what() << std::endl; + } + // Initialize + for (int i = 0; i < group_count; i++) { + m_transa[i] = m_ta; + m_leftright[i] = m_side; + m_upperlower[i] = m_uplo; + m_unitdiag[i] = m_diag; + } + }; + + // Destructor + ~trsmBatchInfo() { + free(m_transa, m_context); + free(m_upperlower, m_context); + free(m_unitdiag, m_context); + free(m_leftright, m_context); + } +}; + +extern "C" int onemklHgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, uint16_t *alpha, + const short **a, int64_t *lda, const short **b, + int64_t *ldb, uint16_t *beta, short **c, + int64_t *ldc, int64_t group_count, int64_t *group_size) { + gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, + &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], + m, n, k, reinterpret_cast(alpha), + reinterpret_cast(&a[0]), lda, + reinterpret_cast(&b[0]), ldb, + reinterpret_cast(beta), reinterpret_cast(&c[0]), + ldc, group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, float *alpha, + const float **a, int64_t *lda, const float **b, + int64_t *ldb, float *beta, float **c, + int64_t *ldc, int64_t group_count, int64_t *group_size) { + gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, + &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], + m, n, k, alpha, + (const float **)&a[0], lda, + (const float **)&b[0], ldb, + beta, &c[0], ldc, + group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, double *alpha, + const double **a, int64_t *lda, const double **b, + int64_t *ldb, double *beta, double **c, + int64_t *ldc, int64_t group_count, int64_t *group_size) { + gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, + &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], + m, n, k, alpha, + (const double **)&a[0], lda, + (const double **)&b[0], ldb, + beta, &c[0], ldc, + group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, float _Complex *alpha, + const float _Complex **a, int64_t *lda, + const float _Complex **b, + int64_t *ldb, float _Complex *beta, float _Complex **c, + int64_t *ldc, int64_t group_count, int64_t *group_size) { + gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, + &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], + m, n, k, reinterpret_cast *>(alpha), + reinterpret_cast **>(&a[0]), + lda, + reinterpret_cast **>(&b[0]), + ldb, + reinterpret_cast *>(beta), + reinterpret_cast **>(&c[0]), ldc, + group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, double _Complex *alpha, + const double _Complex **a, int64_t *lda, + const double _Complex **b, + int64_t *ldb, double _Complex *beta, + double _Complex **c, + int64_t *ldc, int64_t group_count, int64_t *group_size) { + gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, + &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], + m, n, k, reinterpret_cast *>(alpha), + reinterpret_cast **>(&a[0]), + lda, + reinterpret_cast **>(&b[0]), + ldb, + reinterpret_cast *>(beta), + reinterpret_cast **>(&c[0]), ldc, + group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, float *alpha, + const float **a, int64_t *lda, float **b, int64_t *ldb, + int64_t group_count, int64_t *group_size) { + trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, + unit_diag, group_count); + + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, + &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], + &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], + m, n, alpha, (const float **)&a[0], lda, + &b[0], ldb, group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + double *alpha, const double **a, int64_t *lda, + double **b, int64_t *ldb, int64_t group_count, + int64_t *group_size) { + trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, + unit_diag, group_count); + + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, + &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], + &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], + m, n, alpha, (const double **)&a[0], lda, &b[0], + ldb, group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + float _Complex *alpha, const float _Complex **a, + int64_t *lda, float _Complex **b, int64_t *ldb, + int64_t group_count, int64_t *group_size) { + trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, + unit_diag, group_count); + + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, + &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], + &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], + m, n, reinterpret_cast *>(alpha), + reinterpret_cast **>(&a[0]), + lda, reinterpret_cast **>(&b[0]), + ldb, group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + double _Complex *alpha, const double _Complex **a, + int64_t *lda, double _Complex **b, int64_t *ldb, + int64_t group_count, int64_t *group_size) { + trsmBatchInfo trsmInfo(device_queue, left_right, + upper_lower, transa, unit_diag, group_count); + + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, + &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], + &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], + m, n, reinterpret_cast *>(alpha), + reinterpret_cast **>(&a[0]), + lda, reinterpret_cast **>(&b[0]), + ldb, group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklHgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + uint16_t alpha, const short *a, int64_t lda, int64_t stridea, + const short *b, int64_t ldb, int64_t strideb, uint16_t beta, + short *c, int64_t ldc, int64_t stridec, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), + convert(transb), m, n, k, sycl::bit_cast(alpha), + reinterpret_cast(a), lda, stridea, + reinterpret_cast(b), ldb, strideb, + sycl::bit_cast(beta), + reinterpret_cast(c), ldc, stridec, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + float alpha, const float *a, int64_t lda, int64_t stridea, + const float *b, int64_t ldb, int64_t strideb, float beta, + float *c, int64_t ldc, int64_t stridec, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), + convert(transb), m, n, k, alpha, a, lda, stridea, + b, ldb, strideb, beta, c, ldc, stridec, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + double alpha, const double *a, int64_t lda, int64_t stridea, + const double *b, int64_t ldb, int64_t strideb, double beta, + double *c, int64_t ldc, int64_t stridec, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), + convert(transb), m, n, k, alpha, a, lda, stridea, + b, ldb, strideb, beta, c, ldc, stridec, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + float _Complex alpha, const float _Complex *a, int64_t lda, int64_t stridea, + const float _Complex *b, int64_t ldb, int64_t strideb, float _Complex beta, + float _Complex *c, int64_t ldc, int64_t stridec, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), + convert(transb), m, n, k, alpha, + reinterpret_cast *>(a), + lda, stridea, + reinterpret_cast *>(b), + ldb, strideb, beta, + reinterpret_cast *>(c), + ldc, stridec, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + double _Complex alpha, const double _Complex *a, int64_t lda, int64_t stridea, + const double _Complex *b, int64_t ldb, int64_t strideb, double _Complex beta, + double _Complex *c, int64_t ldc, int64_t stridec, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), + convert(transb), m, n, k, alpha, + reinterpret_cast *>(a), + lda, stridea, + reinterpret_cast *>(b), + ldb, strideb, beta, + reinterpret_cast *>(c), + ldc, stridec, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklHgemm(syclQueue_t device_queue, onemklTranspose transA, + onemklTranspose transB, int64_t m, int64_t n, + int64_t k, uint16_t alpha, const short *A, int64_t lda, + const short *B, int64_t ldb, uint16_t beta, short *C, + int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transA), + convert(transB), m, n, k, sycl::bit_cast(alpha), + reinterpret_cast(A), lda, + reinterpret_cast(B), ldb, + sycl::bit_cast(beta), + reinterpret_cast(C), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklHdot(syclQueue_t device_queue, int64_t n, + const short *x, int64_t incx, const short *y, + int64_t incy, short *result) { + auto status = oneapi::mkl::blas::column_major::dot(device_queue->val, n, + reinterpret_cast(x), + incx, reinterpret_cast(y), + incy, reinterpret_cast(result)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklHaxpy(syclQueue_t device_queue, int64_t n, uint16_t alpha, + const short *x, std::int64_t incx, short *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, + sycl::bit_cast(alpha), + reinterpret_cast(x), + incx, reinterpret_cast(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklHscal(syclQueue_t device_queue, int64_t n, uint16_t alpha, + short *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, sycl::bit_cast(alpha), + reinterpret_cast(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklHnrm2(syclQueue_t device_queue, int64_t n, const short *x, + int64_t incx, short *result) { + auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, + reinterpret_cast(x), incx, + reinterpret_cast(result)); + __FORCE_MKL_FLUSH__(status); + return 0; +} diff --git a/deps/onemkl_prologue.h b/deps/onemkl_prologue.h new file mode 100644 index 00000000..eae1c82d --- /dev/null +++ b/deps/onemkl_prologue.h @@ -0,0 +1,225 @@ +#pragma once + +#include "sycl.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// BLAS types +typedef enum { + ONEMKL_TRANSPOSE_NONTRANS, + ONEMKL_TRANSPOSE_TRANS, + ONEMLK_TRANSPOSE_CONJTRANS +} onemklTranspose; + +typedef enum { + ONEMKL_UPLO_UPPER, + ONEMKL_UPLO_LOWER +} onemklUplo; + +typedef enum { + ONEMKL_DIAG_NONUNIT, + ONEMKL_DIAG_UNIT + } onemklDiag; + +typedef enum { + ONEMKL_SIDE_LEFT, + ONEMKL_SIDE_RIGHT +} onemklSide; + +typedef enum { + ONEMKL_OFFSET_ROW, + ONEMKL_OFFSET_COL, + ONEMKL_OFFSET_FIX, +} onemklOffset; + +// LAPACK types +typedef enum { + ONEMKL_JOB_N, + ONEMKL_JOB_V, + ONEMKL_JOB_U, + ONEMKL_JOB_A, + ONEMKL_JOB_S, + ONEMKL_JOB_O +} onemklJob; + +typedef enum { + ONEMKL_GENERATE_Q, + ONEMKL_GENERATE_P, + ONEMKL_GENERATE_N, + ONEMKL_GENERATE_V +} onemklGenerate; + +typedef enum { + ONEMKL_COMPZ_N, + ONEMKL_COMPZ_V, + ONEMKL_COMPZ_I +} onemklCompz; + +typedef enum { + ONEMKL_DIRECT_F, + ONEMKL_DIRECT_B +} onemklDirect; + +typedef enum { + ONEMKL_STOREV_C, + ONEMKL_STOREV_R +} onemklStorev; + +typedef enum { + ONEMKL_RANGEV_A, + ONEMKL_RANGEV_V, + ONEMKL_RANGEV_I +} onemklRangev; + +typedef enum { + ONEMKL_ORDER_B, + ONEMKL_ORDER_E +} onemklOrder; + +typedef enum { + ONEMKL_JOBSVD_N, + ONEMKL_JOBSVD_A, + ONEMKL_JOBSVD_O, + ONEMKL_JOBSVD_S +} onemklJobsvd; + +typedef enum { + ONEMKL_LAYOUT_ROW, + ONEMKL_LAYOUT_COL, +} onemklLayout; + +typedef enum { + ONEMKL_INDEX_ZERO, + ONEMKL_INDEX_ONE, +} onemklIndex; + +// SPARSE types +typedef enum { + ONEMKL_PROPERTY_SYMMETRIC, + ONEMKL_PROPERTY_SORTED, +} onemklProperty; + +// I need help :( +typedef struct MatrixHandle_st *MatrixHandle_t; + +int onemklHgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, uint16_t *alpha, + const short **a, int64_t *lda, const short **b, + int64_t *ldb, uint16_t *beta, short **c, + int64_t *ldc, int64_t group_count, int64_t *group_size); + +int onemklSgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, float *alpha, + const float **a, int64_t *lda, const float **b, + int64_t *ldb, float *beta, float **c, + int64_t *ldc, int64_t group_count, int64_t *group_size); + +int onemklDgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, double *alpha, + const double **a, int64_t *lda, const double **b, + int64_t *ldb, double *beta, double **c, + int64_t *ldc, int64_t group_count, int64_t *group_size); + +int onemklCgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, float _Complex *alpha, + const float _Complex **a, int64_t *lda, + const float _Complex **b, + int64_t *ldb, float _Complex *beta, + float _Complex **c, int64_t *ldc, + int64_t group_count, int64_t *group_size); + +int onemklZgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, double _Complex *alpha, + const double _Complex **a, int64_t *lda, + const double _Complex **b, + int64_t *ldb, double _Complex *beta, + double _Complex **c, int64_t *ldc, + int64_t group_count, int64_t *group_size); + +int onemklStrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + float *alpha, const float **a, int64_t *lda, + float **b, int64_t *ldb, int64_t group_count, + int64_t *group_size); + +int onemklDtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + double *alpha, const double **a, int64_t *lda, + double **b, int64_t *ldb, int64_t group_count, + int64_t *group_size); + +int onemklCtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + float _Complex *alpha, const float _Complex **a, int64_t *lda, + float _Complex **b, int64_t *ldb, int64_t group_count, + int64_t *group_size); + +int onemklZtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + double _Complex *alpha, const double _Complex **a, int64_t *lda, + double _Complex **b, int64_t *ldb, int64_t group_count, + int64_t *group_size); + +int onemklHgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + uint16_t alpha, const short *a, int64_t lda, int64_t stridea, + const short *b, int64_t ldb, int64_t strideb, uint16_t beta, + short *c, int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklSgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + float alpha, const float *a, int64_t lda, int64_t stridea, + const float *b, int64_t ldb, int64_t strideb, float beta, + float *c, int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklDgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + double alpha, const double *a, int64_t lda, int64_t stridea, + const double *b, int64_t ldb, int64_t strideb, double beta, + double *c, int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklCgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + float _Complex alpha, const float _Complex *a, int64_t lda, + int64_t stridea, const float _Complex *b, int64_t ldb, + int64_t strideb, float _Complex beta, float _Complex *c, + int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklZgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + double _Complex alpha, const double _Complex *a, int64_t lda, + int64_t stridea, const double _Complex *b, int64_t ldb, + int64_t strideb, double _Complex beta, double _Complex *c, + int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklHgemm(syclQueue_t device_queue, onemklTranspose transA, + onemklTranspose transB, int64_t m, int64_t n, + int64_t k, uint16_t alpha, const short *A, int64_t lda, + const short *B, int64_t ldb, uint16_t beta, short *C, + int64_t ldc); + +int onemklHaxpy(syclQueue_t device_queue, int64_t n, uint16_t alpha, const short *x, + int64_t incx, short *y, int64_t incy); + +int onemklHscal(syclQueue_t device_queue, int64_t n, uint16_t alpha, + short *x, int64_t incx); + +int onemklHnrm2(syclQueue_t device_queue, int64_t n, const short *x, + int64_t incx, short *result); + +int onemklHdot(syclQueue_t device_queue, int64_t n, const short *x, + int64_t incx, const short *y, int64_t incy, short *result); diff --git a/deps/src/onemkl.cpp b/deps/src/onemkl.cpp index 964a4a50..51404a16 100644 --- a/deps/src/onemkl.cpp +++ b/deps/src/onemkl.cpp @@ -11,10 +11,6 @@ #define __FORCE_MKL_FLUSH__(cmd) \ sycl::get_native(cmd) -// gemm - -// https://spec.oneapi.io/versions/1.0-rev-1/elements/oneMKL/source/domains/blas/gemm.html - oneapi::mkl::transpose convert(onemklTranspose val) { switch (val) { case ONEMKL_TRANSPOSE_NONTRANS: @@ -53,6 +49,138 @@ oneapi::mkl::side convert(onemklSide val) { } } +oneapi::mkl::offset convert(onemklOffset val) { + switch (val) { + case ONEMKL_OFFSET_ROW: + return oneapi::mkl::offset::row; + case ONEMKL_OFFSET_COL: + return oneapi::mkl::offset::column; + case ONEMKL_OFFSET_FIX: + return oneapi::mkl::offset::fix; + } +} + +oneapi::mkl::job convert(onemklJob val) { + switch (val) { + case ONEMKL_JOB_N: + return oneapi::mkl::job::N; + case ONEMKL_JOB_V: + return oneapi::mkl::job::V; + case ONEMKL_JOB_U: + return oneapi::mkl::job::U; + case ONEMKL_JOB_A: + return oneapi::mkl::job::A; + case ONEMKL_JOB_S: + return oneapi::mkl::job::S; + case ONEMKL_JOB_O: + return oneapi::mkl::job::O; + } +} + +oneapi::mkl::generate convert(onemklGenerate val) { + switch (val) { + case ONEMKL_GENERATE_Q: + return oneapi::mkl::generate::Q; + case ONEMKL_GENERATE_P: + return oneapi::mkl::generate::P; + case ONEMKL_GENERATE_N: + return oneapi::mkl::generate::N; + case ONEMKL_GENERATE_V: + return oneapi::mkl::generate::V; + } +} + +oneapi::mkl::compz convert(onemklCompz val) { + switch (val) { + case ONEMKL_COMPZ_N: + return oneapi::mkl::compz::N; + case ONEMKL_COMPZ_V: + return oneapi::mkl::compz::V; + case ONEMKL_COMPZ_I: + return oneapi::mkl::compz::I; + } +} + +oneapi::mkl::direct convert(onemklDirect val) { + switch (val) { + case ONEMKL_DIRECT_F: + return oneapi::mkl::direct::F; + case ONEMKL_DIRECT_B: + return oneapi::mkl::direct::B; + } +} + +oneapi::mkl::storev convert(onemklStorev val) { + switch (val) { + case ONEMKL_STOREV_C: + return oneapi::mkl::storev::C; + case ONEMKL_STOREV_R: + return oneapi::mkl::storev::R; + } +} + +oneapi::mkl::rangev convert(onemklRangev val) { + switch (val) { + case ONEMKL_RANGEV_A: + return oneapi::mkl::rangev::A; + case ONEMKL_RANGEV_V: + return oneapi::mkl::rangev::V; + case ONEMKL_RANGEV_I: + return oneapi::mkl::rangev::I; + } +} + +oneapi::mkl::order convert(onemklOrder val) { + switch (val) { + case ONEMKL_ORDER_B: + return oneapi::mkl::order::B; + case ONEMKL_ORDER_E: + return oneapi::mkl::order::E; + } +} + +oneapi::mkl::jobsvd convert(onemklJobsvd val) { + switch (val) { + case ONEMKL_JOBSVD_N: + return oneapi::mkl::jobsvd::N; + case ONEMKL_JOBSVD_A: + return oneapi::mkl::jobsvd::A; + case ONEMKL_JOBSVD_O: + return oneapi::mkl::jobsvd::O; + case ONEMKL_JOBSVD_S: + return oneapi::mkl::jobsvd::S; + } +} + +oneapi::mkl::layout convert(onemklLayout val) { + switch (val) { + case ONEMKL_LAYOUT_ROW: + return oneapi::mkl::layout::row_major; + case ONEMKL_LAYOUT_COL: + return oneapi::mkl::layout::col_major; + } +} + +oneapi::mkl::index_base convert(onemklIndex val) { + switch (val) { + case ONEMKL_INDEX_ZERO: + return oneapi::mkl::index_base::zero; + case ONEMKL_INDEX_ONE: + return oneapi::mkl::index_base::one; + } +} + +oneapi::mkl::sparse::property convert(onemklProperty val) { + switch (val) { + case ONEMKL_PROPERTY_SYMMETRIC: + return oneapi::mkl::sparse::property::symmetric; + case ONEMKL_PROPERTY_SORTED: + return oneapi::mkl::sparse::property::sorted; + } +} + +// gemm +// https://spec.oneapi.io/versions/1.0-rev-1/elements/oneMKL/source/domains/blas/gemm.html class gemmBatchInfo { public: oneapi::mkl::transpose *m_transa = nullptr; @@ -157,79 +285,7 @@ class trsmBatchInfo { } }; - -extern "C" int onemklHgemm(syclQueue_t device_queue, onemklTranspose transA, - onemklTranspose transB, int64_t m, int64_t n, - int64_t k, uint16_t alpha, const short *A, int64_t lda, - const short *B, int64_t ldb, uint16_t beta, short *C, - int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transA), - convert(transB), m, n, k, sycl::bit_cast(alpha), - reinterpret_cast(A), lda, - reinterpret_cast(B), ldb, - sycl::bit_cast(beta), - reinterpret_cast(C), ldc); - __FORCE_MKL_FLUSH__(status); - return 0; -} - -extern "C" int onemklSgemm(syclQueue_t device_queue, onemklTranspose transA, - onemklTranspose transB, int64_t m, int64_t n, - int64_t k, float alpha, const float *A, int64_t lda, - const float *B, int64_t ldb, float beta, float *C, - int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transA), - convert(transB), m, n, k, alpha, A, - lda, B, ldb, beta, C, ldc); - __FORCE_MKL_FLUSH__(status); - return 0; -} - -extern "C" int onemklDgemm(syclQueue_t device_queue, onemklTranspose transA, - onemklTranspose transB, int64_t m, int64_t n, - int64_t k, double alpha, const double *A, - int64_t lda, const double *B, int64_t ldb, - double beta, double *C, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transA), - convert(transB), m, n, k, alpha, A, - lda, B, ldb, beta, C, ldc); - __FORCE_MKL_FLUSH__(status); - return 0; -} - -extern "C" int onemklCgemm(syclQueue_t device_queue, onemklTranspose transA, - onemklTranspose transB, int64_t m, int64_t n, - int64_t k, float _Complex alpha, - const float _Complex *A, int64_t lda, - const float _Complex *B, int64_t ldb, - float _Complex beta, float _Complex *C, - int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::gemm( - device_queue->val, convert(transA), convert(transB), m, n, k, alpha, - reinterpret_cast *>(A), lda, - reinterpret_cast *>(B), ldb, beta, - reinterpret_cast *>(C), ldc); - __FORCE_MKL_FLUSH__(status); - return 0; -} - -extern "C" int onemklZgemm(syclQueue_t device_queue, onemklTranspose transA, - onemklTranspose transB, int64_t m, int64_t n, - int64_t k, double _Complex alpha, - const double _Complex *A, int64_t lda, - const double _Complex *B, int64_t ldb, - double _Complex beta, double _Complex *C, - int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::gemm( - device_queue->val, convert(transA), convert(transB), m, n, k, alpha, - reinterpret_cast *>(A), lda, - reinterpret_cast *>(B), ldb, beta, - reinterpret_cast *>(C), ldc); - __FORCE_MKL_FLUSH__(status); - return 0; -} - -extern "C" void onemklHgemmBatched(syclQueue_t device_queue, onemklTranspose transa, +extern "C" int onemklHgemmBatched(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, uint16_t *alpha, const short **a, int64_t *lda, const short **b, @@ -243,12 +299,11 @@ extern "C" void onemklHgemmBatched(syclQueue_t device_queue, onemklTranspose tra reinterpret_cast(&b[0]), ldb, reinterpret_cast(beta), reinterpret_cast(&c[0]), ldc, group_count, group_size); - __FORCE_MKL_FLUSH__(status); - + return 0; } -extern "C" void onemklSgemmBatched(syclQueue_t device_queue, onemklTranspose transa, +extern "C" int onemklSgemmBatched(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, const float **b, @@ -263,9 +318,10 @@ extern "C" void onemklSgemmBatched(syclQueue_t device_queue, onemklTranspose tra beta, &c[0], ldc, group_count, group_size); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDgemmBatched(syclQueue_t device_queue, onemklTranspose transa, +extern "C" int onemklDgemmBatched(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, const double **b, @@ -280,9 +336,10 @@ extern "C" void onemklDgemmBatched(syclQueue_t device_queue, onemklTranspose tra beta, &c[0], ldc, group_count, group_size); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCgemmBatched(syclQueue_t device_queue, onemklTranspose transa, +extern "C" int onemklCgemmBatched(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, float _Complex *alpha, const float _Complex **a, int64_t *lda, @@ -301,9 +358,10 @@ extern "C" void onemklCgemmBatched(syclQueue_t device_queue, onemklTranspose tra reinterpret_cast **>(&c[0]), ldc, group_count, group_size); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZgemmBatched(syclQueue_t device_queue, onemklTranspose transa, +extern "C" int onemklZgemmBatched(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, double _Complex *alpha, const double _Complex **a, int64_t *lda, @@ -323,9 +381,85 @@ extern "C" void onemklZgemmBatched(syclQueue_t device_queue, onemklTranspose tra reinterpret_cast **>(&c[0]), ldc, group_count, group_size); __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, float *alpha, + const float **a, int64_t *lda, float **b, int64_t *ldb, + int64_t group_count, int64_t *group_size) { + trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, + unit_diag, group_count); + + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, + &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], + &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], + m, n, alpha, (const float **)&a[0], lda, + &b[0], ldb, group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + double *alpha, const double **a, int64_t *lda, + double **b, int64_t *ldb, int64_t group_count, + int64_t *group_size) { + trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, + unit_diag, group_count); + + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, + &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], + &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], + m, n, alpha, (const double **)&a[0], lda, &b[0], + ldb, group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + float _Complex *alpha, const float _Complex **a, + int64_t *lda, float _Complex **b, int64_t *ldb, + int64_t group_count, int64_t *group_size) { + trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, + unit_diag, group_count); + + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, + &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], + &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], + m, n, reinterpret_cast *>(alpha), + reinterpret_cast **>(&a[0]), + lda, reinterpret_cast **>(&b[0]), + ldb, group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + double _Complex *alpha, const double _Complex **a, + int64_t *lda, double _Complex **b, int64_t *ldb, + int64_t group_count, int64_t *group_size) { + trsmBatchInfo trsmInfo(device_queue, left_right, + upper_lower, transa, unit_diag, group_count); + + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, + &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], + &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], + m, n, reinterpret_cast *>(alpha), + reinterpret_cast **>(&a[0]), + lda, reinterpret_cast **>(&b[0]), + ldb, group_count, group_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklHgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, +extern "C" int onemklHgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, uint16_t alpha, const short *a, int64_t lda, int64_t stridea, const short *b, int64_t ldb, int64_t strideb, uint16_t beta, @@ -337,9 +471,10 @@ extern "C" void onemklHgemmBatchStrided(syclQueue_t device_queue, onemklTranspos sycl::bit_cast(beta), reinterpret_cast(c), ldc, stridec, batch_size); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, +extern "C" int onemklSgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float alpha, const float *a, int64_t lda, int64_t stridea, const float *b, int64_t ldb, int64_t strideb, float beta, @@ -348,9 +483,10 @@ extern "C" void onemklSgemmBatchStrided(syclQueue_t device_queue, onemklTranspos convert(transb), m, n, k, alpha, a, lda, stridea, b, ldb, strideb, beta, c, ldc, stridec, batch_size); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, +extern "C" int onemklDgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double alpha, const double *a, int64_t lda, int64_t stridea, const double *b, int64_t ldb, int64_t strideb, double beta, @@ -359,15 +495,16 @@ extern "C" void onemklDgemmBatchStrided(syclQueue_t device_queue, onemklTranspos convert(transb), m, n, k, alpha, a, lda, stridea, b, ldb, strideb, beta, c, ldc, stridec, batch_size); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, +extern "C" int onemklCgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float _Complex alpha, const float _Complex *a, int64_t lda, int64_t stridea, const float _Complex *b, int64_t ldb, int64_t strideb, float _Complex beta, float _Complex *c, int64_t ldc, int64_t stridec, int64_t batch_size) { auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), - convert(transb), m, n, k, alpha, + convert(transb), m, n, k, alpha, reinterpret_cast *>(a), lda, stridea, reinterpret_cast *>(b), @@ -375,9 +512,10 @@ extern "C" void onemklCgemmBatchStrided(syclQueue_t device_queue, onemklTranspos reinterpret_cast *>(c), ldc, stridec, batch_size); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, +extern "C" int onemklZgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double _Complex alpha, const double _Complex *a, int64_t lda, int64_t stridea, const double _Complex *b, int64_t ldb, int64_t strideb, double _Complex beta, @@ -391,1154 +529,2841 @@ extern "C" void onemklZgemmBatchStrided(syclQueue_t device_queue, onemklTranspos reinterpret_cast *>(c), ldc, stridec, batch_size); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSsymm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, - convert(left_right), convert(upper_lower), - m, n, alpha, a, lda, b, ldb, beta, c, ldc); +extern "C" int onemklHgemm(syclQueue_t device_queue, onemklTranspose transA, + onemklTranspose transB, int64_t m, int64_t n, + int64_t k, uint16_t alpha, const short *A, int64_t lda, + const short *B, int64_t ldb, uint16_t beta, short *C, + int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transA), + convert(transB), m, n, k, sycl::bit_cast(alpha), + reinterpret_cast(A), lda, + reinterpret_cast(B), ldb, + sycl::bit_cast(beta), + reinterpret_cast(C), ldc); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDsymm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), - convert(upper_lower), m, n, alpha, a, lda, b, - ldb, beta, c, ldc); +extern "C" int onemklHdot(syclQueue_t device_queue, int64_t n, + const short *x, int64_t incx, const short *y, + int64_t incy, short *result) { + auto status = oneapi::mkl::blas::column_major::dot(device_queue->val, n, + reinterpret_cast(x), + incx, reinterpret_cast(y), + incy, reinterpret_cast(result)); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCsymm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - float _Complex alpha, const float _Complex *a, int64_t lda, - const float _Complex *b, int64_t ldb, float _Complex beta, - float _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), - convert(upper_lower), m, n, - static_cast >(alpha), - reinterpret_cast *>(a), - lda, reinterpret_cast *>(b), - ldb, beta, reinterpret_cast *>(c), ldc); +extern "C" int onemklHaxpy(syclQueue_t device_queue, int64_t n, uint16_t alpha, + const short *x, std::int64_t incx, short *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, + sycl::bit_cast(alpha), + reinterpret_cast(x), + incx, reinterpret_cast(y), incy); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZsymm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - double _Complex alpha, const double _Complex *a, int64_t lda, - const double _Complex *b, int64_t ldb, double _Complex beta, - double _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), - convert(upper_lower), m, n, - static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(b), ldb, - static_cast >(beta), - reinterpret_cast *>(c), ldc); +extern "C" int onemklHscal(syclQueue_t device_queue, int64_t n, uint16_t alpha, + short *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, sycl::bit_cast(alpha), + reinterpret_cast(x), incx); __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSsyrk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, float alpha, - const float *a, int64_t lda, float beta, float *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), - convert(trans), n, k, alpha, a, lda, beta, c, ldc); +extern "C" int onemklHnrm2(syclQueue_t device_queue, int64_t n, const short *x, + int64_t incx, short *result) { + auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, + reinterpret_cast(x), incx, + reinterpret_cast(result)); __FORCE_MKL_FLUSH__(status); + return 0; +} +// BLAS +extern "C" int onemklSgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float alpha, float *a, int64_t lda, float *b, int64_t ldb, float beta, float *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDsyrk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, double alpha, - const double *a, int64_t lda, double beta, double *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), - convert(trans), n, k, alpha, a, lda, beta, c, ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double alpha, double *a, int64_t lda, double *b, int64_t ldb, double beta, double *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCsyrk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, - float _Complex alpha, const float _Complex *a, - int64_t lda, float _Complex beta, - float _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), - convert(trans), n, k, static_cast >(alpha), - reinterpret_cast *>(a), lda, - static_cast >(beta), - reinterpret_cast *>(c), ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex beta, float _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZsyrk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, - double _Complex alpha, const double _Complex *a, - int64_t lda, double _Complex beta, - double _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), - convert(trans), n, k, static_cast >(alpha), - reinterpret_cast *>(a), lda, - static_cast >(beta), - reinterpret_cast *>(c), ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex beta, double _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, - const float *b, int64_t ldb, float beta, float *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), - convert(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float alpha, float *a, int64_t lda, float *b, int64_t ldb, float beta, float *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, - const double *b, int64_t ldb, double beta, double *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), - convert(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double alpha, double *a, int64_t lda, double *b, int64_t ldb, double beta, double *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, - int64_t n, int64_t k, float _Complex alpha, const float _Complex *a, - int64_t lda, const float _Complex *b, int64_t ldb, float _Complex beta, - float _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), - convert(trans), n, k, static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(b), ldb, - static_cast >(beta), - reinterpret_cast *>(c), ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex beta, float _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, - int64_t n, int64_t k, double _Complex alpha, const double _Complex *a, - int64_t lda, const double _Complex *b, int64_t ldb, double _Complex beta, - double _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), - convert(trans), n, k, static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(b), ldb, - static_cast >(beta), - reinterpret_cast *>(c), ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex beta, double _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklStrmm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo uppler_lower, onemklTranspose trans, - onemklDiag diag, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, float *b, int64_t ldb) { - auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), - convert(uppler_lower), convert(trans), - convert(diag), m, n, alpha, a, lda, b, ldb); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklChemm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex beta, float _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::hemm(device_queue->val, convert(left_right), convert(upper_lower), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDtrmm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo uppler_lower, onemklTranspose trans, - onemklDiag diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb) { - auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), - convert(uppler_lower), convert(trans), - convert(diag), m, n, alpha, a, lda, b, ldb); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZhemm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex beta, double _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::hemm(device_queue->val, convert(left_right), convert(upper_lower), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCtrmm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo uppler_lower, onemklTranspose trans, - onemklDiag diag, int64_t m, int64_t n, float _Complex alpha, - const float _Complex *a, int64_t lda, float _Complex *b, - int64_t ldb) { - auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), - convert(uppler_lower), convert(trans), - convert(diag), m, n, static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(b), ldb); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float alpha, float *a, int64_t lda, float beta, float *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, alpha, a, lda, beta, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZtrmm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo uppler_lower, onemklTranspose trans, - onemklDiag diag, int64_t m, int64_t n, double _Complex alpha, - const double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb) { - auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), - convert(uppler_lower), convert(trans), - convert(diag), m, n, static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(b), ldb); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double alpha, double *a, int64_t lda, double beta, double *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, alpha, a, lda, beta, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklStrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, - onemklTranspose transa, onemklDiag unit_diag, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, float *b, int64_t ldb) { - auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), - convert(upper_lower), convert(transa), convert(unit_diag), - m, n, alpha, a, lda, b, ldb); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex beta, float _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, - onemklTranspose transa, onemklDiag unit_diag, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, double *b, int64_t ldb) { - auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), - convert(upper_lower), convert(transa), convert(unit_diag), - m, n, alpha, a, lda, b, ldb); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex beta, double _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, - onemklTranspose transa, onemklDiag unit_diag, int64_t m, int64_t n, - float _Complex alpha, const float _Complex *a, int64_t lda, float _Complex *b, - int64_t ldb) { - auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), - convert(upper_lower), convert(transa), convert(unit_diag), - m, n, static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(b), ldb); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCherk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float alpha, float _Complex *a, int64_t lda, float beta, float _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::herk(device_queue->val, convert(upper_lower), convert(trans), n, k, alpha, reinterpret_cast*>(a), lda, beta, reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, - onemklTranspose transa, onemklDiag unit_diag, int64_t m, int64_t n, - double _Complex alpha, const double _Complex *a, int64_t lda, - double _Complex *b, int64_t ldb) { - auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), - convert(upper_lower), convert(transa), convert(unit_diag), - m, n, static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(b), ldb); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZherk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double alpha, double _Complex *a, int64_t lda, double beta, double _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::herk(device_queue->val, convert(upper_lower), convert(trans), n, k, alpha, reinterpret_cast*>(a), lda, beta, reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklStrsmBatched(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, onemklTranspose transa, - onemklDiag unit_diag, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *group_size) { - trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, - unit_diag, group_count); +extern "C" int onemklSsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float alpha, float *a, int64_t lda, float *b, int64_t ldb, float beta, float *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} - auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, - &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], - &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], - m, n, alpha, (const float **)&a[0], lda, - &b[0], ldb, group_count, group_size); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double alpha, double *a, int64_t lda, double *b, int64_t ldb, double beta, double *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDtrsmBatched(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, onemklTranspose transa, - onemklDiag unit_diag, int64_t *m, int64_t *n, - double *alpha, const double **a, int64_t *lda, - double **b, int64_t *ldb, int64_t group_count, - int64_t *group_size) { - trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, - unit_diag, group_count); +extern "C" int onemklCsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex beta, float _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} - auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, - &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], - &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], - m, n, alpha, (const double **)&a[0], lda, &b[0], - ldb, group_count, group_size); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex beta, double _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCtrsmBatched(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, onemklTranspose transa, - onemklDiag unit_diag, int64_t *m, int64_t *n, - float _Complex *alpha, const float _Complex **a, - int64_t *lda, float _Complex **b, int64_t *ldb, - int64_t group_count, int64_t *group_size) { - trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, - unit_diag, group_count); +extern "C" int onemklCher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float beta, float _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::her2k(device_queue->val, convert(upper_lower), convert(trans), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, beta, reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double beta, double _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::her2k(device_queue->val, convert(upper_lower), convert(trans), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, beta, reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float alpha, float *a, int64_t lda, float *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, alpha, a, lda, b, ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double alpha, double *a, int64_t lda, double *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, alpha, a, lda, b, ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float alpha, float *a, int64_t lda, float *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, alpha, a, lda, b, ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double alpha, double *a, int64_t lda, double *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, alpha, a, lda, b, ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float *a, int64_t lda, float *x, int64_t incx, float *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, a, lda, x, incx, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double *a, int64_t lda, double *x, int64_t incx, double *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, a, lda, x, incx, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float alpha, float *a, int64_t lda, float *x, int64_t incx, float beta, float *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, alpha, a, lda, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double alpha, double *a, int64_t lda, double *x, int64_t incx, double beta, double *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, alpha, a, lda, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex beta, float _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex beta, double _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, float alpha, float *a, int64_t lda, float *x, int64_t incx, float beta, float *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, double alpha, double *a, int64_t lda, double *x, int64_t incx, double beta, double *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex beta, float _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex beta, double _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSger(syclQueue_t device_queue, int64_t m, int64_t n, float alpha, float *x, int64_t incx, float *y, int64_t incy, float *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::ger(device_queue->val, m, n, alpha, x, incx, y, incy, a, lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDger(syclQueue_t device_queue, int64_t m, int64_t n, double alpha, double *x, int64_t incx, double *y, int64_t incy, double *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::ger(device_queue->val, m, n, alpha, x, incx, y, incy, a, lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgerc(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::gerc(device_queue->val, m, n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgerc(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::gerc(device_queue->val, m, n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgeru(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::geru(device_queue->val, m, n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgeru(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::geru(device_queue->val, m, n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklChbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex beta, float _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::hbmv(device_queue->val, convert(upper_lower), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZhbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex beta, double _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::hbmv(device_queue->val, convert(upper_lower), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklChemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex beta, float _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::hemv(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZhemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex beta, double _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::hemv(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float _Complex *x, int64_t incx, float _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::her(device_queue->val, convert(upper_lower), n, alpha, reinterpret_cast*>(x), incx, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double _Complex *x, int64_t incx, double _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::her(device_queue->val, convert(upper_lower), n, alpha, reinterpret_cast*>(x), incx, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::her2(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::her2(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklChpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, float _Complex *a, float _Complex *x, int64_t incx, float _Complex beta, float _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::hpmv(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(a), reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZhpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, double _Complex *a, double _Complex *x, int64_t incx, double _Complex beta, double _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::hpmv(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(a), reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklChpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float _Complex *x, int64_t incx, float _Complex *a) { + auto status = oneapi::mkl::blas::column_major::hpr(device_queue->val, convert(upper_lower), n, alpha, reinterpret_cast*>(x), incx, reinterpret_cast*>(a)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZhpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double _Complex *x, int64_t incx, double _Complex *a) { + auto status = oneapi::mkl::blas::column_major::hpr(device_queue->val, convert(upper_lower), n, alpha, reinterpret_cast*>(x), incx, reinterpret_cast*>(a)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklChpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a) { + auto status = oneapi::mkl::blas::column_major::hpr2(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZhpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a) { + auto status = oneapi::mkl::blas::column_major::hpr2(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, float alpha, float *a, int64_t lda, float *x, int64_t incx, float beta, float *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::sbmv(device_queue->val, convert(upper_lower), n, k, alpha, a, lda, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, double alpha, double *a, int64_t lda, double *x, int64_t incx, double beta, double *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::sbmv(device_queue->val, convert(upper_lower), n, k, alpha, a, lda, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *a, int64_t lda, float *x, int64_t incx, float beta, float *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, alpha, a, lda, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double *a, int64_t lda, double *x, int64_t incx, double beta, double *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, alpha, a, lda, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex beta, float _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex beta, double _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *x, int64_t incx, float *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, alpha, x, incx, a, lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double *x, int64_t incx, double *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, alpha, x, incx, a, lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, float _Complex *x, int64_t incx, float _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx, double _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *x, int64_t incx, float *y, int64_t incy, float *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, alpha, x, incx, y, incy, a, lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double *x, int64_t incx, double *y, int64_t incy, double *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, alpha, x, incx, y, incy, a, lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) { + auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *a, float *x, int64_t incx, float beta, float *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::spmv(device_queue->val, convert(upper_lower), n, alpha, a, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double *a, double *x, int64_t incx, double beta, double *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::spmv(device_queue->val, convert(upper_lower), n, alpha, a, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *x, int64_t incx, float *a) { + auto status = oneapi::mkl::blas::column_major::spr(device_queue->val, convert(upper_lower), n, alpha, x, incx, a); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double *x, int64_t incx, double *a) { + auto status = oneapi::mkl::blas::column_major::spr(device_queue->val, convert(upper_lower), n, alpha, x, incx, a); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *x, int64_t incx, float *y, int64_t incy, float *a) { + auto status = oneapi::mkl::blas::column_major::spr2(device_queue->val, convert(upper_lower), n, alpha, x, incx, y, incy, a); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double *x, int64_t incx, double *y, int64_t incy, double *a) { + auto status = oneapi::mkl::blas::column_major::spr2(device_queue->val, convert(upper_lower), n, alpha, x, incx, y, incy, a); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float *a, int64_t lda, float *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double *a, int64_t lda, double *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float *a, int64_t lda, float *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double *a, int64_t lda, double *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, float *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, double *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, float _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, double _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, float *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, double *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, float _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, double _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, int64_t lda, float *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, int64_t lda, double *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, int64_t lda, float *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, int64_t lda, double *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCdotc(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *result) { + auto status = oneapi::mkl::blas::column_major::dotc(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(result)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZdotc(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *result) { + auto status = oneapi::mkl::blas::column_major::dotc(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(result)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCdotu(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *result) { + auto status = oneapi::mkl::blas::column_major::dotu(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(result)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZdotu(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *result) { + auto status = oneapi::mkl::blas::column_major::dotu(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(result)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSiamax(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t *result) { + auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDiamax(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t *result) { + auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCiamax(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t *result) { + auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, reinterpret_cast*>(x), incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZiamax(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t *result) { + auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, reinterpret_cast*>(x), incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSiamin(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t *result) { + auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDiamin(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t *result) { + auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCiamin(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t *result) { + auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, reinterpret_cast*>(x), incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZiamin(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t *result) { + auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, reinterpret_cast*>(x), incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSasum(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *result) { + auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, x, incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDasum(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *result) { + auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, x, incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCasum(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float *result) { + auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, reinterpret_cast*>(x), incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZasum(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double *result) { + auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, reinterpret_cast*>(x), incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSaxpy(syclQueue_t device_queue, int64_t n, float alpha, float *x, int64_t incx, float *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, alpha, x, incx, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDaxpy(syclQueue_t device_queue, int64_t n, double alpha, double *x, int64_t incx, double *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, alpha, x, incx, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCaxpy(syclQueue_t device_queue, int64_t n, float _Complex alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZaxpy(syclQueue_t device_queue, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, static_cast >(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSaxpby(syclQueue_t device_queue, int64_t n, float alpha, float *x, int64_t incx, float beta, float *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, alpha, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDaxpby(syclQueue_t device_queue, int64_t n, double alpha, double *x, int64_t incx, double beta, double *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, alpha, x, incx, beta, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCaxpby(syclQueue_t device_queue, int64_t n, float _Complex alpha, float _Complex *x, int64_t incx, float _Complex beta, float _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, static_cast >(alpha), reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZaxpby(syclQueue_t device_queue, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx, double _Complex beta, double _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, static_cast >(alpha), reinterpret_cast*>(x), incx, static_cast >(beta), reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklScopy(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, x, incx, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDcopy(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, x, incx, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCcopy(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZcopy(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSdot(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, float *result) { + auto status = oneapi::mkl::blas::column_major::dot(device_queue->val, n, x, incx, y, incy, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDdot(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, double *result) { + auto status = oneapi::mkl::blas::column_major::dot(device_queue->val, n, x, incx, y, incy, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSsdsdot(syclQueue_t device_queue, int64_t n, float sb, float *x, int64_t incx, float *y, int64_t incy, float *result) { + auto status = oneapi::mkl::blas::column_major::sdsdot(device_queue->val, n, sb, x, incx, y, incy, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSnrm2(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *result) { + auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, x, incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDnrm2(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *result) { + auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, x, incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCnrm2(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float *result) { + auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, reinterpret_cast*>(x), incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZnrm2(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double *result) { + auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, reinterpret_cast*>(x), incx, result); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSrot(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, float c, float s) { + auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, x, incx, y, incy, c, s); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDrot(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, double c, double s) { + auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, x, incx, y, incy, c, s); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCSrot(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float c, float s) { + auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, c, s); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCrot(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float c, float _Complex s) { + auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, c, static_cast >(s)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZDrot(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double c, double s) { + auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, c, s); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZrot(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double c, double _Complex s) { + auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, c, static_cast >(s)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSrotg(syclQueue_t device_queue, float *a, float *b, float *c, float *s) { + auto status = oneapi::mkl::blas::column_major::rotg(device_queue->val, a, b, c, s); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDrotg(syclQueue_t device_queue, double *a, double *b, double *c, double *s) { + auto status = oneapi::mkl::blas::column_major::rotg(device_queue->val, a, b, c, s); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCrotg(syclQueue_t device_queue, float _Complex *a, float _Complex *b, float *c, float _Complex *s) { + auto status = oneapi::mkl::blas::column_major::rotg(device_queue->val, reinterpret_cast*>(a), reinterpret_cast*>(b), c, reinterpret_cast*>(s)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZrotg(syclQueue_t device_queue, double _Complex *a, double _Complex *b, double *c, double _Complex *s) { + auto status = oneapi::mkl::blas::column_major::rotg(device_queue->val, reinterpret_cast*>(a), reinterpret_cast*>(b), c, reinterpret_cast*>(s)); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSrotm(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, float *param) { + auto status = oneapi::mkl::blas::column_major::rotm(device_queue->val, n, x, incx, y, incy, param); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDrotm(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, double *param) { + auto status = oneapi::mkl::blas::column_major::rotm(device_queue->val, n, x, incx, y, incy, param); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSrotmg(syclQueue_t device_queue, float *d1, float *d2, float *x1, float y1, float *param) { + auto status = oneapi::mkl::blas::column_major::rotmg(device_queue->val, d1, d2, x1, y1, param); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDrotmg(syclQueue_t device_queue, double *d1, double *d2, double *x1, double y1, double *param) { + auto status = oneapi::mkl::blas::column_major::rotmg(device_queue->val, d1, d2, x1, y1, param); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSscal(syclQueue_t device_queue, int64_t n, float alpha, float *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, alpha, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDscal(syclQueue_t device_queue, int64_t n, double alpha, double *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, alpha, x, incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCSscal(syclQueue_t device_queue, int64_t n, float alpha, float _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, alpha, reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZDscal(syclQueue_t device_queue, int64_t n, double alpha, double _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, alpha, reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCscal(syclQueue_t device_queue, int64_t n, float _Complex alpha, float _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, static_cast >(alpha), reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZscal(syclQueue_t device_queue, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx) { + auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, static_cast >(alpha), reinterpret_cast*>(x), incx); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSswap(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, x, incx, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDswap(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, x, incx, y, incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCswap(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZswap(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy) { + auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, float beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), convert(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, double beta, double *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), convert(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, float _Complex beta, float _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), convert(transb), m, n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, static_cast >(beta), reinterpret_cast*>(c), ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, double _Complex beta, double _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), convert(transb), m, n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, static_cast >(beta), reinterpret_cast*>(c), ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSsyrk_batch(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float alpha, float *a, int64_t lda, int64_t stride_a, float beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::syrk_batch(device_queue->val, convert(upper_lower), convert(trans), n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDsyrk_batch(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double alpha, double *a, int64_t lda, int64_t stride_a, double beta, double *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::syrk_batch(device_queue->val, convert(upper_lower), convert(trans), n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCsyrk_batch(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex beta, float _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::syrk_batch(device_queue->val, convert(upper_lower), convert(trans), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, stride_a, static_cast >(beta), reinterpret_cast*>(c), ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZsyrk_batch(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex beta, double _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::syrk_batch(device_queue->val, convert(upper_lower), convert(trans), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, stride_a, static_cast >(beta), reinterpret_cast*>(c), ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgemv_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float alpha, float *a, int64_t lda, int64_t stridea, float *x, int64_t incx, int64_t stridex, float beta, float *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemv_batch(device_queue->val, convert(trans), m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgemv_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double alpha, double *a, int64_t lda, int64_t stridea, double *x, int64_t incx, int64_t stridex, double beta, double *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemv_batch(device_queue->val, convert(trans), m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgemv_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, int64_t stridea, float _Complex *x, int64_t incx, int64_t stridex, float _Complex beta, float _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemv_batch(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, stridea, reinterpret_cast*>(x), incx, stridex, static_cast >(beta), reinterpret_cast*>(y), incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgemv_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, int64_t stridea, double _Complex *x, int64_t incx, int64_t stridex, double _Complex beta, double _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::gemv_batch(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, stridea, reinterpret_cast*>(x), incx, stridex, static_cast >(beta), reinterpret_cast*>(y), incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSdgmm_batch(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float *a, int64_t lda, int64_t stridea, float *x, int64_t incx, int64_t stridex, float *c, int64_t ldc, int64_t stridec, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::dgmm_batch(device_queue->val, convert(left_right), m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDdgmm_batch(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double *a, int64_t lda, int64_t stridea, double *x, int64_t incx, int64_t stridex, double *c, int64_t ldc, int64_t stridec, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::dgmm_batch(device_queue->val, convert(left_right), m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCdgmm_batch(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stridea, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *c, int64_t ldc, int64_t stridec, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::dgmm_batch(device_queue->val, convert(left_right), m, n, reinterpret_cast*>(a), lda, stridea, reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(c), ldc, stridec, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZdgmm_batch(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stridea, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *c, int64_t ldc, int64_t stridec, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::dgmm_batch(device_queue->val, convert(left_right), m, n, reinterpret_cast*>(a), lda, stridea, reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(c), ldc, stridec, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSaxpy_batch(syclQueue_t device_queue, int64_t n, float alpha, float *x, int64_t incx, int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::axpy_batch(device_queue->val, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDaxpy_batch(syclQueue_t device_queue, int64_t n, double alpha, double *x, int64_t incx, int64_t stridex, double *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::axpy_batch(device_queue->val, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCaxpy_batch(syclQueue_t device_queue, int64_t n, float _Complex alpha, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::axpy_batch(device_queue->val, n, static_cast >(alpha), reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(y), incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZaxpy_batch(syclQueue_t device_queue, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::axpy_batch(device_queue->val, n, static_cast >(alpha), reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(y), incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklScopy_batch(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::copy_batch(device_queue->val, n, x, incx, stridex, y, incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDcopy_batch(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t stridex, double *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::copy_batch(device_queue->val, n, x, incx, stridex, y, incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCcopy_batch(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::copy_batch(device_queue->val, n, reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(y), incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZcopy_batch(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::copy_batch(device_queue->val, n, reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(y), incy, stridey, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, float alpha, float *a, int64_t lda, float *b, int64_t ldb, float beta, float *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::gemmt(device_queue->val, convert(upper_lower), convert(transa), convert(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, double alpha, double *a, int64_t lda, double *b, int64_t ldb, double beta, double *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::gemmt(device_queue->val, convert(upper_lower), convert(transa), convert(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex beta, float _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::gemmt(device_queue->val, convert(upper_lower), convert(transa), convert(transb), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex beta, double _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::gemmt(device_queue->val, convert(upper_lower), convert(transa), convert(transb), n, k, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, static_cast >(beta), reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float alpha, float *ab, int64_t lda, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::imatcopy(device_queue->val, convert(trans), m, n, alpha, ab, lda, ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double alpha, double *ab, int64_t lda, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::imatcopy(device_queue->val, convert(trans), m, n, alpha, ab, lda, ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex alpha, float _Complex *ab, int64_t lda, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::imatcopy(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(ab), lda, ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex alpha, double _Complex *ab, int64_t lda, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::imatcopy(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(ab), lda, ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float alpha, float *a, int64_t lda, float *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::omatcopy(device_queue->val, convert(trans), m, n, alpha, a, lda, b, ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double alpha, double *a, int64_t lda, double *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::omatcopy(device_queue->val, convert(trans), m, n, alpha, a, lda, b, ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklComatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::omatcopy(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb) { + auto status = oneapi::mkl::blas::column_major::omatcopy(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float alpha, float *a, int64_t lda, float beta, float *b, int64_t ldb, float *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::omatadd(device_queue->val, convert(transa), convert(transb), m, n, alpha, a, lda, beta, b, ldb, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double alpha, double *a, int64_t lda, double beta, double *b, int64_t ldb, double *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::omatadd(device_queue->val, convert(transa), convert(transb), m, n, alpha, a, lda, beta, b, ldb, c, ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklComatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex beta, float _Complex *b, int64_t ldb, float _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::omatadd(device_queue->val, convert(transa), convert(transb), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, static_cast >(beta), reinterpret_cast*>(b), ldb, reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex beta, double _Complex *b, int64_t ldb, double _Complex *c, int64_t ldc) { + auto status = oneapi::mkl::blas::column_major::omatadd(device_queue->val, convert(transa), convert(transb), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, static_cast >(beta), reinterpret_cast*>(b), ldb, reinterpret_cast*>(c), ldc); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSimatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float alpha, float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::imatcopy_batch(device_queue->val, convert(trans), m, n, alpha, ab, lda, ldb, stride, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDimatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double alpha, double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::imatcopy_batch(device_queue->val, convert(trans), m, n, alpha, ab, lda, ldb, stride, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCimatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex alpha, float _Complex *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::imatcopy_batch(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(ab), lda, ldb, stride, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZimatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex alpha, double _Complex *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::imatcopy_batch(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(ab), lda, ldb, stride, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSomatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::omatcopy_batch(device_queue->val, convert(trans), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDomatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::omatcopy_batch(device_queue->val, convert(trans), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklComatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::omatcopy_batch(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZomatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::omatcopy_batch(device_queue->val, convert(trans), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSomatadd_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float alpha, float *a, int64_t lda, int64_t stride_a, float beta, float *b, int64_t ldb, int64_t stride_b, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::omatadd_batch(device_queue->val, convert(transa), convert(transb), m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDomatadd_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double alpha, double *a, int64_t lda, int64_t stride_a, double beta, double *b, int64_t ldb, int64_t stride_b, double *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::omatadd_batch(device_queue->val, convert(transa), convert(transb), m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklComatadd_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex beta, float _Complex *b, int64_t ldb, int64_t stride_b, float _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::omatadd_batch(device_queue->val, convert(transa), convert(transb), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, stride_a, static_cast >(beta), reinterpret_cast*>(b), ldb, stride_b, reinterpret_cast*>(c), ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZomatadd_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex beta, double _Complex *b, int64_t ldb, int64_t stride_b, double _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + auto status = oneapi::mkl::blas::column_major::omatadd_batch(device_queue->val, convert(transa), convert(transb), m, n, static_cast >(alpha), reinterpret_cast*>(a), lda, stride_a, static_cast >(beta), reinterpret_cast*>(b), ldb, stride_b, reinterpret_cast*>(c), ldc, stride_c, batch_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +// LAPACK +extern "C" int onemklSpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrf(device_queue->val, convert(uplo), n, a, lda, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrf(device_queue->val, convert(uplo), n, a, lda, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrf_scratchpad_size(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklDpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrf_scratchpad_size(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklCpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklZpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int onemklSpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float *a, int64_t lda, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrs(device_queue->val, convert(uplo), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double *a, int64_t lda, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrs(device_queue->val, convert(uplo), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrs(device_queue->val, convert(uplo), n, nrhs, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrs(device_queue->val, convert(uplo), n, nrhs, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrs_scratchpad_size(device_queue->val, convert(uplo), n, nrhs, lda, ldb); + return scratchpad_size; +} + +extern "C" int64_t onemklDpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrs_scratchpad_size(device_queue->val, convert(uplo), n, nrhs, lda, ldb); + return scratchpad_size; +} + +extern "C" int64_t onemklCpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrs_scratchpad_size>(device_queue->val, convert(uplo), n, nrhs, lda, ldb); + return scratchpad_size; +} + +extern "C" int64_t onemklZpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrs_scratchpad_size>(device_queue->val, convert(uplo), n, nrhs, lda, ldb); + return scratchpad_size; +} + +extern "C" int onemklSpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potri(device_queue->val, convert(uplo), n, a, lda, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potri(device_queue->val, convert(uplo), n, a, lda, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potri(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potri(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::potri_scratchpad_size(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklDpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::potri_scratchpad_size(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklCpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::potri_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklZpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::potri_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklSgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::gebrd_scratchpad_size(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklDgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::gebrd_scratchpad_size(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklCgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::gebrd_scratchpad_size>(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklZgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::gebrd_scratchpad_size>(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int onemklCgebrd(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float *d, float *e, float _Complex *tauq, float _Complex *taup, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gebrd(device_queue->val, m, n, reinterpret_cast*>(a), lda, d, e, reinterpret_cast*>(tauq), reinterpret_cast*>(taup), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgebrd(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *d, double *e, double *tauq, double *taup, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gebrd(device_queue->val, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgebrd(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *d, float *e, float *tauq, float *taup, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gebrd(device_queue->val, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgebrd(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double *d, double *e, double _Complex *tauq, double _Complex *taup, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gebrd(device_queue->val, m, n, reinterpret_cast*>(a), lda, d, e, reinterpret_cast*>(tauq), reinterpret_cast*>(taup), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_scratchpad_size(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklDgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_scratchpad_size(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklCgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_scratchpad_size>(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklZgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_scratchpad_size>(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int onemklCgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::geqrf(device_queue->val, m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::geqrf(device_queue->val, m, n, a, lda, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::geqrf(device_queue->val, m, n, a, lda, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::geqrf(device_queue->val, m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, float _Complex *a, int64_t lda, float *s, float _Complex *u, int64_t ldu, float _Complex *vt, int64_t ldvt, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gesvd(device_queue->val, convert(jobu), convert(jobvt), m, n, reinterpret_cast*>(a), lda, s, reinterpret_cast*>(u), ldu, reinterpret_cast*>(vt), ldvt, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, double _Complex *a, int64_t lda, double *s, double _Complex *u, int64_t ldu, double _Complex *vt, int64_t ldvt, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gesvd(device_queue->val, convert(jobu), convert(jobvt), m, n, reinterpret_cast*>(a), lda, s, reinterpret_cast*>(u), ldu, reinterpret_cast*>(vt), ldvt, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, double *a, int64_t lda, double *s, double *u, int64_t ldu, double *vt, int64_t ldvt, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gesvd(device_queue->val, convert(jobu), convert(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, float *a, int64_t lda, float *s, float *u, int64_t ldu, float *vt, int64_t ldvt, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gesvd(device_queue->val, convert(jobu), convert(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklDgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklCgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size>(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklZgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size>(device_queue->val, m, n, lda); + return scratchpad_size; +} + +extern "C" int onemklCgetrf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrf(device_queue->val, m, n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgetrf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t *ipiv, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrf(device_queue->val, m, n, a, lda, ipiv, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgetrf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t *ipiv, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrf(device_queue->val, m, n, a, lda, ipiv, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgetrf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrf(device_queue->val, m, n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, stride_ipiv, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklDgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, stride_ipiv, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklCgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, stride_ipiv, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklZgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, stride_ipiv, batch_size); + return scratchpad_size; +} + +extern "C" int onemklCgetrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, ipiv, stride_ipiv, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgetrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgetrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgetrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, ipiv, stride_ipiv, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklDgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklCgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklZgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, batch_size); + return scratchpad_size; +} + +extern "C" int onemklCgetrfnp_batch(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgetrfnp_batch(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgetrfnp_batch(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgetrfnp_batch(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size(device_queue->val, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklDgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size(device_queue->val, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklCgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size>(device_queue->val, n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklZgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size>(device_queue->val, n, lda); + return scratchpad_size; +} + +extern "C" int onemklCgetri(syclQueue_t device_queue, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getri(device_queue->val, n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgetri(syclQueue_t device_queue, int64_t n, double *a, int64_t lda, int64_t *ipiv, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getri(device_queue->val, n, a, lda, ipiv, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgetri(syclQueue_t device_queue, int64_t n, float *a, int64_t lda, int64_t *ipiv, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getri(device_queue->val, n, a, lda, ipiv, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgetri(syclQueue_t device_queue, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getri(device_queue->val, n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, ldb); + return scratchpad_size; +} + +extern "C" int64_t onemklDgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, ldb); + return scratchpad_size; +} + +extern "C" int64_t onemklCgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, ldb); + return scratchpad_size; +} + +extern "C" int64_t onemklZgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, ldb); + return scratchpad_size; +} + +extern "C" int onemklCgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrs(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t *ipiv, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrs(device_queue->val, convert(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t *ipiv, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrs(device_queue->val, convert(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrs(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklDgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklCgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklZgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int onemklCgetrs_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, stride_a, ipiv, stride_ipiv, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgetrs_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgetrs_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgetrs_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, stride_a, ipiv, stride_ipiv, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSgetrsnp_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrsnp_batch_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklDgetrsnp_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrsnp_batch_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklCgetrsnp_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrsnp_batch_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklZgetrsnp_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::getrsnp_batch_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int onemklCgetrsnp_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrsnp_batch(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgetrsnp_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrsnp_batch(device_queue->val, convert(trans), n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgetrsnp_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrsnp_batch(device_queue->val, convert(trans), n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgetrsnp_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::getrsnp_batch(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklCheev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::heev_scratchpad_size>(device_queue->val, convert(jobz), convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklZheev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::heev_scratchpad_size>(device_queue->val, convert(jobz), convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int onemklCheev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *w, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::heev(device_queue->val, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, w, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZheev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *w, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::heev(device_queue->val, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, w, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklCheevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::heevd_scratchpad_size>(device_queue->val, convert(jobz), convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklZheevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::heevd_scratchpad_size>(device_queue->val, convert(jobz), convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int onemklCheevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *w, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::heevd(device_queue->val, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, w, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZheevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *w, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::heevd(device_queue->val, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, w, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklChegvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::hegvd_scratchpad_size>(device_queue->val, itype, convert(jobz), convert(uplo), n, lda, ldb); + return scratchpad_size; +} - auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, - &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], - &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], - m, n, reinterpret_cast *>(alpha), - reinterpret_cast **>(&a[0]), - lda, reinterpret_cast **>(&b[0]), - ldb, group_count, group_size); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklZhegvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::hegvd_scratchpad_size>(device_queue->val, itype, convert(jobz), convert(uplo), n, lda, ldb); + return scratchpad_size; } -extern "C" void onemklZtrsmBatched(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, onemklTranspose transa, - onemklDiag unit_diag, int64_t *m, int64_t *n, - double _Complex *alpha, const double _Complex **a, - int64_t *lda, double _Complex **b, int64_t *ldb, - int64_t group_count, int64_t *group_size) { - trsmBatchInfo trsmInfo(device_queue, left_right, - upper_lower, transa, unit_diag, group_count); +extern "C" int onemklChegvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float *w, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::hegvd(device_queue->val, itype, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, w, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} - auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, - &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], - &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], - m, n, reinterpret_cast *>(alpha), - reinterpret_cast **>(&a[0]), - lda, reinterpret_cast **>(&b[0]), - ldb, group_count, group_size); - __FORCE_MKL_FLUSH__(status); -} - -extern "C" void onemklChemm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - float _Complex alpha, const float _Complex *a, - int64_t lda, const float _Complex *b, int64_t ldb, - float _Complex beta, float _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::hemm(device_queue->val, convert(left_right), - convert(upper_lower), m, n, - static_cast >(alpha), - reinterpret_cast *>(a), - lda, reinterpret_cast *>(b), - ldb, static_cast >(beta), - reinterpret_cast *>(c), ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZhegvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double *w, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::hegvd(device_queue->val, itype, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, w, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZhemm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - double _Complex alpha, const double _Complex *a, - int64_t lda, const double _Complex *b, int64_t ldb, - double _Complex beta, double _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::hemm(device_queue->val, convert(left_right), - convert(upper_lower), m, n, - static_cast >(alpha), - reinterpret_cast *>(a), - lda, reinterpret_cast *>(b), - ldb, static_cast >(beta), - reinterpret_cast *>(c), ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklChetrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::hetrd_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklCherk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, float alpha, - const float _Complex *a, int64_t lda, float beta, - float _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::herk(device_queue->val, convert(upper_lower), - convert(trans), n, k, alpha, - reinterpret_cast *>(a), - lda, beta, reinterpret_cast *>(c), ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklZhetrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::hetrd_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklZherk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, double alpha, - const double _Complex *a, int64_t lda, double beta, - double _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::herk(device_queue->val, convert(upper_lower), - convert(trans), n, k, alpha, - reinterpret_cast *>(a), - lda, beta, reinterpret_cast *>(c), ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklChetrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *d, float *e, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::hetrd(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, d, e, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCher2k(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, - float _Complex alpha, const float _Complex *a, - int64_t lda, const float _Complex *b, int64_t ldb, - float beta, float _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::her2k(device_queue->val, convert(upper_lower), - convert(trans), n, k, static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(b), ldb, - beta, reinterpret_cast *>(c), ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZhetrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *d, double *e, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::hetrd(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, d, e, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZher2k(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, - double _Complex alpha, const double _Complex *a, - int64_t lda, const double _Complex *b, int64_t ldb, - double beta, double _Complex *c, int64_t ldc) { - auto status = oneapi::mkl::blas::column_major::her2k(device_queue->val, convert(upper_lower), - convert(trans), n, k, static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(b), ldb, - beta, reinterpret_cast *>(c), ldc); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklChetrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::hetrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSgbmv(syclQueue_t device_queue, onemklTranspose trans, - int64_t m, int64_t n, int64_t kl, int64_t ku, - float alpha, const float *a, int64_t lda, - const float *x, int64_t incx, float beta, float *y, - int64_t incy) { - auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, - convert(trans), m, n, kl, ku, alpha, a, lda, x, - incx, beta, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZhetrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::hetrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDgbmv(syclQueue_t device_queue, onemklTranspose trans, - int64_t m, int64_t n, int64_t kl, int64_t ku, - double alpha, const double *a, int64_t lda, - const double *x, int64_t incx, double beta, double *y, - int64_t incy) { - auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), - m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklChetrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::hetrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklCgbmv(syclQueue_t device_queue, onemklTranspose trans, - int64_t m, int64_t n, int64_t kl, int64_t ku, - float _Complex alpha, const float _Complex *a, int64_t lda, - const float _Complex *x, int64_t incx, float _Complex beta, - float _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), - m, n, kl, ku, static_cast >(alpha), - reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), - incx, static_cast >(beta), - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklZhetrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::hetrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklZgbmv(syclQueue_t device_queue, onemklTranspose trans, - int64_t m, int64_t n, int64_t kl, int64_t ku, - double _Complex alpha, const double _Complex *a, int64_t lda, - const double _Complex *x, int64_t incx, double _Complex beta, - double _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, - n, kl, ku, static_cast >(alpha), - reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), incx, - static_cast >(beta), - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSorgbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::orgbr(device_queue->val, convert(vec), m, n, k, a, lda, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklHdot(syclQueue_t device_queue, int64_t n, - const short *x, int64_t incx, const short *y, - int64_t incy, short *result) { - auto status = oneapi::mkl::blas::column_major::dot(device_queue->val, n, - reinterpret_cast(x), - incx, reinterpret_cast(y), - incy, reinterpret_cast(result)); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDorgbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::orgbr(device_queue->val, convert(vec), m, n, k, a, lda, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSdot(syclQueue_t device_queue, int64_t n, - const float *x, int64_t incx, const float *y, - int64_t incy, float *result) { - auto status = oneapi::mkl::blas::column_major::dot(device_queue->val, n, x, - incx, y, incy, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSorgbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::orgbr_scratchpad_size(device_queue->val, convert(vect), m, n, k, lda); + return scratchpad_size; } -extern "C" void onemklDdot(syclQueue_t device_queue, int64_t n, - const double *x, int64_t incx, const double *y, - int64_t incy, double *result) { - auto status = oneapi::mkl::blas::column_major::dot(device_queue->val, n, x, - incx, y, incy, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDorgbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::orgbr_scratchpad_size(device_queue->val, convert(vect), m, n, k, lda); + return scratchpad_size; } -extern "C" void onemklCdotc(syclQueue_t device_queue, int64_t n, - const float _Complex *x, int64_t incx, const float _Complex *y, - int64_t incy, float _Complex *result) { - auto status = oneapi::mkl::blas::column_major::dotc(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, - reinterpret_cast *>(result)); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSorgqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::orgqr_scratchpad_size(device_queue->val, m, n, k, lda); + return scratchpad_size; } -extern "C" void onemklZdotc(syclQueue_t device_queue, int64_t n, - const double _Complex *x, int64_t incx, const double _Complex *y, - int64_t incy, double _Complex *result) { - auto status = oneapi::mkl::blas::column_major::dotc(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, - reinterpret_cast *>(result)); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDorgqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::orgqr_scratchpad_size(device_queue->val, m, n, k, lda); + return scratchpad_size; } -extern "C" void onemklCdotu(syclQueue_t device_queue, int64_t n, - const float _Complex *x, int64_t incx, const float _Complex *y, - int64_t incy, float _Complex *result) { - auto status = oneapi::mkl::blas::column_major::dotu(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, - reinterpret_cast *>(result)); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDorgqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::orgqr(device_queue->val, m, n, k, a, lda, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZdotu(syclQueue_t device_queue, int64_t n, - const double _Complex *x, int64_t incx, const double _Complex *y, - int64_t incy, double _Complex *result) { - auto status = oneapi::mkl::blas::column_major::dotu(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, - reinterpret_cast *>(result)); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSorgqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::orgqr(device_queue->val, m, n, k, a, lda, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSasum(syclQueue_t device_queue, int64_t n, - const float *x, int64_t incx, - float *result) { - auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, x, - incx, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSormqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::ormqr_scratchpad_size(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); + return scratchpad_size; } -extern "C" void onemklDasum(syclQueue_t device_queue, int64_t n, - const double *x, int64_t incx, - double *result) { - auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, x, - incx, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDormqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::ormqr_scratchpad_size(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); + return scratchpad_size; } -extern "C" void onemklCasum(syclQueue_t device_queue, int64_t n, - const float _Complex *x, int64_t incx, - float *result) { - auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, - reinterpret_cast *>(x), - incx, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDormqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *c, int64_t ldc, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ormqr(device_queue->val, convert(side), convert(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZasum(syclQueue_t device_queue, int64_t n, - const double _Complex *x, int64_t incx, - double *result) { - auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, - reinterpret_cast *>(x), - incx, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSormqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ormqr(device_queue->val, convert(side), convert(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklHaxpy(syclQueue_t device_queue, int64_t n, uint16_t alpha, - const short *x, std::int64_t incx, short *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, - sycl::bit_cast(alpha), - reinterpret_cast(x), - incx, reinterpret_cast(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz) { + int64_t scratchpad_size = oneapi::mkl::lapack::steqr_scratchpad_size(device_queue->val, convert(compz), n, ldz); + return scratchpad_size; } -extern "C" void onemklSaxpy(syclQueue_t device_queue, int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, alpha, x, - incx, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz) { + int64_t scratchpad_size = oneapi::mkl::lapack::steqr_scratchpad_size(device_queue->val, convert(compz), n, ldz); + return scratchpad_size; } -extern "C" void onemklDaxpy(syclQueue_t device_queue, int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, alpha, x, - incx, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklCsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz) { + int64_t scratchpad_size = oneapi::mkl::lapack::steqr_scratchpad_size>(device_queue->val, convert(compz), n, ldz); + return scratchpad_size; } -extern "C" void onemklCaxpy(syclQueue_t device_queue, int64_t n, float _Complex alpha, - const float _Complex *x, std::int64_t incx, float _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, static_cast >(alpha), - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklZsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz) { + int64_t scratchpad_size = oneapi::mkl::lapack::steqr_scratchpad_size>(device_queue->val, convert(compz), n, ldz); + return scratchpad_size; } -extern "C" void onemklZaxpy(syclQueue_t device_queue, int64_t n, double _Complex alpha, - const double _Complex *x, std::int64_t incx, double _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, static_cast >(alpha), - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, float *d, float *e, float _Complex *z, int64_t ldz, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::steqr(device_queue->val, convert(compz), n, d, e, reinterpret_cast*>(z), ldz, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSaxpby(syclQueue_t device_queue, int64_t n, float alpha, - const float *x, int64_t incx, float beta, float *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, alpha, x, - incx, beta, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, double *d, double *e, double *z, int64_t ldz, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::steqr(device_queue->val, convert(compz), n, d, e, z, ldz, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDaxpby(syclQueue_t device_queue, int64_t n, double alpha, - const double *x, int64_t incx, double beta, double *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, alpha, x, - incx, beta, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, float *d, float *e, float *z, int64_t ldz, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::steqr(device_queue->val, convert(compz), n, d, e, z, ldz, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCaxpby(syclQueue_t device_queue, int64_t n, float _Complex alpha, - const float _Complex *x, int64_t incx, float _Complex beta, float _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, static_cast >(alpha), - reinterpret_cast *>(x), incx, static_cast >(beta), - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, double *d, double *e, double _Complex *z, int64_t ldz, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::steqr(device_queue->val, convert(compz), n, d, e, reinterpret_cast*>(z), ldz, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZaxpby(syclQueue_t device_queue, int64_t n, double _Complex alpha, - const double _Complex *x, int64_t incx, double _Complex beta, double _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, static_cast >(alpha), - reinterpret_cast *>(x), incx, static_cast >(beta), - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSsyev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::syev_scratchpad_size(device_queue->val, convert(jobz), convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklSrot(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, float c, float s) { - auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, x, incx, y, incy, c, s); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDsyev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::syev_scratchpad_size(device_queue->val, convert(jobz), convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklDrot(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, double c, double s) { - auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, x, incx, y, incy, c, s); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDsyev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *w, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::syev(device_queue->val, convert(jobz), convert(uplo), n, a, lda, w, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCrot(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float c, float _Complex s) { - auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, c, static_cast >(s)); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSsyev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *w, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::syev(device_queue->val, convert(jobz), convert(uplo), n, a, lda, w, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZrot(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double c, double _Complex s) { - auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, c, static_cast >(s)); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSsyevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::syevd_scratchpad_size(device_queue->val, convert(jobz), convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklCsrot(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float c, float s) { - auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, c, s); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDsyevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::syevd_scratchpad_size(device_queue->val, convert(jobz), convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklZdrot(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double c, double s) { - auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, c, s); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDsyevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *w, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::syevd(device_queue->val, convert(jobz), convert(uplo), n, a, lda, w, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -// Support Level-1: SCAL primitive -extern "C" void onemklDscal(syclQueue_t device_queue, int64_t n, double alpha, - double *x, int64_t incx) { - auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, alpha, - x, incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSsyevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *w, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::syevd(device_queue->val, convert(jobz), convert(uplo), n, a, lda, w, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} +extern "C" int64_t onemklSsyevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, float vl, float vu, int64_t il, int64_t iu, float abstol, int64_t ldz) { + int64_t scratchpad_size = oneapi::mkl::lapack::syevx_scratchpad_size(device_queue->val, convert(jobz), convert(range), convert(uplo), n, lda, vl, vu, il, iu, abstol, ldz); + return scratchpad_size; } -extern "C" void onemklHscal(syclQueue_t device_queue, int64_t n, uint16_t alpha, - short *x, int64_t incx) { - auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, sycl::bit_cast(alpha), - reinterpret_cast(x), incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDsyevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, double vl, double vu, int64_t il, int64_t iu, double abstol, int64_t ldz) { + int64_t scratchpad_size = oneapi::mkl::lapack::syevx_scratchpad_size(device_queue->val, convert(jobz), convert(range), convert(uplo), n, lda, vl, vu, il, iu, abstol, ldz); + return scratchpad_size; } -extern "C" void onemklSscal(syclQueue_t device_queue, int64_t n, float alpha, - float *x, int64_t incx) { - auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, alpha, - x, incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDsyevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, double *a, int64_t lda, double vl, double vu, int64_t il, int64_t iu, double abstol, int64_t *m, double *w, double *z, int64_t ldz, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::syevx(device_queue->val, convert(jobz), convert(range), convert(uplo), n, a, lda, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCscal(syclQueue_t device_queue, int64_t n, - float _Complex alpha, float _Complex *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, - static_cast >(alpha), - reinterpret_cast *>(x),incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSsyevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, float *a, int64_t lda, float vl, float vu, int64_t il, int64_t iu, float abstol, int64_t *m, float *w, float *z, int64_t ldz, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::syevx(device_queue->val, convert(jobz), convert(range), convert(uplo), n, a, lda, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCsscal(syclQueue_t device_queue, int64_t n, - float alpha, float _Complex *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, alpha, - reinterpret_cast *>(x),incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSsygvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::sygvd_scratchpad_size(device_queue->val, itype, convert(jobz), convert(uplo), n, lda, ldb); + return scratchpad_size; } -extern "C" void onemklZscal(syclQueue_t device_queue, int64_t n, - double _Complex alpha, double _Complex *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, - static_cast >(alpha), - reinterpret_cast *>(x),incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDsygvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::sygvd_scratchpad_size(device_queue->val, itype, convert(jobz), convert(uplo), n, lda, ldb); + return scratchpad_size; } -extern "C" void onemklZdscal(syclQueue_t device_queue, int64_t n, - double alpha, double _Complex *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, alpha, - reinterpret_cast *>(x),incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDsygvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *b, int64_t ldb, double *w, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::sygvd(device_queue->val, itype, convert(jobz), convert(uplo), n, a, lda, b, ldb, w, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSgemv(syclQueue_t device_queue, onemklTranspose trans, - int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), - m, n, alpha, a, lda, x, incx, beta, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSsygvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *b, int64_t ldb, float *w, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::sygvd(device_queue->val, itype, convert(jobz), convert(uplo), n, a, lda, b, ldb, w, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDgemv(syclQueue_t device_queue, onemklTranspose trans, - int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, const double *x, int64_t incx, double beta, - double *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), - m, n, alpha, a, lda, x, incx, beta, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSsygvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb, float vl, float vu, int64_t il, int64_t iu, float abstol, int64_t ldz) { + int64_t scratchpad_size = oneapi::mkl::lapack::sygvx_scratchpad_size(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, lda, ldb, vl, vu, il, iu, abstol, ldz); + return scratchpad_size; } -extern "C" void onemklCgemv(syclQueue_t device_queue, onemklTranspose trans, - int64_t m, int64_t n, float _Complex alpha, - const float _Complex *a, int64_t lda, - const float _Complex *x, int64_t incx, - float _Complex beta, float _Complex *y, - int64_t incy) { - auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, - static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(x), incx, - static_cast >(beta), - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDsygvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb, double vl, double vu, int64_t il, int64_t iu, double abstol, int64_t ldz) { + int64_t scratchpad_size = oneapi::mkl::lapack::sygvx_scratchpad_size(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, lda, ldb, vl, vu, il, iu, abstol, ldz); + return scratchpad_size; } -extern "C" void onemklZgemv(syclQueue_t device_queue, onemklTranspose trans, - int64_t m, int64_t n, double _Complex alpha, - const double _Complex *a, int64_t lda, - const double _Complex *x, int64_t incx, - double _Complex beta, double _Complex *y, - int64_t incy) { - auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, - static_cast >(alpha), - reinterpret_cast *>(a), lda, - reinterpret_cast *>(x), incx, - static_cast >(beta), - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDsygvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *b, int64_t ldb, double vl, double vu, int64_t il, int64_t iu, double abstol, int64_t *m, double *w, double *z, int64_t ldz, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::sygvx(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, a, lda, b, ldb, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSger(syclQueue_t device_queue, int64_t m, int64_t n, float alpha, - const float *x, int64_t incx, const float *y, int64_t incy, - float *a, int64_t lda) { - auto status = oneapi::mkl::blas::column_major::ger(device_queue->val, m, n, alpha, x, - incx, y, incy, a, lda); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSsygvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *b, int64_t ldb, float vl, float vu, int64_t il, int64_t iu, float abstol, int64_t *m, float *w, float *z, int64_t ldz, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::sygvx(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, a, lda, b, ldb, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDger(syclQueue_t device_queue, int64_t m, int64_t n, double alpha, - const double *x, int64_t incx, const double *y, int64_t incy, - double *a, int64_t lda) { - auto status = oneapi::mkl::blas::column_major::ger(device_queue->val, m, n, alpha, x, - incx, y, incy, a, lda); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSsytrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::sytrd_scratchpad_size(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklCgerc(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex alpha, - const float _Complex *x, int64_t incx, const float _Complex *y, int64_t incy, - float _Complex *a, int64_t lda) { - auto status = oneapi::mkl::blas::column_major::gerc(device_queue->val, m, n, - static_cast >(alpha), - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, - reinterpret_cast *>(a), lda); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDsytrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::sytrd_scratchpad_size(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklZgerc(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex alpha, - const double _Complex *x, int64_t incx, const double _Complex *y, int64_t incy, - double _Complex *a, int64_t lda) { - auto status = oneapi::mkl::blas::column_major::gerc(device_queue->val, m, n, - static_cast >(alpha), - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, - reinterpret_cast *>(a), lda); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDsytrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *d, double *e, double *tau, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::sytrd(device_queue->val, convert(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklChemv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, - float _Complex alpha, const float _Complex *a, int64_t lda, - const float _Complex *x, int64_t incx, float _Complex beta, - float _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::hemv(device_queue->val, convert(uplo), n, - static_cast >(alpha), - reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), incx, - static_cast >(beta), - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSsytrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *d, float *e, float *tau, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::sytrd(device_queue->val, convert(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZhemv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, - double _Complex alpha, const double _Complex *a, int64_t lda, - const double _Complex *x, int64_t incx, double _Complex beta, - double _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::hemv(device_queue->val, convert(uplo), n, - static_cast >(alpha), - reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), incx, - static_cast >(beta), - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklStrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, lda, ldb); + return scratchpad_size; } -extern "C" void onemklChbmv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, - int64_t k, float _Complex alpha, const float _Complex *a, - int64_t lda, const float _Complex *x, int64_t incx, float _Complex beta, - float _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::hbmv(device_queue->val, convert(uplo), n, - k, static_cast >(alpha), - reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), - incx, static_cast >(beta), - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, lda, ldb); + return scratchpad_size; } -extern "C" void onemklZhbmv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, - int64_t k, double _Complex alpha, const double _Complex *a, - int64_t lda, const double _Complex *x, int64_t incx, double _Complex beta, - double _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::hbmv(device_queue->val, convert(uplo), n, - k, static_cast >(alpha), - reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), - incx, static_cast >(beta), - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklCtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size>(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, lda, ldb); + return scratchpad_size; } -extern "C" void onemklCher(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float alpha, - const float _Complex *x, int64_t incx, float _Complex *a, - int64_t lda) { - auto status = oneapi::mkl::blas::column_major::her(device_queue->val, convert(uplo), n, alpha, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(a), lda); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklZtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { + int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size>(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, lda, ldb); + return scratchpad_size; } -extern "C" void onemklZher(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double alpha, - const double _Complex *x, int64_t incx, double _Complex *a, - int64_t lda) { - auto status = oneapi::mkl::blas::column_major::her(device_queue->val, convert(uplo), n, alpha, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(a), lda); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::trtrs(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCher2(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex alpha, - const float _Complex *x, int64_t incx, const float _Complex *y, int64_t incy, - float _Complex *a, int64_t lda) { - auto status = oneapi::mkl::blas::column_major::her2(device_queue->val, convert(uplo), n, - static_cast >(alpha), - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, - reinterpret_cast *>(a), lda); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, double *a, int64_t lda, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::trtrs(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZher2(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex alpha, - const double _Complex *x, int64_t incx, const double _Complex *y, int64_t incy, - double _Complex *a, int64_t lda) { - auto status = oneapi::mkl::blas::column_major::her2(device_queue->val, convert(uplo), n, - static_cast >(alpha), - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy, - reinterpret_cast *>(a), lda); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklStrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, float *a, int64_t lda, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::trtrs(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSsbmv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, const float *x, - int64_t incx, float beta, float *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::sbmv(device_queue->val, convert(uplo), n, k, - alpha, a, lda, x, incx, beta, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::trtrs(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDsbmv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, const double *x, - int64_t incx, double beta, double *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::sbmv(device_queue->val, convert(uplo), n, k, - alpha, a, lda, x, incx, beta, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCungbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ungbr(device_queue->val, convert(vec), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSsymv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float alpha, - const float *a, int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(uplo), n, alpha, - a, lda, x, incx, beta, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZungbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ungbr(device_queue->val, convert(vec), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDsymv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double alpha, - const double *a, int64_t lda, const double *x, int64_t incx, double beta, - double *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(uplo), n, alpha, - a, lda, x, incx, beta, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklCungbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::ungbr_scratchpad_size>(device_queue->val, convert(vect), m, n, k, lda); + return scratchpad_size; } -extern "C" void onemklSsyr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float alpha, - const float *x, int64_t incx, float *a, int64_t lda) { - auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(uplo), n, alpha, - x, incx, a, lda); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklZungbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::ungbr_scratchpad_size>(device_queue->val, convert(vect), m, n, k, lda); + return scratchpad_size; } -extern "C" void onemklDsyr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double alpha, - const double *x, int64_t incx, double *a, int64_t lda) { - auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(uplo), n, alpha, - x, incx, a, lda); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklCungqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::ungqr_scratchpad_size>(device_queue->val, m, n, k, lda); + return scratchpad_size; } -extern "C" void onemklStbmv(syclQueue_t device_queue, onemklUplo uplo, - onemklTranspose trans, onemklDiag diag, int64_t n, - int64_t k, const float *a, int64_t lda, float *x, int64_t incx) { - auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, k, a, lda, x, incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklZungqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::ungqr_scratchpad_size>(device_queue->val, m, n, k, lda); + return scratchpad_size; } -extern "C" void onemklDtbmv(syclQueue_t device_queue, onemklUplo uplo, - onemklTranspose trans, onemklDiag diag, int64_t n, - int64_t k, const double *a, int64_t lda, double *x, int64_t incx) { - auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, k, a, lda, x, incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCungqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ungqr(device_queue->val, m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCtbmv(syclQueue_t device_queue, onemklUplo uplo, - onemklTranspose trans, onemklDiag diag, int64_t n, - int64_t k, const float _Complex *a, int64_t lda, float _Complex *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, k, reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZungqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ungqr(device_queue->val, m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZtbmv(syclQueue_t device_queue, onemklUplo uplo, - onemklTranspose trans, onemklDiag diag, int64_t n, - int64_t k, const double _Complex *a, int64_t lda, double _Complex *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, k, reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklCunmqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::unmqr_scratchpad_size>(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); + return scratchpad_size; } -// trmv - level2 -extern "C" void onemklStrmv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const float *a, int64_t lda, float *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, a, lda, x, incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklZunmqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::unmqr_scratchpad_size>(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); + return scratchpad_size; } -extern "C" void onemklDtrmv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const double *a, int64_t lda, double *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, a, lda, x, incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCunmqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::unmqr(device_queue->val, convert(side), convert(trans), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCtrmv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const float _Complex *a, int64_t lda, float _Complex *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZunmqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::unmqr(device_queue->val, convert(side), convert(trans), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZtrmv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const double _Complex *a, int64_t lda, double _Complex *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSgerqf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gerqf(device_queue->val, m, n, a, lda, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -// trsv -extern "C" void onemklStrsv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const float *a, int64_t lda, float *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, a, lda, x, incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDgerqf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gerqf(device_queue->val, m, n, a, lda, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDtrsv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const double *a, int64_t lda, double *x, - int64_t incx) { - auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, a, lda, x, incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCgerqf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gerqf(device_queue->val, m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCtrsv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const float _Complex *a, int64_t lda, - float _Complex *x, int64_t incx) { - auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZgerqf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gerqf(device_queue->val, m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZtrsv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const double _Complex *a, int64_t lda, - double _Complex *x, int64_t incx) { - auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(uplo), convert(trans), - convert(diag), n, reinterpret_cast *>(a), - lda, reinterpret_cast *>(x), incx); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::gerqf_scratchpad_size(device_queue->val, m, n, lda); + return scratchpad_size; } -extern "C" void onemklHnrm2(syclQueue_t device_queue, int64_t n, const short *x, - int64_t incx, short *result) { - auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, - reinterpret_cast(x), incx, - reinterpret_cast(result)); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::gerqf_scratchpad_size(device_queue->val, m, n, lda); + return scratchpad_size; } -extern "C" void onemklDnrm2(syclQueue_t device_queue, int64_t n, const double *x, - int64_t incx, double *result) { - auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, x, incx, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklCgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::gerqf_scratchpad_size>(device_queue->val, m, n, lda); + return scratchpad_size; } -extern "C" void onemklSnrm2(syclQueue_t device_queue, int64_t n, const float *x, - int64_t incx, float *result) { - auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, x, incx, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklZgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::gerqf_scratchpad_size>(device_queue->val, m, n, lda); + return scratchpad_size; } -extern "C" void onemklCnrm2(syclQueue_t device_queue, int64_t n, const float _Complex *x, - int64_t incx, float *result) { - auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, - reinterpret_cast *>(x), incx, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSormrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ormrq(device_queue->val, convert(side), convert(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZnrm2(syclQueue_t device_queue, int64_t n, const double _Complex *x, - int64_t incx, double *result) { - auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, - reinterpret_cast *>(x), incx, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDormrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *c, int64_t ldc, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ormrq(device_queue->val, convert(side), convert(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDcopy(syclQueue_t device_queue, int64_t n, const double *x, - int64_t incx, double *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, x, incx, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklSormrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::ormrq_scratchpad_size(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); + return scratchpad_size; } -extern "C" void onemklScopy(syclQueue_t device_queue, int64_t n, const float *x, - int64_t incx, float *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, x, incx, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklDormrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::ormrq_scratchpad_size(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); + return scratchpad_size; } -extern "C" void onemklZcopy(syclQueue_t device_queue, int64_t n, const double _Complex *x, - int64_t incx, double _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCunmrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::unmrq(device_queue->val, convert(side), convert(trans), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCcopy(syclQueue_t device_queue, int64_t n, const float _Complex *x, - int64_t incx, float _Complex *y, int64_t incy) { - auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklZunmrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::unmrq(device_queue->val, convert(side), convert(trans), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDamax(syclQueue_t device_queue, int64_t n, const double *x, - int64_t incx, int64_t *result){ - auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklCunmrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::unmrq_scratchpad_size>(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); + return scratchpad_size; } -extern "C" void onemklSamax(syclQueue_t device_queue, int64_t n, const float *x, - int64_t incx, int64_t *result){ - auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result); - __FORCE_MKL_FLUSH__(status); + +extern "C" int64_t onemklZunmrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::unmrq_scratchpad_size>(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); + return scratchpad_size; } -extern "C" void onemklZamax(syclQueue_t device_queue, int64_t n, const double _Complex *x, - int64_t incx, int64_t *result){ - auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, - reinterpret_cast *>(x), incx, result); - __FORCE_MKL_FLUSH__(status); + +extern "C" int onemklSsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, int64_t *ipiv, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::sytrf(device_queue->val, convert(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklCamax(syclQueue_t device_queue, int64_t n, const float _Complex *x, - int64_t incx, int64_t *result){ - auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, - reinterpret_cast *>(x), incx, result); - __FORCE_MKL_FLUSH__(status); + +extern "C" int onemklDsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, int64_t *ipiv, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::sytrf(device_queue->val, convert(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklDamin(syclQueue_t device_queue, int64_t n, const double *x, - int64_t incx, int64_t *result){ - auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklCsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::sytrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklSamin(syclQueue_t device_queue, int64_t n, const float *x, - int64_t incx, int64_t *result){ - auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result); - __FORCE_MKL_FLUSH__(status); + +extern "C" int onemklZsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::sytrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZamin(syclQueue_t device_queue, int64_t n, const double _Complex *x, - int64_t incx, int64_t *result){ - auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, - reinterpret_cast *>(x), incx, result); - __FORCE_MKL_FLUSH__(status); + +extern "C" int64_t onemklSsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::sytrf_scratchpad_size(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklCamin(syclQueue_t device_queue, int64_t n, const float _Complex *x, - int64_t incx, int64_t *result){ - auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, - reinterpret_cast *>(x), incx, result); - __FORCE_MKL_FLUSH__(status); + +extern "C" int64_t onemklDsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::sytrf_scratchpad_size(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklSswap(syclQueue_t device_queue, int64_t n, float *x, int64_t incx,\ - float *y, int64_t incy){ - auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, x, incx, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklCsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::sytrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklDswap(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, - double *y, int64_t incy){ - auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, x, incx, y, incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int64_t onemklZsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::sytrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; } -extern "C" void onemklCswap(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, - float _Complex *y, int64_t incy){ - auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklSorgtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::orgtr(device_queue->val, convert(uplo), n, a, lda, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; } -extern "C" void onemklZswap(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, - double _Complex *y, int64_t incy){ - auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, - reinterpret_cast *>(x), incx, - reinterpret_cast *>(y), incy); - __FORCE_MKL_FLUSH__(status); +extern "C" int onemklDorgtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::orgtr(device_queue->val, convert(uplo), n, a, lda, tau, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSorgtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::orgtr_scratchpad_size(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklDorgtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::orgtr_scratchpad_size(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int onemklCungtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ungtr(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZungtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ungtr(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklCungtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::ungtr_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int64_t onemklZungtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { + int64_t scratchpad_size = oneapi::mkl::lapack::ungtr_scratchpad_size>(device_queue->val, convert(uplo), n, lda); + return scratchpad_size; +} + +extern "C" int onemklSormtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ormtr(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDormtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, double *a, int64_t lda, double *tau, double *c, int64_t ldc, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ormtr(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSormtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::ormtr_scratchpad_size(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, lda, ldc); + return scratchpad_size; +} + +extern "C" int64_t onemklDormtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::ormtr_scratchpad_size(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, lda, ldc); + return scratchpad_size; +} + +extern "C" int onemklCunmtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::unmtr(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZunmtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::unmtr(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklCunmtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::unmtr_scratchpad_size>(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, lda, ldc); + return scratchpad_size; +} + +extern "C" int64_t onemklZunmtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc) { + int64_t scratchpad_size = oneapi::mkl::lapack::unmtr_scratchpad_size>(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, lda, ldc); + return scratchpad_size; +} + +extern "C" int onemklSpotrf_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo), n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDpotrf_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo), n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCpotrf_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, stride_a, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZpotrf_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, stride_a, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSpotrs_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDpotrs_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCpotrs_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo), n, nrhs, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZpotrs_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo), n, nrhs, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgeqrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, float *tau, int64_t stride_tau, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgeqrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, double *tau, int64_t stride_tau, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgeqrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *tau, int64_t stride_tau, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(tau), stride_tau, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgeqrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *tau, int64_t stride_tau, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(tau), stride_tau, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSorgqr_batch(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, int64_t stride_a, float *tau, int64_t stride_tau, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::orgqr_batch(device_queue->val, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDorgqr_batch(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, int64_t stride_a, double *tau, int64_t stride_tau, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::orgqr_batch(device_queue->val, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCungqr_batch(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *tau, int64_t stride_tau, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ungqr_batch(device_queue->val, m, n, k, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(tau), stride_tau, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZungqr_batch(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *tau, int64_t stride_tau, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::ungqr_batch(device_queue->val, m, n, k, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(tau), stride_tau, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklSgels_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t stridea, float *b, int64_t ldb, int64_t strideb, int64_t batchsize, float *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans), m, n, nrhs, a, lda, stridea, b, ldb, strideb, batchsize, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklDgels_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t stridea, double *b, int64_t ldb, int64_t strideb, int64_t batchsize, double *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans), m, n, nrhs, a, lda, stridea, b, ldb, strideb, batchsize, scratchpad, scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklCgels_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t stridea, float _Complex *b, int64_t ldb, int64_t strideb, int64_t batchsize, float _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans), m, n, nrhs, reinterpret_cast*>(a), lda, stridea, reinterpret_cast*>(b), ldb, strideb, batchsize, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int onemklZgels_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t stridea, double _Complex *b, int64_t ldb, int64_t strideb, int64_t batchsize, double _Complex *scratchpad, int64_t scratchpad_size) { + auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans), m, n, nrhs, reinterpret_cast*>(a), lda, stridea, reinterpret_cast*>(b), ldb, strideb, batchsize, reinterpret_cast*>(scratchpad), scratchpad_size); + __FORCE_MKL_FLUSH__(status); + return 0; +} + +extern "C" int64_t onemklSpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size(device_queue->val, convert(uplo), n, lda, stride_a, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklDpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size(device_queue->val, convert(uplo), n, lda, stride_a, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklCpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size>(device_queue->val, convert(uplo), n, lda, stride_a, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklZpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size>(device_queue->val, convert(uplo), n, lda, stride_a, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklSpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size(device_queue->val, convert(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklDpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size(device_queue->val, convert(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklCpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size>(device_queue->val, convert(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklZpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size>(device_queue->val, convert(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklSgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, stride_tau, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklDgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, stride_tau, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklCgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, stride_tau, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklZgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, stride_tau, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklSorgqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size(device_queue->val, m, n, k, lda, stride_a, stride_tau, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklDorgqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size(device_queue->val, m, n, k, lda, stride_a, stride_tau, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklCungqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size>(device_queue->val, m, n, k, lda, stride_a, stride_tau, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklZungqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size>(device_queue->val, m, n, k, lda, stride_a, stride_tau, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklSgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size(device_queue->val, convert(trans), m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklDgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size(device_queue->val, convert(trans), m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklCgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size>(device_queue->val, convert(trans), m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; +} + +extern "C" int64_t onemklZgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { + int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size>(device_queue->val, convert(trans), m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); + return scratchpad_size; } // other diff --git a/deps/src/onemkl.h b/deps/src/onemkl.h index 4859edb1..37b8087d 100644 --- a/deps/src/onemkl.h +++ b/deps/src/onemkl.h @@ -9,6 +9,7 @@ extern "C" { #endif +// BLAS types typedef enum { ONEMKL_TRANSPOSE_NONTRANS, ONEMKL_TRANSPOSE_TRANS, @@ -30,508 +31,1917 @@ typedef enum { ONEMKL_SIDE_RIGHT } onemklSide; -// XXX: how to expose half in C? -// int onemklHgemm(syclQueue_t device_queue, onemklTranspose transA, -// onemklTranspose transB, int64_t m, int64_t n, int64_t k, -// half alpha, const half *A, int64_t lda, const half *B, -// int64_t ldb, half beta, half *C, int64_t ldc); -int onemklSgemm(syclQueue_t device_queue, onemklTranspose transA, - onemklTranspose transB, int64_t m, int64_t n, int64_t k, - float alpha, const float *A, int64_t lda, const float *B, - int64_t ldb, float beta, float *C, int64_t ldc); -int onemklDgemm(syclQueue_t device_queue, onemklTranspose transA, - onemklTranspose transB, int64_t m, int64_t n, int64_t k, - double alpha, const double *A, int64_t lda, const double *B, - int64_t ldb, double beta, double *C, int64_t ldc); -int onemklCgemm(syclQueue_t device_queue, onemklTranspose transA, - onemklTranspose transB, int64_t m, int64_t n, int64_t k, - float _Complex alpha, const float _Complex *A, int64_t lda, - const float _Complex *B, int64_t ldb, float _Complex beta, - float _Complex *C, int64_t ldc); -int onemklZgemm(syclQueue_t device_queue, onemklTranspose transA, - onemklTranspose transB, int64_t m, int64_t n, int64_t k, - double _Complex alpha, const double _Complex *A, int64_t lda, - const double _Complex *B, int64_t ldb, double _Complex beta, - double _Complex *C, int64_t ldc); +typedef enum { + ONEMKL_OFFSET_ROW, + ONEMKL_OFFSET_COL, + ONEMKL_OFFSET_FIX, +} onemklOffset; + +// LAPACK types +typedef enum { + ONEMKL_JOB_N, + ONEMKL_JOB_V, + ONEMKL_JOB_U, + ONEMKL_JOB_A, + ONEMKL_JOB_S, + ONEMKL_JOB_O +} onemklJob; + +typedef enum { + ONEMKL_GENERATE_Q, + ONEMKL_GENERATE_P, + ONEMKL_GENERATE_N, + ONEMKL_GENERATE_V +} onemklGenerate; + +typedef enum { + ONEMKL_COMPZ_N, + ONEMKL_COMPZ_V, + ONEMKL_COMPZ_I +} onemklCompz; + +typedef enum { + ONEMKL_DIRECT_F, + ONEMKL_DIRECT_B +} onemklDirect; + +typedef enum { + ONEMKL_STOREV_C, + ONEMKL_STOREV_R +} onemklStorev; + +typedef enum { + ONEMKL_RANGEV_A, + ONEMKL_RANGEV_V, + ONEMKL_RANGEV_I +} onemklRangev; + +typedef enum { + ONEMKL_ORDER_B, + ONEMKL_ORDER_E +} onemklOrder; + +typedef enum { + ONEMKL_JOBSVD_N, + ONEMKL_JOBSVD_A, + ONEMKL_JOBSVD_O, + ONEMKL_JOBSVD_S +} onemklJobsvd; + +typedef enum { + ONEMKL_LAYOUT_ROW, + ONEMKL_LAYOUT_COL, +} onemklLayout; + +typedef enum { + ONEMKL_INDEX_ZERO, + ONEMKL_INDEX_ONE, +} onemklIndex; + +// SPARSE types +typedef enum { + ONEMKL_PROPERTY_SYMMETRIC, + ONEMKL_PROPERTY_SORTED, +} onemklProperty; + +// I need help :( +typedef struct MatrixHandle_st *MatrixHandle_t; + +int onemklHgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, uint16_t *alpha, + const short **a, int64_t *lda, const short **b, + int64_t *ldb, uint16_t *beta, short **c, + int64_t *ldc, int64_t group_count, int64_t *group_size); + +int onemklSgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, float *alpha, + const float **a, int64_t *lda, const float **b, + int64_t *ldb, float *beta, float **c, + int64_t *ldc, int64_t group_count, int64_t *group_size); + +int onemklDgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, double *alpha, + const double **a, int64_t *lda, const double **b, + int64_t *ldb, double *beta, double **c, + int64_t *ldc, int64_t group_count, int64_t *group_size); + +int onemklCgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, float _Complex *alpha, + const float _Complex **a, int64_t *lda, + const float _Complex **b, + int64_t *ldb, float _Complex *beta, + float _Complex **c, int64_t *ldc, + int64_t group_count, int64_t *group_size); + +int onemklZgemmBatched(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t *m, + int64_t *n, int64_t *k, double _Complex *alpha, + const double _Complex **a, int64_t *lda, + const double _Complex **b, + int64_t *ldb, double _Complex *beta, + double _Complex **c, int64_t *ldc, + int64_t group_count, int64_t *group_size); + +int onemklStrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + float *alpha, const float **a, int64_t *lda, + float **b, int64_t *ldb, int64_t group_count, + int64_t *group_size); + +int onemklDtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + double *alpha, const double **a, int64_t *lda, + double **b, int64_t *ldb, int64_t group_count, + int64_t *group_size); + +int onemklCtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + float _Complex *alpha, const float _Complex **a, int64_t *lda, + float _Complex **b, int64_t *ldb, int64_t group_count, + int64_t *group_size); + +int onemklZtrsmBatched(syclQueue_t device_queue, onemklSide left_right, + onemklUplo upper_lower, onemklTranspose transa, + onemklDiag unit_diag, int64_t *m, int64_t *n, + double _Complex *alpha, const double _Complex **a, int64_t *lda, + double _Complex **b, int64_t *ldb, int64_t group_count, + int64_t *group_size); + +int onemklHgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + uint16_t alpha, const short *a, int64_t lda, int64_t stridea, + const short *b, int64_t ldb, int64_t strideb, uint16_t beta, + short *c, int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklSgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + float alpha, const float *a, int64_t lda, int64_t stridea, + const float *b, int64_t ldb, int64_t strideb, float beta, + float *c, int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklDgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + double alpha, const double *a, int64_t lda, int64_t stridea, + const double *b, int64_t ldb, int64_t strideb, double beta, + double *c, int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklCgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + float _Complex alpha, const float _Complex *a, int64_t lda, + int64_t stridea, const float _Complex *b, int64_t ldb, + int64_t strideb, float _Complex beta, float _Complex *c, + int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklZgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, + onemklTranspose transb, int64_t m, int64_t n, int64_t k, + double _Complex alpha, const double _Complex *a, int64_t lda, + int64_t stridea, const double _Complex *b, int64_t ldb, + int64_t strideb, double _Complex beta, double _Complex *c, + int64_t ldc, int64_t stridec, int64_t batch_size); + int onemklHgemm(syclQueue_t device_queue, onemklTranspose transA, onemklTranspose transB, int64_t m, int64_t n, int64_t k, uint16_t alpha, const short *A, int64_t lda, const short *B, int64_t ldb, uint16_t beta, short *C, int64_t ldc); -void onemklHgemmBatched(syclQueue_t device_queue, onemklTranspose transa, - onemklTranspose transb, int64_t *m, - int64_t *n, int64_t *k, uint16_t *alpha, - const short **a, int64_t *lda, const short **b, - int64_t *ldb, uint16_t *beta, short **c, - int64_t *ldc, int64_t group_count, int64_t *group_size); - -void onemklSgemmBatched(syclQueue_t device_queue, onemklTranspose transa, - onemklTranspose transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, - const float **a, int64_t *lda, const float **b, - int64_t *ldb, float *beta, float **c, - int64_t *ldc, int64_t group_count, int64_t *group_size); - -void onemklDgemmBatched(syclQueue_t device_queue, onemklTranspose transa, - onemklTranspose transb, int64_t *m, - int64_t *n, int64_t *k, double *alpha, - const double **a, int64_t *lda, const double **b, - int64_t *ldb, double *beta, double **c, - int64_t *ldc, int64_t group_count, int64_t *group_size); - -void onemklCgemmBatched(syclQueue_t device_queue, onemklTranspose transa, - onemklTranspose transb, int64_t *m, - int64_t *n, int64_t *k, float _Complex *alpha, - const float _Complex **a, int64_t *lda, - const float _Complex **b, - int64_t *ldb, float _Complex *beta, - float _Complex **c, int64_t *ldc, - int64_t group_count, int64_t *group_size); - -void onemklZgemmBatched(syclQueue_t device_queue, onemklTranspose transa, - onemklTranspose transb, int64_t *m, - int64_t *n, int64_t *k, double _Complex *alpha, - const double _Complex **a, int64_t *lda, - const double _Complex **b, - int64_t *ldb, double _Complex *beta, - double _Complex **c, int64_t *ldc, - int64_t group_count, int64_t *group_size); - -void onemklHgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, - onemklTranspose transb, int64_t m, int64_t n, int64_t k, - uint16_t alpha, const short *a, int64_t lda, int64_t stridea, - const short *b, int64_t ldb, int64_t strideb, uint16_t beta, - short *c, int64_t ldc, int64_t stridec, int64_t batch_size); -void onemklSgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, - onemklTranspose transb, int64_t m, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, int64_t stridea, - const float *b, int64_t ldb, int64_t strideb, float beta, - float *c, int64_t ldc, int64_t stridec, int64_t batch_size); -void onemklDgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, - onemklTranspose transb, int64_t m, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, int64_t stridea, - const double *b, int64_t ldb, int64_t strideb, double beta, - double *c, int64_t ldc, int64_t stridec, int64_t batch_size); -void onemklCgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, - onemklTranspose transb, int64_t m, int64_t n, int64_t k, - float _Complex alpha, const float _Complex *a, int64_t lda, - int64_t stridea, const float _Complex *b, int64_t ldb, - int64_t strideb, float _Complex beta, float _Complex *c, - int64_t ldc, int64_t stridec, int64_t batch_size); -void onemklZgemmBatchStrided(syclQueue_t device_queue, onemklTranspose transa, - onemklTranspose transb, int64_t m, int64_t n, int64_t k, - double _Complex alpha, const double _Complex *a, int64_t lda, - int64_t stridea, const double _Complex *b, int64_t ldb, - int64_t strideb, double _Complex beta, double _Complex *c, - int64_t ldc, int64_t stridec, int64_t batch_size); - -void onemklSsymm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc); -void onemklDsymm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc); -void onemklCsymm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - float _Complex alpha, const float _Complex *a, int64_t lda, - const float _Complex *b, int64_t ldb, float _Complex beta, - float _Complex *c, int64_t ldc); -void onemklZsymm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - double _Complex alpha, const double _Complex *a, int64_t lda, - const double _Complex *b, int64_t ldb, double _Complex beta, - double _Complex *c, int64_t ldc); - -void onemklSsyrk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, float alpha, - const float *a, int64_t lda, float beta, float *c, int64_t ldc); -void onemklDsyrk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, double alpha, - const double *a, int64_t lda, double beta, double *c, int64_t ldc); -void onemklCsyrk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, float _Complex alpha, - const float _Complex *a, int64_t lda, float _Complex beta, float _Complex *c, - int64_t ldc); -void onemklZsyrk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, double _Complex alpha, - const double _Complex *a, int64_t lda, double _Complex beta, double _Complex *c, - int64_t ldc); - -void onemklSsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, - const float *b, int64_t ldb, float beta, float *c, int64_t ldc); -void onemklDsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, - const double *b, int64_t ldb, double beta, double *c, int64_t ldc); -void onemklCsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, - int64_t n, int64_t k, float _Complex alpha, const float _Complex *a, - int64_t lda, const float _Complex *b, int64_t ldb, float _Complex beta, - float _Complex *c, int64_t ldc); -void onemklZsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, - int64_t n, int64_t k, double _Complex alpha, const double _Complex *a, - int64_t lda, const double _Complex *b, int64_t ldb, double _Complex beta, - double _Complex *c, int64_t ldc); - -void onemklStrmm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo uppler_lower, onemklTranspose trans, - onemklDiag diag, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, float *b, int64_t ldb); -void onemklDtrmm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo uppler_lower, onemklTranspose trans, - onemklDiag diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb); -void onemklCtrmm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo uppler_lower, onemklTranspose trans, - onemklDiag diag, int64_t m, int64_t n, float _Complex alpha, - const float _Complex *a, int64_t lda, float _Complex *b, - int64_t ldb); -void onemklZtrmm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo uppler_lower, onemklTranspose trans, - onemklDiag diag, int64_t m, int64_t n, double _Complex alpha, - const double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb); - -void onemklStrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, - onemklTranspose transa, onemklDiag unit_diag, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, float *b, int64_t ldb); -void onemklDtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, - onemklTranspose transa, onemklDiag unit_diag, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, double *b, int64_t ldb); -void onemklCtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, - onemklTranspose transa, onemklDiag unit_diag, int64_t m, int64_t n, - float _Complex alpha, const float _Complex *a, int64_t lda, float _Complex *b, - int64_t ldb); -void onemklZtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, - onemklTranspose transa, onemklDiag unit_diag, int64_t m, int64_t n, - double _Complex alpha, const double _Complex *a, int64_t lda, double _Complex *b, - int64_t ldb); - -void onemklStrsmBatched(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, onemklTranspose transa, - onemklDiag unit_diag, int64_t *m, int64_t *n, - float *alpha, const float **a, int64_t *lda, - float **b, int64_t *ldb, int64_t group_count, - int64_t *group_size); - -void onemklDtrsmBatched(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, onemklTranspose transa, - onemklDiag unit_diag, int64_t *m, int64_t *n, - double *alpha, const double **a, int64_t *lda, - double **b, int64_t *ldb, int64_t group_count, - int64_t *group_size); - -void onemklCtrsmBatched(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, onemklTranspose transa, - onemklDiag unit_diag, int64_t *m, int64_t *n, - float _Complex *alpha, const float _Complex **a, int64_t *lda, - float _Complex **b, int64_t *ldb, int64_t group_count, - int64_t *group_size); - -void onemklZtrsmBatched(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, onemklTranspose transa, - onemklDiag unit_diag, int64_t *m, int64_t *n, - double _Complex *alpha, const double _Complex **a, int64_t *lda, - double _Complex **b, int64_t *ldb, int64_t group_count, - int64_t *group_size); - -void onemklChemm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - float _Complex alpha, const float _Complex *a, - int64_t lda, const float _Complex *b, int64_t ldb, - float _Complex beta, float _Complex *c, int64_t ldc); -void onemklZhemm(syclQueue_t device_queue, onemklSide left_right, - onemklUplo upper_lower, int64_t m, int64_t n, - double _Complex alpha, const double _Complex *a, - int64_t lda, const double _Complex *b, int64_t ldb, - double _Complex beta, double _Complex *c, int64_t ldc); - -void onemklCherk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, float alpha, - const float _Complex *a, int64_t lda, float beta, - float _Complex *c, int64_t ldc); -void onemklZherk(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, double alpha, - const double _Complex *a, int64_t lda, double beta, - double _Complex *c, int64_t ldc); - -void onemklCher2k(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, - float _Complex alpha, const float _Complex *a, - int64_t lda, const float _Complex *b, int64_t ldb, - float beta, float _Complex *c, int64_t ldc); -void onemklZher2k(syclQueue_t device_queue, onemklUplo upper_lower, - onemklTranspose trans, int64_t n, int64_t k, - double _Complex alpha, const double _Complex *a, - int64_t lda, const double _Complex *b, int64_t ldb, - double beta, double _Complex *c, int64_t ldc); - -void onemklSgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, - int64_t n, int64_t kl, int64_t ku, float alpha, const float *a, - int64_t lda, const float *x, int64_t incx, float beta, float *y, - int64_t incy); -void onemklDgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, - int64_t n, int64_t kl, int64_t ku, double alpha, const double *a, - int64_t lda, const double *x, int64_t incx, double beta, double *y, - int64_t incy); -void onemklCgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, - int64_t n, int64_t kl, int64_t ku, float _Complex alpha, const float - _Complex *a, int64_t lda, const float _Complex *x, int64_t incx, - float _Complex beta, float _Complex *y, int64_t incy); -void onemklZgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, - int64_t n, int64_t kl, int64_t ku, double _Complex alpha, - const double _Complex *a, int64_t lda, const double _Complex *x, - int64_t incx, double _Complex beta, double _Complex *y, int64_t incy); - -void onemklSgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, - const float *x, int64_t incx, float beta, float *y, int64_t incy); -void onemklDgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, - const double *x, int64_t incx, double beta, double *y, int64_t incy); -void onemklCgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, - int64_t n, float _Complex alpha, const float _Complex *a, int64_t lda, - const float _Complex *x, int64_t incx, float _Complex beta, - float _Complex *y, int64_t incy); -void onemklZgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, - int64_t n, double _Complex alpha, const double _Complex *a, int64_t lda, - const double _Complex *x, int64_t incx, double _Complex beta, - double _Complex *y, int64_t incy); - -void onemklSger(syclQueue_t device_queue, int64_t m, int64_t n, float alpha, - const float *x, int64_t incx, const float *y, int64_t incy, - float *a, int64_t lda); -void onemklDger(syclQueue_t device_queue, int64_t m, int64_t n, double alpha, - const double *x, int64_t incx, const double *y, int64_t incy, - double *a, int64_t lda); -void onemklCgerc(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex alpha, - const float _Complex *x, int64_t incx, const float _Complex *y, int64_t incy, - float _Complex *a, int64_t lda); -void onemklZgerc(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex alpha, - const double _Complex *x, int64_t incx, const double _Complex *y, int64_t incy, - double _Complex *a, int64_t lda); - -void onemklSasum(syclQueue_t device_queue, int64_t n, - const float *x, int64_t incx, float *result); -void onemklDasum(syclQueue_t device_queue, int64_t n, - const double *x, int64_t incx, double *result); -void onemklCasum(syclQueue_t device_queue, int64_t n, - const float _Complex *x, int64_t incx, float *result); -void onemklZasum(syclQueue_t device_queue, int64_t n, - const double _Complex *x, int64_t incx, double *result); - -void onemklSaxpy(syclQueue_t device_queue, int64_t n, float alpha, const float *x, - int64_t incx, float *y, int64_t incy); -void onemklDaxpy(syclQueue_t device_queue, int64_t n, double alpha, const double *x, - int64_t incx, double *y, int64_t incy); -void onemklCaxpy(syclQueue_t device_queue, int64_t n, float _Complex alpha, - const float _Complex *x, int64_t incx, float _Complex *y, int64_t incy); -void onemklZaxpy(syclQueue_t device_queue, int64_t n, double _Complex alpha, - const double _Complex *x, int64_t incx, double _Complex *y, int64_t incy); -void onemklHaxpy(syclQueue_t device_queue, int64_t n, uint16_t alpha, const short *x, +int onemklHaxpy(syclQueue_t device_queue, int64_t n, uint16_t alpha, const short *x, int64_t incx, short *y, int64_t incy); -void onemklSaxpby(syclQueue_t device_queue, int64_t n, float alpha, const float *x, - int64_t incx, float beta, float *y, int64_t incy); -void onemklDaxpby(syclQueue_t device_queue, int64_t n, double alpha, const double *x, - int64_t incx, double beta, double *y, int64_t incy); -void onemklCaxpby(syclQueue_t device_queue, int64_t n, float _Complex alpha, - const float _Complex *x, int64_t incx, float _Complex beta, float _Complex *y, int64_t incy); -void onemklZaxpby(syclQueue_t device_queue, int64_t n, double _Complex alpha, - const double _Complex *x, int64_t incx, double _Complex beta, double _Complex *y, int64_t incy); - -void onemklSrot(syclQueue_t device_queue, int64_t n, float *x, - int64_t incx, float *y, int64_t incy, float c, float s); -void onemklDrot(syclQueue_t device_queue, int64_t n, double *x, - int64_t incx, double *y, int64_t incy, double c, double s); -void onemklCrot(syclQueue_t device_queue, int64_t n, float _Complex *x, - int64_t incx, float _Complex *y, int64_t incy, float c, float _Complex s); -void onemklZrot(syclQueue_t device_queue, int64_t n, double _Complex *x, - int64_t incx, double _Complex *y, int64_t incy, double c, double _Complex s); -void onemklCsrot(syclQueue_t device_queue, int64_t n, float _Complex *x, - int64_t incx, float _Complex *y, int64_t incy, float c, float s); -void onemklZdrot(syclQueue_t device_queue, int64_t n, double _Complex *x, - int64_t incx, double _Complex *y, int64_t incy, double c, double s); - -// Level-1: scal oneMKL -void onemklDscal(syclQueue_t device_queue, int64_t n, double alpha, - double *x, int64_t incx); -void onemklSscal(syclQueue_t device_queue, int64_t n, float alpha, - float *x, int64_t incx); -void onemklCscal(syclQueue_t device_queue, int64_t n, float _Complex alpha, - float _Complex *x, int64_t incx); -void onemklCsscal(syclQueue_t device_queue, int64_t n, float alpha, - float _Complex *x, int64_t incx); -void onemklZscal(syclQueue_t device_queue, int64_t n, double _Complex alpha, - double _Complex *x, int64_t incx); -void onemklZdscal(syclQueue_t device_queue, int64_t n, double alpha, - double _Complex *x, int64_t incx); -void onemklHscal(syclQueue_t device_queue, int64_t n, uint16_t alpha, +int onemklHscal(syclQueue_t device_queue, int64_t n, uint16_t alpha, short *x, int64_t incx); -void onemklChemv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, - float _Complex alpha, const float _Complex *a, int64_t lda, - const float _Complex *x, int64_t incx, float _Complex beta, - float _Complex *y, int64_t incy); -void onemklZhemv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, - double _Complex alpha, const double _Complex *a, int64_t lda, - const double _Complex *x, int64_t incx, double _Complex beta, - double _Complex *y, int64_t incy); -void onemklChbmv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, - int64_t k, float _Complex alpha, const float _Complex *a, - int64_t lda, const float _Complex *x, int64_t incx, float _Complex beta, +int onemklHnrm2(syclQueue_t device_queue, int64_t n, const short *x, + int64_t incx, short *result); + +int onemklHdot(syclQueue_t device_queue, int64_t n, const short *x, + int64_t incx, const short *y, int64_t incy, short *result); +// BLAS +int onemklSgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t + m, int64_t n, int64_t k, float alpha, float *a, int64_t lda, float *b, int64_t ldb, float + beta, float *c, int64_t ldc); + +int onemklDgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t + m, int64_t n, int64_t k, double alpha, double *a, int64_t lda, double *b, int64_t ldb, + double beta, double *c, int64_t ldc); + +int onemklCgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t + m, int64_t n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, float + _Complex *b, int64_t ldb, float _Complex beta, float _Complex *c, int64_t ldc); + +int onemklZgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t + m, int64_t n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, double + _Complex *b, int64_t ldb, double _Complex beta, double _Complex *c, int64_t ldc); + +int onemklSsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t + m, int64_t n, float alpha, float *a, int64_t lda, float *b, int64_t ldb, float beta, + float *c, int64_t ldc); + +int onemklDsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t + m, int64_t n, double alpha, double *a, int64_t lda, double *b, int64_t ldb, double beta, + double *c, int64_t ldc); + +int onemklCsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t + m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, + int64_t ldb, float _Complex beta, float _Complex *c, int64_t ldc); + +int onemklZsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t + m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex + *b, int64_t ldb, double _Complex beta, double _Complex *c, int64_t ldc); + +int onemklChemm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t + m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, + int64_t ldb, float _Complex beta, float _Complex *c, int64_t ldc); + +int onemklZhemm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t + m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex + *b, int64_t ldb, double _Complex beta, double _Complex *c, int64_t ldc); + +int onemklSsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, float alpha, float *a, int64_t lda, float beta, float *c, int64_t ldc); + +int onemklDsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, double alpha, double *a, int64_t lda, double beta, double *c, int64_t ldc); + +int onemklCsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex + beta, float _Complex *c, int64_t ldc); + +int onemklZsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex + beta, double _Complex *c, int64_t ldc); + +int onemklCherk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, float alpha, float _Complex *a, int64_t lda, float beta, float _Complex + *c, int64_t ldc); + +int onemklZherk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, double alpha, double _Complex *a, int64_t lda, double beta, double + _Complex *c, int64_t ldc); + +int onemklSsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, float alpha, float *a, int64_t lda, float *b, int64_t ldb, float beta, + float *c, int64_t ldc); + +int onemklDsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, double alpha, double *a, int64_t lda, double *b, int64_t ldb, double + beta, double *c, int64_t ldc); + +int onemklCsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, + int64_t ldb, float _Complex beta, float _Complex *c, int64_t ldc); + +int onemklZsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex + *b, int64_t ldb, double _Complex beta, double _Complex *c, int64_t ldc); + +int onemklCher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, + int64_t ldb, float beta, float _Complex *c, int64_t ldc); + +int onemklZher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t + n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex + *b, int64_t ldb, double beta, double _Complex *c, int64_t ldc); + +int onemklStrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float alpha, + float *a, int64_t lda, float *b, int64_t ldb); + +int onemklDtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double alpha, + double *a, int64_t lda, double *b, int64_t ldb); + +int onemklCtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex + alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb); + +int onemklZtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex + alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb); + +int onemklStrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float alpha, + float *a, int64_t lda, float *b, int64_t ldb); + +int onemklDtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double alpha, + double *a, int64_t lda, double *b, int64_t ldb); + +int onemklCtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex + alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb); + +int onemklZtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex + alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb); + +int onemklSdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float *a, + int64_t lda, float *x, int64_t incx, float *c, int64_t ldc); + +int onemklDdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double *a, + int64_t lda, double *x, int64_t incx, double *c, int64_t ldc); + +int onemklCdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float + _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *c, int64_t + ldc); + +int onemklZdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double + _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *c, + int64_t ldc); + +int onemklSgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float alpha, + float *a, int64_t lda, float *x, int64_t incx, float beta, float *y, int64_t incy); + +int onemklDgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double alpha, + double *a, int64_t lda, double *x, int64_t incx, double beta, double *y, int64_t incy); + +int onemklCgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float + _Complex alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float + _Complex beta, float _Complex *y, int64_t incy); + +int onemklZgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double + _Complex alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, + double _Complex beta, double _Complex *y, int64_t incy); + +int onemklSgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, + int64_t ku, float alpha, float *a, int64_t lda, float *x, int64_t incx, float beta, + float *y, int64_t incy); + +int onemklDgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, + int64_t ku, double alpha, double *a, int64_t lda, double *x, int64_t incx, double beta, + double *y, int64_t incy); + +int onemklCgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, + int64_t ku, float _Complex alpha, float _Complex *a, int64_t lda, float _Complex *x, + int64_t incx, float _Complex beta, float _Complex *y, int64_t incy); + +int onemklZgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, + int64_t ku, double _Complex alpha, double _Complex *a, int64_t lda, double _Complex + *x, int64_t incx, double _Complex beta, double _Complex *y, int64_t incy); + +int onemklSger(syclQueue_t device_queue, int64_t m, int64_t n, float alpha, float *x, int64_t incx, + float *y, int64_t incy, float *a, int64_t lda); + +int onemklDger(syclQueue_t device_queue, int64_t m, int64_t n, double alpha, double *x, int64_t incx, + double *y, int64_t incy, double *a, int64_t lda); + +int onemklCgerc(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex alpha, float _Complex + *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda); + +int onemklZgerc(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex alpha, double + _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, + int64_t lda); + +int onemklCgeru(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex alpha, float _Complex + *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda); + +int onemklZgeru(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex alpha, double + _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, + int64_t lda); + +int onemklChbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, float + _Complex alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float + _Complex beta, float _Complex *y, int64_t incy); + +int onemklZhbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, double + _Complex alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, + double _Complex beta, double _Complex *y, int64_t incy); + +int onemklChemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, + float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex beta, float _Complex *y, int64_t incy); -void onemklZhbmv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, - int64_t k, double _Complex alpha, const double _Complex *a, - int64_t lda, const double _Complex *x, int64_t incx, double _Complex beta, - double _Complex *y, int64_t incy); -void onemklCher(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float alpha, - const float _Complex *x, int64_t incx, float _Complex *a, + +int onemklZhemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, + double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex + beta, double _Complex *y, int64_t incy); + +int onemklCher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float + _Complex *x, int64_t incx, float _Complex *a, int64_t lda); + +int onemklZher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double + _Complex *x, int64_t incx, double _Complex *a, int64_t lda); + +int onemklCher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, + float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda); -void onemklZher(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double alpha, - const double _Complex *x, int64_t incx, double _Complex *a, + +int onemklZher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, + double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex + *a, int64_t lda); + +int onemklChpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, + float _Complex *a, float _Complex *x, int64_t incx, float _Complex beta, float + _Complex *y, int64_t incy); + +int onemklZhpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, + double _Complex *a, double _Complex *x, int64_t incx, double _Complex beta, double + _Complex *y, int64_t incy); + +int onemklChpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float + _Complex *x, int64_t incx, float _Complex *a); + +int onemklZhpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double + _Complex *x, int64_t incx, double _Complex *a); + +int onemklChpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, + float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a); + +int onemklZhpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, + double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex + *a); + +int onemklSsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, float alpha, + float *a, int64_t lda, float *x, int64_t incx, float beta, float *y, int64_t incy); + +int onemklDsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, double + alpha, double *a, int64_t lda, double *x, int64_t incx, double beta, double *y, int64_t + incy); + +int onemklSsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *a, + int64_t lda, float *x, int64_t incx, float beta, float *y, int64_t incy); + +int onemklDsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double + *a, int64_t lda, double *x, int64_t incx, double beta, double *y, int64_t incy); + +int onemklCsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, + float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex beta, + float _Complex *y, int64_t incy); + +int onemklZsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, + double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex + beta, double _Complex *y, int64_t incy); + +int onemklSsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *x, + int64_t incx, float *a, int64_t lda); + +int onemklDsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double *x, + int64_t incx, double *a, int64_t lda); + +int onemklCsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, + float _Complex *x, int64_t incx, float _Complex *a, int64_t lda); + +int onemklZsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, + double _Complex *x, int64_t incx, double _Complex *a, int64_t lda); + +int onemklSsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *x, + int64_t incx, float *y, int64_t incy, float *a, int64_t lda); + +int onemklDsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double + *x, int64_t incx, double *y, int64_t incy, double *a, int64_t lda); + +int onemklCsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex alpha, + float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda); -void onemklCher2(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex alpha, - const float _Complex *x, int64_t incx, const float _Complex *y, int64_t incy, - float _Complex *a, int64_t lda); -void onemklZher2(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex alpha, - const double _Complex *x, int64_t incx, const double _Complex *y, int64_t incy, - double _Complex *a, int64_t lda); - -void onemklSsbmv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, const float *x, - int64_t incx, float beta, float *y, int64_t incy); -void onemklDsbmv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, const double *x, - int64_t incx, double beta, double *y, int64_t incy); -void onemklSsymv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float alpha, - const float *a, int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy); -void onemklDsymv(syclQueue_t device_queue, onemklUplo uplo, int64_t n, - double alpha, const double *a, int64_t lda, const double *x, - int64_t incx, double beta, double *y, int64_t incy); -void onemklSsyr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float alpha, - const float *x, int64_t incx, float *a, int64_t lda); -void onemklDsyr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double alpha, - const double *x, int64_t incx, double *a, int64_t lda); -void onemklStbmv(syclQueue_t device_queue, onemklUplo uplo, - onemklTranspose trans, onemklDiag diag, int64_t n, - int64_t k, const float *a, int64_t lda, float *x, int64_t incx); - -void onemklDtbmv(syclQueue_t device_queue, onemklUplo uplo, - onemklTranspose trans, onemklDiag diag, int64_t n, - int64_t k, const double *a, int64_t lda, double *x, int64_t incx); - -void onemklCtbmv(syclQueue_t device_queue, onemklUplo uplo, - onemklTranspose trans, onemklDiag diag, int64_t n, - int64_t k, const float _Complex *a, int64_t lda, float _Complex *x, - int64_t incx); -void onemklZtbmv(syclQueue_t device_queue, onemklUplo uplo, - onemklTranspose trans, onemklDiag diag, int64_t n, - int64_t k, const double _Complex *a, int64_t lda, double _Complex *x, - int64_t incx); +int onemklZsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex alpha, + double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex + *a, int64_t lda); -void onemklStrmv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const float *a, int64_t lda, float *x, - int64_t incx); +int onemklSspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *a, + float *x, int64_t incx, float beta, float *y, int64_t incy); -void onemklDtrmv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const double *a, int64_t lda, double *x, - int64_t incx); +int onemklDspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double + *a, double *x, int64_t incx, double beta, double *y, int64_t incy); -void onemklCtrmv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const float _Complex *a, int64_t lda, float _Complex *x, - int64_t incx); +int onemklSspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *x, + int64_t incx, float *a); + +int onemklDspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double *x, + int64_t incx, double *a); + +int onemklSspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float alpha, float *x, + int64_t incx, float *y, int64_t incy, float *a); + +int onemklDspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double alpha, double + *x, int64_t incx, double *y, int64_t incy, double *a); + +int onemklStbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, int64_t k, float *a, int64_t lda, float *x, int64_t + incx); + +int onemklDtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, int64_t k, double *a, int64_t lda, double *x, int64_t + incx); + +int onemklCtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, int64_t k, float _Complex *a, int64_t lda, float + _Complex *x, int64_t incx); + +int onemklZtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, int64_t k, double _Complex *a, int64_t lda, double + _Complex *x, int64_t incx); + +int onemklStbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, int64_t k, float *a, int64_t lda, float *x, int64_t + incx); + +int onemklDtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, int64_t k, double *a, int64_t lda, double *x, int64_t + incx); + +int onemklCtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, int64_t k, float _Complex *a, int64_t lda, float + _Complex *x, int64_t incx); + +int onemklZtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, int64_t k, double _Complex *a, int64_t lda, double + _Complex *x, int64_t incx); + +int onemklStpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, float *a, float *x, int64_t incx); -void onemklZtrmv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const double _Complex *a, int64_t lda, double _Complex *x, +int onemklDtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, double *a, double *x, int64_t incx); + +int onemklCtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, float _Complex *a, float _Complex *x, int64_t incx); + +int onemklZtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, double _Complex *a, double _Complex *x, int64_t + incx); + +int onemklStpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, float *a, float *x, int64_t incx); + +int onemklDtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, double *a, double *x, int64_t incx); + +int onemklCtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, float _Complex *a, float _Complex *x, int64_t incx); + +int onemklZtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, double _Complex *a, double _Complex *x, int64_t + incx); + +int onemklStrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, float *a, int64_t lda, float *x, int64_t incx); + +int onemklDtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, double *a, int64_t lda, double *x, int64_t incx); + +int onemklCtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx); -// trsv -void onemklStrsv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const float *a, int64_t lda, float *x, +int onemklZtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx); -void onemklDtrsv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const double *a, int64_t lda, double *x, +int onemklStrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, float *a, int64_t lda, float *x, int64_t incx); + +int onemklDtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, double *a, int64_t lda, double *x, int64_t incx); + +int onemklCtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx); -void onemklCtrsv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const float _Complex *a, int64_t lda, float _Complex *x, +int onemklZtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + onemklDiag unit_diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx); -void onemklZtrsv(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, - onemklDiag diag, int64_t n, const double _Complex *a, int64_t lda, double _Complex *x, +int onemklCdotc(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex + *y, int64_t incy, float _Complex *result); + +int onemklZdotc(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double + _Complex *y, int64_t incy, double _Complex *result); + +int onemklCdotu(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex + *y, int64_t incy, float _Complex *result); + +int onemklZdotu(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double + _Complex *y, int64_t incy, double _Complex *result); + +int onemklSiamax(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t *result); + +int onemklDiamax(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t *result); + +int onemklCiamax(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t + *result); + +int onemklZiamax(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t + *result); + +int onemklSiamin(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t *result); + +int onemklDiamin(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t *result); + +int onemklCiamin(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t + *result); + +int onemklZiamin(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t + *result); + +int onemklSasum(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *result); + +int onemklDasum(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *result); + +int onemklCasum(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float *result); + +int onemklZasum(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double + *result); + +int onemklSaxpy(syclQueue_t device_queue, int64_t n, float alpha, float *x, int64_t incx, float *y, + int64_t incy); + +int onemklDaxpy(syclQueue_t device_queue, int64_t n, double alpha, double *x, int64_t incx, double *y, + int64_t incy); + +int onemklCaxpy(syclQueue_t device_queue, int64_t n, float _Complex alpha, float _Complex *x, int64_t + incx, float _Complex *y, int64_t incy); + +int onemklZaxpy(syclQueue_t device_queue, int64_t n, double _Complex alpha, double _Complex *x, + int64_t incx, double _Complex *y, int64_t incy); + +int onemklSaxpby(syclQueue_t device_queue, int64_t n, float alpha, float *x, int64_t incx, float beta, + float *y, int64_t incy); + +int onemklDaxpby(syclQueue_t device_queue, int64_t n, double alpha, double *x, int64_t incx, double + beta, double *y, int64_t incy); + +int onemklCaxpby(syclQueue_t device_queue, int64_t n, float _Complex alpha, float _Complex *x, + int64_t incx, float _Complex beta, float _Complex *y, int64_t incy); + +int onemklZaxpby(syclQueue_t device_queue, int64_t n, double _Complex alpha, double _Complex *x, + int64_t incx, double _Complex beta, double _Complex *y, int64_t incy); + +int onemklScopy(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy); + +int onemklDcopy(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy); + +int onemklCcopy(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex + *y, int64_t incy); + +int onemklZcopy(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double + _Complex *y, int64_t incy); + +int onemklSdot(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, + float *result); + +int onemklDdot(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, + double *result); + +int onemklSsdsdot(syclQueue_t device_queue, int64_t n, float sb, float *x, int64_t incx, float *y, + int64_t incy, float *result); + +int onemklSnrm2(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *result); + +int onemklDnrm2(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *result); + +int onemklCnrm2(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float *result); + +int onemklZnrm2(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double + *result); + +int onemklSrot(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, + float c, float s); + +int onemklDrot(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, + double c, double s); + +int onemklCSrot(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex + *y, int64_t incy, float c, float s); + +int onemklCrot(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex + *y, int64_t incy, float c, float _Complex s); + +int onemklZDrot(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double + _Complex *y, int64_t incy, double c, double s); + +int onemklZrot(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex + *y, int64_t incy, double c, double _Complex s); + +int onemklSrotg(syclQueue_t device_queue, float *a, float *b, float *c, float *s); + +int onemklDrotg(syclQueue_t device_queue, double *a, double *b, double *c, double *s); + +int onemklCrotg(syclQueue_t device_queue, float _Complex *a, float _Complex *b, float *c, float + _Complex *s); + +int onemklZrotg(syclQueue_t device_queue, double _Complex *a, double _Complex *b, double *c, double + _Complex *s); + +int onemklSrotm(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, + float *param); + +int onemklDrotm(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, + double *param); + +int onemklSrotmg(syclQueue_t device_queue, float *d1, float *d2, float *x1, float y1, float *param); + +int onemklDrotmg(syclQueue_t device_queue, double *d1, double *d2, double *x1, double y1, double + *param); + +int onemklSscal(syclQueue_t device_queue, int64_t n, float alpha, float *x, int64_t incx); + +int onemklDscal(syclQueue_t device_queue, int64_t n, double alpha, double *x, int64_t incx); + +int onemklCSscal(syclQueue_t device_queue, int64_t n, float alpha, float _Complex *x, int64_t incx); + +int onemklZDscal(syclQueue_t device_queue, int64_t n, double alpha, double _Complex *x, int64_t incx); + +int onemklCscal(syclQueue_t device_queue, int64_t n, float _Complex alpha, float _Complex *x, int64_t + incx); + +int onemklZscal(syclQueue_t device_queue, int64_t n, double _Complex alpha, double _Complex *x, int64_t incx); -// Supported Level-1: Nrm2 -void onemklDnrm2(syclQueue_t device_queue, int64_t n, const double *x, - int64_t incx, double *result); -void onemklSnrm2(syclQueue_t device_queue, int64_t n, const float *x, - int64_t incx, float *result); -void onemklCnrm2(syclQueue_t device_queue, int64_t n, const float _Complex *x, - int64_t incx, float *result); -void onemklZnrm2(syclQueue_t device_queue, int64_t n, const double _Complex *x, - int64_t incx, double *result); -void onemklHnrm2(syclQueue_t device_queue, int64_t n, const short *x, - int64_t incx, short *result); - -void onemklSdot(syclQueue_t device_queue, int64_t n, const float *x, - int64_t incx, const float *y, int64_t incy, float *result); -void onemklDdot(syclQueue_t device_queue, int64_t n, const double *x, - int64_t incx, const double *y, int64_t incy, double *result); -void onemklCdotc(syclQueue_t device_queue, int64_t n, const float _Complex *x, - int64_t incx, const float _Complex *y, int64_t incy, - float _Complex *result); -void onemklZdotc(syclQueue_t device_queue, int64_t n, const double _Complex *x, - int64_t incx, const double _Complex *y, int64_t incy, - double _Complex *result); -void onemklCdotu(syclQueue_t device_queue, int64_t n, const float _Complex *x, - int64_t incx, const float _Complex *y, int64_t incy, - float _Complex *result); -void onemklZdotu(syclQueue_t device_queue, int64_t n, const double _Complex *x, - int64_t incx, const double _Complex *y, int64_t incy, - double _Complex *result); -void onemklHdot(syclQueue_t device_queue, int64_t n, const short *x, - int64_t incx, const short *y, int64_t incy, short *result); - -void onemklDcopy(syclQueue_t device_queue, int64_t n, const double *x, - int64_t incx, double *y, int64_t incy); -void onemklScopy(syclQueue_t device_queue, int64_t n, const float *x, - int64_t incx, float *y, int64_t incy); -void onemklZcopy(syclQueue_t device_queue, int64_t n, const double _Complex *x, - int64_t incx, double _Complex *y, int64_t incy); -void onemklCcopy(syclQueue_t device_queue, int64_t n, const float _Complex *x, - int64_t incx, float _Complex *y, int64_t incy); - -void onemklDamax(syclQueue_t device_queue, int64_t n, const double *x, int64_t incx, - int64_t *result); -void onemklSamax(syclQueue_t device_queue, int64_t n, const float *x, int64_t incx, - int64_t *result); -void onemklZamax(syclQueue_t device_queue, int64_t n, const double _Complex *x, int64_t incx, - int64_t *result); -void onemklCamax(syclQueue_t device_queue, int64_t n, const float _Complex *x, int64_t incx, - int64_t *result); - -void onemklDamin(syclQueue_t device_queue, int64_t n, const double *x, int64_t incx, - int64_t *result); -void onemklSamin(syclQueue_t device_queue, int64_t n, const float *x, int64_t incx, - int64_t *result); -void onemklZamin(syclQueue_t device_queue, int64_t n, const double _Complex *x, int64_t incx, - int64_t *result); -void onemklCamin(syclQueue_t device_queue, int64_t n, const float _Complex *x, int64_t incx, - int64_t *result); - -void onemklSswap(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, - float *y, int64_t incy); -void onemklDswap(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, - double *y, int64_t incy); -void onemklCswap(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, - float _Complex *y, int64_t incy); -void onemklZswap(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, - double _Complex *y, int64_t incy); +int onemklSswap(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy); + +int onemklDswap(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy); + +int onemklCswap(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex + *y, int64_t incy); + +int onemklZswap(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double + _Complex *y, int64_t incy); + +int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, + int64_t m, int64_t n, int64_t k, float alpha, float *a, int64_t lda, int64_t + stride_a, float *b, int64_t ldb, int64_t stride_b, float beta, float *c, + int64_t ldc, int64_t stride_c, int64_t batch_size); + +int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, + int64_t m, int64_t n, int64_t k, double alpha, double *a, int64_t lda, int64_t + stride_a, double *b, int64_t ldb, int64_t stride_b, double beta, double *c, + int64_t ldc, int64_t stride_c, int64_t batch_size); + +int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, + int64_t m, int64_t n, int64_t k, float _Complex alpha, float _Complex *a, + int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t + stride_b, float _Complex beta, float _Complex *c, int64_t ldc, int64_t + stride_c, int64_t batch_size); + +int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, + int64_t m, int64_t n, int64_t k, double _Complex alpha, double _Complex *a, + int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t + stride_b, double _Complex beta, double _Complex *c, int64_t ldc, int64_t + stride_c, int64_t batch_size); + +int onemklSsyrk_batch(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + int64_t n, int64_t k, float alpha, float *a, int64_t lda, int64_t stride_a, + float beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size); + +int onemklDsyrk_batch(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + int64_t n, int64_t k, double alpha, double *a, int64_t lda, int64_t stride_a, + double beta, double *c, int64_t ldc, int64_t stride_c, int64_t batch_size); + +int onemklCsyrk_batch(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + int64_t n, int64_t k, float _Complex alpha, float _Complex *a, int64_t lda, + int64_t stride_a, float _Complex beta, float _Complex *c, int64_t ldc, int64_t + stride_c, int64_t batch_size); + +int onemklZsyrk_batch(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, + int64_t n, int64_t k, double _Complex alpha, double _Complex *a, int64_t lda, + int64_t stride_a, double _Complex beta, double _Complex *c, int64_t ldc, + int64_t stride_c, int64_t batch_size); + +int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float + alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t + stride_b, int64_t batch_size); + +int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double + alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, + int64_t stride_b, int64_t batch_size); + +int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float + _Complex alpha, float _Complex *a, int64_t lda, int64_t stride_a, float + _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size); + +int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, + onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double + _Complex alpha, double _Complex *a, int64_t lda, int64_t stride_a, double + _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size); + +int onemklSgemv_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float + alpha, float *a, int64_t lda, int64_t stridea, float *x, int64_t incx, int64_t + stridex, float beta, float *y, int64_t incy, int64_t stridey, int64_t + batch_size); + +int onemklDgemv_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double + alpha, double *a, int64_t lda, int64_t stridea, double *x, int64_t incx, + int64_t stridex, double beta, double *y, int64_t incy, int64_t stridey, + int64_t batch_size); + +int onemklCgemv_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float + _Complex alpha, float _Complex *a, int64_t lda, int64_t stridea, float + _Complex *x, int64_t incx, int64_t stridex, float _Complex beta, float + _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size); + +int onemklZgemv_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double + _Complex alpha, double _Complex *a, int64_t lda, int64_t stridea, double + _Complex *x, int64_t incx, int64_t stridex, double _Complex beta, double + _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size); + +int onemklSdgmm_batch(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float + *a, int64_t lda, int64_t stridea, float *x, int64_t incx, int64_t stridex, + float *c, int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklDdgmm_batch(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double + *a, int64_t lda, int64_t stridea, double *x, int64_t incx, int64_t stridex, + double *c, int64_t ldc, int64_t stridec, int64_t batch_size); + +int onemklCdgmm_batch(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float + _Complex *a, int64_t lda, int64_t stridea, float _Complex *x, int64_t incx, + int64_t stridex, float _Complex *c, int64_t ldc, int64_t stridec, int64_t + batch_size); + +int onemklZdgmm_batch(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double + _Complex *a, int64_t lda, int64_t stridea, double _Complex *x, int64_t incx, + int64_t stridex, double _Complex *c, int64_t ldc, int64_t stridec, int64_t + batch_size); + +int onemklSaxpy_batch(syclQueue_t device_queue, int64_t n, float alpha, float *x, int64_t incx, + int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size); + +int onemklDaxpy_batch(syclQueue_t device_queue, int64_t n, double alpha, double *x, int64_t incx, + int64_t stridex, double *y, int64_t incy, int64_t stridey, int64_t + batch_size); + +int onemklCaxpy_batch(syclQueue_t device_queue, int64_t n, float _Complex alpha, float _Complex *x, + int64_t incx, int64_t stridex, float _Complex *y, int64_t incy, int64_t + stridey, int64_t batch_size); + +int onemklZaxpy_batch(syclQueue_t device_queue, int64_t n, double _Complex alpha, double _Complex + *x, int64_t incx, int64_t stridex, double _Complex *y, int64_t incy, int64_t + stridey, int64_t batch_size); + +int onemklScopy_batch(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t stridex, + float *y, int64_t incy, int64_t stridey, int64_t batch_size); + +int onemklDcopy_batch(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t stridex, + double *y, int64_t incy, int64_t stridey, int64_t batch_size); + +int onemklCcopy_batch(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t + stridex, float _Complex *y, int64_t incy, int64_t stridey, int64_t + batch_size); + +int onemklZcopy_batch(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t + stridex, double _Complex *y, int64_t incy, int64_t stridey, int64_t + batch_size); + +int onemklSgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, + onemklTranspose transb, int64_t n, int64_t k, float alpha, float *a, int64_t lda, + float *b, int64_t ldb, float beta, float *c, int64_t ldc); + +int onemklDgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, + onemklTranspose transb, int64_t n, int64_t k, double alpha, double *a, int64_t lda, + double *b, int64_t ldb, double beta, double *c, int64_t ldc); + +int onemklCgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, + onemklTranspose transb, int64_t n, int64_t k, float _Complex alpha, float _Complex + *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex beta, float _Complex + *c, int64_t ldc); + +int onemklZgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, + onemklTranspose transb, int64_t n, int64_t k, double _Complex alpha, double + _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex beta, + double _Complex *c, int64_t ldc); + +int onemklSimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float + alpha, float *ab, int64_t lda, int64_t ldb); + +int onemklDimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double + alpha, double *ab, int64_t lda, int64_t ldb); + +int onemklCimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float + _Complex alpha, float _Complex *ab, int64_t lda, int64_t ldb); + +int onemklZimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double + _Complex alpha, double _Complex *ab, int64_t lda, int64_t ldb); + +int onemklSomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float + alpha, float *a, int64_t lda, float *b, int64_t ldb); + +int onemklDomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double + alpha, double *a, int64_t lda, double *b, int64_t ldb); + +int onemklComatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float + _Complex alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb); + +int onemklZomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double + _Complex alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb); + +int onemklSomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, + int64_t m, int64_t n, float alpha, float *a, int64_t lda, float beta, float *b, + int64_t ldb, float *c, int64_t ldc); + +int onemklDomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, + int64_t m, int64_t n, double alpha, double *a, int64_t lda, double beta, double *b, + int64_t ldb, double *c, int64_t ldc); + +int onemklComatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, + int64_t m, int64_t n, float _Complex alpha, float _Complex *a, int64_t lda, float + _Complex beta, float _Complex *b, int64_t ldb, float _Complex *c, int64_t ldc); + +int onemklZomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, + int64_t m, int64_t n, double _Complex alpha, double _Complex *a, int64_t lda, + double _Complex beta, double _Complex *b, int64_t ldb, double _Complex *c, int64_t + ldc); + +int onemklSimatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + float alpha, float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t + batch_size); + +int onemklDimatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + double alpha, double *ab, int64_t lda, int64_t ldb, int64_t stride, + int64_t batch_size); + +int onemklCimatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + float _Complex alpha, float _Complex *ab, int64_t lda, int64_t ldb, + int64_t stride, int64_t batch_size); + +int onemklZimatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + double _Complex alpha, double _Complex *ab, int64_t lda, int64_t ldb, + int64_t stride, int64_t batch_size); + +int onemklSomatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + float alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t + ldb, int64_t stride_b, int64_t batch_size); + +int onemklDomatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + double alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t + ldb, int64_t stride_b, int64_t batch_size); + +int onemklComatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + float _Complex alpha, float _Complex *a, int64_t lda, int64_t stride_a, + float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size); + +int onemklZomatcopy_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + double _Complex alpha, double _Complex *a, int64_t lda, int64_t stride_a, + double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size); + +int onemklSomatadd_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose + transb, int64_t m, int64_t n, float alpha, float *a, int64_t lda, int64_t + stride_a, float beta, float *b, int64_t ldb, int64_t stride_b, float *c, + int64_t ldc, int64_t stride_c, int64_t batch_size); + +int onemklDomatadd_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose + transb, int64_t m, int64_t n, double alpha, double *a, int64_t lda, int64_t + stride_a, double beta, double *b, int64_t ldb, int64_t stride_b, double *c, + int64_t ldc, int64_t stride_c, int64_t batch_size); + +int onemklComatadd_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose + transb, int64_t m, int64_t n, float _Complex alpha, float _Complex *a, + int64_t lda, int64_t stride_a, float _Complex beta, float _Complex *b, + int64_t ldb, int64_t stride_b, float _Complex *c, int64_t ldc, int64_t + stride_c, int64_t batch_size); + +int onemklZomatadd_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose + transb, int64_t m, int64_t n, double _Complex alpha, double _Complex *a, + int64_t lda, int64_t stride_a, double _Complex beta, double _Complex *b, + int64_t ldb, int64_t stride_b, double _Complex *c, int64_t ldc, int64_t + stride_c, int64_t batch_size); + +// LAPACK +int onemklSpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float + *scratchpad, int64_t scratchpad_size); + +int onemklDpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double + *scratchpad, int64_t scratchpad_size); + +int onemklCpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t + lda, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t + lda, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklSpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklDpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklCpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklZpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int onemklSpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float *a, + int64_t lda, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size); + +int onemklDpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double *a, + int64_t lda, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size); + +int onemklCpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float _Complex + *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t + scratchpad_size); + +int onemklZpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double _Complex + *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *scratchpad, + int64_t scratchpad_size); + +int64_t onemklSpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t nrhs, int64_t lda, int64_t ldb); + +int64_t onemklDpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t nrhs, int64_t lda, int64_t ldb); + +int64_t onemklCpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t nrhs, int64_t lda, int64_t ldb); + +int64_t onemklZpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t nrhs, int64_t lda, int64_t ldb); + +int onemklSpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float + *scratchpad, int64_t scratchpad_size); + +int onemklDpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double + *scratchpad, int64_t scratchpad_size); + +int onemklCpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t + lda, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t + lda, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklSpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklDpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklCpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklZpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklSgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklDgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklCgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklZgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int onemklCgebrd(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float + *d, float *e, float _Complex *tauq, float _Complex *taup, float _Complex + *scratchpad, int64_t scratchpad_size); + +int onemklDgebrd(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *d, + double *e, double *tauq, double *taup, double *scratchpad, int64_t + scratchpad_size); + +int onemklSgebrd(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *d, float + *e, float *tauq, float *taup, float *scratchpad, int64_t scratchpad_size); + +int onemklZgebrd(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, + double *d, double *e, double _Complex *tauq, double _Complex *taup, double _Complex + *scratchpad, int64_t scratchpad_size); + +int64_t onemklSgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklDgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklCgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklZgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int onemklCgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float + _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklDgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *tau, + double *scratchpad, int64_t scratchpad_size); + +int onemklSgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *tau, + float *scratchpad, int64_t scratchpad_size); + +int onemklZgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, + double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size); + +int onemklCgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, + int64_t n, float _Complex *a, int64_t lda, float *s, float _Complex *u, int64_t ldu, + float _Complex *vt, int64_t ldvt, float _Complex *scratchpad, int64_t + scratchpad_size); + +int onemklZgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, + int64_t n, double _Complex *a, int64_t lda, double *s, double _Complex *u, int64_t + ldu, double _Complex *vt, int64_t ldvt, double _Complex *scratchpad, int64_t + scratchpad_size); + +int onemklDgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, + int64_t n, double *a, int64_t lda, double *s, double *u, int64_t ldu, double *vt, + int64_t ldvt, double *scratchpad, int64_t scratchpad_size); + +int onemklSgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, + int64_t n, float *a, int64_t lda, float *s, float *u, int64_t ldu, float *vt, int64_t + ldvt, float *scratchpad, int64_t scratchpad_size); + +int64_t onemklSgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklDgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklCgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklZgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int onemklCgetrf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, + int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklDgetrf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t + *ipiv, double *scratchpad, int64_t scratchpad_size); + +int onemklSgetrf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t *ipiv, + float *scratchpad, int64_t scratchpad_size); + +int onemklZgetrf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, + int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklSgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t stride_ipiv, + int64_t batch_size); + +int64_t onemklDgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t stride_ipiv, + int64_t batch_size); + +int64_t onemklCgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t stride_ipiv, + int64_t batch_size); + +int64_t onemklZgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t stride_ipiv, + int64_t batch_size); + +int onemklCgetrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t + lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t + batch_size, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklDgetrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, + int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, + double *scratchpad, int64_t scratchpad_size); + +int onemklSgetrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t + stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float + *scratchpad, int64_t scratchpad_size); + +int onemklZgetrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t + lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t + batch_size, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklSgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t batch_size); + +int64_t onemklDgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t batch_size); + +int64_t onemklCgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t batch_size); + +int64_t onemklZgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t batch_size); + +int onemklCgetrfnp_batch(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t + lda, int64_t stride_a, int64_t batch_size, float _Complex *scratchpad, + int64_t scratchpad_size); + +int onemklDgetrfnp_batch(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, + int64_t stride_a, int64_t batch_size, double *scratchpad, int64_t + scratchpad_size); + +int onemklSgetrfnp_batch(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, + int64_t stride_a, int64_t batch_size, float *scratchpad, int64_t + scratchpad_size); + +int onemklZgetrfnp_batch(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t + lda, int64_t stride_a, int64_t batch_size, double _Complex *scratchpad, + int64_t scratchpad_size); + +int64_t onemklSgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda); + +int64_t onemklDgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda); + +int64_t onemklCgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda); + +int64_t onemklZgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda); + +int onemklCgetri(syclQueue_t device_queue, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, + float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklDgetri(syclQueue_t device_queue, int64_t n, double *a, int64_t lda, int64_t *ipiv, double + *scratchpad, int64_t scratchpad_size); + +int onemklSgetri(syclQueue_t device_queue, int64_t n, float *a, int64_t lda, int64_t *ipiv, float + *scratchpad, int64_t scratchpad_size); + +int onemklZgetri(syclQueue_t device_queue, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, + double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklSgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, + int64_t nrhs, int64_t lda, int64_t ldb); + +int64_t onemklDgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, + int64_t nrhs, int64_t lda, int64_t ldb); + +int64_t onemklCgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, + int64_t nrhs, int64_t lda, int64_t ldb); + +int64_t onemklZgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, + int64_t nrhs, int64_t lda, int64_t ldb); + +int onemklCgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float + _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *b, int64_t ldb, float + _Complex *scratchpad, int64_t scratchpad_size); + +int onemklDgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double + *a, int64_t lda, int64_t *ipiv, double *b, int64_t ldb, double *scratchpad, int64_t + scratchpad_size); + +int onemklSgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float *a, + int64_t lda, int64_t *ipiv, float *b, int64_t ldb, float *scratchpad, int64_t + scratchpad_size); + +int onemklZgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double + _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *b, int64_t ldb, double + _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklSgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t stride_ipiv, int64_t ldb, int64_t + stride_b, int64_t batch_size); + +int64_t onemklDgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t stride_ipiv, int64_t ldb, int64_t + stride_b, int64_t batch_size); + +int64_t onemklCgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t stride_ipiv, int64_t ldb, int64_t + stride_b, int64_t batch_size); + +int64_t onemklZgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t stride_ipiv, int64_t ldb, int64_t + stride_b, int64_t batch_size); + +int onemklCgetrs_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, + float _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t + stride_ipiv, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t + batch_size, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklDgetrs_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, + double *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t + stride_ipiv, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size, + double *scratchpad, int64_t scratchpad_size); + +int onemklSgetrs_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, + float *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, + float *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float + *scratchpad, int64_t scratchpad_size); + +int onemklZgetrs_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, + double _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t + stride_ipiv, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t + batch_size, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklSgetrsnp_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t ldb, int64_t stride_b, int64_t + batch_size); + +int64_t onemklDgetrsnp_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t ldb, int64_t stride_b, int64_t + batch_size); + +int64_t onemklCgetrsnp_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t ldb, int64_t stride_b, int64_t + batch_size); + +int64_t onemklZgetrsnp_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t ldb, int64_t stride_b, int64_t + batch_size); + +int onemklCgetrsnp_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, + float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, + int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex + *scratchpad, int64_t scratchpad_size); + +int onemklDgetrsnp_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, + double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t + stride_b, int64_t batch_size, double *scratchpad, int64_t + scratchpad_size); + +int onemklSgetrsnp_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, + float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t + stride_b, int64_t batch_size, float *scratchpad, int64_t + scratchpad_size); + +int onemklZgetrsnp_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, + double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, + int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex + *scratchpad, int64_t scratchpad_size); + +int64_t onemklCheev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, + int64_t n, int64_t lda); + +int64_t onemklZheev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, + int64_t n, int64_t lda); + +int onemklCheev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, float + _Complex *a, int64_t lda, float *w, float _Complex *scratchpad, int64_t + scratchpad_size); + +int onemklZheev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, double + _Complex *a, int64_t lda, double *w, double _Complex *scratchpad, int64_t + scratchpad_size); + +int64_t onemklCheevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, + int64_t n, int64_t lda); + +int64_t onemklZheevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, + int64_t n, int64_t lda); + +int onemklCheevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, float + _Complex *a, int64_t lda, float *w, float _Complex *scratchpad, int64_t + scratchpad_size); + +int onemklZheevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, double + _Complex *a, int64_t lda, double *w, double _Complex *scratchpad, int64_t + scratchpad_size); + +int64_t onemklChegvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, + onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb); + +int64_t onemklZhegvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, + onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb); + +int onemklChegvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t + n, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float *w, float + _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZhegvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t + n, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double *w, double + _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklChetrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklZhetrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int onemklChetrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t + lda, float *d, float *e, float _Complex *tau, float _Complex *scratchpad, int64_t + scratchpad_size); + +int onemklZhetrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t + lda, double *d, double *e, double _Complex *tau, double _Complex *scratchpad, + int64_t scratchpad_size); + +int onemklChetrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t + lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZhetrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t + lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklChetrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklZhetrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int onemklSorgbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, float + *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size); + +int onemklDorgbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, + double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size); + +int64_t onemklSorgbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, + int64_t n, int64_t k, int64_t lda); + +int64_t onemklDorgbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, + int64_t n, int64_t k, int64_t lda); + +int64_t onemklSorgqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, + int64_t lda); + +int64_t onemklDorgqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, + int64_t lda); + +int onemklDorgqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, + double *tau, double *scratchpad, int64_t scratchpad_size); + +int onemklSorgqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, + float *tau, float *scratchpad, int64_t scratchpad_size); + +int64_t onemklSormqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose + trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t + ldc); + +int64_t onemklDormqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose + trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t + ldc); + +int onemklDormqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, + int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *c, int64_t ldc, + double *scratchpad, int64_t scratchpad_size); + +int onemklSormqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, + int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float + *scratchpad, int64_t scratchpad_size); + +int64_t onemklSsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, + int64_t ldz); + +int64_t onemklDsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, + int64_t ldz); + +int64_t onemklCsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, + int64_t ldz); + +int64_t onemklZsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, + int64_t ldz); + +int onemklCsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, float *d, float *e, float + _Complex *z, int64_t ldz, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklDsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, double *d, double *e, double + *z, int64_t ldz, double *scratchpad, int64_t scratchpad_size); + +int onemklSsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, float *d, float *e, float *z, + int64_t ldz, float *scratchpad, int64_t scratchpad_size); + +int onemklZsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, double *d, double *e, double + _Complex *z, int64_t ldz, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklSsyev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, + int64_t n, int64_t lda); + +int64_t onemklDsyev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, + int64_t n, int64_t lda); + +int onemklDsyev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, double *a, + int64_t lda, double *w, double *scratchpad, int64_t scratchpad_size); + +int onemklSsyev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, float *a, + int64_t lda, float *w, float *scratchpad, int64_t scratchpad_size); + +int64_t onemklSsyevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, + int64_t n, int64_t lda); + +int64_t onemklDsyevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, + int64_t n, int64_t lda); + +int onemklDsyevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, double *a, + int64_t lda, double *w, double *scratchpad, int64_t scratchpad_size); + +int onemklSsyevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, float *a, + int64_t lda, float *w, float *scratchpad, int64_t scratchpad_size); + +int64_t onemklSsyevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev + range, onemklUplo uplo, int64_t n, int64_t lda, float vl, + float vu, int64_t il, int64_t iu, float abstol, int64_t ldz); + +int64_t onemklDsyevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev + range, onemklUplo uplo, int64_t n, int64_t lda, double vl, + double vu, int64_t il, int64_t iu, double abstol, int64_t ldz); + +int onemklDsyevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, + int64_t n, double *a, int64_t lda, double vl, double vu, int64_t il, int64_t iu, double + abstol, int64_t *m, double *w, double *z, int64_t ldz, double *scratchpad, int64_t + scratchpad_size); + +int onemklSsyevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, + int64_t n, float *a, int64_t lda, float vl, float vu, int64_t il, int64_t iu, float + abstol, int64_t *m, float *w, float *z, int64_t ldz, float *scratchpad, int64_t + scratchpad_size); + +int64_t onemklSsygvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, + onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb); + +int64_t onemklDsygvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, + onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb); + +int onemklDsygvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t + n, double *a, int64_t lda, double *b, int64_t ldb, double *w, double *scratchpad, + int64_t scratchpad_size); + +int onemklSsygvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t + n, float *a, int64_t lda, float *b, int64_t ldb, float *w, float *scratchpad, int64_t + scratchpad_size); + +int64_t onemklSsygvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, + onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, + int64_t ldb, float vl, float vu, int64_t il, int64_t iu, float + abstol, int64_t ldz); + +int64_t onemklDsygvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, + onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, + int64_t ldb, double vl, double vu, int64_t il, int64_t iu, + double abstol, int64_t ldz); + +int onemklDsygvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, + onemklUplo uplo, int64_t n, double *a, int64_t lda, double *b, int64_t ldb, double vl, + double vu, int64_t il, int64_t iu, double abstol, int64_t *m, double *w, double *z, + int64_t ldz, double *scratchpad, int64_t scratchpad_size); + +int onemklSsygvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, + onemklUplo uplo, int64_t n, float *a, int64_t lda, float *b, int64_t ldb, float vl, + float vu, int64_t il, int64_t iu, float abstol, int64_t *m, float *w, float *z, int64_t + ldz, float *scratchpad, int64_t scratchpad_size); + +int64_t onemklSsytrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklDsytrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int onemklDsytrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double + *d, double *e, double *tau, double *scratchpad, int64_t scratchpad_size); + +int onemklSsytrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float + *d, float *e, float *tau, float *scratchpad, int64_t scratchpad_size); + +int64_t onemklStrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose + trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, + int64_t ldb); + +int64_t onemklDtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose + trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, + int64_t ldb); + +int64_t onemklCtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose + trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, + int64_t ldb); + +int64_t onemklZtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose + trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, + int64_t ldb); + +int onemklCtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag + diag, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, float _Complex *b, + int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklDtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag + diag, int64_t n, int64_t nrhs, double *a, int64_t lda, double *b, int64_t ldb, double + *scratchpad, int64_t scratchpad_size); + +int onemklStrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag + diag, int64_t n, int64_t nrhs, float *a, int64_t lda, float *b, int64_t ldb, float + *scratchpad, int64_t scratchpad_size); + +int onemklZtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag + diag, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, double _Complex *b, + int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size); + +int onemklCungbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, float + _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t + scratchpad_size); + +int onemklZungbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, + double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, + int64_t scratchpad_size); + +int64_t onemklCungbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, + int64_t n, int64_t k, int64_t lda); + +int64_t onemklZungbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, + int64_t n, int64_t k, int64_t lda); + +int64_t onemklCungqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, + int64_t lda); + +int64_t onemklZungqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, + int64_t lda); + +int onemklCungqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t + lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZungqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double _Complex *a, + int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t + scratchpad_size); + +int64_t onemklCunmqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose + trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t + ldc); + +int64_t onemklZunmqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose + trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t + ldc); + +int onemklCunmqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, + int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float + _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZunmqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, + int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double + _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size); + +int onemklSgerqf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *tau, + float *scratchpad, int64_t scratchpad_size); + +int onemklDgerqf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *tau, + double *scratchpad, int64_t scratchpad_size); + +int onemklCgerqf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float + _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZgerqf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, + double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklSgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklDgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklCgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int64_t onemklZgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); + +int onemklSormrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, + int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float + *scratchpad, int64_t scratchpad_size); + +int onemklDormrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, + int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *c, int64_t ldc, + double *scratchpad, int64_t scratchpad_size); + +int64_t onemklSormrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose + trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t + ldc); + +int64_t onemklDormrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose + trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t + ldc); + +int onemklCunmrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, + int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float + _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZunmrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, + int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double + _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklCunmrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose + trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t + ldc); + +int64_t onemklZunmrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose + trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t + ldc); + +int onemklSsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, int64_t + *ipiv, float *scratchpad, int64_t scratchpad_size); + +int onemklDsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, + int64_t *ipiv, double *scratchpad, int64_t scratchpad_size); + +int onemklCsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t + lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t + lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklSsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklDsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklCsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklZsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int onemklSorgtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float + *tau, float *scratchpad, int64_t scratchpad_size); + +int onemklDorgtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double + *tau, double *scratchpad, int64_t scratchpad_size); + +int64_t onemklSorgtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklDorgtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int onemklCungtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t + lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZungtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t + lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size); + +int64_t onemklCungtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int64_t onemklZungtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda); + +int onemklSormtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose + trans, int64_t m, int64_t n, float *a, int64_t lda, float *tau, float *c, int64_t ldc, + float *scratchpad, int64_t scratchpad_size); + +int onemklDormtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose + trans, int64_t m, int64_t n, double *a, int64_t lda, double *tau, double *c, int64_t + ldc, double *scratchpad, int64_t scratchpad_size); + +int64_t onemklSormtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, + onemklTranspose trans, int64_t m, int64_t n, int64_t lda, + int64_t ldc); + +int64_t onemklDormtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, + onemklTranspose trans, int64_t m, int64_t n, int64_t lda, + int64_t ldc); + +int onemklCunmtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose + trans, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, + float _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t + scratchpad_size); + +int onemklZunmtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose + trans, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, + double _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t + scratchpad_size); + +int64_t onemklCunmtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, + onemklTranspose trans, int64_t m, int64_t n, int64_t lda, + int64_t ldc); + +int64_t onemklZunmtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, + onemklTranspose trans, int64_t m, int64_t n, int64_t lda, + int64_t ldc); + +int onemklSpotrf_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, + int64_t stride_a, int64_t batch_size, float *scratchpad, int64_t + scratchpad_size); + +int onemklDpotrf_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, + int64_t stride_a, int64_t batch_size, double *scratchpad, int64_t + scratchpad_size); + +int onemklCpotrf_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, + int64_t lda, int64_t stride_a, int64_t batch_size, float _Complex + *scratchpad, int64_t scratchpad_size); + +int onemklZpotrf_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, + int64_t lda, int64_t stride_a, int64_t batch_size, double _Complex + *scratchpad, int64_t scratchpad_size); + +int onemklSpotrs_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float *a, + int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, + int64_t batch_size, float *scratchpad, int64_t scratchpad_size); + +int onemklDpotrs_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double + *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, + int64_t batch_size, double *scratchpad, int64_t scratchpad_size); + +int onemklCpotrs_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float + _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, + int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t + scratchpad_size); + +int onemklZpotrs_batch(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double + _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, + int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t + scratchpad_size); + +int onemklSgeqrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t + stride_a, float *tau, int64_t stride_tau, int64_t batch_size, float + *scratchpad, int64_t scratchpad_size); + +int onemklDgeqrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, + int64_t stride_a, double *tau, int64_t stride_tau, int64_t batch_size, + double *scratchpad, int64_t scratchpad_size); + +int onemklCgeqrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t + lda, int64_t stride_a, float _Complex *tau, int64_t stride_tau, int64_t + batch_size, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZgeqrf_batch(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t + lda, int64_t stride_a, double _Complex *tau, int64_t stride_tau, int64_t + batch_size, double _Complex *scratchpad, int64_t scratchpad_size); + +int onemklSorgqr_batch(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float *a, int64_t + lda, int64_t stride_a, float *tau, int64_t stride_tau, int64_t batch_size, + float *scratchpad, int64_t scratchpad_size); + +int onemklDorgqr_batch(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double *a, int64_t + lda, int64_t stride_a, double *tau, int64_t stride_tau, int64_t batch_size, + double *scratchpad, int64_t scratchpad_size); + +int onemklCungqr_batch(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float _Complex *a, + int64_t lda, int64_t stride_a, float _Complex *tau, int64_t stride_tau, + int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); + +int onemklZungqr_batch(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double _Complex *a, + int64_t lda, int64_t stride_a, double _Complex *tau, int64_t stride_tau, + int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); + +int onemklSgels_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + int64_t nrhs, float *a, int64_t lda, int64_t stridea, float *b, int64_t ldb, + int64_t strideb, int64_t batchsize, float *scratchpad, int64_t + scratchpad_size); + +int onemklDgels_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + int64_t nrhs, double *a, int64_t lda, int64_t stridea, double *b, int64_t ldb, + int64_t strideb, int64_t batchsize, double *scratchpad, int64_t + scratchpad_size); + +int onemklCgels_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + int64_t nrhs, float _Complex *a, int64_t lda, int64_t stridea, float _Complex + *b, int64_t ldb, int64_t strideb, int64_t batchsize, float _Complex + *scratchpad, int64_t scratchpad_size); + +int onemklZgels_batch(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, + int64_t nrhs, double _Complex *a, int64_t lda, int64_t stridea, double + _Complex *b, int64_t ldb, int64_t strideb, int64_t batchsize, double _Complex + *scratchpad, int64_t scratchpad_size); + +int64_t onemklSpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda, int64_t stride_a, int64_t batch_size); + +int64_t onemklDpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda, int64_t stride_a, int64_t batch_size); + +int64_t onemklCpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda, int64_t stride_a, int64_t batch_size); + +int64_t onemklZpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t lda, int64_t stride_a, int64_t batch_size); + +int64_t onemklSpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t nrhs, int64_t lda, int64_t stride_a, int64_t + ldb, int64_t stride_b, int64_t batch_size); + +int64_t onemklDpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t nrhs, int64_t lda, int64_t stride_a, int64_t + ldb, int64_t stride_b, int64_t batch_size); + +int64_t onemklCpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t nrhs, int64_t lda, int64_t stride_a, int64_t + ldb, int64_t stride_b, int64_t batch_size); + +int64_t onemklZpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, + int64_t nrhs, int64_t lda, int64_t stride_a, int64_t + ldb, int64_t stride_b, int64_t batch_size); + +int64_t onemklSgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t stride_tau, + int64_t batch_size); + +int64_t onemklDgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t stride_tau, + int64_t batch_size); + +int64_t onemklCgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t stride_tau, + int64_t batch_size); + +int64_t onemklZgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t lda, int64_t stride_a, int64_t stride_tau, + int64_t batch_size); + +int64_t onemklSorgqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t k, int64_t lda, int64_t stride_a, int64_t + stride_tau, int64_t batch_size); + +int64_t onemklDorgqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t k, int64_t lda, int64_t stride_a, int64_t + stride_tau, int64_t batch_size); + +int64_t onemklCungqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t k, int64_t lda, int64_t stride_a, int64_t + stride_tau, int64_t batch_size); + +int64_t onemklZungqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, + int64_t k, int64_t lda, int64_t stride_a, int64_t + stride_tau, int64_t batch_size); + +int64_t onemklSgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t ldb, int64_t stride_b, int64_t + batch_size); + +int64_t onemklDgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t ldb, int64_t stride_b, int64_t + batch_size); + +int64_t onemklCgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t ldb, int64_t stride_b, int64_t + batch_size); + +int64_t onemklZgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, + int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t + stride_a, int64_t ldb, int64_t stride_b, int64_t + batch_size); void onemklDestroy(void); #ifdef __cplusplus diff --git a/lib/mkl/wrappers.jl b/lib/mkl/wrappers.jl index c5e5388e..bbc9d844 100644 --- a/lib/mkl/wrappers.jl +++ b/lib/mkl/wrappers.jl @@ -572,8 +572,8 @@ for (fname, elty, cty, sty, supty) in ((:onemklSrot,:Float32,:Float32,:Float32,: (:onemklDrot,:Float64,:Float64,:Float64,:Number), (:onemklCrot,:ComplexF32,:Float32,:ComplexF32,:Number), (:onemklZrot,:ComplexF64,:Float64,:ComplexF64,:Number), - (:onemklCsrot,:ComplexF32,:Float32,:Float32,:Real), - (:onemklZdrot,:ComplexF64,:Float64,:Float64,:Real)) + (:onemklCSrot,:ComplexF32,:Float32,:Float32,:Real), + (:onemklZDrot,:ComplexF64,:Float64,:Float64,:Real)) @eval begin function rot!(n::Integer, x::oneStridedArray{$elty}, @@ -722,8 +722,8 @@ function sbmv(uplo::Char, k::Integer, a::oneStridedArray{T}, sbmv(uplo, k, one(T), a, x) end -for (fname, elty, celty) in ((:onemklCsscal, :Float32, :ComplexF32), - (:onemklZdscal, :Float64, :ComplexF64)) +for (fname, elty, celty) in ((:onemklCSscal, :Float32, :ComplexF32), + (:onemklZDscal, :Float64, :ComplexF64)) @eval begin function scal!(n::Integer, alpha::$elty, @@ -851,10 +851,10 @@ end ## iamax for (fname, elty) in - ((:onemklDamax,:Float64), - (:onemklSamax,:Float32), - (:onemklZamax,:ComplexF64), - (:onemklCamax,:ComplexF32)) + ((:onemklDiamax,:Float64), + (:onemklSiamax,:Float32), + (:onemklZiamax,:ComplexF64), + (:onemklCiamax,:ComplexF32)) @eval begin function iamax(x::oneStridedArray{$elty}) n = length(x) @@ -868,10 +868,10 @@ end ## iamin for (fname, elty) in - ((:onemklDamin,:Float64), - (:onemklSamin,:Float32), - (:onemklZamin,:ComplexF64), - (:onemklCamin,:ComplexF32)) + ((:onemklDiamin,:Float64), + (:onemklSiamin,:Float32), + (:onemklZiamin,:ComplexF64), + (:onemklCiamin,:ComplexF32)) @eval begin function iamin(x::StridedArray{$elty}) n = length(x) @@ -885,9 +885,9 @@ end ## swap for (fname, elty) in ((:onemklSswap,:Float32), - (:onemklDswap,:Float64), - (:onemklCswap,:ComplexF32), - (:onemklZswap,:ComplexF64)) + (:onemklDswap,:Float64), + (:onemklCswap,:ComplexF32), + (:onemklZswap,:ComplexF64)) @eval begin function swap!(n::Integer, x::oneStridedArray{$elty}, diff --git a/lib/support/liboneapi_support.jl b/lib/support/liboneapi_support.jl index e3d8ca25..e56717ad 100644 --- a/lib/support/liboneapi_support.jl +++ b/lib/support/liboneapi_support.jl @@ -93,53 +93,81 @@ end ONEMKL_SIDE_RIGHT = 1 end -function onemklSgemm(device_queue, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, - ldc) - @ccall liboneapi_support.onemklSgemm(device_queue::syclQueue_t, transA::onemklTranspose, - transB::onemklTranspose, m::Int64, n::Int64, - k::Int64, alpha::Cfloat, A::ZePtr{Cfloat}, - lda::Int64, B::ZePtr{Cfloat}, ldb::Int64, - beta::Cfloat, C::ZePtr{Cfloat}, ldc::Int64)::Cint +@cenum onemklOffset::UInt32 begin + ONEMKL_OFFSET_ROW = 0 + ONEMKL_OFFSET_COL = 1 + ONEMKL_OFFSET_FIX = 2 end -function onemklDgemm(device_queue, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, - ldc) - @ccall liboneapi_support.onemklDgemm(device_queue::syclQueue_t, transA::onemklTranspose, - transB::onemklTranspose, m::Int64, n::Int64, - k::Int64, alpha::Cdouble, A::ZePtr{Cdouble}, - lda::Int64, B::ZePtr{Cdouble}, ldb::Int64, - beta::Cdouble, C::ZePtr{Cdouble}, ldc::Int64)::Cint +@cenum onemklJob::UInt32 begin + ONEMKL_JOB_N = 0 + ONEMKL_JOB_V = 1 + ONEMKL_JOB_U = 2 + ONEMKL_JOB_A = 3 + ONEMKL_JOB_S = 4 + ONEMKL_JOB_O = 5 end -function onemklCgemm(device_queue, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, - ldc) - @ccall liboneapi_support.onemklCgemm(device_queue::syclQueue_t, transA::onemklTranspose, - transB::onemklTranspose, m::Int64, n::Int64, - k::Int64, alpha::ComplexF32, A::ZePtr{ComplexF32}, - lda::Int64, B::ZePtr{ComplexF32}, ldb::Int64, - beta::ComplexF32, C::ZePtr{ComplexF32}, - ldc::Int64)::Cint +@cenum onemklGenerate::UInt32 begin + ONEMKL_GENERATE_Q = 0 + ONEMKL_GENERATE_P = 1 + ONEMKL_GENERATE_N = 2 + ONEMKL_GENERATE_V = 3 end -function onemklZgemm(device_queue, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, - ldc) - @ccall liboneapi_support.onemklZgemm(device_queue::syclQueue_t, transA::onemklTranspose, - transB::onemklTranspose, m::Int64, n::Int64, - k::Int64, alpha::ComplexF64, A::ZePtr{ComplexF64}, - lda::Int64, B::ZePtr{ComplexF64}, ldb::Int64, - beta::ComplexF64, C::ZePtr{ComplexF64}, - ldc::Int64)::Cint +@cenum onemklCompz::UInt32 begin + ONEMKL_COMPZ_N = 0 + ONEMKL_COMPZ_V = 1 + ONEMKL_COMPZ_I = 2 end -function onemklHgemm(device_queue, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, - ldc) - @ccall liboneapi_support.onemklHgemm(device_queue::syclQueue_t, transA::onemklTranspose, - transB::onemklTranspose, m::Int64, n::Int64, - k::Int64, alpha::Float16, A::ZePtr{Float16}, - lda::Int64, B::ZePtr{Float16}, ldb::Int64, - beta::Float16, C::ZePtr{Float16}, ldc::Int64)::Cint +@cenum onemklDirect::UInt32 begin + ONEMKL_DIRECT_F = 0 + ONEMKL_DIRECT_B = 1 +end + +@cenum onemklStorev::UInt32 begin + ONEMKL_STOREV_C = 0 + ONEMKL_STOREV_R = 1 +end + +@cenum onemklRangev::UInt32 begin + ONEMKL_RANGEV_A = 0 + ONEMKL_RANGEV_V = 1 + ONEMKL_RANGEV_I = 2 +end + +@cenum onemklOrder::UInt32 begin + ONEMKL_ORDER_B = 0 + ONEMKL_ORDER_E = 1 +end + +@cenum onemklJobsvd::UInt32 begin + ONEMKL_JOBSVD_N = 0 + ONEMKL_JOBSVD_A = 1 + ONEMKL_JOBSVD_O = 2 + ONEMKL_JOBSVD_S = 3 +end + +@cenum onemklLayout::UInt32 begin + ONEMKL_LAYOUT_ROW = 0 + ONEMKL_LAYOUT_COL = 1 +end + +@cenum onemklIndex::UInt32 begin + ONEMKL_INDEX_ZERO = 0 + ONEMKL_INDEX_ONE = 1 +end + +@cenum onemklProperty::UInt32 begin + ONEMKL_PROPERTY_SYMMETRIC = 0 + ONEMKL_PROPERTY_SORTED = 1 end +mutable struct MatrixHandle_st end + +const MatrixHandle_t = Ptr{MatrixHandle_st} + function onemklHgemmBatched(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size) @ccall liboneapi_support.onemklHgemmBatched(device_queue::syclQueue_t, @@ -152,7 +180,7 @@ function onemklHgemmBatched(device_queue, transa, transb, m, n, k, alpha, a, lda beta::ZePtr{Float16}, c::ZePtr{Ptr{Float16}}, ldc::ZePtr{Int64}, group_count::Int64, - group_size::ZePtr{Int64})::Cvoid + group_size::ZePtr{Int64})::Cint end function onemklSgemmBatched(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, @@ -166,7 +194,7 @@ function onemklSgemmBatched(device_queue, transa, transb, m, n, k, alpha, a, lda ldb::ZePtr{Int64}, beta::ZePtr{Cfloat}, c::ZePtr{Ptr{Cfloat}}, ldc::ZePtr{Int64}, group_count::Int64, - group_size::ZePtr{Int64})::Cvoid + group_size::ZePtr{Int64})::Cint end function onemklDgemmBatched(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, @@ -181,7 +209,7 @@ function onemklDgemmBatched(device_queue, transa, transb, m, n, k, alpha, a, lda beta::ZePtr{Cdouble}, c::ZePtr{Ptr{Cdouble}}, ldc::ZePtr{Int64}, group_count::Int64, - group_size::ZePtr{Int64})::Cvoid + group_size::ZePtr{Int64})::Cint end function onemklCgemmBatched(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, @@ -197,7 +225,7 @@ function onemklCgemmBatched(device_queue, transa, transb, m, n, k, alpha, a, lda ldb::ZePtr{Int64}, beta::ZePtr{ComplexF32}, c::ZePtr{Ptr{ComplexF32}}, ldc::ZePtr{Int64}, group_count::Int64, - group_size::ZePtr{Int64})::Cvoid + group_size::ZePtr{Int64})::Cint end function onemklZgemmBatched(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, @@ -213,7 +241,65 @@ function onemklZgemmBatched(device_queue, transa, transb, m, n, k, alpha, a, lda ldb::ZePtr{Int64}, beta::ZePtr{ComplexF64}, c::ZePtr{Ptr{ComplexF64}}, ldc::ZePtr{Int64}, group_count::Int64, - group_size::ZePtr{Int64})::Cvoid + group_size::ZePtr{Int64})::Cint +end + +function onemklStrsmBatched(device_queue, left_right, upper_lower, transa, unit_diag, m, n, + alpha, a, lda, b, ldb, group_count, group_size) + @ccall liboneapi_support.onemklStrsmBatched(device_queue::syclQueue_t, + left_right::onemklSide, + upper_lower::onemklUplo, + transa::onemklTranspose, + unit_diag::onemklDiag, m::ZePtr{Int64}, + n::ZePtr{Int64}, alpha::ZePtr{Cfloat}, + a::ZePtr{Ptr{Cfloat}}, lda::ZePtr{Int64}, + b::ZePtr{Ptr{Cfloat}}, ldb::ZePtr{Int64}, + group_count::Int64, + group_size::ZePtr{Int64})::Cint +end + +function onemklDtrsmBatched(device_queue, left_right, upper_lower, transa, unit_diag, m, n, + alpha, a, lda, b, ldb, group_count, group_size) + @ccall liboneapi_support.onemklDtrsmBatched(device_queue::syclQueue_t, + left_right::onemklSide, + upper_lower::onemklUplo, + transa::onemklTranspose, + unit_diag::onemklDiag, m::ZePtr{Int64}, + n::ZePtr{Int64}, alpha::ZePtr{Cdouble}, + a::ZePtr{Ptr{Cdouble}}, lda::ZePtr{Int64}, + b::ZePtr{Ptr{Cdouble}}, ldb::ZePtr{Int64}, + group_count::Int64, + group_size::ZePtr{Int64})::Cint +end + +function onemklCtrsmBatched(device_queue, left_right, upper_lower, transa, unit_diag, m, n, + alpha, a, lda, b, ldb, group_count, group_size) + @ccall liboneapi_support.onemklCtrsmBatched(device_queue::syclQueue_t, + left_right::onemklSide, + upper_lower::onemklUplo, + transa::onemklTranspose, + unit_diag::onemklDiag, m::ZePtr{Int64}, + n::ZePtr{Int64}, alpha::ZePtr{ComplexF32}, + a::ZePtr{Ptr{ComplexF32}}, + lda::ZePtr{Int64}, + b::ZePtr{Ptr{ComplexF32}}, + ldb::ZePtr{Int64}, group_count::Int64, + group_size::ZePtr{Int64})::Cint +end + +function onemklZtrsmBatched(device_queue, left_right, upper_lower, transa, unit_diag, m, n, + alpha, a, lda, b, ldb, group_count, group_size) + @ccall liboneapi_support.onemklZtrsmBatched(device_queue::syclQueue_t, + left_right::onemklSide, + upper_lower::onemklUplo, + transa::onemklTranspose, + unit_diag::onemklDiag, m::ZePtr{Int64}, + n::ZePtr{Int64}, alpha::ZePtr{ComplexF64}, + a::ZePtr{Ptr{ComplexF64}}, + lda::ZePtr{Int64}, + b::ZePtr{Ptr{ComplexF64}}, + ldb::ZePtr{Int64}, group_count::Int64, + group_size::ZePtr{Int64})::Cint end function onemklHgemmBatchStrided(device_queue, transa, transb, m, n, k, alpha, a, lda, @@ -228,7 +314,7 @@ function onemklHgemmBatchStrided(device_queue, transa, transb, m, n, k, alpha, a ldb::Int64, strideb::Int64, beta::Float16, c::ZePtr{Float16}, ldc::Int64, stridec::Int64, - batch_size::Int64)::Cvoid + batch_size::Int64)::Cint end function onemklSgemmBatchStrided(device_queue, transa, transb, m, n, k, alpha, a, lda, @@ -243,7 +329,7 @@ function onemklSgemmBatchStrided(device_queue, transa, transb, m, n, k, alpha, a ldb::Int64, strideb::Int64, beta::Cfloat, c::ZePtr{Cfloat}, ldc::Int64, stridec::Int64, - batch_size::Int64)::Cvoid + batch_size::Int64)::Cint end function onemklDgemmBatchStrided(device_queue, transa, transb, m, n, k, alpha, a, lda, @@ -258,7 +344,7 @@ function onemklDgemmBatchStrided(device_queue, transa, transb, m, n, k, alpha, a ldb::Int64, strideb::Int64, beta::Cdouble, c::ZePtr{Cdouble}, ldc::Int64, stridec::Int64, - batch_size::Int64)::Cvoid + batch_size::Int64)::Cint end function onemklCgemmBatchStrided(device_queue, transa, transb, m, n, k, alpha, a, lda, @@ -273,7 +359,7 @@ function onemklCgemmBatchStrided(device_queue, transa, transb, m, n, k, alpha, a ldb::Int64, strideb::Int64, beta::ComplexF32, c::ZePtr{ComplexF32}, ldc::Int64, stridec::Int64, - batch_size::Int64)::Cvoid + batch_size::Int64)::Cint end function onemklZgemmBatchStrided(device_queue, transa, transb, m, n, k, alpha, a, lda, @@ -288,7 +374,78 @@ function onemklZgemmBatchStrided(device_queue, transa, transb, m, n, k, alpha, a ldb::Int64, strideb::Int64, beta::ComplexF64, c::ZePtr{ComplexF64}, ldc::Int64, stridec::Int64, - batch_size::Int64)::Cvoid + batch_size::Int64)::Cint +end + +function onemklHgemm(device_queue, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, + ldc) + @ccall liboneapi_support.onemklHgemm(device_queue::syclQueue_t, transA::onemklTranspose, + transB::onemklTranspose, m::Int64, n::Int64, + k::Int64, alpha::Float16, A::ZePtr{Float16}, + lda::Int64, B::ZePtr{Float16}, ldb::Int64, + beta::Float16, C::ZePtr{Float16}, ldc::Int64)::Cint +end + +function onemklHaxpy(device_queue, n, alpha, x, incx, y, incy) + @ccall liboneapi_support.onemklHaxpy(device_queue::syclQueue_t, n::Int64, + alpha::Float16, x::ZePtr{Float16}, incx::Int64, + y::ZePtr{Float16}, incy::Int64)::Cint +end + +function onemklHscal(device_queue, n, alpha, x, incx) + @ccall liboneapi_support.onemklHscal(device_queue::syclQueue_t, n::Int64, + alpha::Float16, x::ZePtr{Float16}, + incx::Int64)::Cint +end + +function onemklHnrm2(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklHnrm2(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Float16}, incx::Int64, + result::RefOrZeRef{Float16})::Cint +end + +function onemklHdot(device_queue, n, x, incx, y, incy, result) + @ccall liboneapi_support.onemklHdot(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Float16}, incx::Int64, y::ZePtr{Float16}, + incy::Int64, result::RefOrZeRef{Float16})::Cint +end + +function onemklSgemm(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, + ldc) + @ccall liboneapi_support.onemklSgemm(device_queue::syclQueue_t, transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + k::Int64, alpha::Cfloat, a::ZePtr{Cfloat}, + lda::Int64, b::ZePtr{Cfloat}, ldb::Int64, + beta::Cfloat, c::ZePtr{Cfloat}, ldc::Int64)::Cint +end + +function onemklDgemm(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, + ldc) + @ccall liboneapi_support.onemklDgemm(device_queue::syclQueue_t, transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + k::Int64, alpha::Cdouble, a::ZePtr{Cdouble}, + lda::Int64, b::ZePtr{Cdouble}, ldb::Int64, + beta::Cdouble, c::ZePtr{Cdouble}, ldc::Int64)::Cint +end + +function onemklCgemm(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, + ldc) + @ccall liboneapi_support.onemklCgemm(device_queue::syclQueue_t, transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + k::Int64, alpha::ComplexF32, a::ZePtr{ComplexF32}, + lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, + beta::ComplexF32, c::ZePtr{ComplexF32}, + ldc::Int64)::Cint +end + +function onemklZgemm(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, + ldc) + @ccall liboneapi_support.onemklZgemm(device_queue::syclQueue_t, transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + k::Int64, alpha::ComplexF64, a::ZePtr{ComplexF64}, + lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, + beta::ComplexF64, c::ZePtr{ComplexF64}, + ldc::Int64)::Cint end function onemklSsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, @@ -297,7 +454,7 @@ function onemklSsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, upper_lower::onemklUplo, m::Int64, n::Int64, alpha::Cfloat, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64, beta::Cfloat, - c::ZePtr{Cfloat}, ldc::Int64)::Cvoid + c::ZePtr{Cfloat}, ldc::Int64)::Cint end function onemklDsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, @@ -306,7 +463,7 @@ function onemklDsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, upper_lower::onemklUplo, m::Int64, n::Int64, alpha::Cdouble, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64, beta::Cdouble, - c::ZePtr{Cdouble}, ldc::Int64)::Cvoid + c::ZePtr{Cdouble}, ldc::Int64)::Cint end function onemklCsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, @@ -316,7 +473,7 @@ function onemklCsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, alpha::ComplexF32, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, beta::ComplexF32, c::ZePtr{ComplexF32}, - ldc::Int64)::Cvoid + ldc::Int64)::Cint end function onemklZsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, @@ -326,22 +483,41 @@ function onemklZsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, alpha::ComplexF64, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, beta::ComplexF64, c::ZePtr{ComplexF64}, - ldc::Int64)::Cvoid + ldc::Int64)::Cint +end + +function onemklChemm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc) + @ccall liboneapi_support.onemklChemm(device_queue::syclQueue_t, left_right::onemklSide, + upper_lower::onemklUplo, m::Int64, n::Int64, + alpha::ComplexF32, a::ZePtr{ComplexF32}, + lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, + beta::ComplexF32, c::ZePtr{ComplexF32}, + ldc::Int64)::Cint +end + +function onemklZhemm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc) + @ccall liboneapi_support.onemklZhemm(device_queue::syclQueue_t, left_right::onemklSide, + upper_lower::onemklUplo, m::Int64, n::Int64, + alpha::ComplexF64, a::ZePtr{ComplexF64}, + lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, + beta::ComplexF64, c::ZePtr{ComplexF64}, + ldc::Int64)::Cint end function onemklSsyrk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) @ccall liboneapi_support.onemklSsyrk(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Cfloat, a::ZePtr{Cfloat}, lda::Int64, - beta::Cfloat, c::ZePtr{Cfloat}, ldc::Int64)::Cvoid + beta::Cfloat, c::ZePtr{Cfloat}, ldc::Int64)::Cint end function onemklDsyrk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) @ccall liboneapi_support.onemklDsyrk(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Cdouble, a::ZePtr{Cdouble}, lda::Int64, - beta::Cdouble, c::ZePtr{Cdouble}, - ldc::Int64)::Cvoid + beta::Cdouble, c::ZePtr{Cdouble}, ldc::Int64)::Cint end function onemklCsyrk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) @@ -349,7 +525,7 @@ function onemklCsyrk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta trans::onemklTranspose, n::Int64, k::Int64, alpha::ComplexF32, a::ZePtr{ComplexF32}, lda::Int64, beta::ComplexF32, c::ZePtr{ComplexF32}, - ldc::Int64)::Cvoid + ldc::Int64)::Cint end function onemklZsyrk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) @@ -357,7 +533,23 @@ function onemklZsyrk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta trans::onemklTranspose, n::Int64, k::Int64, alpha::ComplexF64, a::ZePtr{ComplexF64}, lda::Int64, beta::ComplexF64, c::ZePtr{ComplexF64}, - ldc::Int64)::Cvoid + ldc::Int64)::Cint +end + +function onemklCherk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) + @ccall liboneapi_support.onemklCherk(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, n::Int64, k::Int64, + alpha::ComplexF32, a::ZePtr{ComplexF32}, + lda::Int64, beta::ComplexF32, c::ZePtr{ComplexF32}, + ldc::Int64)::Cint +end + +function onemklZherk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) + @ccall liboneapi_support.onemklZherk(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, n::Int64, k::Int64, + alpha::ComplexF64, a::ZePtr{ComplexF64}, + lda::Int64, beta::ComplexF64, c::ZePtr{ComplexF64}, + ldc::Int64)::Cint end function onemklSsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, @@ -367,7 +559,7 @@ function onemklSsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, n::Int64, k::Int64, alpha::Cfloat, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64, beta::Cfloat, c::ZePtr{Cfloat}, - ldc::Int64)::Cvoid + ldc::Int64)::Cint end function onemklDsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, @@ -377,7 +569,7 @@ function onemklDsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, n::Int64, k::Int64, alpha::Cdouble, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64, beta::Cdouble, c::ZePtr{Cdouble}, - ldc::Int64)::Cvoid + ldc::Int64)::Cint end function onemklCsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, @@ -388,7 +580,7 @@ function onemklCsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, beta::ComplexF32, c::ZePtr{ComplexF32}, - ldc::Int64)::Cvoid + ldc::Int64)::Cint end function onemklZsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, @@ -399,199 +591,163 @@ function onemklZsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, beta::ComplexF64, c::ZePtr{ComplexF64}, - ldc::Int64)::Cvoid + ldc::Int64)::Cint +end + +function onemklCher2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, + c, ldc) + @ccall liboneapi_support.onemklCher2k(device_queue::syclQueue_t, + upper_lower::onemklUplo, trans::onemklTranspose, + n::Int64, k::Int64, alpha::ComplexF32, + a::ZePtr{ComplexF32}, lda::Int64, + b::ZePtr{ComplexF32}, ldb::Int64, + beta::ComplexF32, c::ZePtr{ComplexF32}, + ldc::Int64)::Cint +end + +function onemklZher2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, + c, ldc) + @ccall liboneapi_support.onemklZher2k(device_queue::syclQueue_t, + upper_lower::onemklUplo, trans::onemklTranspose, + n::Int64, k::Int64, alpha::ComplexF64, + a::ZePtr{ComplexF64}, lda::Int64, + b::ZePtr{ComplexF64}, ldb::Int64, + beta::ComplexF64, c::ZePtr{ComplexF64}, + ldc::Int64)::Cint end -function onemklStrmm(device_queue, left_right, uppler_lower, trans, diag, m, n, alpha, a, - lda, b, ldb) +function onemklStrmm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb) @ccall liboneapi_support.onemklStrmm(device_queue::syclQueue_t, left_right::onemklSide, - uppler_lower::onemklUplo, trans::onemklTranspose, - diag::onemklDiag, m::Int64, n::Int64, + upper_lower::onemklUplo, trans::onemklTranspose, + unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Cfloat, a::ZePtr{Cfloat}, lda::Int64, - b::ZePtr{Cfloat}, ldb::Int64)::Cvoid + b::ZePtr{Cfloat}, ldb::Int64)::Cint end -function onemklDtrmm(device_queue, left_right, uppler_lower, trans, diag, m, n, alpha, a, - lda, b, ldb) +function onemklDtrmm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb) @ccall liboneapi_support.onemklDtrmm(device_queue::syclQueue_t, left_right::onemklSide, - uppler_lower::onemklUplo, trans::onemklTranspose, - diag::onemklDiag, m::Int64, n::Int64, + upper_lower::onemklUplo, trans::onemklTranspose, + unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Cdouble, a::ZePtr{Cdouble}, lda::Int64, - b::ZePtr{Cdouble}, ldb::Int64)::Cvoid + b::ZePtr{Cdouble}, ldb::Int64)::Cint end -function onemklCtrmm(device_queue, left_right, uppler_lower, trans, diag, m, n, alpha, a, - lda, b, ldb) +function onemklCtrmm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb) @ccall liboneapi_support.onemklCtrmm(device_queue::syclQueue_t, left_right::onemklSide, - uppler_lower::onemklUplo, trans::onemklTranspose, - diag::onemklDiag, m::Int64, n::Int64, + upper_lower::onemklUplo, trans::onemklTranspose, + unit_diag::onemklDiag, m::Int64, n::Int64, alpha::ComplexF32, a::ZePtr{ComplexF32}, - lda::Int64, b::ZePtr{ComplexF32}, - ldb::Int64)::Cvoid + lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64)::Cint end -function onemklZtrmm(device_queue, left_right, uppler_lower, trans, diag, m, n, alpha, a, - lda, b, ldb) +function onemklZtrmm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb) @ccall liboneapi_support.onemklZtrmm(device_queue::syclQueue_t, left_right::onemklSide, - uppler_lower::onemklUplo, trans::onemklTranspose, - diag::onemklDiag, m::Int64, n::Int64, + upper_lower::onemklUplo, trans::onemklTranspose, + unit_diag::onemklDiag, m::Int64, n::Int64, alpha::ComplexF64, a::ZePtr{ComplexF64}, - lda::Int64, b::ZePtr{ComplexF64}, - ldb::Int64)::Cvoid + lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64)::Cint end -function onemklStrsm(device_queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, +function onemklStrsm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklStrsm(device_queue::syclQueue_t, left_right::onemklSide, - upper_lower::onemklUplo, transa::onemklTranspose, + upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Cfloat, a::ZePtr{Cfloat}, lda::Int64, - b::ZePtr{Cfloat}, ldb::Int64)::Cvoid + b::ZePtr{Cfloat}, ldb::Int64)::Cint end -function onemklDtrsm(device_queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, +function onemklDtrsm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklDtrsm(device_queue::syclQueue_t, left_right::onemklSide, - upper_lower::onemklUplo, transa::onemklTranspose, + upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Cdouble, a::ZePtr{Cdouble}, lda::Int64, - b::ZePtr{Cdouble}, ldb::Int64)::Cvoid + b::ZePtr{Cdouble}, ldb::Int64)::Cint end -function onemklCtrsm(device_queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, +function onemklCtrsm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklCtrsm(device_queue::syclQueue_t, left_right::onemklSide, - upper_lower::onemklUplo, transa::onemklTranspose, + upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::ComplexF32, a::ZePtr{ComplexF32}, - lda::Int64, b::ZePtr{ComplexF32}, - ldb::Int64)::Cvoid + lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64)::Cint end -function onemklZtrsm(device_queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, +function onemklZtrsm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklZtrsm(device_queue::syclQueue_t, left_right::onemklSide, - upper_lower::onemklUplo, transa::onemklTranspose, + upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::ComplexF64, a::ZePtr{ComplexF64}, - lda::Int64, b::ZePtr{ComplexF64}, - ldb::Int64)::Cvoid + lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64)::Cint end -function onemklStrsmBatched(device_queue, left_right, upper_lower, transa, unit_diag, m, n, - alpha, a, lda, b, ldb, group_count, group_size) - @ccall liboneapi_support.onemklStrsmBatched(device_queue::syclQueue_t, - left_right::onemklSide, - upper_lower::onemklUplo, - transa::onemklTranspose, - unit_diag::onemklDiag, m::ZePtr{Int64}, - n::ZePtr{Int64}, alpha::ZePtr{Cfloat}, - a::ZePtr{Ptr{Cfloat}}, lda::ZePtr{Int64}, - b::ZePtr{Ptr{Cfloat}}, ldb::ZePtr{Int64}, - group_count::Int64, - group_size::ZePtr{Int64})::Cvoid +function onemklSdgmm(device_queue, left_right, m, n, a, lda, x, incx, c, ldc) + @ccall liboneapi_support.onemklSdgmm(device_queue::syclQueue_t, left_right::onemklSide, + m::Int64, n::Int64, a::Ptr{Cfloat}, lda::Int64, + x::Ptr{Cfloat}, incx::Int64, c::Ptr{Cfloat}, + ldc::Int64)::Cint end -function onemklDtrsmBatched(device_queue, left_right, upper_lower, transa, unit_diag, m, n, - alpha, a, lda, b, ldb, group_count, group_size) - @ccall liboneapi_support.onemklDtrsmBatched(device_queue::syclQueue_t, - left_right::onemklSide, - upper_lower::onemklUplo, - transa::onemklTranspose, - unit_diag::onemklDiag, m::ZePtr{Int64}, - n::ZePtr{Int64}, alpha::ZePtr{Cdouble}, - a::ZePtr{Ptr{Cdouble}}, lda::ZePtr{Int64}, - b::ZePtr{Ptr{Cdouble}}, ldb::ZePtr{Int64}, - group_count::Int64, - group_size::ZePtr{Int64})::Cvoid +function onemklDdgmm(device_queue, left_right, m, n, a, lda, x, incx, c, ldc) + @ccall liboneapi_support.onemklDdgmm(device_queue::syclQueue_t, left_right::onemklSide, + m::Int64, n::Int64, a::Ptr{Cdouble}, lda::Int64, + x::Ptr{Cdouble}, incx::Int64, c::Ptr{Cdouble}, + ldc::Int64)::Cint end -function onemklCtrsmBatched(device_queue, left_right, upper_lower, transa, unit_diag, m, n, - alpha, a, lda, b, ldb, group_count, group_size) - @ccall liboneapi_support.onemklCtrsmBatched(device_queue::syclQueue_t, - left_right::onemklSide, - upper_lower::onemklUplo, - transa::onemklTranspose, - unit_diag::onemklDiag, m::ZePtr{Int64}, - n::ZePtr{Int64}, alpha::ZePtr{ComplexF32}, - a::ZePtr{Ptr{ComplexF32}}, - lda::ZePtr{Int64}, - b::ZePtr{Ptr{ComplexF32}}, - ldb::ZePtr{Int64}, group_count::Int64, - group_size::ZePtr{Int64})::Cvoid +function onemklCdgmm(device_queue, left_right, m, n, a, lda, x, incx, c, ldc) + @ccall liboneapi_support.onemklCdgmm(device_queue::syclQueue_t, left_right::onemklSide, + m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, + x::Ptr{ComplexF32}, incx::Int64, + c::Ptr{ComplexF32}, ldc::Int64)::Cint end -function onemklZtrsmBatched(device_queue, left_right, upper_lower, transa, unit_diag, m, n, - alpha, a, lda, b, ldb, group_count, group_size) - @ccall liboneapi_support.onemklZtrsmBatched(device_queue::syclQueue_t, - left_right::onemklSide, - upper_lower::onemklUplo, - transa::onemklTranspose, - unit_diag::onemklDiag, m::ZePtr{Int64}, - n::ZePtr{Int64}, alpha::ZePtr{ComplexF64}, - a::ZePtr{Ptr{ComplexF64}}, - lda::ZePtr{Int64}, - b::ZePtr{Ptr{ComplexF64}}, - ldb::ZePtr{Int64}, group_count::Int64, - group_size::ZePtr{Int64})::Cvoid +function onemklZdgmm(device_queue, left_right, m, n, a, lda, x, incx, c, ldc) + @ccall liboneapi_support.onemklZdgmm(device_queue::syclQueue_t, left_right::onemklSide, + m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, + x::Ptr{ComplexF32}, incx::Int64, + c::Ptr{ComplexF32}, ldc::Int64)::Cint end -function onemklChemm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc) - @ccall liboneapi_support.onemklChemm(device_queue::syclQueue_t, left_right::onemklSide, - upper_lower::onemklUplo, m::Int64, n::Int64, - alpha::ComplexF32, a::ZePtr{ComplexF32}, - lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, - beta::ComplexF32, c::ZePtr{ComplexF32}, - ldc::Int64)::Cvoid -end - -function onemklZhemm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc) - @ccall liboneapi_support.onemklZhemm(device_queue::syclQueue_t, left_right::onemklSide, - upper_lower::onemklUplo, m::Int64, n::Int64, - alpha::ComplexF64, a::ZePtr{ComplexF64}, - lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, - beta::ComplexF64, c::ZePtr{ComplexF64}, - ldc::Int64)::Cvoid -end - -function onemklCherk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) - @ccall liboneapi_support.onemklCherk(device_queue::syclQueue_t, upper_lower::onemklUplo, - trans::onemklTranspose, n::Int64, k::Int64, - alpha::ComplexF32, a::ZePtr{ComplexF32}, - lda::Int64, beta::ComplexF32, c::ZePtr{ComplexF32}, - ldc::Int64)::Cvoid +function onemklSgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklSgemv(device_queue::syclQueue_t, trans::onemklTranspose, + m::Int64, n::Int64, alpha::Cfloat, + a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, + incx::Int64, beta::Cfloat, y::ZePtr{Cfloat}, + incy::Int64)::Cint end -function onemklZherk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) - @ccall liboneapi_support.onemklZherk(device_queue::syclQueue_t, upper_lower::onemklUplo, - trans::onemklTranspose, n::Int64, k::Int64, - alpha::ComplexF64, a::ZePtr{ComplexF64}, - lda::Int64, beta::ComplexF64, c::ZePtr{ComplexF64}, - ldc::Int64)::Cvoid +function onemklDgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklDgemv(device_queue::syclQueue_t, trans::onemklTranspose, + m::Int64, n::Int64, alpha::Cdouble, + a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, + incx::Int64, beta::Cdouble, y::ZePtr{Cdouble}, + incy::Int64)::Cint end -function onemklCher2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, - c, ldc) - @ccall liboneapi_support.onemklCher2k(device_queue::syclQueue_t, - upper_lower::onemklUplo, trans::onemklTranspose, - n::Int64, k::Int64, alpha::ComplexF32, - a::ZePtr{ComplexF32}, lda::Int64, - b::ZePtr{ComplexF32}, ldb::Int64, - beta::ComplexF32, c::ZePtr{ComplexF32}, - ldc::Int64)::Cvoid +function onemklCgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklCgemv(device_queue::syclQueue_t, trans::onemklTranspose, + m::Int64, n::Int64, alpha::ComplexF32, + a::ZePtr{ComplexF32}, lda::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + beta::ComplexF32, y::ZePtr{ComplexF32}, + incy::Int64)::Cint end -function onemklZher2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, - c, ldc) - @ccall liboneapi_support.onemklZher2k(device_queue::syclQueue_t, - upper_lower::onemklUplo, trans::onemklTranspose, - n::Int64, k::Int64, alpha::ComplexF64, - a::ZePtr{ComplexF64}, lda::Int64, - b::ZePtr{ComplexF64}, ldb::Int64, - beta::ComplexF64, c::ZePtr{ComplexF64}, - ldc::Int64)::Cvoid +function onemklZgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklZgemv(device_queue::syclQueue_t, trans::onemklTranspose, + m::Int64, n::Int64, alpha::ComplexF64, + a::ZePtr{ComplexF64}, lda::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + beta::ComplexF64, y::ZePtr{ComplexF64}, + incy::Int64)::Cint end function onemklSgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, @@ -600,624 +756,3636 @@ function onemklSgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, m::Int64, n::Int64, kl::Int64, ku::Int64, alpha::Cfloat, a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, incx::Int64, beta::Cfloat, - y::ZePtr{Cfloat}, incy::Int64)::Cvoid + y::ZePtr{Cfloat}, incy::Int64)::Cint +end + +function onemklDgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, + incy) + @ccall liboneapi_support.onemklDgbmv(device_queue::syclQueue_t, trans::onemklTranspose, + m::Int64, n::Int64, kl::Int64, ku::Int64, + alpha::Cdouble, a::ZePtr{Cdouble}, lda::Int64, + x::ZePtr{Cdouble}, incx::Int64, beta::Cdouble, + y::ZePtr{Cdouble}, incy::Int64)::Cint +end + +function onemklCgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, + incy) + @ccall liboneapi_support.onemklCgbmv(device_queue::syclQueue_t, trans::onemklTranspose, + m::Int64, n::Int64, kl::Int64, ku::Int64, + alpha::ComplexF32, a::ZePtr{ComplexF32}, + lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, + beta::ComplexF32, y::ZePtr{ComplexF32}, + incy::Int64)::Cint +end + +function onemklZgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, + incy) + @ccall liboneapi_support.onemklZgbmv(device_queue::syclQueue_t, trans::onemklTranspose, + m::Int64, n::Int64, kl::Int64, ku::Int64, + alpha::ComplexF64, a::ZePtr{ComplexF64}, + lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, + beta::ComplexF64, y::ZePtr{ComplexF64}, + incy::Int64)::Cint +end + +function onemklSger(device_queue, m, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklSger(device_queue::syclQueue_t, m::Int64, n::Int64, + alpha::Cfloat, x::ZePtr{Cfloat}, incx::Int64, + y::ZePtr{Cfloat}, incy::Int64, a::ZePtr{Cfloat}, + lda::Int64)::Cint +end + +function onemklDger(device_queue, m, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklDger(device_queue::syclQueue_t, m::Int64, n::Int64, + alpha::Cdouble, x::ZePtr{Cdouble}, incx::Int64, + y::ZePtr{Cdouble}, incy::Int64, a::ZePtr{Cdouble}, + lda::Int64)::Cint +end + +function onemklCgerc(device_queue, m, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklCgerc(device_queue::syclQueue_t, m::Int64, n::Int64, + alpha::ComplexF32, x::ZePtr{ComplexF32}, + incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, + a::ZePtr{ComplexF32}, lda::Int64)::Cint +end + +function onemklZgerc(device_queue, m, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklZgerc(device_queue::syclQueue_t, m::Int64, n::Int64, + alpha::ComplexF64, x::ZePtr{ComplexF64}, + incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, + a::ZePtr{ComplexF64}, lda::Int64)::Cint +end + +function onemklCgeru(device_queue, m, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklCgeru(device_queue::syclQueue_t, m::Int64, n::Int64, + alpha::ComplexF32, x::Ptr{ComplexF32}, incx::Int64, + y::Ptr{ComplexF32}, incy::Int64, + a::Ptr{ComplexF32}, lda::Int64)::Cint +end + +function onemklZgeru(device_queue, m, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklZgeru(device_queue::syclQueue_t, m::Int64, n::Int64, + alpha::ComplexF32, x::Ptr{ComplexF32}, incx::Int64, + y::Ptr{ComplexF32}, incy::Int64, + a::Ptr{ComplexF32}, lda::Int64)::Cint +end + +function onemklChbmv(device_queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklChbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, k::Int64, alpha::ComplexF32, + a::ZePtr{ComplexF32}, lda::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + beta::ComplexF32, y::ZePtr{ComplexF32}, + incy::Int64)::Cint +end + +function onemklZhbmv(device_queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklZhbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, k::Int64, alpha::ComplexF64, + a::ZePtr{ComplexF64}, lda::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + beta::ComplexF64, y::ZePtr{ComplexF64}, + incy::Int64)::Cint +end + +function onemklChemv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklChemv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, a::ZePtr{ComplexF32}, + lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, + beta::ComplexF32, y::ZePtr{ComplexF32}, + incy::Int64)::Cint +end + +function onemklZhemv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklZhemv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF64, a::ZePtr{ComplexF64}, + lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, + beta::ComplexF64, y::ZePtr{ComplexF64}, + incy::Int64)::Cint +end + +function onemklCher(device_queue, upper_lower, n, alpha, x, incx, a, lda) + @ccall liboneapi_support.onemklCher(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, x::ZePtr{ComplexF32}, + incx::Int64, a::ZePtr{ComplexF32}, lda::Int64)::Cint +end + +function onemklZher(device_queue, upper_lower, n, alpha, x, incx, a, lda) + @ccall liboneapi_support.onemklZher(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF64, x::ZePtr{ComplexF64}, + incx::Int64, a::ZePtr{ComplexF64}, lda::Int64)::Cint +end + +function onemklCher2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklCher2(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, x::ZePtr{ComplexF32}, + incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, + a::ZePtr{ComplexF32}, lda::Int64)::Cint +end + +function onemklZher2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklZher2(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF64, x::ZePtr{ComplexF64}, + incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, + a::ZePtr{ComplexF64}, lda::Int64)::Cint +end + +function onemklChpmv(device_queue, upper_lower, n, alpha, a, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklChpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, a::Ptr{ComplexF32}, + x::Ptr{ComplexF32}, incx::Int64, beta::ComplexF32, + y::Ptr{ComplexF32}, incy::Int64)::Cint +end + +function onemklZhpmv(device_queue, upper_lower, n, alpha, a, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklZhpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, a::Ptr{ComplexF32}, + x::Ptr{ComplexF32}, incx::Int64, beta::ComplexF32, + y::Ptr{ComplexF32}, incy::Int64)::Cint +end + +function onemklChpr(device_queue, upper_lower, n, alpha, x, incx, a) + @ccall liboneapi_support.onemklChpr(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cfloat, x::Ptr{ComplexF32}, + incx::Int64, a::Ptr{ComplexF32})::Cint +end + +function onemklZhpr(device_queue, upper_lower, n, alpha, x, incx, a) + @ccall liboneapi_support.onemklZhpr(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cdouble, x::Ptr{ComplexF32}, + incx::Int64, a::Ptr{ComplexF32})::Cint +end + +function onemklChpr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a) + @ccall liboneapi_support.onemklChpr2(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, x::Ptr{ComplexF32}, + incx::Int64, y::Ptr{ComplexF32}, incy::Int64, + a::Ptr{ComplexF32})::Cint +end + +function onemklZhpr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a) + @ccall liboneapi_support.onemklZhpr2(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, x::Ptr{ComplexF32}, + incx::Int64, y::Ptr{ComplexF32}, incy::Int64, + a::Ptr{ComplexF32})::Cint +end + +function onemklSsbmv(device_queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklSsbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, k::Int64, alpha::Cfloat, + a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, + incx::Int64, beta::Cfloat, y::ZePtr{Cfloat}, + incy::Int64)::Cint +end + +function onemklDsbmv(device_queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklDsbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, k::Int64, alpha::Cdouble, + a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, + incx::Int64, beta::Cdouble, y::ZePtr{Cdouble}, + incy::Int64)::Cint +end + +function onemklSsymv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklSsymv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cfloat, a::ZePtr{Cfloat}, + lda::Int64, x::ZePtr{Cfloat}, incx::Int64, + beta::Cfloat, y::ZePtr{Cfloat}, incy::Int64)::Cint +end + +function onemklDsymv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklDsymv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cdouble, a::ZePtr{Cdouble}, + lda::Int64, x::ZePtr{Cdouble}, incx::Int64, + beta::Cdouble, y::ZePtr{Cdouble}, + incy::Int64)::Cint +end + +function onemklCsymv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklCsymv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, a::ZePtr{ComplexF32}, + lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, + beta::ComplexF32, y::ZePtr{ComplexF32}, + incy::Int64)::Cint +end + +function onemklZsymv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklZsymv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF64, a::ZePtr{ComplexF64}, + lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, + beta::ComplexF64, y::ZePtr{ComplexF64}, + incy::Int64)::Cint +end + +function onemklSsyr(device_queue, upper_lower, n, alpha, x, incx, a, lda) + @ccall liboneapi_support.onemklSsyr(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cfloat, x::ZePtr{Cfloat}, + incx::Int64, a::ZePtr{Cfloat}, lda::Int64)::Cint +end + +function onemklDsyr(device_queue, upper_lower, n, alpha, x, incx, a, lda) + @ccall liboneapi_support.onemklDsyr(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cdouble, x::ZePtr{Cdouble}, + incx::Int64, a::ZePtr{Cdouble}, lda::Int64)::Cint +end + +function onemklCsyr(device_queue, upper_lower, n, alpha, x, incx, a, lda) + @ccall liboneapi_support.onemklCsyr(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, x::ZePtr{ComplexF32}, + incx::Int64, a::ZePtr{ComplexF32}, lda::Int64)::Cint +end + +function onemklZsyr(device_queue, upper_lower, n, alpha, x, incx, a, lda) + @ccall liboneapi_support.onemklZsyr(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF64, x::ZePtr{ComplexF64}, + incx::Int64, a::ZePtr{ComplexF64}, lda::Int64)::Cint +end + +function onemklSsyr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklSsyr2(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cfloat, x::Ptr{Cfloat}, + incx::Int64, y::Ptr{Cfloat}, incy::Int64, + a::Ptr{Cfloat}, lda::Int64)::Cint +end + +function onemklDsyr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklDsyr2(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cdouble, x::Ptr{Cdouble}, + incx::Int64, y::Ptr{Cdouble}, incy::Int64, + a::Ptr{Cdouble}, lda::Int64)::Cint +end + +function onemklCsyr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklCsyr2(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, x::Ptr{ComplexF32}, + incx::Int64, y::Ptr{ComplexF32}, incy::Int64, + a::Ptr{ComplexF32}, lda::Int64)::Cint +end + +function onemklZsyr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) + @ccall liboneapi_support.onemklZsyr2(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::ComplexF32, x::Ptr{ComplexF32}, + incx::Int64, y::Ptr{ComplexF32}, incy::Int64, + a::Ptr{ComplexF32}, lda::Int64)::Cint +end + +function onemklSspmv(device_queue, upper_lower, n, alpha, a, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklSspmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cfloat, a::Ptr{Cfloat}, + x::Ptr{Cfloat}, incx::Int64, beta::Cfloat, + y::Ptr{Cfloat}, incy::Int64)::Cint +end + +function onemklDspmv(device_queue, upper_lower, n, alpha, a, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklDspmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cdouble, a::Ptr{Cdouble}, + x::Ptr{Cdouble}, incx::Int64, beta::Cdouble, + y::Ptr{Cdouble}, incy::Int64)::Cint +end + +function onemklSspr(device_queue, upper_lower, n, alpha, x, incx, a) + @ccall liboneapi_support.onemklSspr(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cfloat, x::Ptr{Cfloat}, + incx::Int64, a::Ptr{Cfloat})::Cint +end + +function onemklDspr(device_queue, upper_lower, n, alpha, x, incx, a) + @ccall liboneapi_support.onemklDspr(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cdouble, x::Ptr{Cdouble}, + incx::Int64, a::Ptr{Cdouble})::Cint +end + +function onemklSspr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a) + @ccall liboneapi_support.onemklSspr2(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cfloat, x::Ptr{Cfloat}, + incx::Int64, y::Ptr{Cfloat}, incy::Int64, + a::Ptr{Cfloat})::Cint +end + +function onemklDspr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a) + @ccall liboneapi_support.onemklDspr2(device_queue::syclQueue_t, upper_lower::onemklUplo, + n::Int64, alpha::Cdouble, x::Ptr{Cdouble}, + incx::Int64, y::Ptr{Cdouble}, incy::Int64, + a::Ptr{Cdouble})::Cint +end + +function onemklStbmv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) + @ccall liboneapi_support.onemklStbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, k::Int64, a::ZePtr{Cfloat}, lda::Int64, + x::ZePtr{Cfloat}, incx::Int64)::Cint +end + +function onemklDtbmv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) + @ccall liboneapi_support.onemklDtbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, k::Int64, a::ZePtr{Cdouble}, lda::Int64, + x::ZePtr{Cdouble}, incx::Int64)::Cint +end + +function onemklCtbmv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) + @ccall liboneapi_support.onemklCtbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, k::Int64, a::ZePtr{ComplexF32}, + lda::Int64, x::ZePtr{ComplexF32}, + incx::Int64)::Cint +end + +function onemklZtbmv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) + @ccall liboneapi_support.onemklZtbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, k::Int64, a::ZePtr{ComplexF64}, + lda::Int64, x::ZePtr{ComplexF64}, + incx::Int64)::Cint +end + +function onemklStbsv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) + @ccall liboneapi_support.onemklStbsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, k::Int64, a::Ptr{Cfloat}, lda::Int64, + x::Ptr{Cfloat}, incx::Int64)::Cint +end + +function onemklDtbsv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) + @ccall liboneapi_support.onemklDtbsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, k::Int64, a::Ptr{Cdouble}, lda::Int64, + x::Ptr{Cdouble}, incx::Int64)::Cint +end + +function onemklCtbsv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) + @ccall liboneapi_support.onemklCtbsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, k::Int64, a::Ptr{ComplexF32}, lda::Int64, + x::Ptr{ComplexF32}, incx::Int64)::Cint +end + +function onemklZtbsv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) + @ccall liboneapi_support.onemklZtbsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, k::Int64, a::Ptr{ComplexF32}, lda::Int64, + x::Ptr{ComplexF32}, incx::Int64)::Cint +end + +function onemklStpmv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) + @ccall liboneapi_support.onemklStpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::Ptr{Cfloat}, x::Ptr{Cfloat}, + incx::Int64)::Cint +end + +function onemklDtpmv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) + @ccall liboneapi_support.onemklDtpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::Ptr{Cdouble}, x::Ptr{Cdouble}, + incx::Int64)::Cint +end + +function onemklCtpmv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) + @ccall liboneapi_support.onemklCtpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::Ptr{ComplexF32}, x::Ptr{ComplexF32}, + incx::Int64)::Cint +end + +function onemklZtpmv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) + @ccall liboneapi_support.onemklZtpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::Ptr{ComplexF32}, x::Ptr{ComplexF32}, + incx::Int64)::Cint +end + +function onemklStpsv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) + @ccall liboneapi_support.onemklStpsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::Ptr{Cfloat}, x::Ptr{Cfloat}, + incx::Int64)::Cint +end + +function onemklDtpsv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) + @ccall liboneapi_support.onemklDtpsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::Ptr{Cdouble}, x::Ptr{Cdouble}, + incx::Int64)::Cint +end + +function onemklCtpsv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) + @ccall liboneapi_support.onemklCtpsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::Ptr{ComplexF32}, x::Ptr{ComplexF32}, + incx::Int64)::Cint +end + +function onemklZtpsv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) + @ccall liboneapi_support.onemklZtpsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::Ptr{ComplexF32}, x::Ptr{ComplexF32}, + incx::Int64)::Cint +end + +function onemklStrmv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) + @ccall liboneapi_support.onemklStrmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::ZePtr{Cfloat}, lda::Int64, + x::ZePtr{Cfloat}, incx::Int64)::Cint +end + +function onemklDtrmv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) + @ccall liboneapi_support.onemklDtrmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::ZePtr{Cdouble}, lda::Int64, + x::ZePtr{Cdouble}, incx::Int64)::Cint +end + +function onemklCtrmv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) + @ccall liboneapi_support.onemklCtrmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::ZePtr{ComplexF32}, lda::Int64, + x::ZePtr{ComplexF32}, incx::Int64)::Cint +end + +function onemklZtrmv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) + @ccall liboneapi_support.onemklZtrmv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::ZePtr{ComplexF64}, lda::Int64, + x::ZePtr{ComplexF64}, incx::Int64)::Cint +end + +function onemklStrsv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) + @ccall liboneapi_support.onemklStrsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::ZePtr{Cfloat}, lda::Int64, + x::ZePtr{Cfloat}, incx::Int64)::Cint +end + +function onemklDtrsv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) + @ccall liboneapi_support.onemklDtrsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::ZePtr{Cdouble}, lda::Int64, + x::ZePtr{Cdouble}, incx::Int64)::Cint +end + +function onemklCtrsv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) + @ccall liboneapi_support.onemklCtrsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::ZePtr{ComplexF32}, lda::Int64, + x::ZePtr{ComplexF32}, incx::Int64)::Cint +end + +function onemklZtrsv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) + @ccall liboneapi_support.onemklZtrsv(device_queue::syclQueue_t, upper_lower::onemklUplo, + trans::onemklTranspose, unit_diag::onemklDiag, + n::Int64, a::ZePtr{ComplexF64}, lda::Int64, + x::ZePtr{ComplexF64}, incx::Int64)::Cint +end + +function onemklCdotc(device_queue, n, x, incx, y, incy, result) + @ccall liboneapi_support.onemklCdotc(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + y::ZePtr{ComplexF32}, incy::Int64, + result::RefOrZeRef{ComplexF32})::Cint +end + +function onemklZdotc(device_queue, n, x, incx, y, incy, result) + @ccall liboneapi_support.onemklZdotc(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + y::ZePtr{ComplexF64}, incy::Int64, + result::RefOrZeRef{ComplexF64})::Cint +end + +function onemklCdotu(device_queue, n, x, incx, y, incy, result) + @ccall liboneapi_support.onemklCdotu(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + y::ZePtr{ComplexF32}, incy::Int64, + result::RefOrZeRef{ComplexF32})::Cint +end + +function onemklZdotu(device_queue, n, x, incx, y, incy, result) + @ccall liboneapi_support.onemklZdotu(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + y::ZePtr{ComplexF64}, incy::Int64, + result::RefOrZeRef{ComplexF64})::Cint +end + +function onemklSiamax(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklSiamax(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cfloat}, incx::Int64, + result::ZePtr{Int64})::Cint +end + +function onemklDiamax(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklDiamax(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cdouble}, incx::Int64, + result::ZePtr{Int64})::Cint +end + +function onemklCiamax(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklCiamax(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + result::ZePtr{Int64})::Cint +end + +function onemklZiamax(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklZiamax(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + result::ZePtr{Int64})::Cint +end + +function onemklSiamin(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklSiamin(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cfloat}, incx::Int64, + result::ZePtr{Int64})::Cint +end + +function onemklDiamin(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklDiamin(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cdouble}, incx::Int64, + result::ZePtr{Int64})::Cint +end + +function onemklCiamin(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklCiamin(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + result::ZePtr{Int64})::Cint +end + +function onemklZiamin(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklZiamin(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + result::ZePtr{Int64})::Cint +end + +function onemklSasum(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklSasum(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cfloat}, incx::Int64, + result::ZePtr{Cfloat})::Cint +end + +function onemklDasum(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklDasum(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cdouble}, incx::Int64, + result::ZePtr{Cdouble})::Cint +end + +function onemklCasum(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklCasum(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + result::ZePtr{Cfloat})::Cint +end + +function onemklZasum(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklZasum(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + result::ZePtr{Float64})::Cint +end + +function onemklSaxpy(device_queue, n, alpha, x, incx, y, incy) + @ccall liboneapi_support.onemklSaxpy(device_queue::syclQueue_t, n::Int64, alpha::Cfloat, + x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, + incy::Int64)::Cint +end + +function onemklDaxpy(device_queue, n, alpha, x, incx, y, incy) + @ccall liboneapi_support.onemklDaxpy(device_queue::syclQueue_t, n::Int64, + alpha::Cdouble, x::ZePtr{Cdouble}, incx::Int64, + y::ZePtr{Cdouble}, incy::Int64)::Cint +end + +function onemklCaxpy(device_queue, n, alpha, x, incx, y, incy) + @ccall liboneapi_support.onemklCaxpy(device_queue::syclQueue_t, n::Int64, + alpha::ComplexF32, x::ZePtr{ComplexF32}, + incx::Int64, y::ZePtr{ComplexF32}, + incy::Int64)::Cint +end + +function onemklZaxpy(device_queue, n, alpha, x, incx, y, incy) + @ccall liboneapi_support.onemklZaxpy(device_queue::syclQueue_t, n::Int64, + alpha::ComplexF64, x::ZePtr{ComplexF64}, + incx::Int64, y::ZePtr{ComplexF64}, + incy::Int64)::Cint +end + +function onemklSaxpby(device_queue, n, alpha, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklSaxpby(device_queue::syclQueue_t, n::Int64, + alpha::Cfloat, x::ZePtr{Cfloat}, incx::Int64, + beta::Cfloat, y::ZePtr{Cfloat}, incy::Int64)::Cint +end + +function onemklDaxpby(device_queue, n, alpha, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklDaxpby(device_queue::syclQueue_t, n::Int64, + alpha::Cdouble, x::ZePtr{Cdouble}, incx::Int64, + beta::Cdouble, y::ZePtr{Cdouble}, + incy::Int64)::Cint +end + +function onemklCaxpby(device_queue, n, alpha, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklCaxpby(device_queue::syclQueue_t, n::Int64, + alpha::ComplexF32, x::ZePtr{ComplexF32}, + incx::Int64, beta::ComplexF32, + y::ZePtr{ComplexF32}, incy::Int64)::Cint +end + +function onemklZaxpby(device_queue, n, alpha, x, incx, beta, y, incy) + @ccall liboneapi_support.onemklZaxpby(device_queue::syclQueue_t, n::Int64, + alpha::ComplexF64, x::ZePtr{ComplexF64}, + incx::Int64, beta::ComplexF64, + y::ZePtr{ComplexF64}, incy::Int64)::Cint +end + +function onemklScopy(device_queue, n, x, incx, y, incy) + @ccall liboneapi_support.onemklScopy(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, + incy::Int64)::Cint +end + +function onemklDcopy(device_queue, n, x, incx, y, incy) + @ccall liboneapi_support.onemklDcopy(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, + incy::Int64)::Cint +end + +function onemklCcopy(device_queue, n, x, incx, y, incy) + @ccall liboneapi_support.onemklCcopy(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + y::ZePtr{ComplexF32}, incy::Int64)::Cint +end + +function onemklZcopy(device_queue, n, x, incx, y, incy) + @ccall liboneapi_support.onemklZcopy(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + y::ZePtr{ComplexF64}, incy::Int64)::Cint +end + +function onemklSdot(device_queue, n, x, incx, y, incy, result) + @ccall liboneapi_support.onemklSdot(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, + incy::Int64, result::RefOrZeRef{Cfloat})::Cint +end + +function onemklDdot(device_queue, n, x, incx, y, incy, result) + @ccall liboneapi_support.onemklDdot(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, + incy::Int64, result::RefOrZeRef{Cdouble})::Cint +end + +function onemklSsdsdot(device_queue, n, sb, x, incx, y, incy, result) + @ccall liboneapi_support.onemklSsdsdot(device_queue::syclQueue_t, n::Int64, sb::Cfloat, + x::Ptr{Cfloat}, incx::Int64, y::Ptr{Cfloat}, + incy::Int64, result::Ptr{Cfloat})::Cint +end + +function onemklSnrm2(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklSnrm2(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cfloat}, incx::Int64, + result::RefOrZeRef{Cfloat})::Cint +end + +function onemklDnrm2(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklDnrm2(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cdouble}, incx::Int64, + result::RefOrZeRef{Cdouble})::Cint +end + +function onemklCnrm2(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklCnrm2(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + result::RefOrZeRef{Cfloat})::Cint +end + +function onemklZnrm2(device_queue, n, x, incx, result) + @ccall liboneapi_support.onemklZnrm2(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + result::RefOrZeRef{Cdouble})::Cint +end + +function onemklSrot(device_queue, n, x, incx, y, incy, c, s) + @ccall liboneapi_support.onemklSrot(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, + incy::Int64, c::Cfloat, s::Cfloat)::Cint +end + +function onemklDrot(device_queue, n, x, incx, y, incy, c, s) + @ccall liboneapi_support.onemklDrot(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, + incy::Int64, c::Cdouble, s::Cdouble)::Cint +end + +function onemklCSrot(device_queue, n, x, incx, y, incy, c, s) + @ccall liboneapi_support.onemklCSrot(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + y::ZePtr{ComplexF32}, incy::Int64, c::Cfloat, + s::Cfloat)::Cint +end + +function onemklCrot(device_queue, n, x, incx, y, incy, c, s) + @ccall liboneapi_support.onemklCrot(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + y::ZePtr{ComplexF32}, incy::Int64, c::Cfloat, + s::ComplexF32)::Cint +end + +function onemklZDrot(device_queue, n, x, incx, y, incy, c, s) + @ccall liboneapi_support.onemklZDrot(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + y::ZePtr{ComplexF64}, incy::Int64, c::Cdouble, + s::Cdouble)::Cint +end + +function onemklZrot(device_queue, n, x, incx, y, incy, c, s) + @ccall liboneapi_support.onemklZrot(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + y::ZePtr{ComplexF64}, incy::Int64, c::Cdouble, + s::ComplexF32)::Cint +end + +function onemklSrotg(device_queue, a, b, c, s) + @ccall liboneapi_support.onemklSrotg(device_queue::syclQueue_t, a::Ptr{Cfloat}, + b::Ptr{Cfloat}, c::Ptr{Cfloat}, + s::Ptr{Cfloat})::Cint +end + +function onemklDrotg(device_queue, a, b, c, s) + @ccall liboneapi_support.onemklDrotg(device_queue::syclQueue_t, a::Ptr{Cdouble}, + b::Ptr{Cdouble}, c::Ptr{Cdouble}, + s::Ptr{Cdouble})::Cint +end + +function onemklCrotg(device_queue, a, b, c, s) + @ccall liboneapi_support.onemklCrotg(device_queue::syclQueue_t, a::Ptr{ComplexF32}, + b::Ptr{ComplexF32}, c::Ptr{Cfloat}, + s::Ptr{ComplexF32})::Cint +end + +function onemklZrotg(device_queue, a, b, c, s) + @ccall liboneapi_support.onemklZrotg(device_queue::syclQueue_t, a::Ptr{ComplexF32}, + b::Ptr{ComplexF32}, c::Ptr{Cdouble}, + s::Ptr{ComplexF32})::Cint +end + +function onemklSrotm(device_queue, n, x, incx, y, incy, param) + @ccall liboneapi_support.onemklSrotm(device_queue::syclQueue_t, n::Int64, + x::Ptr{Cfloat}, incx::Int64, y::Ptr{Cfloat}, + incy::Int64, param::Ptr{Cfloat})::Cint +end + +function onemklDrotm(device_queue, n, x, incx, y, incy, param) + @ccall liboneapi_support.onemklDrotm(device_queue::syclQueue_t, n::Int64, + x::Ptr{Cdouble}, incx::Int64, y::Ptr{Cdouble}, + incy::Int64, param::Ptr{Cdouble})::Cint +end + +function onemklSrotmg(device_queue, d1, d2, x1, y1, param) + @ccall liboneapi_support.onemklSrotmg(device_queue::syclQueue_t, d1::Ptr{Cfloat}, + d2::Ptr{Cfloat}, x1::Ptr{Cfloat}, y1::Cfloat, + param::Ptr{Cfloat})::Cint +end + +function onemklDrotmg(device_queue, d1, d2, x1, y1, param) + @ccall liboneapi_support.onemklDrotmg(device_queue::syclQueue_t, d1::Ptr{Cdouble}, + d2::Ptr{Cdouble}, x1::Ptr{Cdouble}, y1::Cdouble, + param::Ptr{Cdouble})::Cint +end + +function onemklSscal(device_queue, n, alpha, x, incx) + @ccall liboneapi_support.onemklSscal(device_queue::syclQueue_t, n::Int64, alpha::Cfloat, + x::ZePtr{Cfloat}, incx::Int64)::Cint +end + +function onemklDscal(device_queue, n, alpha, x, incx) + @ccall liboneapi_support.onemklDscal(device_queue::syclQueue_t, n::Int64, + alpha::Cdouble, x::ZePtr{Cdouble}, + incx::Int64)::Cint +end + +function onemklCSscal(device_queue, n, alpha, x, incx) + @ccall liboneapi_support.onemklCSscal(device_queue::syclQueue_t, n::Int64, + alpha::Cfloat, x::ZePtr{ComplexF32}, + incx::Int64)::Cint +end + +function onemklZDscal(device_queue, n, alpha, x, incx) + @ccall liboneapi_support.onemklZDscal(device_queue::syclQueue_t, n::Int64, + alpha::Cdouble, x::ZePtr{ComplexF64}, + incx::Int64)::Cint +end + +function onemklCscal(device_queue, n, alpha, x, incx) + @ccall liboneapi_support.onemklCscal(device_queue::syclQueue_t, n::Int64, + alpha::ComplexF32, x::ZePtr{ComplexF32}, + incx::Int64)::Cint +end + +function onemklZscal(device_queue, n, alpha, x, incx) + @ccall liboneapi_support.onemklZscal(device_queue::syclQueue_t, n::Int64, + alpha::ComplexF64, x::ZePtr{ComplexF64}, + incx::Int64)::Cint +end + +function onemklSswap(device_queue, n, x, incx, y, incy) + @ccall liboneapi_support.onemklSswap(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, + incy::Int64)::Cint +end + +function onemklDswap(device_queue, n, x, incx, y, incy) + @ccall liboneapi_support.onemklDswap(device_queue::syclQueue_t, n::Int64, + x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, + incy::Int64)::Cint +end + +function onemklCswap(device_queue, n, x, incx, y, incy) + @ccall liboneapi_support.onemklCswap(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF32}, incx::Int64, + y::ZePtr{ComplexF32}, incy::Int64)::Cint +end + +function onemklZswap(device_queue, n, x, incx, y, incy) + @ccall liboneapi_support.onemklZswap(device_queue::syclQueue_t, n::Int64, + x::ZePtr{ComplexF64}, incx::Int64, + y::ZePtr{ComplexF64}, incy::Int64)::Cint +end + +function onemklSgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, stride_a, + b, ldb, stride_b, beta, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklSgemm_batch(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + k::Int64, alpha::Cfloat, a::Ptr{Cfloat}, + lda::Int64, stride_a::Int64, b::Ptr{Cfloat}, + ldb::Int64, stride_b::Int64, beta::Cfloat, + c::Ptr{Cfloat}, ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklDgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, stride_a, + b, ldb, stride_b, beta, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklDgemm_batch(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + k::Int64, alpha::Cdouble, a::Ptr{Cdouble}, + lda::Int64, stride_a::Int64, b::Ptr{Cdouble}, + ldb::Int64, stride_b::Int64, beta::Cdouble, + c::Ptr{Cdouble}, ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklCgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, stride_a, + b, ldb, stride_b, beta, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklCgemm_batch(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + k::Int64, alpha::ComplexF32, + a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, b::Ptr{ComplexF32}, + ldb::Int64, stride_b::Int64, + beta::ComplexF32, c::Ptr{ComplexF32}, + ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklZgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, stride_a, + b, ldb, stride_b, beta, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklZgemm_batch(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + k::Int64, alpha::ComplexF32, + a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, b::Ptr{ComplexF32}, + ldb::Int64, stride_b::Int64, + beta::ComplexF32, c::Ptr{ComplexF32}, + ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklSsyrk_batch(device_queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, + beta, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklSsyrk_batch(device_queue::syclQueue_t, + upper_lower::onemklUplo, + trans::onemklTranspose, n::Int64, k::Int64, + alpha::Cfloat, a::Ptr{Cfloat}, lda::Int64, + stride_a::Int64, beta::Cfloat, + c::Ptr{Cfloat}, ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklDsyrk_batch(device_queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, + beta, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklDsyrk_batch(device_queue::syclQueue_t, + upper_lower::onemklUplo, + trans::onemklTranspose, n::Int64, k::Int64, + alpha::Cdouble, a::Ptr{Cdouble}, lda::Int64, + stride_a::Int64, beta::Cdouble, + c::Ptr{Cdouble}, ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklCsyrk_batch(device_queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, + beta, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklCsyrk_batch(device_queue::syclQueue_t, + upper_lower::onemklUplo, + trans::onemklTranspose, n::Int64, k::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, + lda::Int64, stride_a::Int64, + beta::ComplexF32, c::Ptr{ComplexF32}, + ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklZsyrk_batch(device_queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, + beta, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklZsyrk_batch(device_queue::syclQueue_t, + upper_lower::onemklUplo, + trans::onemklTranspose, n::Int64, k::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, + lda::Int64, stride_a::Int64, + beta::ComplexF32, c::Ptr{ComplexF32}, + ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklStrsm_batch(device_queue, left_right, upper_lower, trans, unit_diag, m, n, + alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklStrsm_batch(device_queue::syclQueue_t, + left_right::onemklSide, + upper_lower::onemklUplo, + trans::onemklTranspose, + unit_diag::onemklDiag, m::Int64, n::Int64, + alpha::Cfloat, a::Ptr{Cfloat}, lda::Int64, + stride_a::Int64, b::Ptr{Cfloat}, ldb::Int64, + stride_b::Int64, batch_size::Int64)::Cint +end + +function onemklDtrsm_batch(device_queue, left_right, upper_lower, trans, unit_diag, m, n, + alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklDtrsm_batch(device_queue::syclQueue_t, + left_right::onemklSide, + upper_lower::onemklUplo, + trans::onemklTranspose, + unit_diag::onemklDiag, m::Int64, n::Int64, + alpha::Cdouble, a::Ptr{Cdouble}, lda::Int64, + stride_a::Int64, b::Ptr{Cdouble}, ldb::Int64, + stride_b::Int64, batch_size::Int64)::Cint +end + +function onemklCtrsm_batch(device_queue, left_right, upper_lower, trans, unit_diag, m, n, + alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklCtrsm_batch(device_queue::syclQueue_t, + left_right::onemklSide, + upper_lower::onemklUplo, + trans::onemklTranspose, + unit_diag::onemklDiag, m::Int64, n::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, + lda::Int64, stride_a::Int64, + b::Ptr{ComplexF32}, ldb::Int64, + stride_b::Int64, batch_size::Int64)::Cint +end + +function onemklZtrsm_batch(device_queue, left_right, upper_lower, trans, unit_diag, m, n, + alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklZtrsm_batch(device_queue::syclQueue_t, + left_right::onemklSide, + upper_lower::onemklUplo, + trans::onemklTranspose, + unit_diag::onemklDiag, m::Int64, n::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, + lda::Int64, stride_a::Int64, + b::Ptr{ComplexF32}, ldb::Int64, + stride_b::Int64, batch_size::Int64)::Cint +end + +function onemklSgemv_batch(device_queue, trans, m, n, alpha, a, lda, stridea, x, incx, + stridex, beta, y, incy, stridey, batch_size) + @ccall liboneapi_support.onemklSgemv_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::Cfloat, a::Ptr{Cfloat}, lda::Int64, + stridea::Int64, x::Ptr{Cfloat}, incx::Int64, + stridex::Int64, beta::Cfloat, y::Ptr{Cfloat}, + incy::Int64, stridey::Int64, + batch_size::Int64)::Cint +end + +function onemklDgemv_batch(device_queue, trans, m, n, alpha, a, lda, stridea, x, incx, + stridex, beta, y, incy, stridey, batch_size) + @ccall liboneapi_support.onemklDgemv_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::Cdouble, a::Ptr{Cdouble}, lda::Int64, + stridea::Int64, x::Ptr{Cdouble}, incx::Int64, + stridex::Int64, beta::Cdouble, + y::Ptr{Cdouble}, incy::Int64, stridey::Int64, + batch_size::Int64)::Cint +end + +function onemklCgemv_batch(device_queue, trans, m, n, alpha, a, lda, stridea, x, incx, + stridex, beta, y, incy, stridey, batch_size) + @ccall liboneapi_support.onemklCgemv_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, + lda::Int64, stridea::Int64, + x::Ptr{ComplexF32}, incx::Int64, + stridex::Int64, beta::ComplexF32, + y::Ptr{ComplexF32}, incy::Int64, + stridey::Int64, batch_size::Int64)::Cint +end + +function onemklZgemv_batch(device_queue, trans, m, n, alpha, a, lda, stridea, x, incx, + stridex, beta, y, incy, stridey, batch_size) + @ccall liboneapi_support.onemklZgemv_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, + lda::Int64, stridea::Int64, + x::Ptr{ComplexF32}, incx::Int64, + stridex::Int64, beta::ComplexF32, + y::Ptr{ComplexF32}, incy::Int64, + stridey::Int64, batch_size::Int64)::Cint +end + +function onemklSdgmm_batch(device_queue, left_right, m, n, a, lda, stridea, x, incx, + stridex, c, ldc, stridec, batch_size) + @ccall liboneapi_support.onemklSdgmm_batch(device_queue::syclQueue_t, + left_right::onemklSide, m::Int64, n::Int64, + a::Ptr{Cfloat}, lda::Int64, stridea::Int64, + x::Ptr{Cfloat}, incx::Int64, stridex::Int64, + c::Ptr{Cfloat}, ldc::Int64, stridec::Int64, + batch_size::Int64)::Cint +end + +function onemklDdgmm_batch(device_queue, left_right, m, n, a, lda, stridea, x, incx, + stridex, c, ldc, stridec, batch_size) + @ccall liboneapi_support.onemklDdgmm_batch(device_queue::syclQueue_t, + left_right::onemklSide, m::Int64, n::Int64, + a::Ptr{Cdouble}, lda::Int64, stridea::Int64, + x::Ptr{Cdouble}, incx::Int64, stridex::Int64, + c::Ptr{Cdouble}, ldc::Int64, stridec::Int64, + batch_size::Int64)::Cint +end + +function onemklCdgmm_batch(device_queue, left_right, m, n, a, lda, stridea, x, incx, + stridex, c, ldc, stridec, batch_size) + @ccall liboneapi_support.onemklCdgmm_batch(device_queue::syclQueue_t, + left_right::onemklSide, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, + stridea::Int64, x::Ptr{ComplexF32}, + incx::Int64, stridex::Int64, + c::Ptr{ComplexF32}, ldc::Int64, + stridec::Int64, batch_size::Int64)::Cint +end + +function onemklZdgmm_batch(device_queue, left_right, m, n, a, lda, stridea, x, incx, + stridex, c, ldc, stridec, batch_size) + @ccall liboneapi_support.onemklZdgmm_batch(device_queue::syclQueue_t, + left_right::onemklSide, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, + stridea::Int64, x::Ptr{ComplexF32}, + incx::Int64, stridex::Int64, + c::Ptr{ComplexF32}, ldc::Int64, + stridec::Int64, batch_size::Int64)::Cint +end + +function onemklSaxpy_batch(device_queue, n, alpha, x, incx, stridex, y, incy, stridey, + batch_size) + @ccall liboneapi_support.onemklSaxpy_batch(device_queue::syclQueue_t, n::Int64, + alpha::Cfloat, x::Ptr{Cfloat}, incx::Int64, + stridex::Int64, y::Ptr{Cfloat}, incy::Int64, + stridey::Int64, batch_size::Int64)::Cint +end + +function onemklDaxpy_batch(device_queue, n, alpha, x, incx, stridex, y, incy, stridey, + batch_size) + @ccall liboneapi_support.onemklDaxpy_batch(device_queue::syclQueue_t, n::Int64, + alpha::Cdouble, x::Ptr{Cdouble}, incx::Int64, + stridex::Int64, y::Ptr{Cdouble}, incy::Int64, + stridey::Int64, batch_size::Int64)::Cint +end + +function onemklCaxpy_batch(device_queue, n, alpha, x, incx, stridex, y, incy, stridey, + batch_size) + @ccall liboneapi_support.onemklCaxpy_batch(device_queue::syclQueue_t, n::Int64, + alpha::ComplexF32, x::Ptr{ComplexF32}, + incx::Int64, stridex::Int64, + y::Ptr{ComplexF32}, incy::Int64, + stridey::Int64, batch_size::Int64)::Cint +end + +function onemklZaxpy_batch(device_queue, n, alpha, x, incx, stridex, y, incy, stridey, + batch_size) + @ccall liboneapi_support.onemklZaxpy_batch(device_queue::syclQueue_t, n::Int64, + alpha::ComplexF32, x::Ptr{ComplexF32}, + incx::Int64, stridex::Int64, + y::Ptr{ComplexF32}, incy::Int64, + stridey::Int64, batch_size::Int64)::Cint +end + +function onemklScopy_batch(device_queue, n, x, incx, stridex, y, incy, stridey, batch_size) + @ccall liboneapi_support.onemklScopy_batch(device_queue::syclQueue_t, n::Int64, + x::Ptr{Cfloat}, incx::Int64, stridex::Int64, + y::Ptr{Cfloat}, incy::Int64, stridey::Int64, + batch_size::Int64)::Cint +end + +function onemklDcopy_batch(device_queue, n, x, incx, stridex, y, incy, stridey, batch_size) + @ccall liboneapi_support.onemklDcopy_batch(device_queue::syclQueue_t, n::Int64, + x::Ptr{Cdouble}, incx::Int64, stridex::Int64, + y::Ptr{Cdouble}, incy::Int64, stridey::Int64, + batch_size::Int64)::Cint +end + +function onemklCcopy_batch(device_queue, n, x, incx, stridex, y, incy, stridey, batch_size) + @ccall liboneapi_support.onemklCcopy_batch(device_queue::syclQueue_t, n::Int64, + x::Ptr{ComplexF32}, incx::Int64, + stridex::Int64, y::Ptr{ComplexF32}, + incy::Int64, stridey::Int64, + batch_size::Int64)::Cint +end + +function onemklZcopy_batch(device_queue, n, x, incx, stridex, y, incy, stridey, batch_size) + @ccall liboneapi_support.onemklZcopy_batch(device_queue::syclQueue_t, n::Int64, + x::Ptr{ComplexF32}, incx::Int64, + stridex::Int64, y::Ptr{ComplexF32}, + incy::Int64, stridey::Int64, + batch_size::Int64)::Cint +end + +function onemklSgemmt(device_queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, + ldb, beta, c, ldc) + @ccall liboneapi_support.onemklSgemmt(device_queue::syclQueue_t, + upper_lower::onemklUplo, transa::onemklTranspose, + transb::onemklTranspose, n::Int64, k::Int64, + alpha::Cfloat, a::Ptr{Cfloat}, lda::Int64, + b::Ptr{Cfloat}, ldb::Int64, beta::Cfloat, + c::Ptr{Cfloat}, ldc::Int64)::Cint +end + +function onemklDgemmt(device_queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, + ldb, beta, c, ldc) + @ccall liboneapi_support.onemklDgemmt(device_queue::syclQueue_t, + upper_lower::onemklUplo, transa::onemklTranspose, + transb::onemklTranspose, n::Int64, k::Int64, + alpha::Cdouble, a::Ptr{Cdouble}, lda::Int64, + b::Ptr{Cdouble}, ldb::Int64, beta::Cdouble, + c::Ptr{Cdouble}, ldc::Int64)::Cint +end + +function onemklCgemmt(device_queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, + ldb, beta, c, ldc) + @ccall liboneapi_support.onemklCgemmt(device_queue::syclQueue_t, + upper_lower::onemklUplo, transa::onemklTranspose, + transb::onemklTranspose, n::Int64, k::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, lda::Int64, + b::Ptr{ComplexF32}, ldb::Int64, beta::ComplexF32, + c::Ptr{ComplexF32}, ldc::Int64)::Cint +end + +function onemklZgemmt(device_queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, + ldb, beta, c, ldc) + @ccall liboneapi_support.onemklZgemmt(device_queue::syclQueue_t, + upper_lower::onemklUplo, transa::onemklTranspose, + transb::onemklTranspose, n::Int64, k::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, lda::Int64, + b::Ptr{ComplexF32}, ldb::Int64, beta::ComplexF32, + c::Ptr{ComplexF32}, ldc::Int64)::Cint +end + +function onemklSimatcopy(device_queue, trans, m, n, alpha, ab, lda, ldb) + @ccall liboneapi_support.onemklSimatcopy(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::Cfloat, ab::Ptr{Cfloat}, lda::Int64, + ldb::Int64)::Cint +end + +function onemklDimatcopy(device_queue, trans, m, n, alpha, ab, lda, ldb) + @ccall liboneapi_support.onemklDimatcopy(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::Cdouble, ab::Ptr{Cdouble}, lda::Int64, + ldb::Int64)::Cint +end + +function onemklCimatcopy(device_queue, trans, m, n, alpha, ab, lda, ldb) + @ccall liboneapi_support.onemklCimatcopy(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::ComplexF32, ab::Ptr{ComplexF32}, + lda::Int64, ldb::Int64)::Cint +end + +function onemklZimatcopy(device_queue, trans, m, n, alpha, ab, lda, ldb) + @ccall liboneapi_support.onemklZimatcopy(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::ComplexF32, ab::Ptr{ComplexF32}, + lda::Int64, ldb::Int64)::Cint +end + +function onemklSomatcopy(device_queue, trans, m, n, alpha, a, lda, b, ldb) + @ccall liboneapi_support.onemklSomatcopy(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::Cfloat, a::Ptr{Cfloat}, lda::Int64, + b::Ptr{Cfloat}, ldb::Int64)::Cint +end + +function onemklDomatcopy(device_queue, trans, m, n, alpha, a, lda, b, ldb) + @ccall liboneapi_support.onemklDomatcopy(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::Cdouble, a::Ptr{Cdouble}, lda::Int64, + b::Ptr{Cdouble}, ldb::Int64)::Cint +end + +function onemklComatcopy(device_queue, trans, m, n, alpha, a, lda, b, ldb) + @ccall liboneapi_support.onemklComatcopy(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, + lda::Int64, b::Ptr{ComplexF32}, + ldb::Int64)::Cint +end + +function onemklZomatcopy(device_queue, trans, m, n, alpha, a, lda, b, ldb) + @ccall liboneapi_support.onemklZomatcopy(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, + lda::Int64, b::Ptr{ComplexF32}, + ldb::Int64)::Cint +end + +function onemklSomatadd(device_queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, + ldc) + @ccall liboneapi_support.onemklSomatadd(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + alpha::Cfloat, a::Ptr{Cfloat}, lda::Int64, + beta::Cfloat, b::Ptr{Cfloat}, ldb::Int64, + c::Ptr{Cfloat}, ldc::Int64)::Cint +end + +function onemklDomatadd(device_queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, + ldc) + @ccall liboneapi_support.onemklDomatadd(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + alpha::Cdouble, a::Ptr{Cdouble}, lda::Int64, + beta::Cdouble, b::Ptr{Cdouble}, ldb::Int64, + c::Ptr{Cdouble}, ldc::Int64)::Cint +end + +function onemklComatadd(device_queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, + ldc) + @ccall liboneapi_support.onemklComatadd(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, + lda::Int64, beta::ComplexF32, + b::Ptr{ComplexF32}, ldb::Int64, + c::Ptr{ComplexF32}, ldc::Int64)::Cint +end + +function onemklZomatadd(device_queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, + ldc) + @ccall liboneapi_support.onemklZomatadd(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, n::Int64, + alpha::ComplexF32, a::Ptr{ComplexF32}, + lda::Int64, beta::ComplexF32, + b::Ptr{ComplexF32}, ldb::Int64, + c::Ptr{ComplexF32}, ldc::Int64)::Cint +end + +function onemklSimatcopy_batch(device_queue, trans, m, n, alpha, ab, lda, ldb, stride, + batch_size) + @ccall liboneapi_support.onemklSimatcopy_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, + n::Int64, alpha::Cfloat, ab::Ptr{Cfloat}, + lda::Int64, ldb::Int64, stride::Int64, + batch_size::Int64)::Cint +end + +function onemklDimatcopy_batch(device_queue, trans, m, n, alpha, ab, lda, ldb, stride, + batch_size) + @ccall liboneapi_support.onemklDimatcopy_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, + n::Int64, alpha::Cdouble, + ab::Ptr{Cdouble}, lda::Int64, ldb::Int64, + stride::Int64, batch_size::Int64)::Cint +end + +function onemklCimatcopy_batch(device_queue, trans, m, n, alpha, ab, lda, ldb, stride, + batch_size) + @ccall liboneapi_support.onemklCimatcopy_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, + n::Int64, alpha::ComplexF32, + ab::Ptr{ComplexF32}, lda::Int64, + ldb::Int64, stride::Int64, + batch_size::Int64)::Cint +end + +function onemklZimatcopy_batch(device_queue, trans, m, n, alpha, ab, lda, ldb, stride, + batch_size) + @ccall liboneapi_support.onemklZimatcopy_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, + n::Int64, alpha::ComplexF32, + ab::Ptr{ComplexF32}, lda::Int64, + ldb::Int64, stride::Int64, + batch_size::Int64)::Cint +end + +function onemklSomatcopy_batch(device_queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, + stride_b, batch_size) + @ccall liboneapi_support.onemklSomatcopy_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, + n::Int64, alpha::Cfloat, a::Ptr{Cfloat}, + lda::Int64, stride_a::Int64, + b::Ptr{Cfloat}, ldb::Int64, + stride_b::Int64, batch_size::Int64)::Cint +end + +function onemklDomatcopy_batch(device_queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, + stride_b, batch_size) + @ccall liboneapi_support.onemklDomatcopy_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, + n::Int64, alpha::Cdouble, + a::Ptr{Cdouble}, lda::Int64, + stride_a::Int64, b::Ptr{Cdouble}, + ldb::Int64, stride_b::Int64, + batch_size::Int64)::Cint +end + +function onemklComatcopy_batch(device_queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, + stride_b, batch_size) + @ccall liboneapi_support.onemklComatcopy_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, + n::Int64, alpha::ComplexF32, + a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, b::Ptr{ComplexF32}, + ldb::Int64, stride_b::Int64, + batch_size::Int64)::Cint +end + +function onemklZomatcopy_batch(device_queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, + stride_b, batch_size) + @ccall liboneapi_support.onemklZomatcopy_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, + n::Int64, alpha::ComplexF32, + a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, b::Ptr{ComplexF32}, + ldb::Int64, stride_b::Int64, + batch_size::Int64)::Cint +end + +function onemklSomatadd_batch(device_queue, transa, transb, m, n, alpha, a, lda, stride_a, + beta, b, ldb, stride_b, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklSomatadd_batch(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, + n::Int64, alpha::Cfloat, a::Ptr{Cfloat}, + lda::Int64, stride_a::Int64, beta::Cfloat, + b::Ptr{Cfloat}, ldb::Int64, + stride_b::Int64, c::Ptr{Cfloat}, + ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklDomatadd_batch(device_queue, transa, transb, m, n, alpha, a, lda, stride_a, + beta, b, ldb, stride_b, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklDomatadd_batch(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, + n::Int64, alpha::Cdouble, a::Ptr{Cdouble}, + lda::Int64, stride_a::Int64, + beta::Cdouble, b::Ptr{Cdouble}, + ldb::Int64, stride_b::Int64, + c::Ptr{Cdouble}, ldc::Int64, + stride_c::Int64, batch_size::Int64)::Cint +end + +function onemklComatadd_batch(device_queue, transa, transb, m, n, alpha, a, lda, stride_a, + beta, b, ldb, stride_b, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklComatadd_batch(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, + n::Int64, alpha::ComplexF32, + a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, beta::ComplexF32, + b::Ptr{ComplexF32}, ldb::Int64, + stride_b::Int64, c::Ptr{ComplexF32}, + ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklZomatadd_batch(device_queue, transa, transb, m, n, alpha, a, lda, stride_a, + beta, b, ldb, stride_b, c, ldc, stride_c, batch_size) + @ccall liboneapi_support.onemklZomatadd_batch(device_queue::syclQueue_t, + transa::onemklTranspose, + transb::onemklTranspose, m::Int64, + n::Int64, alpha::ComplexF32, + a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, beta::ComplexF32, + b::Ptr{ComplexF32}, ldb::Int64, + stride_b::Int64, c::Ptr{ComplexF32}, + ldc::Int64, stride_c::Int64, + batch_size::Int64)::Cint +end + +function onemklSpotrf(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSpotrf(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cfloat}, lda::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklDpotrf(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDpotrf(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cdouble}, lda::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklCpotrf(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCpotrf(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZpotrf(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZpotrf(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSpotrf_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklSpotrf_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklDpotrf_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklDpotrf_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklCpotrf_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklCpotrf_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklZpotrf_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklZpotrf_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklSpotrs(device_queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklSpotrs(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, + b::Ptr{Cfloat}, ldb::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklDpotrs(device_queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklDpotrs(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, nrhs::Int64, a::Ptr{Cdouble}, + lda::Int64, b::Ptr{Cdouble}, ldb::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklCpotrs(device_queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklCpotrs(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, + lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZpotrs(device_queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklZpotrs(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, + lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSpotrs_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklSpotrs_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 +end + +function onemklDpotrs_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklDpotrs_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 +end + +function onemklCpotrs_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklCpotrs_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 +end + +function onemklZpotrs_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklZpotrs_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 +end + +function onemklSpotri(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSpotri(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cfloat}, lda::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklDpotri(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDpotri(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cdouble}, lda::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklCpotri(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCpotri(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZpotri(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZpotri(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSpotri_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklSpotri_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklDpotri_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklDpotri_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklCpotri_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklCpotri_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklZpotri_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklZpotri_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklSgebrd_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklSgebrd_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklDgebrd_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklDgebrd_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklCgebrd_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklCgebrd_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklZgebrd_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklZgebrd_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklCgebrd(device_queue, m, n, a, lda, d, e, tauq, taup, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklCgebrd(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, d::Ptr{Cfloat}, + e::Ptr{Cfloat}, tauq::Ptr{ComplexF32}, + taup::Ptr{ComplexF32}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklDgebrd(device_queue, m, n, a, lda, d, e, tauq, taup, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklDgebrd(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{Cdouble}, lda::Int64, d::Ptr{Cdouble}, + e::Ptr{Cdouble}, tauq::Ptr{Cdouble}, + taup::Ptr{Cdouble}, scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSgebrd(device_queue, m, n, a, lda, d, e, tauq, taup, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklSgebrd(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{Cfloat}, lda::Int64, d::Ptr{Cfloat}, + e::Ptr{Cfloat}, tauq::Ptr{Cfloat}, + taup::Ptr{Cfloat}, scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklZgebrd(device_queue, m, n, a, lda, d, e, tauq, taup, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklZgebrd(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, d::Ptr{Cdouble}, + e::Ptr{Cdouble}, tauq::Ptr{ComplexF32}, + taup::Ptr{ComplexF32}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSgeqrf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklSgeqrf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklDgeqrf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklDgeqrf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklCgeqrf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklCgeqrf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklZgeqrf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklZgeqrf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklCgeqrf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCgeqrf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklDgeqrf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDgeqrf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{Cdouble}, lda::Int64, tau::Ptr{Cdouble}, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSgeqrf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSgeqrf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{Cfloat}, lda::Int64, tau::Ptr{Cfloat}, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklZgeqrf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZgeqrf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklCgesvd(device_queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, + scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCgesvd(device_queue::syclQueue_t, jobu::onemklJobsvd, + jobvt::onemklJobsvd, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, s::Ptr{Cfloat}, + u::Ptr{ComplexF32}, ldu::Int64, + vt::Ptr{ComplexF32}, ldvt::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZgesvd(device_queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, + scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZgesvd(device_queue::syclQueue_t, jobu::onemklJobsvd, + jobvt::onemklJobsvd, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, s::Ptr{Cdouble}, + u::Ptr{ComplexF32}, ldu::Int64, + vt::Ptr{ComplexF32}, ldvt::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklDgesvd(device_queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, + scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDgesvd(device_queue::syclQueue_t, jobu::onemklJobsvd, + jobvt::onemklJobsvd, m::Int64, n::Int64, + a::Ptr{Cdouble}, lda::Int64, s::Ptr{Cdouble}, + u::Ptr{Cdouble}, ldu::Int64, vt::Ptr{Cdouble}, + ldvt::Int64, scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSgesvd(device_queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, + scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSgesvd(device_queue::syclQueue_t, jobu::onemklJobsvd, + jobvt::onemklJobsvd, m::Int64, n::Int64, + a::Ptr{Cfloat}, lda::Int64, s::Ptr{Cfloat}, + u::Ptr{Cfloat}, ldu::Int64, vt::Ptr{Cfloat}, + ldvt::Int64, scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklSgetrf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklDgetrf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklDgetrf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklCgetrf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklCgetrf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklZgetrf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklZgetrf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 +end + +function onemklCgetrf(device_queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCgetrf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, ipiv::Ptr{Int64}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklDgetrf(device_queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDgetrf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{Cdouble}, lda::Int64, ipiv::Ptr{Int64}, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrf(device_queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSgetrf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{Cfloat}, lda::Int64, ipiv::Ptr{Int64}, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklZgetrf(device_queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZgetrf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, ipiv::Ptr{Int64}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrf_batch_scratchpad_size(device_queue, m, n, lda, stride_a, stride_ipiv, + batch_size) + @ccall liboneapi_support.onemklSgetrf_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, stride_a::Int64, + stride_ipiv::Int64, + batch_size::Int64)::Int64 +end + +function onemklDgetrf_batch_scratchpad_size(device_queue, m, n, lda, stride_a, stride_ipiv, + batch_size) + @ccall liboneapi_support.onemklDgetrf_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, stride_a::Int64, + stride_ipiv::Int64, + batch_size::Int64)::Int64 +end + +function onemklCgetrf_batch_scratchpad_size(device_queue, m, n, lda, stride_a, stride_ipiv, + batch_size) + @ccall liboneapi_support.onemklCgetrf_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, stride_a::Int64, + stride_ipiv::Int64, + batch_size::Int64)::Int64 +end + +function onemklZgetrf_batch_scratchpad_size(device_queue, m, n, lda, stride_a, stride_ipiv, + batch_size) + @ccall liboneapi_support.onemklZgetrf_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, stride_a::Int64, + stride_ipiv::Int64, + batch_size::Int64)::Int64 +end + +function onemklCgetrf_batch(device_queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCgetrf_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, ipiv::Ptr{Int64}, + stride_ipiv::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklDgetrf_batch(device_queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDgetrf_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{Cdouble}, lda::Int64, + stride_a::Int64, ipiv::Ptr{Int64}, + stride_ipiv::Int64, batch_size::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrf_batch(device_queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSgetrf_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{Cfloat}, lda::Int64, + stride_a::Int64, ipiv::Ptr{Int64}, + stride_ipiv::Int64, batch_size::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklZgetrf_batch(device_queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZgetrf_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, ipiv::Ptr{Int64}, + stride_ipiv::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrfnp_batch_scratchpad_size(device_queue, m, n, lda, stride_a, batch_size) + @ccall liboneapi_support.onemklSgetrfnp_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, + stride_a::Int64, + batch_size::Int64)::Int64 +end + +function onemklDgetrfnp_batch_scratchpad_size(device_queue, m, n, lda, stride_a, batch_size) + @ccall liboneapi_support.onemklDgetrfnp_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, + stride_a::Int64, + batch_size::Int64)::Int64 +end + +function onemklCgetrfnp_batch_scratchpad_size(device_queue, m, n, lda, stride_a, batch_size) + @ccall liboneapi_support.onemklCgetrfnp_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, + stride_a::Int64, + batch_size::Int64)::Int64 +end + +function onemklZgetrfnp_batch_scratchpad_size(device_queue, m, n, lda, stride_a, batch_size) + @ccall liboneapi_support.onemklZgetrfnp_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, + stride_a::Int64, + batch_size::Int64)::Int64 +end + +function onemklCgetrfnp_batch(device_queue, m, n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklCgetrfnp_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklDgetrfnp_batch(device_queue, m, n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklDgetrfnp_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{Cdouble}, lda::Int64, + stride_a::Int64, batch_size::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrfnp_batch(device_queue, m, n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklSgetrfnp_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{Cfloat}, lda::Int64, + stride_a::Int64, batch_size::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklZgetrfnp_batch(device_queue, m, n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklZgetrfnp_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetri_scratchpad_size(device_queue, n, lda) + @ccall liboneapi_support.onemklSgetri_scratchpad_size(device_queue::syclQueue_t, + n::Int64, lda::Int64)::Int64 +end + +function onemklDgetri_scratchpad_size(device_queue, n, lda) + @ccall liboneapi_support.onemklDgetri_scratchpad_size(device_queue::syclQueue_t, + n::Int64, lda::Int64)::Int64 +end + +function onemklCgetri_scratchpad_size(device_queue, n, lda) + @ccall liboneapi_support.onemklCgetri_scratchpad_size(device_queue::syclQueue_t, + n::Int64, lda::Int64)::Int64 end -function onemklDgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, - incy) - @ccall liboneapi_support.onemklDgbmv(device_queue::syclQueue_t, trans::onemklTranspose, - m::Int64, n::Int64, kl::Int64, ku::Int64, - alpha::Cdouble, a::ZePtr{Cdouble}, lda::Int64, - x::ZePtr{Cdouble}, incx::Int64, beta::Cdouble, - y::ZePtr{Cdouble}, incy::Int64)::Cvoid +function onemklZgetri_scratchpad_size(device_queue, n, lda) + @ccall liboneapi_support.onemklZgetri_scratchpad_size(device_queue::syclQueue_t, + n::Int64, lda::Int64)::Int64 end -function onemklCgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, - incy) - @ccall liboneapi_support.onemklCgbmv(device_queue::syclQueue_t, trans::onemklTranspose, - m::Int64, n::Int64, kl::Int64, ku::Int64, - alpha::ComplexF32, a::ZePtr{ComplexF32}, - lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, - beta::ComplexF32, y::ZePtr{ComplexF32}, - incy::Int64)::Cvoid +function onemklCgetri(device_queue, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCgetri(device_queue::syclQueue_t, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, ipiv::Ptr{Int64}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklZgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, - incy) - @ccall liboneapi_support.onemklZgbmv(device_queue::syclQueue_t, trans::onemklTranspose, - m::Int64, n::Int64, kl::Int64, ku::Int64, - alpha::ComplexF64, a::ZePtr{ComplexF64}, - lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, - beta::ComplexF64, y::ZePtr{ComplexF64}, - incy::Int64)::Cvoid +function onemklDgetri(device_queue, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDgetri(device_queue::syclQueue_t, n::Int64, + a::Ptr{Cdouble}, lda::Int64, ipiv::Ptr{Int64}, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint end -function onemklSgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklSgemv(device_queue::syclQueue_t, trans::onemklTranspose, - m::Int64, n::Int64, alpha::Cfloat, - a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, - incx::Int64, beta::Cfloat, y::ZePtr{Cfloat}, - incy::Int64)::Cvoid +function onemklSgetri(device_queue, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSgetri(device_queue::syclQueue_t, n::Int64, + a::Ptr{Cfloat}, lda::Int64, ipiv::Ptr{Int64}, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklDgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklDgemv(device_queue::syclQueue_t, trans::onemklTranspose, - m::Int64, n::Int64, alpha::Cdouble, - a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, - incx::Int64, beta::Cdouble, y::ZePtr{Cdouble}, - incy::Int64)::Cvoid +function onemklZgetri(device_queue, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZgetri(device_queue::syclQueue_t, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, ipiv::Ptr{Int64}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklCgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklCgemv(device_queue::syclQueue_t, trans::onemklTranspose, - m::Int64, n::Int64, alpha::ComplexF32, - a::ZePtr{ComplexF32}, lda::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - beta::ComplexF32, y::ZePtr{ComplexF32}, - incy::Int64)::Cvoid +function onemklSgetrs_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklSgetrs_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 end -function onemklZgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklZgemv(device_queue::syclQueue_t, trans::onemklTranspose, - m::Int64, n::Int64, alpha::ComplexF64, - a::ZePtr{ComplexF64}, lda::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - beta::ComplexF64, y::ZePtr{ComplexF64}, - incy::Int64)::Cvoid +function onemklDgetrs_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklDgetrs_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 end -function onemklSger(device_queue, m, n, alpha, x, incx, y, incy, a, lda) - @ccall liboneapi_support.onemklSger(device_queue::syclQueue_t, m::Int64, n::Int64, - alpha::Cfloat, x::ZePtr{Cfloat}, incx::Int64, - y::ZePtr{Cfloat}, incy::Int64, a::ZePtr{Cfloat}, - lda::Int64)::Cvoid +function onemklCgetrs_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklCgetrs_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 +end + +function onemklZgetrs_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklZgetrs_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 +end + +function onemklCgetrs(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklCgetrs(device_queue::syclQueue_t, trans::onemklTranspose, + n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, + lda::Int64, ipiv::Ptr{Int64}, b::Ptr{ComplexF32}, + ldb::Int64, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklDgetrs(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklDgetrs(device_queue::syclQueue_t, trans::onemklTranspose, + n::Int64, nrhs::Int64, a::Ptr{Cdouble}, + lda::Int64, ipiv::Ptr{Int64}, b::Ptr{Cdouble}, + ldb::Int64, scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrs(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklSgetrs(device_queue::syclQueue_t, trans::onemklTranspose, + n::Int64, nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, + ipiv::Ptr{Int64}, b::Ptr{Cfloat}, ldb::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklZgetrs(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklZgetrs(device_queue::syclQueue_t, trans::onemklTranspose, + n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, + lda::Int64, ipiv::Ptr{Int64}, b::Ptr{ComplexF32}, + ldb::Int64, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrs_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, + stride_ipiv, ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklSgetrs_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + n::Int64, nrhs::Int64, + lda::Int64, stride_a::Int64, + stride_ipiv::Int64, + ldb::Int64, stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklDgetrs_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, + stride_ipiv, ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklDgetrs_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + n::Int64, nrhs::Int64, + lda::Int64, stride_a::Int64, + stride_ipiv::Int64, + ldb::Int64, stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklCgetrs_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, + stride_ipiv, ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklCgetrs_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + n::Int64, nrhs::Int64, + lda::Int64, stride_a::Int64, + stride_ipiv::Int64, + ldb::Int64, stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklZgetrs_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, + stride_ipiv, ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklZgetrs_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + n::Int64, nrhs::Int64, + lda::Int64, stride_a::Int64, + stride_ipiv::Int64, + ldb::Int64, stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklCgetrs_batch(device_queue, trans, n, nrhs, a, lda, stride_a, ipiv, + stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklCgetrs_batch(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, ipiv::Ptr{Int64}, + stride_ipiv::Int64, b::Ptr{ComplexF32}, + ldb::Int64, stride_b::Int64, + batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklDgetrs_batch(device_queue, trans, n, nrhs, a, lda, stride_a, ipiv, + stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklDgetrs_batch(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, a::Ptr{Cdouble}, lda::Int64, + stride_a::Int64, ipiv::Ptr{Int64}, + stride_ipiv::Int64, b::Ptr{Cdouble}, + ldb::Int64, stride_b::Int64, + batch_size::Int64, scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrs_batch(device_queue, trans, n, nrhs, a, lda, stride_a, ipiv, + stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklSgetrs_batch(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, + stride_a::Int64, ipiv::Ptr{Int64}, + stride_ipiv::Int64, b::Ptr{Cfloat}, + ldb::Int64, stride_b::Int64, + batch_size::Int64, scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklZgetrs_batch(device_queue, trans, n, nrhs, a, lda, stride_a, ipiv, + stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklZgetrs_batch(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, ipiv::Ptr{Int64}, + stride_ipiv::Int64, b::Ptr{ComplexF32}, + ldb::Int64, stride_b::Int64, + batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrsnp_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, + ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklSgetrsnp_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + n::Int64, nrhs::Int64, + lda::Int64, + stride_a::Int64, + ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklDgetrsnp_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, + ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklDgetrsnp_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + n::Int64, nrhs::Int64, + lda::Int64, + stride_a::Int64, + ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklCgetrsnp_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, + ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklCgetrsnp_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + n::Int64, nrhs::Int64, + lda::Int64, + stride_a::Int64, + ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklZgetrsnp_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, + ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklZgetrsnp_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + n::Int64, nrhs::Int64, + lda::Int64, + stride_a::Int64, + ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklCgetrsnp_batch(device_queue, trans, n, nrhs, a, lda, stride_a, b, ldb, + stride_b, batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCgetrsnp_batch(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, a::Ptr{ComplexF32}, + lda::Int64, stride_a::Int64, + b::Ptr{ComplexF32}, ldb::Int64, + stride_b::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklDgetrsnp_batch(device_queue, trans, n, nrhs, a, lda, stride_a, b, ldb, + stride_b, batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDgetrsnp_batch(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, a::Ptr{Cdouble}, lda::Int64, + stride_a::Int64, b::Ptr{Cdouble}, + ldb::Int64, stride_b::Int64, + batch_size::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSgetrsnp_batch(device_queue, trans, n, nrhs, a, lda, stride_a, b, ldb, + stride_b, batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSgetrsnp_batch(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, + stride_a::Int64, b::Ptr{Cfloat}, + ldb::Int64, stride_b::Int64, + batch_size::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklZgetrsnp_batch(device_queue, trans, n, nrhs, a, lda, stride_a, b, ldb, + stride_b, batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZgetrsnp_batch(device_queue::syclQueue_t, + trans::onemklTranspose, n::Int64, + nrhs::Int64, a::Ptr{ComplexF32}, + lda::Int64, stride_a::Int64, + b::Ptr{ComplexF32}, ldb::Int64, + stride_b::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklCheev_scratchpad_size(device_queue, jobz, uplo, n, lda) + @ccall liboneapi_support.onemklCheev_scratchpad_size(device_queue::syclQueue_t, + jobz::onemklCompz, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklZheev_scratchpad_size(device_queue, jobz, uplo, n, lda) + @ccall liboneapi_support.onemklZheev_scratchpad_size(device_queue::syclQueue_t, + jobz::onemklCompz, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklCheev(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCheev(device_queue::syclQueue_t, jobz::onemklCompz, + uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, + lda::Int64, w::Ptr{Cfloat}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZheev(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZheev(device_queue::syclQueue_t, jobz::onemklCompz, + uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, + lda::Int64, w::Ptr{Cdouble}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklCheevd_scratchpad_size(device_queue, jobz, uplo, n, lda) + @ccall liboneapi_support.onemklCheevd_scratchpad_size(device_queue::syclQueue_t, + jobz::onemklJob, uplo::onemklUplo, + n::Int64, lda::Int64)::Int64 +end + +function onemklZheevd_scratchpad_size(device_queue, jobz, uplo, n, lda) + @ccall liboneapi_support.onemklZheevd_scratchpad_size(device_queue::syclQueue_t, + jobz::onemklJob, uplo::onemklUplo, + n::Int64, lda::Int64)::Int64 +end + +function onemklCheevd(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCheevd(device_queue::syclQueue_t, jobz::onemklJob, + uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, + lda::Int64, w::Ptr{Cfloat}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklDger(device_queue, m, n, alpha, x, incx, y, incy, a, lda) - @ccall liboneapi_support.onemklDger(device_queue::syclQueue_t, m::Int64, n::Int64, - alpha::Cdouble, x::ZePtr{Cdouble}, incx::Int64, - y::ZePtr{Cdouble}, incy::Int64, a::ZePtr{Cdouble}, - lda::Int64)::Cvoid +function onemklZheevd(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZheevd(device_queue::syclQueue_t, jobz::onemklJob, + uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, + lda::Int64, w::Ptr{Cdouble}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklCgerc(device_queue, m, n, alpha, x, incx, y, incy, a, lda) - @ccall liboneapi_support.onemklCgerc(device_queue::syclQueue_t, m::Int64, n::Int64, - alpha::ComplexF32, x::ZePtr{ComplexF32}, - incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, - a::ZePtr{ComplexF32}, lda::Int64)::Cvoid +function onemklChegvd_scratchpad_size(device_queue, itype, jobz, uplo, n, lda, ldb) + @ccall liboneapi_support.onemklChegvd_scratchpad_size(device_queue::syclQueue_t, + itype::Int64, jobz::onemklJob, + uplo::onemklUplo, n::Int64, + lda::Int64, ldb::Int64)::Int64 end -function onemklZgerc(device_queue, m, n, alpha, x, incx, y, incy, a, lda) - @ccall liboneapi_support.onemklZgerc(device_queue::syclQueue_t, m::Int64, n::Int64, - alpha::ComplexF64, x::ZePtr{ComplexF64}, - incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, - a::ZePtr{ComplexF64}, lda::Int64)::Cvoid +function onemklZhegvd_scratchpad_size(device_queue, itype, jobz, uplo, n, lda, ldb) + @ccall liboneapi_support.onemklZhegvd_scratchpad_size(device_queue::syclQueue_t, + itype::Int64, jobz::onemklJob, + uplo::onemklUplo, n::Int64, + lda::Int64, ldb::Int64)::Int64 end -function onemklSasum(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklSasum(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cfloat}, incx::Int64, - result::ZePtr{Cfloat})::Cvoid +function onemklChegvd(device_queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklChegvd(device_queue::syclQueue_t, itype::Int64, + jobz::onemklJob, uplo::onemklUplo, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, + b::Ptr{ComplexF32}, ldb::Int64, w::Ptr{Cfloat}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklDasum(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklDasum(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cdouble}, incx::Int64, - result::ZePtr{Cdouble})::Cvoid +function onemklZhegvd(device_queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklZhegvd(device_queue::syclQueue_t, itype::Int64, + jobz::onemklJob, uplo::onemklUplo, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, + b::Ptr{ComplexF32}, ldb::Int64, w::Ptr{Cdouble}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklCasum(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklCasum(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - result::ZePtr{Cfloat})::Cvoid +function onemklChetrd_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklChetrd_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklZasum(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklZasum(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - result::ZePtr{Float64})::Cvoid +function onemklZhetrd_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklZhetrd_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklSaxpy(device_queue, n, alpha, x, incx, y, incy) - @ccall liboneapi_support.onemklSaxpy(device_queue::syclQueue_t, n::Int64, alpha::Cfloat, - x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, - incy::Int64)::Cvoid +function onemklChetrd(device_queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklChetrd(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + d::Ptr{Cfloat}, e::Ptr{Cfloat}, + tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklDaxpy(device_queue, n, alpha, x, incx, y, incy) - @ccall liboneapi_support.onemklDaxpy(device_queue::syclQueue_t, n::Int64, - alpha::Cdouble, x::ZePtr{Cdouble}, incx::Int64, - y::ZePtr{Cdouble}, incy::Int64)::Cvoid +function onemklZhetrd(device_queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZhetrd(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + d::Ptr{Cdouble}, e::Ptr{Cdouble}, + tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklCaxpy(device_queue, n, alpha, x, incx, y, incy) - @ccall liboneapi_support.onemklCaxpy(device_queue::syclQueue_t, n::Int64, - alpha::ComplexF32, x::ZePtr{ComplexF32}, - incx::Int64, y::ZePtr{ComplexF32}, - incy::Int64)::Cvoid +function onemklChetrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklChetrf(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + ipiv::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklZaxpy(device_queue, n, alpha, x, incx, y, incy) - @ccall liboneapi_support.onemklZaxpy(device_queue::syclQueue_t, n::Int64, - alpha::ComplexF64, x::ZePtr{ComplexF64}, - incx::Int64, y::ZePtr{ComplexF64}, - incy::Int64)::Cvoid +function onemklZhetrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZhetrf(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + ipiv::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklHaxpy(device_queue, n, alpha, x, incx, y, incy) - @ccall liboneapi_support.onemklHaxpy(device_queue::syclQueue_t, n::Int64, - alpha::Float16, x::ZePtr{Float16}, incx::Int64, - y::ZePtr{Float16}, incy::Int64)::Cvoid +function onemklChetrf_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklChetrf_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklSaxpby(device_queue, n, alpha, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklSaxpby(device_queue::syclQueue_t, n::Int64, - alpha::Cfloat, x::ZePtr{Cfloat}, incx::Int64, - beta::Cfloat, y::ZePtr{Cfloat}, - incy::Int64)::Cvoid +function onemklZhetrf_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklZhetrf_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklDaxpby(device_queue, n, alpha, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklDaxpby(device_queue::syclQueue_t, n::Int64, - alpha::Cdouble, x::ZePtr{Cdouble}, incx::Int64, - beta::Cdouble, y::ZePtr{Cdouble}, - incy::Int64)::Cvoid +function onemklSorgbr(device_queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSorgbr(device_queue::syclQueue_t, vec::onemklGenerate, + m::Int64, n::Int64, k::Int64, a::Ptr{Cfloat}, + lda::Int64, tau::Ptr{Cfloat}, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklCaxpby(device_queue, n, alpha, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklCaxpby(device_queue::syclQueue_t, n::Int64, - alpha::ComplexF32, x::ZePtr{ComplexF32}, - incx::Int64, beta::ComplexF32, - y::ZePtr{ComplexF32}, incy::Int64)::Cvoid +function onemklDorgbr(device_queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDorgbr(device_queue::syclQueue_t, vec::onemklGenerate, + m::Int64, n::Int64, k::Int64, a::Ptr{Cdouble}, + lda::Int64, tau::Ptr{Cdouble}, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint end -function onemklZaxpby(device_queue, n, alpha, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklZaxpby(device_queue::syclQueue_t, n::Int64, - alpha::ComplexF64, x::ZePtr{ComplexF64}, - incx::Int64, beta::ComplexF64, - y::ZePtr{ComplexF64}, incy::Int64)::Cvoid +function onemklSorgbr_scratchpad_size(device_queue, vect, m, n, k, lda) + @ccall liboneapi_support.onemklSorgbr_scratchpad_size(device_queue::syclQueue_t, + vect::onemklGenerate, m::Int64, + n::Int64, k::Int64, + lda::Int64)::Int64 end -function onemklSrot(device_queue, n, x, incx, y, incy, c, s) - @ccall liboneapi_support.onemklSrot(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, - incy::Int64, c::Cfloat, s::Cfloat)::Cvoid +function onemklDorgbr_scratchpad_size(device_queue, vect, m, n, k, lda) + @ccall liboneapi_support.onemklDorgbr_scratchpad_size(device_queue::syclQueue_t, + vect::onemklGenerate, m::Int64, + n::Int64, k::Int64, + lda::Int64)::Int64 end -function onemklDrot(device_queue, n, x, incx, y, incy, c, s) - @ccall liboneapi_support.onemklDrot(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, - incy::Int64, c::Cdouble, s::Cdouble)::Cvoid +function onemklSorgqr_scratchpad_size(device_queue, m, n, k, lda) + @ccall liboneapi_support.onemklSorgqr_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, k::Int64, + lda::Int64)::Int64 end -function onemklCrot(device_queue, n, x, incx, y, incy, c, s) - @ccall liboneapi_support.onemklCrot(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - y::ZePtr{ComplexF32}, incy::Int64, c::Cfloat, - s::ComplexF32)::Cvoid +function onemklDorgqr_scratchpad_size(device_queue, m, n, k, lda) + @ccall liboneapi_support.onemklDorgqr_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, k::Int64, + lda::Int64)::Int64 end -function onemklZrot(device_queue, n, x, incx, y, incy, c, s) - @ccall liboneapi_support.onemklZrot(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - y::ZePtr{ComplexF64}, incy::Int64, c::Cdouble, - s::ComplexF64)::Cvoid +function onemklDorgqr(device_queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDorgqr(device_queue::syclQueue_t, m::Int64, n::Int64, + k::Int64, a::Ptr{Cdouble}, lda::Int64, + tau::Ptr{Cdouble}, scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint end -function onemklCsrot(device_queue, n, x, incx, y, incy, c, s) - @ccall liboneapi_support.onemklCsrot(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - y::ZePtr{ComplexF32}, incy::Int64, c::Cfloat, - s::Cfloat)::Cvoid +function onemklSorgqr(device_queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSorgqr(device_queue::syclQueue_t, m::Int64, n::Int64, + k::Int64, a::Ptr{Cfloat}, lda::Int64, + tau::Ptr{Cfloat}, scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklZdrot(device_queue, n, x, incx, y, incy, c, s) - @ccall liboneapi_support.onemklZdrot(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - y::ZePtr{ComplexF64}, incy::Int64, c::Cdouble, - s::Cdouble)::Cvoid +function onemklSormqr_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) + @ccall liboneapi_support.onemklSormqr_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + trans::onemklTranspose, m::Int64, + n::Int64, k::Int64, lda::Int64, + ldc::Int64)::Int64 end -function onemklDscal(device_queue, n, alpha, x, incx) - @ccall liboneapi_support.onemklDscal(device_queue::syclQueue_t, n::Int64, - alpha::Cdouble, x::ZePtr{Cdouble}, - incx::Int64)::Cvoid +function onemklDormqr_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) + @ccall liboneapi_support.onemklDormqr_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + trans::onemklTranspose, m::Int64, + n::Int64, k::Int64, lda::Int64, + ldc::Int64)::Int64 end -function onemklSscal(device_queue, n, alpha, x, incx) - @ccall liboneapi_support.onemklSscal(device_queue::syclQueue_t, n::Int64, alpha::Cfloat, - x::ZePtr{Cfloat}, incx::Int64)::Cvoid +function onemklDormqr(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklDormqr(device_queue::syclQueue_t, side::onemklSide, + trans::onemklTranspose, m::Int64, n::Int64, + k::Int64, a::Ptr{Cdouble}, lda::Int64, + tau::Ptr{Cdouble}, c::Ptr{Cdouble}, ldc::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint end -function onemklCscal(device_queue, n, alpha, x, incx) - @ccall liboneapi_support.onemklCscal(device_queue::syclQueue_t, n::Int64, - alpha::ComplexF32, x::ZePtr{ComplexF32}, - incx::Int64)::Cvoid +function onemklSormqr(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklSormqr(device_queue::syclQueue_t, side::onemklSide, + trans::onemklTranspose, m::Int64, n::Int64, + k::Int64, a::Ptr{Cfloat}, lda::Int64, + tau::Ptr{Cfloat}, c::Ptr{Cfloat}, ldc::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklCsscal(device_queue, n, alpha, x, incx) - @ccall liboneapi_support.onemklCsscal(device_queue::syclQueue_t, n::Int64, - alpha::Cfloat, x::ZePtr{ComplexF32}, - incx::Int64)::Cvoid +function onemklSsteqr_scratchpad_size(device_queue, compz, n, ldz) + @ccall liboneapi_support.onemklSsteqr_scratchpad_size(device_queue::syclQueue_t, + compz::onemklCompz, n::Int64, + ldz::Int64)::Int64 end -function onemklZscal(device_queue, n, alpha, x, incx) - @ccall liboneapi_support.onemklZscal(device_queue::syclQueue_t, n::Int64, - alpha::ComplexF64, x::ZePtr{ComplexF64}, - incx::Int64)::Cvoid +function onemklDsteqr_scratchpad_size(device_queue, compz, n, ldz) + @ccall liboneapi_support.onemklDsteqr_scratchpad_size(device_queue::syclQueue_t, + compz::onemklCompz, n::Int64, + ldz::Int64)::Int64 end -function onemklZdscal(device_queue, n, alpha, x, incx) - @ccall liboneapi_support.onemklZdscal(device_queue::syclQueue_t, n::Int64, - alpha::Cdouble, x::ZePtr{ComplexF64}, - incx::Int64)::Cvoid +function onemklCsteqr_scratchpad_size(device_queue, compz, n, ldz) + @ccall liboneapi_support.onemklCsteqr_scratchpad_size(device_queue::syclQueue_t, + compz::onemklCompz, n::Int64, + ldz::Int64)::Int64 end -function onemklHscal(device_queue, n, alpha, x, incx) - @ccall liboneapi_support.onemklHscal(device_queue::syclQueue_t, n::Int64, - alpha::Float16, x::ZePtr{Float16}, - incx::Int64)::Cvoid +function onemklZsteqr_scratchpad_size(device_queue, compz, n, ldz) + @ccall liboneapi_support.onemklZsteqr_scratchpad_size(device_queue::syclQueue_t, + compz::onemklCompz, n::Int64, + ldz::Int64)::Int64 end -function onemklChemv(device_queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklChemv(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, alpha::ComplexF32, a::ZePtr{ComplexF32}, - lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, - beta::ComplexF32, y::ZePtr{ComplexF32}, - incy::Int64)::Cvoid +function onemklCsteqr(device_queue, compz, n, d, e, z, ldz, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCsteqr(device_queue::syclQueue_t, compz::onemklCompz, + n::Int64, d::Ptr{Cfloat}, e::Ptr{Cfloat}, + z::Ptr{ComplexF32}, ldz::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklZhemv(device_queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklZhemv(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, alpha::ComplexF64, a::ZePtr{ComplexF64}, - lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, - beta::ComplexF64, y::ZePtr{ComplexF64}, - incy::Int64)::Cvoid +function onemklDsteqr(device_queue, compz, n, d, e, z, ldz, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDsteqr(device_queue::syclQueue_t, compz::onemklCompz, + n::Int64, d::Ptr{Cdouble}, e::Ptr{Cdouble}, + z::Ptr{Cdouble}, ldz::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint end -function onemklChbmv(device_queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklChbmv(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, k::Int64, alpha::ComplexF32, - a::ZePtr{ComplexF32}, lda::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - beta::ComplexF32, y::ZePtr{ComplexF32}, - incy::Int64)::Cvoid +function onemklSsteqr(device_queue, compz, n, d, e, z, ldz, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSsteqr(device_queue::syclQueue_t, compz::onemklCompz, + n::Int64, d::Ptr{Cfloat}, e::Ptr{Cfloat}, + z::Ptr{Cfloat}, ldz::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklZhbmv(device_queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklZhbmv(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, k::Int64, alpha::ComplexF64, - a::ZePtr{ComplexF64}, lda::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - beta::ComplexF64, y::ZePtr{ComplexF64}, - incy::Int64)::Cvoid +function onemklZsteqr(device_queue, compz, n, d, e, z, ldz, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZsteqr(device_queue::syclQueue_t, compz::onemklCompz, + n::Int64, d::Ptr{Cdouble}, e::Ptr{Cdouble}, + z::Ptr{ComplexF32}, ldz::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklCher(device_queue, uplo, n, alpha, x, incx, a, lda) - @ccall liboneapi_support.onemklCher(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, alpha::ComplexF32, x::ZePtr{ComplexF32}, - incx::Int64, a::ZePtr{ComplexF32}, - lda::Int64)::Cvoid +function onemklSsyev_scratchpad_size(device_queue, jobz, uplo, n, lda) + @ccall liboneapi_support.onemklSsyev_scratchpad_size(device_queue::syclQueue_t, + jobz::onemklCompz, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklZher(device_queue, uplo, n, alpha, x, incx, a, lda) - @ccall liboneapi_support.onemklZher(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, alpha::ComplexF64, x::ZePtr{ComplexF64}, - incx::Int64, a::ZePtr{ComplexF64}, - lda::Int64)::Cvoid +function onemklDsyev_scratchpad_size(device_queue, jobz, uplo, n, lda) + @ccall liboneapi_support.onemklDsyev_scratchpad_size(device_queue::syclQueue_t, + jobz::onemklCompz, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklCher2(device_queue, uplo, n, alpha, x, incx, y, incy, a, lda) - @ccall liboneapi_support.onemklCher2(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, alpha::ComplexF32, x::ZePtr{ComplexF32}, - incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, - a::ZePtr{ComplexF32}, lda::Int64)::Cvoid +function onemklDsyev(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDsyev(device_queue::syclQueue_t, jobz::onemklCompz, + uplo::onemklUplo, n::Int64, a::Ptr{Cdouble}, + lda::Int64, w::Ptr{Cdouble}, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint end -function onemklZher2(device_queue, uplo, n, alpha, x, incx, y, incy, a, lda) - @ccall liboneapi_support.onemklZher2(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, alpha::ComplexF64, x::ZePtr{ComplexF64}, - incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, - a::ZePtr{ComplexF64}, lda::Int64)::Cvoid +function onemklSsyev(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSsyev(device_queue::syclQueue_t, jobz::onemklCompz, + uplo::onemklUplo, n::Int64, a::Ptr{Cfloat}, + lda::Int64, w::Ptr{Cfloat}, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklSsbmv(device_queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklSsbmv(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, k::Int64, alpha::Cfloat, - a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, - incx::Int64, beta::Cfloat, y::ZePtr{Cfloat}, - incy::Int64)::Cvoid +function onemklSsyevd_scratchpad_size(device_queue, jobz, uplo, n, lda) + @ccall liboneapi_support.onemklSsyevd_scratchpad_size(device_queue::syclQueue_t, + jobz::onemklJob, uplo::onemklUplo, + n::Int64, lda::Int64)::Int64 +end + +function onemklDsyevd_scratchpad_size(device_queue, jobz, uplo, n, lda) + @ccall liboneapi_support.onemklDsyevd_scratchpad_size(device_queue::syclQueue_t, + jobz::onemklJob, uplo::onemklUplo, + n::Int64, lda::Int64)::Int64 end -function onemklDsbmv(device_queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklDsbmv(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, k::Int64, alpha::Cdouble, - a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, - incx::Int64, beta::Cdouble, y::ZePtr{Cdouble}, - incy::Int64)::Cvoid +function onemklDsyevd(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDsyevd(device_queue::syclQueue_t, jobz::onemklJob, + uplo::onemklUplo, n::Int64, a::Ptr{Cdouble}, + lda::Int64, w::Ptr{Cdouble}, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSsyevd(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSsyevd(device_queue::syclQueue_t, jobz::onemklJob, + uplo::onemklUplo, n::Int64, a::Ptr{Cfloat}, + lda::Int64, w::Ptr{Cfloat}, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklSsyevx_scratchpad_size(device_queue, jobz, range, uplo, n, lda, vl, vu, il, + iu, abstol, ldz) + @ccall liboneapi_support.onemklSsyevx_scratchpad_size(device_queue::syclQueue_t, + jobz::onemklCompz, + range::onemklRangev, + uplo::onemklUplo, n::Int64, + lda::Int64, vl::Cfloat, + vu::Cfloat, il::Int64, iu::Int64, + abstol::Cfloat, ldz::Int64)::Int64 +end + +function onemklDsyevx_scratchpad_size(device_queue, jobz, range, uplo, n, lda, vl, vu, il, + iu, abstol, ldz) + @ccall liboneapi_support.onemklDsyevx_scratchpad_size(device_queue::syclQueue_t, + jobz::onemklCompz, + range::onemklRangev, + uplo::onemklUplo, n::Int64, + lda::Int64, vl::Cdouble, + vu::Cdouble, il::Int64, iu::Int64, + abstol::Cdouble, + ldz::Int64)::Int64 +end + +function onemklDsyevx(device_queue, jobz, range, uplo, n, a, lda, vl, vu, il, iu, abstol, m, + w, z, ldz, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDsyevx(device_queue::syclQueue_t, jobz::onemklCompz, + range::onemklRangev, uplo::onemklUplo, n::Int64, + a::Ptr{Cdouble}, lda::Int64, vl::Cdouble, + vu::Cdouble, il::Int64, iu::Int64, + abstol::Cdouble, m::Ptr{Int64}, w::Ptr{Cdouble}, + z::Ptr{Cdouble}, ldz::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSsyevx(device_queue, jobz, range, uplo, n, a, lda, vl, vu, il, iu, abstol, m, + w, z, ldz, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSsyevx(device_queue::syclQueue_t, jobz::onemklCompz, + range::onemklRangev, uplo::onemklUplo, n::Int64, + a::Ptr{Cfloat}, lda::Int64, vl::Cfloat, + vu::Cfloat, il::Int64, iu::Int64, abstol::Cfloat, + m::Ptr{Int64}, w::Ptr{Cfloat}, z::Ptr{Cfloat}, + ldz::Int64, scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklSsygvd_scratchpad_size(device_queue, itype, jobz, uplo, n, lda, ldb) + @ccall liboneapi_support.onemklSsygvd_scratchpad_size(device_queue::syclQueue_t, + itype::Int64, jobz::onemklJob, + uplo::onemklUplo, n::Int64, + lda::Int64, ldb::Int64)::Int64 +end + +function onemklDsygvd_scratchpad_size(device_queue, itype, jobz, uplo, n, lda, ldb) + @ccall liboneapi_support.onemklDsygvd_scratchpad_size(device_queue::syclQueue_t, + itype::Int64, jobz::onemklJob, + uplo::onemklUplo, n::Int64, + lda::Int64, ldb::Int64)::Int64 +end + +function onemklDsygvd(device_queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklDsygvd(device_queue::syclQueue_t, itype::Int64, + jobz::onemklJob, uplo::onemklUplo, n::Int64, + a::Ptr{Cdouble}, lda::Int64, b::Ptr{Cdouble}, + ldb::Int64, w::Ptr{Cdouble}, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSsygvd(device_queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklSsygvd(device_queue::syclQueue_t, itype::Int64, + jobz::onemklJob, uplo::onemklUplo, n::Int64, + a::Ptr{Cfloat}, lda::Int64, b::Ptr{Cfloat}, + ldb::Int64, w::Ptr{Cfloat}, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklSsygvx_scratchpad_size(device_queue, itype, jobz, range, uplo, n, lda, ldb, + vl, vu, il, iu, abstol, ldz) + @ccall liboneapi_support.onemklSsygvx_scratchpad_size(device_queue::syclQueue_t, + itype::Int64, jobz::onemklCompz, + range::onemklRangev, + uplo::onemklUplo, n::Int64, + lda::Int64, ldb::Int64, + vl::Cfloat, vu::Cfloat, il::Int64, + iu::Int64, abstol::Cfloat, + ldz::Int64)::Int64 +end + +function onemklDsygvx_scratchpad_size(device_queue, itype, jobz, range, uplo, n, lda, ldb, + vl, vu, il, iu, abstol, ldz) + @ccall liboneapi_support.onemklDsygvx_scratchpad_size(device_queue::syclQueue_t, + itype::Int64, jobz::onemklCompz, + range::onemklRangev, + uplo::onemklUplo, n::Int64, + lda::Int64, ldb::Int64, + vl::Cdouble, vu::Cdouble, + il::Int64, iu::Int64, + abstol::Cdouble, + ldz::Int64)::Int64 +end + +function onemklDsygvx(device_queue, itype, jobz, range, uplo, n, a, lda, b, ldb, vl, vu, il, + iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDsygvx(device_queue::syclQueue_t, itype::Int64, + jobz::onemklCompz, range::onemklRangev, + uplo::onemklUplo, n::Int64, a::Ptr{Cdouble}, + lda::Int64, b::Ptr{Cdouble}, ldb::Int64, + vl::Cdouble, vu::Cdouble, il::Int64, iu::Int64, + abstol::Cdouble, m::Ptr{Int64}, w::Ptr{Cdouble}, + z::Ptr{Cdouble}, ldz::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSsygvx(device_queue, itype, jobz, range, uplo, n, a, lda, b, ldb, vl, vu, il, + iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSsygvx(device_queue::syclQueue_t, itype::Int64, + jobz::onemklCompz, range::onemklRangev, + uplo::onemklUplo, n::Int64, a::Ptr{Cfloat}, + lda::Int64, b::Ptr{Cfloat}, ldb::Int64, + vl::Cfloat, vu::Cfloat, il::Int64, iu::Int64, + abstol::Cfloat, m::Ptr{Int64}, w::Ptr{Cfloat}, + z::Ptr{Cfloat}, ldz::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklSsytrd_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklSsytrd_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklDsytrd_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklDsytrd_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 +end + +function onemklDsytrd(device_queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDsytrd(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cdouble}, lda::Int64, + d::Ptr{Cdouble}, e::Ptr{Cdouble}, + tau::Ptr{Cdouble}, scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSsytrd(device_queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSsytrd(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cfloat}, lda::Int64, + d::Ptr{Cfloat}, e::Ptr{Cfloat}, tau::Ptr{Cfloat}, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklStrtrs_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklStrtrs_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, + trans::onemklTranspose, + diag::onemklDiag, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 +end + +function onemklDtrtrs_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklDtrtrs_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, + trans::onemklTranspose, + diag::onemklDiag, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 +end + +function onemklCtrtrs_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklCtrtrs_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, + trans::onemklTranspose, + diag::onemklDiag, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 +end + +function onemklZtrtrs_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb) + @ccall liboneapi_support.onemklZtrtrs_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, + trans::onemklTranspose, + diag::onemklDiag, n::Int64, + nrhs::Int64, lda::Int64, + ldb::Int64)::Int64 +end + +function onemklCtrtrs(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklCtrtrs(device_queue::syclQueue_t, uplo::onemklUplo, + trans::onemklTranspose, diag::onemklDiag, + n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, + lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklDtrtrs(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklDtrtrs(device_queue::syclQueue_t, uplo::onemklUplo, + trans::onemklTranspose, diag::onemklDiag, + n::Int64, nrhs::Int64, a::Ptr{Cdouble}, + lda::Int64, b::Ptr{Cdouble}, ldb::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklStrtrs(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklStrtrs(device_queue::syclQueue_t, uplo::onemklUplo, + trans::onemklTranspose, diag::onemklDiag, + n::Int64, nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, + b::Ptr{Cfloat}, ldb::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklZtrtrs(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklZtrtrs(device_queue::syclQueue_t, uplo::onemklUplo, + trans::onemklTranspose, diag::onemklDiag, + n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, + lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklSsymv(device_queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklSsymv(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, alpha::Cfloat, a::ZePtr{Cfloat}, - lda::Int64, x::ZePtr{Cfloat}, incx::Int64, - beta::Cfloat, y::ZePtr{Cfloat}, incy::Int64)::Cvoid +function onemklCungbr(device_queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCungbr(device_queue::syclQueue_t, vec::onemklGenerate, + m::Int64, n::Int64, k::Int64, a::Ptr{ComplexF32}, + lda::Int64, tau::Ptr{ComplexF32}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZungbr(device_queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZungbr(device_queue::syclQueue_t, vec::onemklGenerate, + m::Int64, n::Int64, k::Int64, a::Ptr{ComplexF32}, + lda::Int64, tau::Ptr{ComplexF32}, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklCungbr_scratchpad_size(device_queue, vect, m, n, k, lda) + @ccall liboneapi_support.onemklCungbr_scratchpad_size(device_queue::syclQueue_t, + vect::onemklGenerate, m::Int64, + n::Int64, k::Int64, + lda::Int64)::Int64 end -function onemklDsymv(device_queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy) - @ccall liboneapi_support.onemklDsymv(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, alpha::Cdouble, a::ZePtr{Cdouble}, - lda::Int64, x::ZePtr{Cdouble}, incx::Int64, - beta::Cdouble, y::ZePtr{Cdouble}, - incy::Int64)::Cvoid +function onemklZungbr_scratchpad_size(device_queue, vect, m, n, k, lda) + @ccall liboneapi_support.onemklZungbr_scratchpad_size(device_queue::syclQueue_t, + vect::onemklGenerate, m::Int64, + n::Int64, k::Int64, + lda::Int64)::Int64 end -function onemklSsyr(device_queue, uplo, n, alpha, x, incx, a, lda) - @ccall liboneapi_support.onemklSsyr(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, alpha::Cfloat, x::ZePtr{Cfloat}, - incx::Int64, a::ZePtr{Cfloat}, lda::Int64)::Cvoid +function onemklCungqr_scratchpad_size(device_queue, m, n, k, lda) + @ccall liboneapi_support.onemklCungqr_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, k::Int64, + lda::Int64)::Int64 end -function onemklDsyr(device_queue, uplo, n, alpha, x, incx, a, lda) - @ccall liboneapi_support.onemklDsyr(device_queue::syclQueue_t, uplo::onemklUplo, - n::Int64, alpha::Cdouble, x::ZePtr{Cdouble}, - incx::Int64, a::ZePtr{Cdouble}, lda::Int64)::Cvoid +function onemklZungqr_scratchpad_size(device_queue, m, n, k, lda) + @ccall liboneapi_support.onemklZungqr_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, k::Int64, + lda::Int64)::Int64 end -function onemklStbmv(device_queue, uplo, trans, diag, n, k, a, lda, x, incx) - @ccall liboneapi_support.onemklStbmv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - k::Int64, a::ZePtr{Cfloat}, lda::Int64, - x::ZePtr{Cfloat}, incx::Int64)::Cvoid +function onemklCungqr(device_queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCungqr(device_queue::syclQueue_t, m::Int64, n::Int64, + k::Int64, a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklDtbmv(device_queue, uplo, trans, diag, n, k, a, lda, x, incx) - @ccall liboneapi_support.onemklDtbmv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - k::Int64, a::ZePtr{Cdouble}, lda::Int64, - x::ZePtr{Cdouble}, incx::Int64)::Cvoid +function onemklZungqr(device_queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZungqr(device_queue::syclQueue_t, m::Int64, n::Int64, + k::Int64, a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklCtbmv(device_queue, uplo, trans, diag, n, k, a, lda, x, incx) - @ccall liboneapi_support.onemklCtbmv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - k::Int64, a::ZePtr{ComplexF32}, lda::Int64, - x::ZePtr{ComplexF32}, incx::Int64)::Cvoid +function onemklCunmqr_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) + @ccall liboneapi_support.onemklCunmqr_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + trans::onemklTranspose, m::Int64, + n::Int64, k::Int64, lda::Int64, + ldc::Int64)::Int64 end -function onemklZtbmv(device_queue, uplo, trans, diag, n, k, a, lda, x, incx) - @ccall liboneapi_support.onemklZtbmv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - k::Int64, a::ZePtr{ComplexF64}, lda::Int64, - x::ZePtr{ComplexF64}, incx::Int64)::Cvoid +function onemklZunmqr_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) + @ccall liboneapi_support.onemklZunmqr_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + trans::onemklTranspose, m::Int64, + n::Int64, k::Int64, lda::Int64, + ldc::Int64)::Int64 end -function onemklStrmv(device_queue, uplo, trans, diag, n, a, lda, x, incx) - @ccall liboneapi_support.onemklStrmv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, - incx::Int64)::Cvoid +function onemklCunmqr(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklCunmqr(device_queue::syclQueue_t, side::onemklSide, + trans::onemklTranspose, m::Int64, n::Int64, + k::Int64, a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, c::Ptr{ComplexF32}, + ldc::Int64, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklDtrmv(device_queue, uplo, trans, diag, n, a, lda, x, incx) - @ccall liboneapi_support.onemklDtrmv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, - incx::Int64)::Cvoid +function onemklZunmqr(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklZunmqr(device_queue::syclQueue_t, side::onemklSide, + trans::onemklTranspose, m::Int64, n::Int64, + k::Int64, a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, c::Ptr{ComplexF32}, + ldc::Int64, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklCtrmv(device_queue, uplo, trans, diag, n, a, lda, x, incx) - @ccall liboneapi_support.onemklCtrmv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - a::ZePtr{ComplexF32}, lda::Int64, - x::ZePtr{ComplexF32}, incx::Int64)::Cvoid +function onemklSgerqf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSgerqf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{Cfloat}, lda::Int64, tau::Ptr{Cfloat}, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklZtrmv(device_queue, uplo, trans, diag, n, a, lda, x, incx) - @ccall liboneapi_support.onemklZtrmv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - a::ZePtr{ComplexF64}, lda::Int64, - x::ZePtr{ComplexF64}, incx::Int64)::Cvoid +function onemklDgerqf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDgerqf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{Cdouble}, lda::Int64, tau::Ptr{Cdouble}, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint end -function onemklStrsv(device_queue, uplo, trans, diag, n, a, lda, x, incx) - @ccall liboneapi_support.onemklStrsv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, - incx::Int64)::Cvoid +function onemklCgerqf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCgerqf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklDtrsv(device_queue, uplo, trans, diag, n, a, lda, x, incx) - @ccall liboneapi_support.onemklDtrsv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, - incx::Int64)::Cvoid +function onemklZgerqf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZgerqf(device_queue::syclQueue_t, m::Int64, n::Int64, + a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklCtrsv(device_queue, uplo, trans, diag, n, a, lda, x, incx) - @ccall liboneapi_support.onemklCtrsv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - a::ZePtr{ComplexF32}, lda::Int64, - x::ZePtr{ComplexF32}, incx::Int64)::Cvoid +function onemklSgerqf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklSgerqf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 end -function onemklZtrsv(device_queue, uplo, trans, diag, n, a, lda, x, incx) - @ccall liboneapi_support.onemklZtrsv(device_queue::syclQueue_t, uplo::onemklUplo, - trans::onemklTranspose, diag::onemklDiag, n::Int64, - a::ZePtr{ComplexF64}, lda::Int64, - x::ZePtr{ComplexF64}, incx::Int64)::Cvoid +function onemklDgerqf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklDgerqf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 end -function onemklDnrm2(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklDnrm2(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cdouble}, incx::Int64, - result::RefOrZeRef{Cdouble})::Cvoid +function onemklCgerqf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklCgerqf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 end -function onemklSnrm2(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklSnrm2(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cfloat}, incx::Int64, - result::RefOrZeRef{Cfloat})::Cvoid +function onemklZgerqf_scratchpad_size(device_queue, m, n, lda) + @ccall liboneapi_support.onemklZgerqf_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64)::Int64 end -function onemklCnrm2(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklCnrm2(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - result::RefOrZeRef{Cfloat})::Cvoid +function onemklSormrq(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklSormrq(device_queue::syclQueue_t, side::onemklSide, + trans::onemklTranspose, m::Int64, n::Int64, + k::Int64, a::Ptr{Cfloat}, lda::Int64, + tau::Ptr{Cfloat}, c::Ptr{Cfloat}, ldc::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklZnrm2(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklZnrm2(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - result::RefOrZeRef{Cdouble})::Cvoid +function onemklDormrq(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklDormrq(device_queue::syclQueue_t, side::onemklSide, + trans::onemklTranspose, m::Int64, n::Int64, + k::Int64, a::Ptr{Cdouble}, lda::Int64, + tau::Ptr{Cdouble}, c::Ptr{Cdouble}, ldc::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint end -function onemklHnrm2(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklHnrm2(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Float16}, incx::Int64, - result::RefOrZeRef{Float16})::Cvoid +function onemklSormrq_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) + @ccall liboneapi_support.onemklSormrq_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + trans::onemklTranspose, m::Int64, + n::Int64, k::Int64, lda::Int64, + ldc::Int64)::Int64 end -function onemklSdot(device_queue, n, x, incx, y, incy, result) - @ccall liboneapi_support.onemklSdot(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, - incy::Int64, result::RefOrZeRef{Cfloat})::Cvoid +function onemklDormrq_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) + @ccall liboneapi_support.onemklDormrq_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + trans::onemklTranspose, m::Int64, + n::Int64, k::Int64, lda::Int64, + ldc::Int64)::Int64 end -function onemklDdot(device_queue, n, x, incx, y, incy, result) - @ccall liboneapi_support.onemklDdot(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, - incy::Int64, result::RefOrZeRef{Cdouble})::Cvoid +function onemklCunmrq(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklCunmrq(device_queue::syclQueue_t, side::onemklSide, + trans::onemklTranspose, m::Int64, n::Int64, + k::Int64, a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, c::Ptr{ComplexF32}, + ldc::Int64, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklCdotc(device_queue, n, x, incx, y, incy, result) - @ccall liboneapi_support.onemklCdotc(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - y::ZePtr{ComplexF32}, incy::Int64, - result::RefOrZeRef{ComplexF32})::Cvoid +function onemklZunmrq(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklZunmrq(device_queue::syclQueue_t, side::onemklSide, + trans::onemklTranspose, m::Int64, n::Int64, + k::Int64, a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, c::Ptr{ComplexF32}, + ldc::Int64, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklZdotc(device_queue, n, x, incx, y, incy, result) - @ccall liboneapi_support.onemklZdotc(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - y::ZePtr{ComplexF64}, incy::Int64, - result::RefOrZeRef{ComplexF64})::Cvoid +function onemklCunmrq_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) + @ccall liboneapi_support.onemklCunmrq_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + trans::onemklTranspose, m::Int64, + n::Int64, k::Int64, lda::Int64, + ldc::Int64)::Int64 end -function onemklCdotu(device_queue, n, x, incx, y, incy, result) - @ccall liboneapi_support.onemklCdotu(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - y::ZePtr{ComplexF32}, incy::Int64, - result::RefOrZeRef{ComplexF32})::Cvoid +function onemklZunmrq_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) + @ccall liboneapi_support.onemklZunmrq_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + trans::onemklTranspose, m::Int64, + n::Int64, k::Int64, lda::Int64, + ldc::Int64)::Int64 end -function onemklZdotu(device_queue, n, x, incx, y, incy, result) - @ccall liboneapi_support.onemklZdotu(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - y::ZePtr{ComplexF64}, incy::Int64, - result::RefOrZeRef{ComplexF64})::Cvoid +function onemklSsytrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSsytrf(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cfloat}, lda::Int64, + ipiv::Ptr{Int64}, scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklHdot(device_queue, n, x, incx, y, incy, result) - @ccall liboneapi_support.onemklHdot(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Float16}, incx::Int64, y::ZePtr{Float16}, - incy::Int64, result::RefOrZeRef{Float16})::Cvoid +function onemklDsytrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDsytrf(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cdouble}, lda::Int64, + ipiv::Ptr{Int64}, scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint end -function onemklDcopy(device_queue, n, x, incx, y, incy) - @ccall liboneapi_support.onemklDcopy(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, - incy::Int64)::Cvoid +function onemklCsytrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCsytrf(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + ipiv::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklScopy(device_queue, n, x, incx, y, incy) - @ccall liboneapi_support.onemklScopy(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, - incy::Int64)::Cvoid +function onemklZsytrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZsytrf(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + ipiv::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklZcopy(device_queue, n, x, incx, y, incy) - @ccall liboneapi_support.onemklZcopy(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - y::ZePtr{ComplexF64}, incy::Int64)::Cvoid +function onemklSsytrf_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklSsytrf_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklCcopy(device_queue, n, x, incx, y, incy) - @ccall liboneapi_support.onemklCcopy(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - y::ZePtr{ComplexF32}, incy::Int64)::Cvoid +function onemklDsytrf_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklDsytrf_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklDamax(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklDamax(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cdouble}, incx::Int64, - result::ZePtr{Int64})::Cvoid +function onemklCsytrf_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklCsytrf_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklSamax(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklSamax(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cfloat}, incx::Int64, - result::ZePtr{Int64})::Cvoid +function onemklZsytrf_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklZsytrf_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklZamax(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklZamax(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - result::ZePtr{Int64})::Cvoid +function onemklSorgtr(device_queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSorgtr(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cfloat}, lda::Int64, + tau::Ptr{Cfloat}, scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklCamax(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklCamax(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - result::ZePtr{Int64})::Cvoid +function onemklDorgtr(device_queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDorgtr(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cdouble}, lda::Int64, + tau::Ptr{Cdouble}, scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint end -function onemklDamin(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklDamin(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cdouble}, incx::Int64, - result::ZePtr{Int64})::Cvoid +function onemklSorgtr_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklSorgtr_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklSamin(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklSamin(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cfloat}, incx::Int64, - result::ZePtr{Int64})::Cvoid +function onemklDorgtr_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklDorgtr_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklZamin(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklZamin(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - result::ZePtr{Int64})::Cvoid +function onemklCungtr(device_queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCungtr(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklCamin(device_queue, n, x, incx, result) - @ccall liboneapi_support.onemklCamin(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - result::ZePtr{Int64})::Cvoid +function onemklZungtr(device_queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZungtr(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint end -function onemklSswap(device_queue, n, x, incx, y, incy) - @ccall liboneapi_support.onemklSswap(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, - incy::Int64)::Cvoid +function onemklCungtr_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklCungtr_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklDswap(device_queue, n, x, incx, y, incy) - @ccall liboneapi_support.onemklDswap(device_queue::syclQueue_t, n::Int64, - x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, - incy::Int64)::Cvoid +function onemklZungtr_scratchpad_size(device_queue, uplo, n, lda) + @ccall liboneapi_support.onemklZungtr_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64)::Int64 end -function onemklCswap(device_queue, n, x, incx, y, incy) - @ccall liboneapi_support.onemklCswap(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF32}, incx::Int64, - y::ZePtr{ComplexF32}, incy::Int64)::Cvoid +function onemklSormtr(device_queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, + scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSormtr(device_queue::syclQueue_t, side::onemklSide, + uplo::onemklUplo, trans::onemklTranspose, + m::Int64, n::Int64, a::Ptr{Cfloat}, lda::Int64, + tau::Ptr{Cfloat}, c::Ptr{Cfloat}, ldc::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint end -function onemklZswap(device_queue, n, x, incx, y, incy) - @ccall liboneapi_support.onemklZswap(device_queue::syclQueue_t, n::Int64, - x::ZePtr{ComplexF64}, incx::Int64, - y::ZePtr{ComplexF64}, incy::Int64)::Cvoid +function onemklDormtr(device_queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, + scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDormtr(device_queue::syclQueue_t, side::onemklSide, + uplo::onemklUplo, trans::onemklTranspose, + m::Int64, n::Int64, a::Ptr{Cdouble}, lda::Int64, + tau::Ptr{Cdouble}, c::Ptr{Cdouble}, ldc::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklSormtr_scratchpad_size(device_queue, side, uplo, trans, m, n, lda, ldc) + @ccall liboneapi_support.onemklSormtr_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + uplo::onemklUplo, + trans::onemklTranspose, m::Int64, + n::Int64, lda::Int64, + ldc::Int64)::Int64 +end + +function onemklDormtr_scratchpad_size(device_queue, side, uplo, trans, m, n, lda, ldc) + @ccall liboneapi_support.onemklDormtr_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + uplo::onemklUplo, + trans::onemklTranspose, m::Int64, + n::Int64, lda::Int64, + ldc::Int64)::Int64 +end + +function onemklCunmtr(device_queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, + scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCunmtr(device_queue::syclQueue_t, side::onemklSide, + uplo::onemklUplo, trans::onemklTranspose, + m::Int64, n::Int64, a::Ptr{ComplexF32}, + lda::Int64, tau::Ptr{ComplexF32}, + c::Ptr{ComplexF32}, ldc::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZunmtr(device_queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, + scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZunmtr(device_queue::syclQueue_t, side::onemklSide, + uplo::onemklUplo, trans::onemklTranspose, + m::Int64, n::Int64, a::Ptr{ComplexF32}, + lda::Int64, tau::Ptr{ComplexF32}, + c::Ptr{ComplexF32}, ldc::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklCunmtr_scratchpad_size(device_queue, side, uplo, trans, m, n, lda, ldc) + @ccall liboneapi_support.onemklCunmtr_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + uplo::onemklUplo, + trans::onemklTranspose, m::Int64, + n::Int64, lda::Int64, + ldc::Int64)::Int64 +end + +function onemklZunmtr_scratchpad_size(device_queue, side, uplo, trans, m, n, lda, ldc) + @ccall liboneapi_support.onemklZunmtr_scratchpad_size(device_queue::syclQueue_t, + side::onemklSide, + uplo::onemklUplo, + trans::onemklTranspose, m::Int64, + n::Int64, lda::Int64, + ldc::Int64)::Int64 +end + +function onemklSpotrf_batch(device_queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklSpotrf_batch(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cfloat}, lda::Int64, + stride_a::Int64, batch_size::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklDpotrf_batch(device_queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklDpotrf_batch(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{Cdouble}, lda::Int64, + stride_a::Int64, batch_size::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklCpotrf_batch(device_queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklCpotrf_batch(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZpotrf_batch(device_queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size) + @ccall liboneapi_support.onemklZpotrf_batch(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSpotrs_batch(device_queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSpotrs_batch(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, nrhs::Int64, a::Ptr{Cfloat}, + lda::Int64, stride_a::Int64, b::Ptr{Cfloat}, + ldb::Int64, stride_b::Int64, + batch_size::Int64, scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklDpotrs_batch(device_queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDpotrs_batch(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, nrhs::Int64, a::Ptr{Cdouble}, + lda::Int64, stride_a::Int64, + b::Ptr{Cdouble}, ldb::Int64, + stride_b::Int64, batch_size::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklCpotrs_batch(device_queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCpotrs_batch(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, + lda::Int64, stride_a::Int64, + b::Ptr{ComplexF32}, ldb::Int64, + stride_b::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZpotrs_batch(device_queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZpotrs_batch(device_queue::syclQueue_t, uplo::onemklUplo, + n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, + lda::Int64, stride_a::Int64, + b::Ptr{ComplexF32}, ldb::Int64, + stride_b::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSgeqrf_batch(device_queue, m, n, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSgeqrf_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{Cfloat}, lda::Int64, + stride_a::Int64, tau::Ptr{Cfloat}, + stride_tau::Int64, batch_size::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklDgeqrf_batch(device_queue, m, n, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDgeqrf_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{Cdouble}, lda::Int64, + stride_a::Int64, tau::Ptr{Cdouble}, + stride_tau::Int64, batch_size::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklCgeqrf_batch(device_queue, m, n, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCgeqrf_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, tau::Ptr{ComplexF32}, + stride_tau::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZgeqrf_batch(device_queue, m, n, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZgeqrf_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, a::Ptr{ComplexF32}, lda::Int64, + stride_a::Int64, tau::Ptr{ComplexF32}, + stride_tau::Int64, batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSorgqr_batch(device_queue, m, n, k, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSorgqr_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, k::Int64, a::Ptr{Cfloat}, + lda::Int64, stride_a::Int64, + tau::Ptr{Cfloat}, stride_tau::Int64, + batch_size::Int64, scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklDorgqr_batch(device_queue, m, n, k, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDorgqr_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, k::Int64, a::Ptr{Cdouble}, + lda::Int64, stride_a::Int64, + tau::Ptr{Cdouble}, stride_tau::Int64, + batch_size::Int64, scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklCungqr_batch(device_queue, m, n, k, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCungqr_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, k::Int64, a::Ptr{ComplexF32}, + lda::Int64, stride_a::Int64, + tau::Ptr{ComplexF32}, stride_tau::Int64, + batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZungqr_batch(device_queue, m, n, k, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZungqr_batch(device_queue::syclQueue_t, m::Int64, + n::Int64, k::Int64, a::Ptr{ComplexF32}, + lda::Int64, stride_a::Int64, + tau::Ptr{ComplexF32}, stride_tau::Int64, + batch_size::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSgels_batch(device_queue, trans, m, n, nrhs, a, lda, stridea, b, ldb, + strideb, batchsize, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklSgels_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, + stridea::Int64, b::Ptr{Cfloat}, ldb::Int64, + strideb::Int64, batchsize::Int64, + scratchpad::Ptr{Cfloat}, + scratchpad_size::Int64)::Cint +end + +function onemklDgels_batch(device_queue, trans, m, n, nrhs, a, lda, stridea, b, ldb, + strideb, batchsize, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklDgels_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + nrhs::Int64, a::Ptr{Cdouble}, lda::Int64, + stridea::Int64, b::Ptr{Cdouble}, ldb::Int64, + strideb::Int64, batchsize::Int64, + scratchpad::Ptr{Cdouble}, + scratchpad_size::Int64)::Cint +end + +function onemklCgels_batch(device_queue, trans, m, n, nrhs, a, lda, stridea, b, ldb, + strideb, batchsize, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklCgels_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, + stridea::Int64, b::Ptr{ComplexF32}, + ldb::Int64, strideb::Int64, batchsize::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklZgels_batch(device_queue, trans, m, n, nrhs, a, lda, stridea, b, ldb, + strideb, batchsize, scratchpad, scratchpad_size) + @ccall liboneapi_support.onemklZgels_batch(device_queue::syclQueue_t, + trans::onemklTranspose, m::Int64, n::Int64, + nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, + stridea::Int64, b::Ptr{ComplexF32}, + ldb::Int64, strideb::Int64, batchsize::Int64, + scratchpad::Ptr{ComplexF32}, + scratchpad_size::Int64)::Cint +end + +function onemklSpotrf_batch_scratchpad_size(device_queue, uplo, n, lda, stride_a, + batch_size) + @ccall liboneapi_support.onemklSpotrf_batch_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64, stride_a::Int64, + batch_size::Int64)::Int64 +end + +function onemklDpotrf_batch_scratchpad_size(device_queue, uplo, n, lda, stride_a, + batch_size) + @ccall liboneapi_support.onemklDpotrf_batch_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64, stride_a::Int64, + batch_size::Int64)::Int64 +end + +function onemklCpotrf_batch_scratchpad_size(device_queue, uplo, n, lda, stride_a, + batch_size) + @ccall liboneapi_support.onemklCpotrf_batch_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64, stride_a::Int64, + batch_size::Int64)::Int64 +end + +function onemklZpotrf_batch_scratchpad_size(device_queue, uplo, n, lda, stride_a, + batch_size) + @ccall liboneapi_support.onemklZpotrf_batch_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + lda::Int64, stride_a::Int64, + batch_size::Int64)::Int64 +end + +function onemklSpotrs_batch_scratchpad_size(device_queue, uplo, n, nrhs, lda, stride_a, ldb, + stride_b, batch_size) + @ccall liboneapi_support.onemklSpotrs_batch_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + nrhs::Int64, lda::Int64, + stride_a::Int64, ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklDpotrs_batch_scratchpad_size(device_queue, uplo, n, nrhs, lda, stride_a, ldb, + stride_b, batch_size) + @ccall liboneapi_support.onemklDpotrs_batch_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + nrhs::Int64, lda::Int64, + stride_a::Int64, ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklCpotrs_batch_scratchpad_size(device_queue, uplo, n, nrhs, lda, stride_a, ldb, + stride_b, batch_size) + @ccall liboneapi_support.onemklCpotrs_batch_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + nrhs::Int64, lda::Int64, + stride_a::Int64, ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklZpotrs_batch_scratchpad_size(device_queue, uplo, n, nrhs, lda, stride_a, ldb, + stride_b, batch_size) + @ccall liboneapi_support.onemklZpotrs_batch_scratchpad_size(device_queue::syclQueue_t, + uplo::onemklUplo, n::Int64, + nrhs::Int64, lda::Int64, + stride_a::Int64, ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklSgeqrf_batch_scratchpad_size(device_queue, m, n, lda, stride_a, stride_tau, + batch_size) + @ccall liboneapi_support.onemklSgeqrf_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, stride_a::Int64, + stride_tau::Int64, + batch_size::Int64)::Int64 +end + +function onemklDgeqrf_batch_scratchpad_size(device_queue, m, n, lda, stride_a, stride_tau, + batch_size) + @ccall liboneapi_support.onemklDgeqrf_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, stride_a::Int64, + stride_tau::Int64, + batch_size::Int64)::Int64 +end + +function onemklCgeqrf_batch_scratchpad_size(device_queue, m, n, lda, stride_a, stride_tau, + batch_size) + @ccall liboneapi_support.onemklCgeqrf_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, stride_a::Int64, + stride_tau::Int64, + batch_size::Int64)::Int64 +end + +function onemklZgeqrf_batch_scratchpad_size(device_queue, m, n, lda, stride_a, stride_tau, + batch_size) + @ccall liboneapi_support.onemklZgeqrf_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + lda::Int64, stride_a::Int64, + stride_tau::Int64, + batch_size::Int64)::Int64 +end + +function onemklSorgqr_batch_scratchpad_size(device_queue, m, n, k, lda, stride_a, + stride_tau, batch_size) + @ccall liboneapi_support.onemklSorgqr_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + k::Int64, lda::Int64, + stride_a::Int64, + stride_tau::Int64, + batch_size::Int64)::Int64 +end + +function onemklDorgqr_batch_scratchpad_size(device_queue, m, n, k, lda, stride_a, + stride_tau, batch_size) + @ccall liboneapi_support.onemklDorgqr_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + k::Int64, lda::Int64, + stride_a::Int64, + stride_tau::Int64, + batch_size::Int64)::Int64 +end + +function onemklCungqr_batch_scratchpad_size(device_queue, m, n, k, lda, stride_a, + stride_tau, batch_size) + @ccall liboneapi_support.onemklCungqr_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + k::Int64, lda::Int64, + stride_a::Int64, + stride_tau::Int64, + batch_size::Int64)::Int64 +end + +function onemklZungqr_batch_scratchpad_size(device_queue, m, n, k, lda, stride_a, + stride_tau, batch_size) + @ccall liboneapi_support.onemklZungqr_batch_scratchpad_size(device_queue::syclQueue_t, + m::Int64, n::Int64, + k::Int64, lda::Int64, + stride_a::Int64, + stride_tau::Int64, + batch_size::Int64)::Int64 +end + +function onemklSgels_batch_scratchpad_size(device_queue, trans, m, n, nrhs, lda, stride_a, + ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklSgels_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + m::Int64, n::Int64, + nrhs::Int64, lda::Int64, + stride_a::Int64, ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklDgels_batch_scratchpad_size(device_queue, trans, m, n, nrhs, lda, stride_a, + ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklDgels_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + m::Int64, n::Int64, + nrhs::Int64, lda::Int64, + stride_a::Int64, ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklCgels_batch_scratchpad_size(device_queue, trans, m, n, nrhs, lda, stride_a, + ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklCgels_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + m::Int64, n::Int64, + nrhs::Int64, lda::Int64, + stride_a::Int64, ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 +end + +function onemklZgels_batch_scratchpad_size(device_queue, trans, m, n, nrhs, lda, stride_a, + ldb, stride_b, batch_size) + @ccall liboneapi_support.onemklZgels_batch_scratchpad_size(device_queue::syclQueue_t, + trans::onemklTranspose, + m::Int64, n::Int64, + nrhs::Int64, lda::Int64, + stride_a::Int64, ldb::Int64, + stride_b::Int64, + batch_size::Int64)::Int64 end function onemklDestroy() diff --git a/res/support.toml b/res/support.toml index 4c0e7207..fca4b896 100644 --- a/res/support.toml +++ b/res/support.toml @@ -150,11 +150,11 @@ use_ccall_macro = true 3 = "ZePtr{T}" 5 = "ZePtr{T}" -[api.onemklXdrot.argtypes] +[api.onemklXDrot.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{T}" -[api.onemklXsrot.argtypes] +[api.onemklXSrot.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{T}" @@ -162,10 +162,10 @@ use_ccall_macro = true 3 = "T" 4 = "ZePtr{T}" -[api.onemklXdscal.argtypes] +[api.onemklXDscal.argtypes] 4 = "ZePtr{T}" -[api.onemklXsscal.argtypes] +[api.onemklXSscal.argtypes] 4 = "ZePtr{T}" [api.onemklXger.argtypes] @@ -252,11 +252,11 @@ use_ccall_macro = true 3 = "ZePtr{T}" 5 = "ZePtr{T}" -[api.onemklXamax.argtypes] +[api.onemklXiamax.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{Int64}" -[api.onemklXamin.argtypes] +[api.onemklXiamin.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{Int64}"