Skip to content

Commit

Permalink
cholesky added to GPU (MadNLP#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
sshin23 authored and frapac committed Sep 20, 2021
1 parent 4d1df14 commit 6ed4f0f
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 12 deletions.
50 changes: 44 additions & 6 deletions lib/MadNLPGPU/src/lapackgpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import ..MadNLPGPU:
SymbolicException,FactorizationException,SolveException,InertiaException,
introduce, factorize!, solve!, improve!, is_inertia, inertia, MadNLPLapackCPU, tril_to_full!,
CUBLAS, CUSOLVER, CuVector, CuMatrix, toolkit_version, R_64F
import .CUSOLVER: cusolverDnDsytrf_bufferSize, cusolverDnDsytrf,
import .CUSOLVER:
cusolverDnDsytrf_bufferSize, cusolverDnDsytrf,
cusolverDnDpotrf_bufferSize, cusolverDnDpotrf, cusolverDnDpotrs,
cusolverDnDgetrf_bufferSize, cusolverDnDgetrf, cusolverDnDgetrs,
cusolverDnDgeqrf_bufferSize, cusolverDnDgeqrf, cusolverDnDgeqrf_bufferSize,
cusolverDnDormqr_bufferSize, cusolverDnDormqr,
Expand All @@ -16,7 +18,7 @@ import .CUBLAS: cublasDtrsm_v2, handle, CUBLAS_DIAG_NON_UNIT,

const INPUT_MATRIX_TYPE = :dense

@enum(Algorithms::Int, BUNCHKAUFMAN = 1, LU = 2, QR = 3)
@enum(Algorithms::Int, BUNCHKAUFMAN = 1, LU = 2, QR = 3, CHOLESKY = 4)
@kwdef mutable struct Options <: AbstractOptions
lapackgpu_algorithm::Algorithms = BUNCHKAUFMAN
end
Expand Down Expand Up @@ -60,6 +62,8 @@ function factorize!(M::Solver)
factorize_lu!(M)
elseif M.opt.lapackgpu_algorithm == QR
factorize_qr!(M)
elseif M.opt.lapackgpu_algorithm == CHOLESKY
factorize_cholesky!(M)
else
error(LOGGER,"Invalid lapackgpu_algorithm")
end
Expand All @@ -71,6 +75,8 @@ function solve!(M::Solver,x)
solve_lu!(M,x)
elseif M.opt.lapackgpu_algorithm == QR
solve_qr!(M,x)
elseif M.opt.lapackgpu_algorithm == CHOLESKY
solve_cholesky!(M,x)
else
error(LOGGER,"Invalid lapackgpu_algorithm")
end
Expand All @@ -80,8 +86,7 @@ improve!(M::Solver) = false
introduce(M::Solver) = "Lapack-GPU ($(M.opt.lapackgpu_algorithm))"

if toolkit_version() >= v"11.3.1"

is_inertia(M::Solver) = false # TODO: implement inertia(M::Solver) for BUNCHKAUFMAN
is_inertia(M::Solver) = M.opt.lapackgpu_algorithm == CHOLESKY # TODO: implement inertia(M::Solver) for BUNCHKAUFMAN

function factorize_bunchkaufman!(M::Solver)
haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
Expand Down Expand Up @@ -124,8 +129,8 @@ if toolkit_version() >= v"11.3.1"
return x
end
else
is_inertia(M::Solver) = M.opt.lapackgpu_algorithm == BUNCHKAUFMAN
inertia(M::Solver) = inertia(M.etc[:fact_cpu],M.etc[:ipiv_cpu],M.etc[:info_cpu][])
is_inertia(M::Solver) =
M.opt.lapackgpu_algorithm == CHOLESKY || M.opt.lapackgpu_algorithm == CHOLESKY

function factorize_bunchkaufman!(M::Solver)
haskey(M.etc,:ipiv) || (M.etc[:ipiv] = CuVector{Int32}(undef,size(M.dense,1)))
Expand Down Expand Up @@ -209,4 +214,37 @@ function solve_qr!(M::Solver,x)
return x
end

function factorize_cholesky!(M::Solver)
copyto!(M.fact,M.dense)
cusolverDnDpotrf_bufferSize(
dense_handle(),CUBLAS_FILL_MODE_LOWER,
Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),M.lwork)
length(M.work) < M.lwork[] && resize!(M.work,Int(M.lwork[]))
cusolverDnDpotrf(
dense_handle(),CUBLAS_FILL_MODE_LOWER,
Int32(size(M.fact,1)),M.fact,Int32(size(M.fact,2)),
M.work,M.lwork[],M.info)
return M
end

function solve_cholesky!(M::Solver,x)
copyto!(M.rhs,x)
cusolverDnDpotrs(
dense_handle(),CUBLAS_FILL_MODE_LOWER,
Int32(size(M.fact,1)),Int32(1),M.fact,Int32(size(M.fact,2)),
M.rhs,Int32(length(M.rhs)),M.info)
copyto!(x,M.rhs)
return x
end

function inertia(M::Solver)
if M.opt.lapackgpu_algorithm == BUNCHKAUFMAN
inertia(M.etc[:fact_cpu],M.etc[:ipiv_cpu],M.etc[:info_cpu][])
elseif M.opt.lapackgpu_algorithm == CHOLESKY
sum(M.info) == 0 ? (size(M.fact,1),0,0) : (0,size(M.fact,1),0)
else
error(LOGGER,"Invalid lapackcpu_algorithm")
end
end

end # module
9 changes: 9 additions & 0 deletions lib/MadNLPGPU/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ testset = [
[],
@isdefined(MadNLPLapackGPU)
],
[
"LapackGPU-CHOLESKY",
()->MadNLP.Optimizer(
linear_solver=MadNLPLapackGPU,
lapackgpu_algorithm=MadNLPLapackGPU.CHOLESKY,
print_level=MadNLP.ERROR),
["infeasible", "lootsma", "eigmina"],
@isdefined(MadNLPLapackGPU)
],
]

@testset "MadNLPGPU test" begin
Expand Down
46 changes: 40 additions & 6 deletions src/LinearSolvers/lapack.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import ..MadNLP:

const INPUT_MATRIX_TYPE = :dense

@enum(Algorithms::Int, BUNCHKAUFMAN = 1, LU = 2, QR =3)
@enum(Algorithms::Int, BUNCHKAUFMAN = 1, LU = 2, QR =3, CHOLESKY=4)
@kwdef mutable struct Options <: AbstractOptions
lapackcpu_algorithm::Algorithms = BUNCHKAUFMAN
end
Expand Down Expand Up @@ -60,6 +60,16 @@ trsm(side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb)=ccall(
Cvoid,
(Ref{Cchar},Ref{Cchar},Ref{Cchar},Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ref{Cdouble},Ptr{Cdouble},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt}),
side,uplo,transa,diag,m,n,alpha,a,lda,b,ldb)
potrf(uplo,n,a,lda,info)=ccall(
(@blasfunc(dpotrf_),libblas),
Cvoid,
(Ref{Cchar},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}),
uplo,n,a,lda,info)
potrs(uplo,n,nrhs,a,lda,b,ldb,info)=ccall(
(@blasfunc(dpotrs_),libblas),
Cvoid,
(Ref{Cchar},Ref{BlasInt},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{Cdouble},Ref{BlasInt},Ptr{BlasInt}),
uplo,n,nrhs,a,lda,b,ldb,info)

function Solver(dense::Matrix{Float64};
option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(),
Expand All @@ -83,6 +93,8 @@ function factorize!(M::Solver)
factorize_lu!(M)
elseif M.opt.lapackcpu_algorithm == QR
factorize_qr!(M)
elseif M.opt.lapackcpu_algorithm == CHOLESKY
factorize_cholesky!(M)
else
error(LOGGER,"Invalid lapackcpu_algorithm")
end
Expand All @@ -94,6 +106,8 @@ function solve!(M::Solver,x)
solve_lu!(M,x)
elseif M.opt.lapackcpu_algorithm == QR
solve_qr!(M,x)
elseif M.opt.lapackcpu_algorithm == CHOLESKY
solve_cholesky!(M,x)
else
error(LOGGER,"Invalid lapackcpu_algorithm")
end
Expand All @@ -103,7 +117,6 @@ function factorize_bunchkaufman!(M::Solver)
size(M.fact,1) == 0 && return M
haskey(M.etc,:ipiv) || (M.etc[:ipiv] = Vector{BlasInt}(undef,size(M.dense,1)))
M.lwork = -1
# pointer(M.fact)==pointer(M.dense) || M.fact.=M.dense
M.fact .= M.dense
sytrf('L',size(M.fact,1),M.fact,size(M.fact,2),M.etc[:ipiv],M.work,M.lwork,M.info)
M.lwork = BlasInt(real(M.work[1]))
Expand All @@ -121,7 +134,6 @@ function factorize_lu!(M::Solver)
size(M.fact,1) == 0 && return M
haskey(M.etc,:ipiv) || (M.etc[:ipiv] = Vector{BlasInt}(undef,size(M.dense,1)))
tril_to_full!(M.dense)
# pointer(M.fact)==pointer(M.dense) || M.fact.=M.dense
M.fact .= M.dense
getrf(size(M.fact,1),size(M.fact,2),M.fact,size(M.fact,2),M.etc[:ipiv],M.info)
return M
Expand All @@ -138,7 +150,6 @@ function factorize_qr!(M::Solver)
haskey(M.etc,:tau) || (M.etc[:tau] = Vector{Float64}(undef,size(M.dense,1)))
tril_to_full!(M.dense)
M.lwork = -1
# pointer(M.fact)==pointer(M.dense) || M.fact.=M.dense
M.fact .= M.dense
geqrf(size(M.fact,1),size(M.fact,2),M.fact,size(M.fact,2),M.etc[:tau],M.work,M.lwork,M.info)
M.lwork = BlasInt(real(M.work[1]))
Expand All @@ -158,8 +169,31 @@ function solve_qr!(M::Solver,x)
return x
end

is_inertia(M::Solver) = M.opt.lapackcpu_algorithm == BUNCHKAUFMAN
inertia(M::Solver) = inertia(M.fact,M.etc[:ipiv],M.info[])
function factorize_cholesky!(M::Solver)
size(M.fact,1) == 0 && return M
M.lwork = -1
M.fact .= M.dense
potrf('L',size(M.fact,1),M.fact,size(M.fact,2),M.info)
return M
end
function solve_cholesky!(M::Solver,x)
size(M.fact,1) == 0 && return M
potrs('L',size(M.fact,1),1,M.fact,size(M.fact,2),x,length(x),M.info)
return x
end

is_inertia(M::Solver) =
M.opt.lapackcpu_algorithm == BUNCHKAUFMAN || M.opt.lapackcpu_algorithm == CHOLESKY
function inertia(M::Solver)
if M.opt.lapackcpu_algorithm == BUNCHKAUFMAN
inertia(M.fact,M.etc[:ipiv],M.info[])
elseif M.opt.lapackcpu_algorithm == CHOLESKY
M.info[] == 0 ? (size(M.fact,1),0,0) : (0,size(M.fact,1),0) # later we need to change inertia() to is_inertia_correct() and is_full_rank()
else
error(LOGGER,"Invalid lapackcpu_algorithm")
end
end

function inertia(fact,ipiv,info)
numneg = num_neg_ev(size(fact,1),fact,ipiv)
numzero = info > 0 ? 1 : 0
Expand Down
8 changes: 8 additions & 0 deletions test/madnlp_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ testset = [
print_level=MadNLP.ERROR),
[]
],
[
"LapackCPU-CHOLESKY",
()->MadNLP.Optimizer(
linear_solver=MadNLPLapackCPU,
lapackcpu_algorithm=MadNLPLapackCPU.CHOLESKY,
print_level=MadNLP.ERROR),
["infeasible", "lootsma", "eigmina"]
],
[
"Option: RELAX_BOUND",
()->MadNLP.Optimizer(
Expand Down

0 comments on commit 6ed4f0f

Please sign in to comment.