Skip to content

Commit

Permalink
Remove allocations in callbacks (#230)
Browse files Browse the repository at this point in the history
* update interface between sparse KKT systems and callbacks

* add PrimalVector type to store primal information

- split slack from variable explicitly
- remove all allocations in callbacks

* fix tests

* fix tests on GPU

* address comments

* typo fix

* typo fix

Co-authored-by: Sungho Shin <sshin@anl.gov>
  • Loading branch information
frapac and sshin23 committed Oct 24, 2022
1 parent 6c22169 commit 15c227e
Show file tree
Hide file tree
Showing 12 changed files with 511 additions and 262 deletions.
7 changes: 6 additions & 1 deletion lib/MadNLPGPU/src/kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,14 @@ end
function MadNLP.set_aug_diagonal!(kkt::MadNLP.AbstractDenseKKTSystem{T, VT, MT}, solver::MadNLP.MadNLPSolver) where {T, VT<:CuVector{T}, MT<:CuMatrix{T}}
haskey(kkt.etc, :pr_diag_host) || (kkt.etc[:pr_diag_host] = Vector{T}(undef, length(kkt.pr_diag)))
pr_diag_h = kkt.etc[:pr_diag_host]::Vector{T}
x = MadNLP.full(solver.x)
zl = MadNLP.full(solver.zl)
zu = MadNLP.full(solver.zu)
xl = MadNLP.full(solver.xl)
xu = MadNLP.full(solver.xu)
# Broadcast is not working as MadNLP array are allocated on the CPU,
# whereas pr_diag is allocated on the GPU
pr_diag_h .= solver.zl./(solver.x.-solver.xl) .+ solver.zu./(solver.xu.-solver.x)
pr_diag_h .= zl./(x.-xl) .+ zu./(xu.-x)
copyto!(kkt.pr_diag, pr_diag_h)
fill!(kkt.du_diag, 0.0)
end
Expand Down
10 changes: 5 additions & 5 deletions lib/MadNLPGPU/test/densekkt_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@ function _compare_gpu_with_cpu(KKTSystem, n, m, ind_fixed)

# Solve on CPU
h_solver = MadNLP.MadNLPSolver(nlp; madnlp_options...)
MadNLP.solve!(h_solver)
results_cpu = MadNLP.solve!(h_solver)

# Solve on GPU
d_solver = MadNLPGPU.CuMadNLPSolver(nlp; madnlp_options...)
MadNLP.solve!(d_solver)
results_gpu = MadNLP.solve!(d_solver)

@test isa(d_solver.kkt, KKTSystem{T, CuVector{T}, CuMatrix{T}})
# # Check that both results match exactly
@test h_solver.cnt.k == d_solver.cnt.k
@test h_solver.obj_val d_solver.obj_val atol=atol
@test h_solver.x d_solver.x atol=atol
@test h_solver.y d_solver.y atol=atol
@test results_cpu.objective results_gpu.objective
@test results_cpu.solution results_gpu.solution atol=atol
@test results_cpu.multipliers results_gpu.multipliers atol=atol
end
end

Expand Down
87 changes: 47 additions & 40 deletions src/IPM/IPM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ mutable struct MadNLPSolver{T, KKTSystem <: AbstractKKTSystem{T}, Model <: Abstr
nlb::Int
nub::Int

x::Vector{T} # primal (after reformulation)
x::PrimalVector{T, Vector{T}} # primal (after reformulation)
y::Vector{T} # dual
zl::Vector{T} # dual (after reformulation)
zu::Vector{T} # dual (after reformulation)
xl::Vector{T} # primal lower bound (after reformulation)
xu::Vector{T} # primal upper bound (after reformulation)
zl::PrimalVector{T, Vector{T}} # dual (after reformulation)
zu::PrimalVector{T, Vector{T}} # dual (after reformulation)
xl::PrimalVector{T, Vector{T}} # primal lower bound (after reformulation)
xu::PrimalVector{T, Vector{T}} # primal upper bound (after reformulation)

obj_val::T
f::Vector{T}
f::PrimalVector{T, Vector{T}}
c::Vector{T}

jacl::Vector{T}
Expand All @@ -40,11 +40,10 @@ mutable struct MadNLPSolver{T, KKTSystem <: AbstractKKTSystem{T}, Model <: Abstr
_w3::KKTVec
_w4::KKTVec

x_trial::Vector{T}
x_trial::PrimalVector{T, Vector{T}}
c_trial::Vector{T}
obj_val_trial::T

x_slk::Vector{T}
c_slk::SubVector{T}
rhs::Vector{T}

Expand Down Expand Up @@ -140,62 +139,70 @@ function MadNLPSolver{T,KKTSystem}(
@trace(logger,"Initializing variables.")
ind_cons = get_index_constraints(nlp; fixed_variable_treatment=opt.fixed_variable_treatment)
ns = length(ind_cons.ind_ineq)
n = get_nvar(nlp)+ns
nx = get_nvar(nlp)
n = nx+ns
m = get_ncon(nlp)

# Initialize KKT
kkt = KKTSystem(nlp, ind_cons)

xl = [get_lvar(nlp);view(get_lcon(nlp),ind_cons.ind_ineq)]
xu = [get_uvar(nlp);view(get_ucon(nlp),ind_cons.ind_ineq)]
x = [get_x0(nlp);zeros(T,ns)]
y = copy(get_y0(nlp))
zl= zeros(T,get_nvar(nlp)+ns)
zu= zeros(T,get_nvar(nlp)+ns)
# Primal variable
x = PrimalVector{T, Vector{T}}(nx, ns)
variable(x) .= get_x0(nlp)
# Bounds
xl = PrimalVector{T, Vector{T}}(nx, ns)
variable(xl) .= get_lvar(nlp)
slack(xl) .= view(get_lcon(nlp), ind_cons.ind_ineq)
xu = PrimalVector{T, Vector{T}}(nx, ns)
variable(xu) .= get_uvar(nlp)
slack(xu) .= view(get_ucon(nlp), ind_cons.ind_ineq)
zl = PrimalVector{T, Vector{T}}(nx, ns)
zu = PrimalVector{T, Vector{T}}(nx, ns)
# Gradient
f = PrimalVector{T, Vector{T}}(nx, ns)

f = zeros(T,n) # not sure why, but seems necessary to initialize to 0 when used with Plasmo interface
c = zeros(T,m)
y = copy(get_y0(nlp))
c = zeros(T, m)

n_jac = nnz_jacobian(kkt)

nlb = length(ind_cons.ind_lb)
nub = length(ind_cons.ind_ub)

x_trial=Vector{T}(undef,n)
c_trial=Vector{T}(undef,m)
x_trial = PrimalVector{T, Vector{T}}(nx, ns)
c_trial = Vector{T}(undef, m)

x_slk= _madnlp_unsafe_wrap(x,ns, get_nvar(nlp)+1)
c_slk= view(c,ind_cons.ind_ineq)
c_slk = view(c,ind_cons.ind_ineq)
rhs = (get_lcon(nlp).==get_ucon(nlp)).*get_lcon(nlp)

x_lr = view(x, ind_cons.ind_lb)
x_ur = view(x, ind_cons.ind_ub)
xl_r = view(xl, ind_cons.ind_lb)
xu_r = view(xu, ind_cons.ind_ub)
zl_r = view(zl, ind_cons.ind_lb)
zu_r = view(zu, ind_cons.ind_ub)
x_trial_lr = view(x_trial, ind_cons.ind_lb)
x_trial_ur = view(x_trial, ind_cons.ind_ub)
x_lr = view(full(x), ind_cons.ind_lb)
x_ur = view(full(x), ind_cons.ind_ub)
xl_r = view(full(xl), ind_cons.ind_lb)
xu_r = view(full(xu), ind_cons.ind_ub)
zl_r = view(full(zl), ind_cons.ind_lb)
zu_r = view(full(zu), ind_cons.ind_ub)
x_trial_lr = view(full(x_trial), ind_cons.ind_lb)
x_trial_ur = view(full(x_trial), ind_cons.ind_ub)

if is_reduced(kkt)
_w1 = ReducedKKTVector{T,typeof(x)}(n, m)
_w2 = ReducedKKTVector{T,typeof(x)}(n, m)
_w3 = ReducedKKTVector{T,typeof(x)}(n, m)
_w4 = ReducedKKTVector{T,typeof(x)}(n, m)
_w1 = ReducedKKTVector{T,typeof(c)}(n, m)
_w2 = ReducedKKTVector{T,typeof(c)}(n, m)
_w3 = ReducedKKTVector{T,typeof(c)}(n, m)
_w4 = ReducedKKTVector{T,typeof(c)}(n, m)
else
_w1 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
_w2 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
_w3 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
_w4 = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
_w1 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
_w2 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
_w3 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
_w4 = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
end

jacl = zeros(T,n) # spblas may throw an error if not initialized to zero

d = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
d = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)
dx_lr = view(d.xp, ind_cons.ind_lb) # TODO
dx_ur = view(d.xp, ind_cons.ind_ub) # TODO

p = UnreducedKKTVector{T,typeof(x)}(n, m, nlb, nub)
p = UnreducedKKTVector{T,typeof(c)}(n, m, nlb, nub)

obj_scale = T[1.0]
con_scale = ones(T,m)
Expand Down Expand Up @@ -223,7 +230,7 @@ function MadNLPSolver{T,KKTSystem}(
jacl,
d, p,
_w1, _w2, _w3, _w4,
x_trial,c_trial,0.,x_slk,c_slk,rhs,
x_trial,c_trial,0.,c_slk,rhs,
ind_cons.ind_ineq,ind_cons.ind_fixed,ind_cons.ind_llb,ind_cons.ind_uub,
x_lr,x_ur,xl_r,xu_r,zl_r,zu_r,dx_lr,dx_ur,x_trial_lr,x_trial_ur,
linear_solver,iterator,
Expand Down
103 changes: 56 additions & 47 deletions src/IPM/callbacks.jl
Original file line number Diff line number Diff line change
@@ -1,128 +1,137 @@
function eval_f_wrapper(solver::MadNLPSolver, x::Vector{T}) where T
function eval_f_wrapper(solver::MadNLPSolver, x::PrimalVector{T}) where T
nlp = solver.nlp
cnt = solver.cnt
@trace(solver.logger,"Evaluating objective.")
x_nlpmodel = _madnlp_unsafe_wrap(x, get_nvar(nlp))
cnt.eval_function_time += @elapsed obj_val = (get_minimize(nlp) ? 1. : -1.) * obj(nlp,x_nlpmodel)
cnt.obj_cnt+=1
cnt.obj_cnt==1 && (is_valid(obj_val) || throw(InvalidNumberException(:obj)))
return obj_val*solver.obj_scale[]
cnt.eval_function_time += @elapsed begin
sense = (get_minimize(nlp) ? one(T) : -one(T))
obj_val = sense * obj(nlp, variable(x))
end
cnt.obj_cnt += 1
if cnt.obj_cnt == 1 && !is_valid(obj_val)
throw(InvalidNumberException(:obj))
end
return obj_val * solver.obj_scale[]
end

function eval_grad_f_wrapper!(solver::MadNLPSolver, f::Vector{T},x::Vector{T}) where T
function eval_grad_f_wrapper!(solver::MadNLPSolver, f::PrimalVector{T}, x::PrimalVector{T}) where T
nlp = solver.nlp
cnt = solver.cnt
@trace(solver.logger,"Evaluating objective gradient.")
obj_scaling = solver.obj_scale[] * (get_minimize(nlp) ? one(T) : -one(T))
x_nlpmodel = _madnlp_unsafe_wrap(x, get_nvar(nlp))
f_nlpmodel = _madnlp_unsafe_wrap(f, get_nvar(nlp))
cnt.eval_function_time += @elapsed grad!(
nlp,
x_nlpmodel,
f_nlpmodel
variable(x),
variable(f),
)
_scal!(obj_scaling, f)
_scal!(obj_scaling, full(f))
cnt.obj_grad_cnt+=1
cnt.obj_grad_cnt==1 && (is_valid(f) || throw(InvalidNumberException(:grad)))
if cnt.obj_grad_cnt == 1 && !is_valid(full(f))
throw(InvalidNumberException(:grad))
end
return f
end

function eval_cons_wrapper!(solver::MadNLPSolver, c::Vector{T},x::Vector{T}) where T
function eval_cons_wrapper!(solver::MadNLPSolver, c::Vector{T}, x::PrimalVector{T}) where T
nlp = solver.nlp
cnt = solver.cnt
@trace(solver.logger, "Evaluating constraints.")
x_nlpmodel = _madnlp_unsafe_wrap(x, get_nvar(nlp))
c_nlpmodel = _madnlp_unsafe_wrap(c, get_ncon(nlp))
cnt.eval_function_time += @elapsed cons!(
nlp,
x_nlpmodel,
c_nlpmodel
variable(x),
c,
)
view(c,solver.ind_ineq).-=view(x,get_nvar(nlp)+1:solver.n)
view(c,solver.ind_ineq) .-= slack(x)
c .-= solver.rhs
c .*= solver.con_scale
cnt.con_cnt+=1
cnt.con_cnt==2 && (is_valid(c) || throw(InvalidNumberException(:cons)))
if cnt.con_cnt == 1 && !is_valid(c)
throw(InvalidNumberException(:cons))
end
return c
end

function eval_jac_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::Vector{T}) where T
function eval_jac_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::PrimalVector{T}) where T
nlp = solver.nlp
cnt = solver.cnt
ns = length(solver.ind_ineq)
@trace(solver.logger, "Evaluating constraint Jacobian.")
jac = get_jacobian(kkt)
x_nlpmodel = _madnlp_unsafe_wrap(x, get_nvar(nlp))
jac_nlpmodel = _madnlp_unsafe_wrap(jac, get_nnzj(nlp.meta))
cnt.eval_function_time += @elapsed jac_coord!(
nlp,
x_nlpmodel,
jac_nlpmodel
variable(x),
jac,
)
compress_jacobian!(kkt)
cnt.con_jac_cnt+=1
cnt.con_jac_cnt==1 && (is_valid(jac) || throw(InvalidNumberException(:jac)))
cnt.con_jac_cnt += 1
if cnt.con_jac_cnt == 1 && !is_valid(jac)
throw(InvalidNumberException(:jac))
end
@trace(solver.logger,"Constraint jacobian evaluation started.")
return jac
end

function eval_lag_hess_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::Vector{T},l::Vector{T};is_resto=false) where T
function eval_lag_hess_wrapper!(solver::MadNLPSolver, kkt::AbstractKKTSystem, x::PrimalVector{T},l::Vector{T};is_resto=false) where T
nlp = solver.nlp
cnt = solver.cnt
@trace(solver.logger,"Evaluating Lagrangian Hessian.")
dual(solver._w1) .= l.*solver.con_scale
dual(solver._w1) .= l .* solver.con_scale
hess = get_hessian(kkt)
x_nlpmodel = _madnlp_unsafe_wrap(x, get_nvar(nlp))
hess_nlpmodel = _madnlp_unsafe_wrap(hess, get_nnzh(nlp.meta))
scale = (get_minimize(nlp) ? one(T) : -one(T))
scale *= (is_resto ? zero(T) : solver.obj_scale[])
cnt.eval_function_time += @elapsed hess_coord!(
nlp,
x_nlpmodel,
variable(x),
dual(solver._w1),
hess_nlpmodel;
obj_weight = (get_minimize(nlp) ? 1. : -1.) * (is_resto ? 0.0 : solver.obj_scale[])
hess;
obj_weight = scale,
)
compress_hessian!(kkt)
cnt.lag_hess_cnt+=1
cnt.lag_hess_cnt==1 && (is_valid(hess) || throw(InvalidNumberException(:hess)))
cnt.lag_hess_cnt += 1
if cnt.lag_hess_cnt == 1 && !is_valid(hess)
throw(InvalidNumberException(:hess))
end
return hess
end

function eval_jac_wrapper!(solver::MadNLPSolver, kkt::AbstractDenseKKTSystem, x::Vector{T}) where T
function eval_jac_wrapper!(solver::MadNLPSolver, kkt::AbstractDenseKKTSystem, x::PrimalVector{T}) where T
nlp = solver.nlp
cnt = solver.cnt
ns = length(solver.ind_ineq)
@trace(solver.logger, "Evaluating constraint Jacobian.")
jac = get_jacobian(kkt)
x_nlpmodel = _madnlp_unsafe_wrap(x, get_nvar(nlp))
cnt.eval_function_time += @elapsed jac_dense!(
nlp,
x_nlpmodel,
jac
variable(x),
jac,
)
compress_jacobian!(kkt)
cnt.con_jac_cnt+=1
cnt.con_jac_cnt==1 && (is_valid(jac) || throw(InvalidNumberException(:jac)))
if cnt.con_jac_cnt == 1 && !is_valid(jac)
throw(InvalidNumberException(:jac))
end
@trace(solver.logger,"Constraint jacobian evaluation started.")
return jac
end

function eval_lag_hess_wrapper!(solver::MadNLPSolver, kkt::AbstractDenseKKTSystem, x::Vector{T},l::Vector{T};is_resto=false) where T
function eval_lag_hess_wrapper!(solver::MadNLPSolver, kkt::AbstractDenseKKTSystem, x::PrimalVector{T},l::Vector{T};is_resto=false) where T
nlp = solver.nlp
cnt = solver.cnt
@trace(solver.logger,"Evaluating Lagrangian Hessian.")
dual(solver._w1) .= l.*solver.con_scale
dual(solver._w1) .= l .* solver.con_scale
hess = get_hessian(kkt)
x_nlpmodel = _madnlp_unsafe_wrap(x, get_nvar(nlp))
scale = is_resto ? zero(T) : get_minimize(nlp) ? solver.obj_scale[] : -solver.obj_scale[]
cnt.eval_function_time += @elapsed hess_dense!(
nlp,
x_nlpmodel,
variable(x),
dual(solver._w1),
hess;
obj_weight = (get_minimize(nlp) ? 1. : -1.) * (is_resto ? 0.0 : solver.obj_scale[])
obj_weight = scale,
)
compress_hessian!(kkt)
cnt.lag_hess_cnt+=1
cnt.lag_hess_cnt==1 && (is_valid(hess) || throw(InvalidNumberException(:hess)))
if cnt.lag_hess_cnt == 1 && !is_valid(hess)
throw(InvalidNumberException(:hess))
end
return hess
end

Loading

0 comments on commit 15c227e

Please sign in to comment.