Skip to content

Commit

Permalink
Add support for Float32 (#187)
Browse files Browse the repository at this point in the history
* float32 on cpu works

* tests passing

* general precision

* gpu test passing

* everything in HSL works

* first draft done

* export linear solvers

* krylov subtyping

* hsl build improvement, hopefully final

* HSL build improvment - no fakemetis

* addressed francois's comments

* remove unncessary

* typo fix
  • Loading branch information
sshin23 committed Jul 5, 2022
1 parent 03d728e commit 897acf1
Show file tree
Hide file tree
Showing 41 changed files with 1,271 additions and 1,123 deletions.
10 changes: 7 additions & 3 deletions lib/MadNLPGPU/src/MadNLPGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,23 @@ module MadNLPGPU

import LinearAlgebra
# CUDA
import CUDA: CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F, has_cuda, @allowscalar, runtime_version
import CUDA: CUDA, CUBLAS, CUSOLVER, CuVector, CuMatrix, CuArray, R_64F, has_cuda, @allowscalar, runtime_version
import .CUSOLVER:
libcusolver, cusolverStatus_t, CuPtr, cudaDataType, cublasFillMode_t, cusolverDnHandle_t, dense_handle
import .CUBLAS: handle, CUBLAS_DIAG_NON_UNIT,
CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER, CUBLAS_SIDE_LEFT, CUBLAS_OP_N, CUBLAS_OP_T

# Kernels
import KernelAbstractions: @kernel, @index, wait, Event
import CUDAKernels: CUDADevice

import MadNLP

import MadNLP:
@kwdef, Logger, @debug, @warn, @error,
AbstractOptions, AbstractLinearSolver, AbstractNLPModel, set_options!,
SymbolicException,FactorizationException,SolveException,InertiaException,
introduce, factorize!, solve!, improve!, is_inertia, inertia, tril_to_full!,
LapackOptions, input_type
LapackOptions, input_type, is_supported



Expand Down
20 changes: 10 additions & 10 deletions lib/MadNLPGPU/src/interface.jl
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@

function CuInteriorPointSolver(nlp::AbstractNLPModel;
function CuInteriorPointSolver(nlp::AbstractNLPModel{T};
option_dict::Dict{Symbol,Any}=Dict{Symbol,Any}(), kwargs...
)
) where T
opt = MadNLP.Options(linear_solver=LapackGPUSolver)
MadNLP.set_options!(opt,option_dict,kwargs)
MadNLP.check_option_sanity(opt)

KKTSystem = if (opt.kkt_system == MadNLP.SPARSE_KKT_SYSTEM) || (opt.kkt_system == MadNLP.SPARSE_UNREDUCED_KKT_SYSTEM)
error("Sparse KKT system are currently not supported on CUDA GPU.\n" *
"Please use `DENSE_KKT_SYSTEM` or `DENSE_CONDENSED_KKT_SYSTEM` instead.")
"Please use `DENSE_KKT_SYSTEM` or `DENSE_CONDENSED_KKT_SYSTEM` instead.")
elseif opt.kkt_system == MadNLP.DENSE_KKT_SYSTEM
MT = CuMatrix{Float64}
VT = CuVector{Float64}
MadNLP.DenseKKTSystem{Float64, VT, MT}
MT = CuMatrix{T}
VT = CuVector{T}
MadNLP.DenseKKTSystem{T, VT, MT}
elseif opt.kkt_system == MadNLP.DENSE_CONDENSED_KKT_SYSTEM
MT = CuMatrix{Float64}
VT = CuVector{Float64}
MadNLP.DenseCondensedKKTSystem{Float64, VT, MT}
MT = CuMatrix{T}
VT = CuVector{T}
MadNLP.DenseCondensedKKTSystem{T, VT, MT}
end
return MadNLP.InteriorPointSolver{KKTSystem}(nlp, opt; option_linear_solver=option_dict)
return MadNLP.InteriorPointSolver{T,KKTSystem}(nlp, opt; option_linear_solver=option_dict)
end
332 changes: 155 additions & 177 deletions lib/MadNLPGPU/src/lapackgpu.jl

Large diffs are not rendered by default.

52 changes: 26 additions & 26 deletions lib/MadNLPGPU/test/densekkt_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,32 @@ function _compare_gpu_with_cpu(KKTSystem, n, m, ind_fixed)
elseif (KKTSystem == MadNLP.DenseCondensedKKTSystem)
MadNLP.DENSE_CONDENSED_KKT_SYSTEM
end
# Define options
madnlp_options = Dict{Symbol, Any}(
:kkt_system=>opt_kkt,
:linear_solver=>LapackGPUSolver,
:print_level=>MadNLP.ERROR,
)

nlp = MadNLPTests.DenseDummyQP(; n=n, m=m, fixed_variables=ind_fixed)

# Solve on CPU
h_ips = MadNLP.InteriorPointSolver(nlp; option_dict=copy(madnlp_options))
MadNLP.optimize!(h_ips)

# Solve on GPU
d_ips = MadNLPGPU.CuInteriorPointSolver(nlp; option_dict=copy(madnlp_options))
MadNLP.optimize!(d_ips)

T = Float64
VT = CuVector{T}
MT = CuMatrix{T}
@test isa(d_ips.kkt, KKTSystem{T, VT, MT})
# # Check that both results match exactly
@test h_ips.cnt.k == d_ips.cnt.k
@test h_ips.obj_val d_ips.obj_val atol=1e-10
@test h_ips.x d_ips.x atol=1e-10
@test h_ips.l d_ips.l atol=1e-10

for (T,tol,atol) in [(Float32,1e-3,1e-1), (Float64,1e-8,1e-6)]
madnlp_options = Dict{Symbol, Any}(
:kkt_system=>opt_kkt,
:linear_solver=>LapackGPUSolver,
:print_level=>MadNLP.ERROR,
:tol=>tol
)

nlp = MadNLPTests.DenseDummyQP{T}(; n=n, m=m, fixed_variables=ind_fixed)
# Solve on CPU
h_ips = MadNLP.InteriorPointSolver(nlp; option_dict=copy(madnlp_options))
MadNLP.optimize!(h_ips)

# Solve on GPU
d_ips = MadNLPGPU.CuInteriorPointSolver(nlp; option_dict=copy(madnlp_options))
MadNLP.optimize!(d_ips)

@test isa(d_ips.kkt, KKTSystem{T, CuVector{T}, CuMatrix{T}})
# # Check that both results match exactly
@test h_ips.cnt.k == d_ips.cnt.k
@test h_ips.obj_val d_ips.obj_val atol=atol
@test h_ips.x d_ips.x atol=atol
@test h_ips.l d_ips.l atol=atol
end
end

@testset "MadNLPGPU ($(kkt_system))" for kkt_system in [
Expand Down
9 changes: 6 additions & 3 deletions lib/MadNLPGPU/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,16 @@ testset = [
],
]

# Test LapackGPU wrapper
@testset "LapackGPU test" begin
@testset "MadNLPGPU test" begin

MadNLPTests.test_linear_solver(LapackGPUSolver,Float32)
MadNLPTests.test_linear_solver(LapackGPUSolver,Float64)

# Test LapackGPU wrapper
for (name,optimizer_constructor,exclude) in testset
test_madnlp(name,optimizer_constructor,exclude)
end
end

# Test DenseKKTSystem on GPU
include("densekkt_gpu.jl")

15 changes: 12 additions & 3 deletions lib/MadNLPHSL/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,20 @@ pkg> add MadNLPHSL

To build MadNLP with HSL linear solvers (Ma27, Ma57, Ma77, Ma86, Ma97), the source codes need to be obtained by the user from <http://www.hsl.rl.ac.uk/ipopt/> under Coin-HSL Full (Stable). The source codes are distribted as a tarball file `coinhsl-*.tar.gz`. The absolute path to the extracted source code or the complied library should be provided to the user. If the user has an already compiled HSL sovler library, one can simply provide a path to that shared library.In this case, the source code is not compiled and the provided shared library is directly used.
```julia
# either one of the following should be given
# at least one of the following should be given
julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/coinhsl"
julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/coinhsl-archive-2021.05.05"
julia> ENV["MADNLP_HSL_SOURCE_PATH"] = "/opt/ma57-3.11.0/"
julia> ENV["MADNLP_MA27_SOURCE_PATH"] = "/opt/coinhsl-archive-2021.05.05"
julia> ENV["MADNLP_MA57_SOURCE_PATH"] = "/opt/ma57-3.11.0/"
julia> ENV["MADNLP_MA77_SOURCE_PATH"] = "/opt/hsl_ma77-6.3.0"
julia> ENV["MADNLP_MA86_SOURCE_PATH"] = "/opt/hsl_ma86-1.7.2"
julia> ENV["MADNLP_MA97_SOURCE_PATH"] = "/opt/hsl_ma97-2.7.1"

julia> ENV["MADNLP_HSL_LIBRARY_PATH"] = "/usr/lib/libcoinhsl.so"
julia> ENV["MADNLP_MA27_LIBRARY_PATH"] = "/usr/lib/libma27.so"
julia> ENV["MADNLP_MA57_LIBRARY_PATH"] = "/usr/lib/libma57.so"
julia> ENV["MADNLP_MA77_LIBRARY_PATH"] = "/usr/lib/libma77.so"
julia> ENV["MADNLP_MA86_LIBRARY_PATH"] = "/usr/lib/libma86.so"
julia> ENV["MADNLP_MA97_LIBRARY_PATH"] = "/usr/lib/libma97.so"
# optionally, one can specify
julia> ENV["MADNLP_HSL_BLAS"] = "mkl" # default is "openblas"
```
Expand Down
136 changes: 90 additions & 46 deletions lib/MadNLPHSL/deps/build.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ const rpath = `-Wl,-rpath,`
const whole_archive= Sys.isapple() ? `-Wl,-all_load` : `-Wl,--whole-archive`
const no_whole_archive = Sys.isapple() ? `-Wl,-noall_load` : `-Wl,--no-whole-archive`
const libdir = mkpath(joinpath(@__DIR__, "lib"))
const hsl_library_path = haskey(ENV,"MADNLP_HSL_LIBRARY_PATH") ? ENV["MADNLP_HSL_LIBRARY_PATH"] : ""
const hsl_source_path = haskey(ENV,"MADNLP_HSL_SOURCE_PATH") ? ENV["MADNLP_HSL_SOURCE_PATH"] : ""
const FC = haskey(ENV,"MADNLP_FC") ? ENV["MADNLP_FC"] : `gfortran`
const libmetis_dir = joinpath(artifact"METIS", "lib")
const with_metis = `-L$libmetis_dir $rpath$libmetis_dir -lmetis`
Expand All @@ -26,74 +24,120 @@ else
const libblas_dir = joinpath(artifact"OpenBLAS32","lib")
const with_blas = `-L$libblas_dir $rpath$libblas_dir -lopenblas`
end

const targets =[
[
"deps.f", "deps90.f90",

const supported_library = [
(:libhsl, "MADNLP_HSL_LIBRARY_PATH", "MADNLP_HSL_SOURCE_PATH")
(:libma27, "MADNLP_MA27_LIBRARY_PATH", "MADNLP_MA27_SOURCE_PATH")
(:libma57, "MADNLP_MA57_LIBRARY_PATH", "MADNLP_MA57_SOURCE_PATH")
(:libma77, "MADNLP_MA77_LIBRARY_PATH", "MADNLP_MA77_SOURCE_PATH")
(:libma86, "MADNLP_MA86_LIBRARY_PATH", "MADNLP_MA86_SOURCE_PATH")
(:libma97, "MADNLP_MA97_LIBRARY_PATH", "MADNLP_MA97_SOURCE_PATH")
]

const targets_dict = Dict(
:libhsl=> [
"deps.f",
"deps90.f90",
"ma27d.f",
"ma57d.f",
"hsl_ma77d.f90",
"hsl_ma86d.f90",
"hsl_ma97d.f90",
"hsl_mc68i_ciface.f90",
"hsl_ma77d_ciface.f90",
"hsl_ma86d_ciface.f90",
"hsl_ma97d_ciface.f90",
],
[
"ma27d.f", "ma27s.f",
"ma57d.f", "ma57s.f",
"hsl_ma77d.f90", "hsl_ma77s.f90",
"hsl_ma86d.f90", "hsl_ma86s.f90",
"hsl_ma97d.f90", "hsl_ma97s.f90",
:libma27 => [
"deps.f",
"ma27d.f",
"ma27s.f",
],
[
"hsl_mc68i_ciface.f90",
:libma57 => [
"sdeps.f", "ddeps.f",
"ma57d.f", "ma57s.f",
],
:libma77 => [
"common.f", "common90.f90",
"ddeps90.f90", "sdeps90.f90",
"hsl_ma77d.f90", "hsl_ma77s.f90",
"hsl_ma77d_ciface.f90", "hsl_ma77s_ciface.f90",
],
:libma86 => [
"common.f", "common90.f90",
"sdeps90.f90",
"hsl_ma86d.f90", "hsl_ma86s.f90",
"hsl_ma86d_ciface.f90", "hsl_ma86s_ciface.f90",
"hsl_ma97d_ciface.f90", "hsl_ma97s_ciface.f90",
"hsl_mc68i_ciface.f90",
],
:libma97 => [
"common.f", "common90.f90",
"sdeps90.f90", "ddeps90.f90",
"hsl_ma97d.f90", "hsl_ma97s.f90",
"hsl_ma97d_ciface.f90", "hsl_ma97s_ciface.f90",
]
]
)

rm(libdir;recursive=true,force=true)
mkpath(libdir)
isvalid(cmd::Cmd)=(try run(cmd) catch e return false end; return true)


# HSL
if hsl_source_path != ""
if isvalid(`$FC --version`)
OC = OutputCollector[]
cd(hsl_source_path)
attempted = Tuple{Symbol,Product}[]

names_succeeded = []
for i=1:3
names = []
for (root, dirs, files) in walkdir(hsl_source_path)
for file in files;
if file in targets[i];
filter!(x->x != file,files)
name = splitext(relpath(joinpath(root,file),hsl_source_path))
push!(names, name)
@info "$(name[1])$(name[2]) source code detected."
end
for (lib, envlib, envsrc) in supported_library
if haskey(ENV,envlib)
push!(attempted, (lib,FileProduct(ENV[envlib], lib)))
elseif haskey(ENV,envsrc) && isvalid(`$FC --version`)
@info "Compiling $lib"
source_path = ENV[envsrc]
targets = targets_dict[lib]

cd(source_path)

list = []
for (root, dir, files) in walkdir(source_path)
for file in files
if file in targets
@info "$file source code detected."
push!(list, (root, dir, file))
end
end
succeeded = wait.(
[OutputCollector(`$FC -fopenmp -fPIC -c -O3 -o $name.o $name$ext`,verbose=verbose)
for (name,ext) in names])
append!(names_succeeded, names[succeeded])
end

cmd = `$FC -o$(libdir)/libhsl.$so -shared -fPIC -O3 -fopenmp`
append!(cmd.exec, ["$name.o" for (name,ext) in names_succeeded])
succeeded = []
for target in targets
for (root, dir, file) in list
if file == target
name, ext = splitext(relpath(joinpath(root,file),source_path))
isvalid(`$FC -fopenmp -fPIC -c -O3 -o $name.o $name$ext`)
push!(succeeded, (name, ext))
end
end
end


cmd = `$FC -o$(libdir)/$lib.$so -shared -fPIC -O3 -fopenmp`
append!(cmd.exec, ["$name.o" for (name,ext) in succeeded])
append!(cmd.exec, with_metis.exec)
append!(cmd.exec, with_blas.exec)

run(cmd)
cd("$(@__DIR__)")
product = FileProduct(prefix,joinpath(libdir,"libhsl.$so"), :libhsl)
push!(attempted, (lib,FileProduct(prefix,joinpath(libdir,"$lib.$so"), lib)))
end
else
product = FileProduct(hsl_library_path, :libhsl)
end

# write deps.jl
if satisfied(product)
@info "Building HSL succeeded."
write_deps_file(joinpath(@__DIR__, "deps.jl"),Product[product], verbose=verbose)
else
@error "Building HSL failed."
write_deps_file(joinpath(@__DIR__, "deps.jl"),Product[], verbose=verbose)
succeeded = Product[]
for (lib, product) in attempted
if satisfied(product)
@info "Building $lib succeeded."
push!(succeeded, product)
else
@error "Building $lib failed."
end
end

write_deps_file(joinpath(@__DIR__, "deps.jl"), succeeded, verbose=verbose)
41 changes: 36 additions & 5 deletions lib/MadNLPHSL/src/MadNLPHSL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,56 @@ import MadNLP: @kwdef, Logger, @debug, @warn, @error,
AbstractOptions, AbstractLinearSolver, set_options!, SparseMatrixCSC, SubVector,
SymbolicException,FactorizationException,SolveException,InertiaException,
introduce, factorize!, solve!, improve!, is_inertia, inertia, findIJ, nnz,
get_tril_to_full, transfer!, input_type, _madnlp_unsafe_wrap
get_tril_to_full, transfer!, input_type, _madnlp_unsafe_wrap,
is_supported

include(joinpath("..","deps","deps.jl"))

include("common.jl")
include("mc68.jl")

if @isdefined(libhsl)
include("common.jl")
include("mc68.jl")
@isdefined(libma27) || const libma27 = libhsl
@isdefined(libma57) || const libma57 = libhsl
@isdefined(libma77) || const libma77 = libhsl
@isdefined(libma86) || const libma86 = libhsl
@isdefined(libma97) || const libma97 = libhsl
end

if @isdefined(libma27)
include("ma27.jl")
export Ma27Solver
end

if @isdefined(libma57)
include("ma57.jl")
export Ma57Solver
end

if @isdefined(libma77)
include("ma77.jl")
export Ma77Solver
end

if @isdefined(libma86)
include("ma86.jl")
export Ma86Solver
end

if @isdefined(libma97)
include("ma97.jl")
export Ma27Solver, Ma57Solver, Ma77Solver, Ma86Solver, Ma97Solver
export Ma97Solver
end

function __init__()
check_deps()
try
@isdefined(libhsl) && dlopen(libhsl,RTLD_DEEPBIND)
@isdefined(libhsl) && dlopen(libhsl,RTLD_DEEPBIND)
@isdefined(libma27) && dlopen(libma27,RTLD_DEEPBIND)
@isdefined(libma77) && dlopen(libma57,RTLD_DEEPBIND)
@isdefined(libma77) && dlopen(libma77,RTLD_DEEPBIND)
@isdefined(libma86) && dlopen(libma77,RTLD_DEEPBIND)
@isdefined(libma97) && dlopen(libma97,RTLD_DEEPBIND)
catch e
println("HSL shared library cannot be loaded")
end
Expand Down
Loading

0 comments on commit 897acf1

Please sign in to comment.