Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RECOVERED] branch with special handling - DO NOT MERGE #210

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6bf107c
fixing merge mess
pokutta Jul 6, 2021
4bd9246
more reverting
pokutta Jul 6, 2021
fa32a32
Merge branch 'master' into verbose_iterations_embedding
pokutta Jul 6, 2021
9ebef34
and fixing
pokutta Jul 6, 2021
24b5b06
added warmstarting to lcg
pokutta Jul 8, 2021
a187a51
Merge branch 'master' into verbose_iterations_embedding
pokutta Jul 14, 2021
ee766d4
merge master
matbesancon Jul 15, 2021
a9bec3f
Merge branch 'verbose_iterations_embedding' of github.com:ZIB-IOL/Fra…
matbesancon Jul 15, 2021
f820471
Merge branch 'master' into verbose_iterations_embedding
matbesancon Jul 15, 2021
ae7d452
Merge branch 'master' into verbose_iterations_embedding
matbesancon Jul 15, 2021
7f12e08
Merge branch 'master' into verbose_iterations_embedding
matbesancon Jul 16, 2021
b68e828
adding some tracing example for gamma
pokutta Jul 16, 2021
659abc2
minor
pokutta Jul 16, 2021
5be1fcd
Merge branch 'master' into verbose_iterations_embedding
pokutta Jul 16, 2021
8278c1a
more tests
pokutta Jul 16, 2021
1d33e31
temp
pokutta Jul 16, 2021
24f856a
Merge branch 'master' into verbose_iterations_embedding
matbesancon Jul 19, 2021
51b6d07
Merge branch 'master' into verbose_iterations_embedding
matbesancon Jul 19, 2021
a40bb63
Merge branch 'fixing-BCG-bug' into verbose_iterations_embedding
pokutta Jul 20, 2021
ebd459d
minor verbose
pokutta Jul 21, 2021
d3ed5f9
added ada
pokutta Jul 21, 2021
c979e84
removed ada - bad performance
pokutta Jul 21, 2021
2636093
fix in tracing_gamma
pokutta Jul 22, 2021
e0a6a0f
Merge branch 'master' into verbose_iterations_embedding
pokutta Jul 22, 2021
6fad965
fix to ci
pokutta Jul 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ examples/precompile_fw_clean.jl
examples/precompile_fw.jl
cc/libloporacle.so
cc/*
examples/temp.jl
128 changes: 128 additions & 0 deletions examples/tracing_gamma.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
include("activate.jl")

using LinearAlgebra
# using FrankWolfe
# using ProgressMeter
# using Plots

n = Int(1e2)
k = Int(1e4)
eps=1e-8

xpi = rand(n);
total = sum(xpi);
const xp = xpi ./ total;

# better for memory consumption as we do coordinate-wise ops

function cf(x, xp)
return LinearAlgebra.norm(x .- xp)^2
end

function cgrad!(storage, x, xp)
return @. storage = 2 * (x - xp)
end

lmo = FrankWolfe.ProbabilitySimplexOracle(1.0);
# lmo = FrankWolfe.UnitSimplexOracle(1.0);
# lmo = FrankWolfe.UnitSimplexOracle(1.0);
# lmo = FrankWolfe.KSparseLMO(40, 1.0);
# lmo = FrankWolfe.LpNormLMO{2}(1.0)

x00 = FrankWolfe.compute_extreme_point(lmo, zeros(n));

FrankWolfe.benchmark_oracles(
x -> cf(x, xp),
(str, x) -> cgrad!(str, x, xp),
() -> randn(n),
lmo;
k=100,
)


function build_callback(storage)
return function callback(data)
return push!(storage, (Tuple(data)[1:5]...,data.gamma))
end
end


####### 2/(2+t) rule

x0 = copy(x00)

trajectory_ag = Vector{Tuple{Int64,Float64,Float64,Float64,Float64,Float64}}()
callback = build_callback(trajectory_ag)

@time x, v, primal, dual_gap = FrankWolfe.frank_wolfe(
x -> cf(x, xp),
(str, x) -> cgrad!(str, x, xp),
lmo,
x0,
max_iteration=k,
trajectory=true,
epsilon=eps,
line_search=FrankWolfe.Agnostic(),
print_iter=k / 10,
callback=callback,
emphasis=FrankWolfe.memory,
verbose=true,
);


####### adaptive

x0 = copy(x00)

trajectory_ada = Vector{Tuple{Int64,Float64,Float64,Float64,Float64,Float64}}()
callback = build_callback(trajectory_ada)

@time x, v, primal, dual_gap = FrankWolfe.frank_wolfe(
x -> cf(x, xp),
(str, x) -> cgrad!(str, x, xp),
lmo,
x0,
max_iteration=k,
trajectory=true,
epsilon=eps,
line_search=FrankWolfe.Adaptive(),
print_iter=k / 10,
callback=callback,
emphasis=FrankWolfe.memory,
verbose=true,
);


####### backtracking

x0 = copy(x00)

trajectory_ls = Vector{Tuple{Int64,Float64,Float64,Float64,Float64,Float64}}()
callback = build_callback(trajectory_ls)

@time x, v, primal, dual_gap = FrankWolfe.frank_wolfe(
x -> cf(x, xp),
(str, x) -> cgrad!(str, x, xp),
lmo,
x0,
max_iteration=k,
trajectory=true,
epsilon=eps,
line_search=FrankWolfe.Shortstep(),
print_iter=k / 10,
L=2,
callback=callback,
emphasis=FrankWolfe.memory,
verbose=true,
);

x_ag = [trajectory_ag[i][1]+1 for i in eachindex(trajectory_ag)]
gamma_ag = [trajectory_ag[i][6] for i in eachindex(trajectory_ag)]
x_ada = [trajectory_ada[i][1]+1 for i in eachindex(trajectory_ada)]
gamma_ada = [trajectory_ada[i][6] for i in eachindex(trajectory_ada)]
x_ls = [trajectory_ls[i][1]+1 for i in eachindex(trajectory_ls)]
gamma_ls = [trajectory_ls[i][6] for i in eachindex(trajectory_ls)]

Plots.plot(x_ag,gamma_ag,label="gamma_ag", yaxis=:log, xaxis=:log)
Plots.plot!(x_ada,gamma_ada,label="gamma_ada", yaxis=:log, xaxis=:log)
Plots.plot!(x_ls,gamma_ls,label="gamma_ls", yaxis=:log, xaxis=:log)
62 changes: 62 additions & 0 deletions examples/warmstart_lmo.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
include("activate.jl")

using LinearAlgebra

n = Int(1e4)
k = 10000

xpi = rand(n);
total = sum(xpi);
const xp = xpi # ./ total;

f(x) = norm(x - xp)^2
function grad!(storage, x)
@. storage = 2 * (x - xp)
end

# better for memory consumption as we do coordinate-wise ops

function cf(x, xp)
return LinearAlgebra.norm(x .- xp)^2
end

function cgrad!(storage, x, xp)
return @. storage = 2 * (x - xp)
end

# lmo = FrankWolfe.ProbabilitySimplexOracle(1);

lmo = FrankWolfe.KSparseLMO(100, 1.0)
x00 = FrankWolfe.compute_extreme_point(lmo, zeros(n));

# arbitrary cache

x0 = deepcopy(x00)

@time x, v, primal, dual_gap, trajectory, lmo = FrankWolfe.lazified_conditional_gradient(
x -> cf(x, xp),
(str, x) -> cgrad!(str, x, xp),
lmo,
x0,
max_iteration=k,
L=100,
line_search=FrankWolfe.Adaptive(),
print_iter=k / 10,
emphasis=FrankWolfe.memory,
verbose=true,
);

@time x, v, primal, dual_gap, trajectory, lmo = FrankWolfe.lazified_conditional_gradient(
x -> cf(x, xp),
(str, x) -> cgrad!(str, x, xp),
lmo,
x0,
max_iteration=k,
L=100,
line_search=FrankWolfe.Adaptive(),
print_iter=k / 10,
emphasis=FrankWolfe.memory,
verbose=true,
warmstart_lmo = lmo
);

39 changes: 30 additions & 9 deletions src/fw_algorithms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ function frank_wolfe(
println("FATAL: gamma0 not set. We are not going to move a single bit.")
end

if (!isnothing(momentum) && line_search isa Union{Shortstep,Adaptive,RationalShortstep})
if (!isnothing(momentum) && line_search isa Union{Shortstep,Adaptive,RationalShortstep} && verbose)
println(
"WARNING: Momentum-averaged gradients should usually be used with agnostic stepsize rules.",
)
Expand Down Expand Up @@ -96,6 +96,11 @@ function frank_wolfe(
else
similar(x)
end

# # H and H2 container
# H = similar(gradient) * 0.0
# H2 = similar(gradient) * 0.0

while t <= max_iteration && dual_gap >= max(epsilon, eps())

#####################
Expand Down Expand Up @@ -131,6 +136,14 @@ function frank_wolfe(
end
first_iter = false

# # H2 = H2+est_grad_f_x**2
# # H = eps+np.sqrt(H2)
# if ada && momentum === nothing
# H2 += gradient.^2
# H = (ada_eps .+ (H2.^(0.5))).^(-1.0)
# gradient = gradient .* H
# end

v = compute_extreme_point(lmo, gradient)
# go easy on the memory - only compute if really needed
if (
Expand Down Expand Up @@ -248,6 +261,7 @@ function lazified_conditional_gradient(
print_iter=1000,
trajectory=false,
verbose=false,
verbose_it=false,
linesearch_tol=1e-7,
step_lim=20,
emphasis::Emphasis=memory,
Expand All @@ -256,15 +270,20 @@ function lazified_conditional_gradient(
callback=nothing,
timeout=Inf,
print_callback=print_callback,
warmstart_lmo=nothing,
)

# format string for output of the algorithm
format_string = "%6s %13s %14e %14e %14e %14e %14e %14i\n"

if isfinite(cache_size)
lmo = MultiCacheLMO{cache_size,typeof(lmo_base),VType}(lmo_base)
if warmstart_lmo === nothing
if isfinite(cache_size)
lmo = MultiCacheLMO{cache_size,typeof(lmo_base),VType}(lmo_base)
else
lmo = VectorCacheLMO{typeof(lmo_base),VType}(lmo_base)
end
else
lmo = VectorCacheLMO{typeof(lmo_base),VType}(lmo_base)
lmo = warmstart_lmo
end

t = 0
Expand Down Expand Up @@ -347,7 +366,7 @@ function lazified_conditional_gradient(
threshold = fast_dot(x, gradient) - phi / K

# go easy on the memory - only compute if really needed
if ((mod(t, print_iter) == 0 && verbose ) || callback !== nothing)
if ((mod(t, print_iter) == 0 && (verbose || verbose_it) ) || callback !== nothing)
primal = f(x)
end

Expand Down Expand Up @@ -394,7 +413,7 @@ function lazified_conditional_gradient(

@emphasis(emphasis, x = x - gamma * d)

if verbose && (mod(t, print_iter) == 0 || tt == dualstep)
if (verbose || verbose_it) && (mod(t, print_iter) == 0 || tt == dualstep)
if t == 0
tt = initial
end
Expand Down Expand Up @@ -422,7 +441,7 @@ function lazified_conditional_gradient(
primal = f(x)
dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)

if verbose
if verbose || verbose_it
tt = last
tot_time = (time_ns() - time_start) / 1.0e9
rep = (
Expand All @@ -436,10 +455,12 @@ function lazified_conditional_gradient(
length(lmo),
)
print_callback(rep, format_string)
print_callback(nothing, format_string, print_footer=true)
if verbose
print_callback(nothing, format_string, print_footer=true)
end
flush(stdout)
end
return x, v, primal, dual_gap, traj_data
return x, v, primal, dual_gap, traj_data, lmo
end

"""
Expand Down