diff --git a/src/analysis/sampling/affine_hit_and_run.jl b/src/analysis/sampling/affine_hit_and_run.jl new file mode 100644 index 000000000..899364eab --- /dev/null +++ b/src/analysis/sampling/affine_hit_and_run.jl @@ -0,0 +1,138 @@ +""" + function affine_hit_and_run( + warmup_points::Matrix{Float64}, + lbs::Vector{Float64}, + ubs::Vector{Float64}; + sample_iters = 100 .* (1:5), + workers = [myid()], + chains = length(workers), + ) + +Run a hit-and-run style sampling that starts from `warmup_points` and uses +their affine combinations for generating the run directions to sample the space +delimited by `lbs` and `ubs`. The points that represent fluxes in +`warmup_points` should be organized in columns, i.e. `warmup_points[:,1]` is +the first warmup flux. + +There are total `chains` of hit-and-run runs, each on a batch of +`size(warmup_points, 2)` points. The runs are scheduled on `workers`, for good +load balancing `chains` should be ideally much greater than `length(workers)`. + +Each run continues for `maximum(sample_iters)` iterations; the numbers in +`sample_iters` represent the iterations at which the whole "current" batch of +points is collected for output. For example, `sample_iters=[1,4,5]` causes the +process run for 5 iterations, returning the sample batch that was produced by +1st, 4th and last (5th) iteration. + +Returns a matrix of sampled fluxes (in columns), with all collected samples +horizontally concatenated. The total number of samples (columns) will be +`size(warmup_points,2) * chains * length(sample_iters)`. + +# Example +``` +using COBREXA +using Tulip + +model = load_model(StandardModel, model_path) + +warmup, lbs, ubs = warmup_from_variability(model, Tulip.Optimizer, 100) +samples = affine_hit_and_run(warmup, lbs, ubs, sample_iters = 1:3) +``` +""" +function affine_hit_and_run( + warmup_points::Matrix{Float64}, + lbs::Vector{Float64}, + ubs::Vector{Float64}; + sample_iters = 100 .* (1:5), + workers = [myid()], + chains = length(workers), +) + + # distribute starting data to workers + save_at.(workers, :cobrexa_hit_and_run_data, Ref((warmup_points, lbs, ubs))) + + # sample all chains + samples = hcat( + dpmap( + chain -> :($COBREXA._affine_hit_and_run_chain( + cobrexa_hit_and_run_data..., + $sample_iters, + $chain, + )), + CachingPool(workers), + 1:chains, + )..., + ) + + # remove warmup points from workers + map(fetch, remove_from.(workers, :cobrexa_hit_and_run_data)) + + return samples +end + +""" + _affine_hit_and_run_chain(warmup, lbs, ubs, iters, chain) + +Internal helper function for computing a single affine hit-and-run chain. The +number of the chain is passed for possible future initialization of stable +RNGs. +""" +function _affine_hit_and_run_chain(warmup, lbs, ubs, iters, chain) + + points = copy(warmup) + d, n_points = size(points) + result = Matrix{Float64}(undef, size(points, 1), 0) + + iter = 0 + + for iter_target in iters + + while iter < iter_target + iter += 1 + + new_points = copy(points) + + for i = 1:n_points + + mix = rand(n_points) .+ _constants.tolerance + dir = points * (mix ./ sum(mix)) - points[:, i] + + # iteratively collect the maximum and minimum possible multiple + # of `dir` added to the current point + λmax = Inf + λmin = -Inf + for j = 1:d + dl = lbs[j] - points[j, i] + du = ubs[j] - points[j, i] + idir = 1 / dir[j] + if dir[j] < -_constants.tolerance + lower = du * idir + upper = dl * idir + elseif dir[j] > _constants.tolerance + lower = dl * idir + upper = du * idir + else + lower = -Inf + upper = Inf + end + λmin = max(λmin, lower) + λmax = min(λmax, upper) + end + + λ = λmin + rand() * (λmax - λmin) + !isfinite(λ) && continue # avoid divergence + new_points[:, i] = points[:, i] .+ λ .* dir + + # TODO normally, here we would check if sum(S*new_point) is still + # lower than the tolerance, but we shall trust the computer + # instead. + end + + points = new_points + end + + result = hcat(result, points) + end + + result +end diff --git a/src/analysis/sampling/hit_and_run.jl b/src/analysis/sampling/hit_and_run.jl deleted file mode 100644 index fa2b82c91..000000000 --- a/src/analysis/sampling/hit_and_run.jl +++ /dev/null @@ -1,120 +0,0 @@ -""" - hit_and_run( - N::Int, - opt_model; - keepevery = 100, - samplesize = 1000, - random_objective = false, - ) - -Perform a basic hit and run sampling for `N` iterations on a constrained JuMP -model in `opt_model`. See "Robert L. Smith Efficient Monte Carlo Procedures for -Generating Points Uniformly Distributed over Bounded Regions. Operations -Research 32 (6) 1296-1308 https://doi.org/10.1287/opre.32.6.1296" for more -details. - -The process generates `samplesize` samples, and logs the sample state each -`keepevery` iterations. - -Warm up points are generated by minimizing and maximizing reactions as in -[`flux_variability_analysis`](@ref), unless the `random_objective` is `true`, -in which case a randomly weighted objective is used for warmup. - -Note that `N` needs to be greater than sample size, and should be greater than -the dimensionality of the sampled space (i.e., at least same as the number of -reactions). - -# Example -``` -using COBREXA -using JuMP -using Tulip - -model = load_model(StandardModel, "e_coli_core.json") -biomass = findfirst(model.reactions, "BIOMASS_Ecoli_core_w_GAM") -glucose = findfirst(model.reactions, "EX_glc__D_e") - -opt_model = flux_balance_analysis(model, Tulip.Optimizer; - modifications=[change_objective(biomass), - modify_constraint(glucose, -12, -12), - change_optimizer_attribute("IPM_IterationsLimit", 500)]) - -biomass_index = model[biomass] -λ = JuMP.value(opt_model[:x][biomass_index]) -modify_constraint(biomass, 0.99*λ, λ)(model, opt_model) - -samples = hit_and_run(100_000, opt_model; keepevery=10, samplesize=5000) -``` -""" -function hit_and_run( - N::Int, - opt_model; - keepevery = _constants.sampling_keep_iters, - samplesize = _constants.sampling_size, - random_objective = false, -) - - lbs, ubs = get_bound_vectors(opt_model) # get actual ub and lb constraints, can't use model function because the user may have changed them in the function arguments - - wpoints = _get_warmup_points(opt_model; random_objective = random_objective) - - nwpts = size(wpoints, 2) # number of warmup points generated - samples = zeros(size(wpoints, 1), samplesize) # sample storage - current_point = zeros(size(wpoints, 1)) - current_point .= wpoints[:, rand(1:nwpts)] # pick random initial point - - sample_num = 0 - samplelength = 0 - updatesamplesizelength = true - for n = 1:N - - # direction = random point - current point - if updatesamplesizelength - direction_point = (@view wpoints[:, rand(1:nwpts)]) - (@view current_point[:]) # use warmup points to find direction in warmup phase - else - direction_point = - (@view samples[:, rand(1:(samplelength))]) - (@view current_point[:]) # after warmup phase, only find directions in sampled space - end - - λmax = Inf - λmin = -Inf - for i in eachindex(lbs) - δlower = lbs[i] - current_point[i] - δupper = ubs[i] - current_point[i] - # only consider the step size bound if the direction of travel is non-negligible - if direction_point[i] < -_constants.tolerance - lower = δupper / direction_point[i] - upper = δlower / direction_point[i] - elseif direction_point[i] > _constants.tolerance - lower = δlower / direction_point[i] - upper = δupper / direction_point[i] - else - lower = -Inf - upper = Inf - end - lower > λmin && (λmin = lower) # max min step size that satisfies all bounds - upper < λmax && (λmax = upper) # min max step size that satisfies all bounds - end - - if λmax <= λmin || λmin == -Inf || λmax == Inf # this sometimes can happen - @warn "Infeasible direction at iteration $(n)..." - continue - end - - λ = rand() * (λmax - λmin) + λmin # random step size - current_point .= current_point .+ λ .* direction_point # will be feasible - - if n % keepevery == 0 - sample_num += 1 - samples[:, sample_num] .= current_point - if sample_num >= samplesize - updatesamplesizelength = false # once the entire memory vector filled, stop using warm up points - sample_num = 0 # reset, start replacing the older samples - end - updatesamplesizelength && (samplelength += 1) # lags sample_num because the latter is a flag as well - end - - end - - return samples -end diff --git a/src/analysis/sampling/warmup.jl b/src/analysis/sampling/warmup.jl deleted file mode 100644 index f2ed645db..000000000 --- a/src/analysis/sampling/warmup.jl +++ /dev/null @@ -1,63 +0,0 @@ -""" - _get_warmup_points(cbm; random_objective=false, numstop=Inf) - -Generate warmup points for all reactions in the model that are not fixed. -Assumes that the input JuMP model in `cbm` is already constrained. - -The warmup points are sampled randomly from all possibilities until `numstop` -is reached; by default all points are generated. - -By default, the warmup points are generated as in FVA by minimizing and -maximizing all reactions; `random_objective` switches this to completely random -objectives. -""" -function _get_warmup_points(cbm; random_objective = false, numstop = Inf) - v = cbm[:x] - ubs = cbm[:ubs] - lbs = cbm[:lbs] - # determine which rxns should be max/min-ized (non fixed fluxes) - fixed_rxns = Int[] - for i in eachindex(v) - ub_val = normalized_rhs(ubs[i]) - lb_val = normalized_rhs(lbs[i]) - if ub_val >= 0 && ub_val == -lb_val - push!(fixed_rxns, i) - elseif ub_val < 0 && -ub_val == lb_val - push!(fixed_rxns, i) - end - end - - # determine number of warmup points (if numstops is less than the number of non-fixed reactions.) - var_rxn_inds = shuffle!(filter(x -> !(x in fixed_rxns), 1:length(v))) # shuffle the optimization points - NN = numstop > length(var_rxn_inds) ? length(var_rxn_inds) : numstop - wpoints = zeros(length(v), 2 * NN) - - # generate warmup points. - for (i, ii) in zip(1:length(var_rxn_inds), 1:2:(2*length(var_rxn_inds))) - i > NN && break - - if random_objective - @objective(cbm, Max, sum(rand() * v[iii] for iii in var_rxn_inds)) - else - @objective(cbm, Max, v[var_rxn_inds[i]]) - end - - optimize!(cbm) - for j = 1:size(wpoints, 1) - wpoints[j, ii] = value(v[j]) - end - - if random_objective - @objective(cbm, Min, sum(rand() * v[iii] for iii in var_rxn_inds)) - else - @objective(cbm, Min, v[var_rxn_inds[i]]) - end - - optimize!(cbm) - for j = 1:size(wpoints, 1) - wpoints[j, ii+1] = value(v[j]) - end - end - - return wpoints -end diff --git a/src/analysis/sampling/warmup_variability.jl b/src/analysis/sampling/warmup_variability.jl new file mode 100644 index 000000000..f14d90423 --- /dev/null +++ b/src/analysis/sampling/warmup_variability.jl @@ -0,0 +1,92 @@ +""" + warmup_from_variability( + model::MetabolicModel, + optimizer, + n_points::Int; + kwargs... + ) + +Generates FVA-like warmup points for samplers, by selecting random points by +minimizing and maximizing reactions. Can not return more than 2 times the +number of reactions in the model. +""" +function warmup_from_variability(model::MetabolicModel, optimizer, n_points::Int; kwargs...) + nr = n_reactions(model) + + n_points > 2 * nr && throw( + DomainError( + n_points, + "Variability method can not generate more than $(2*nr) points from this model", + ), + ) + + sample = shuffle(vcat(1:nr, -(1:nr)))[begin:n_points] + warmup_from_variability( + model, + optimizer, + -filter(x -> x < 0, sample), + filter(x -> x > 0, sample); + kwargs..., + ) +end + +""" + function warmup_from_variability( + model::MetabolicModel, + optimizer, + min_reactions::Vector{Int}=1:n_reactions(model), + max_reactions::Vector{Int}=1:n_reactions(model); + modifications = [], + workers::Vector{Int} = [myid()], + )::Tuple{Matrix{Float64}, Vector{Float64}, Vector{Float64}} + +Generate FVA-like warmup points for samplers, by minimizing and maximizing the +specified reactions. The result is returned as a matrix, each point occupies as +single column in the result. +""" +function warmup_from_variability( + model::MetabolicModel, + optimizer, + min_reactions::AbstractVector{Int} = 1:n_reactions(model), + max_reactions::AbstractVector{Int} = 1:n_reactions(model); + modifications = [], + workers::Vector{Int} = [myid()], +)::Tuple{Matrix{Float64},Vector{Float64},Vector{Float64}} + + # create optimization problem at workers, apply modifications + save_model = :( + begin + model = $model + optmodel = $COBREXA.make_optimization_model(model, $optimizer) + for mod in $modifications + mod(model, optmodel) + end + optmodel + end + ) + + map(fetch, save_at.(workers, :cobrexa_sampling_warmup_optmodel, Ref(save_model))) + + fluxes = hcat( + dpmap( + rid -> :($COBREXA._FVA_optimize_reaction( + cobrexa_sampling_warmup_optmodel, + $rid, + optmodel -> $COBREXA.JuMP.value.(optmodel[:x]), + )), + CachingPool(workers), + vcat(-min_reactions, max_reactions), + )..., + ) + + # snatch the bounds from whatever worker is around + lbs, ubs = get_val_from( + workers[1], + :($COBREXA.get_bound_vectors(cobrexa_sampling_warmup_optmodel)), + ) + + # free the data on workers + map(fetch, remove_from.(workers, :cobrexa_sampling_warmup_optmodel)) + + return fluxes, lbs, ubs +end diff --git a/src/base/solver.jl b/src/base/solver.jl index 94e231382..83943b844 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -54,5 +54,19 @@ Return `true` if `optmodel` solved successfully (solution is optimal or locally optimal). Return `false` if any other termination status is reached. Termination status is defined in the documentation of `JuMP`. """ -is_solved(optmodel) = - COBREXA.JuMP.termination_status(optmodel) in [MOI.OPTIMAL, MOI.LOCALLY_SOLVED] +function is_solved(optmodel) + COBREXA.JuMP.termination_status(optmodel) in [MOI.OPTIMAL, MOI.LOCALLY_SOLVED] ? true : + false +end + +""" + get_bound_vectors(opt_model) + +Returns vectors of the lower and upper bounds of `opt_model` constraints, where +`opt_model` is a JuMP model constructed by e.g. +[`make_optimization_model`](@ref) or [`flux_balance_analysis`](@ref). +""" +get_bound_vectors(opt_model) = ( + [-normalized_rhs(lb) for lb in opt_model[:lbs]], + [normalized_rhs(ub) for ub in opt_model[:ubs]], +) diff --git a/src/base/utils/StandardModel.jl b/src/base/utils/StandardModel.jl index 1c26e758d..990f7b702 100644 --- a/src/base/utils/StandardModel.jl +++ b/src/base/utils/StandardModel.jl @@ -149,24 +149,3 @@ function set_bound( set_normalized_rhs(opt_model[:lbs][vind], -lb) set_normalized_rhs(opt_model[:ubs][vind], ub) end - -""" - get_bound_vectors(opt_model) - -Returns vectors of the lower and upper bounds of `opt_model` constraints, where -`opt_model` is a JuMP model constructed by e.g. -[`make_optimization_model`](@ref) or [`flux_balance_analysis`](@ref). - - -""" -function get_bound_vectors(opt_model) - lbconref = opt_model[:lbs] - ubconref = opt_model[:ubs] - lbs = zeros(length(lbconref)) - for i in eachindex(lbs) - lbs[i] = -normalized_rhs(lbconref[i]) - end - ubs = [normalized_rhs(ubconref[i]) for i in eachindex(ubconref)] - - return lbs, ubs -end diff --git a/test/analysis/sampling/affine_hit_and_run.jl b/test/analysis/sampling/affine_hit_and_run.jl new file mode 100644 index 000000000..0eaa1f0b8 --- /dev/null +++ b/test/analysis/sampling/affine_hit_and_run.jl @@ -0,0 +1,27 @@ +@testset "Sampling Tests" begin + model_path = download_data_file( + "http://bigg.ucsd.edu/static/models/e_coli_core.json", + joinpath("data", "e_coli_core.json"), + "7bedec10576cfe935b19218dc881f3fb14f890a1871448fc19a9b4ee15b448d8", + ) + + model = load_model(StandardModel, model_path) + + warmup, lbs, ubs = warmup_from_variability(model, Tulip.Optimizer; workers = W) + + samples = affine_hit_and_run( + warmup, + lbs, + ubs; + sample_iters = 10 * (1:3), + workers = W, + chains = length(W), + ) + + @test size(samples, 1) == size(warmup, 1) + @test size(samples, 2) == size(warmup, 2) * 3 * length(W) + + @test all(samples .>= lbs) + @test all(samples .<= ubs) + @test all(stoichiometry(model) * samples .< TEST_TOLERANCE) +end diff --git a/test/analysis/sampling/hit_and_run.jl b/test/analysis/sampling/hit_and_run.jl deleted file mode 100644 index 38213736f..000000000 --- a/test/analysis/sampling/hit_and_run.jl +++ /dev/null @@ -1,28 +0,0 @@ -@testset "Sampling Tests" begin - # # these tests are not very good - sampling needs work - model_path = download_data_file( - "http://bigg.ucsd.edu/static/models/e_coli_core.json", - joinpath("data", "e_coli_core.json"), - "7bedec10576cfe935b19218dc881f3fb14f890a1871448fc19a9b4ee15b448d8", - ) - - model = load_model(StandardModel, model_path) - opt_model = flux_balance_analysis( - model, - Tulip.Optimizer; - modifications = [ - change_objective("BIOMASS_Ecoli_core_w_GAM"), - change_constraint("EX_glc__D_e", -12, -12), - change_optimizer_attribute("IPM_IterationsLimit", 500), - ], - ) - biomass_index = first(indexin(["BIOMASS_Ecoli_core_w_GAM"], reactions(model))) - λ = JuMP.value(opt_model[:x][biomass_index]) - change_constraint("BIOMASS_Ecoli_core_w_GAM", 0.99 * λ, λ)(model, opt_model) - - samples = hit_and_run(100_000, opt_model, keepevery = 10, samplesize = 5000) - - # # The sampling converges very randomly and extremely approximately, so only - # # test a rough result - @test isapprox(mean(samples[64, :]), 8.9, atol = 0.1) -end diff --git a/test/analysis/sampling/warmup_variability.jl b/test/analysis/sampling/warmup_variability.jl new file mode 100644 index 000000000..f77a7258a --- /dev/null +++ b/test/analysis/sampling/warmup_variability.jl @@ -0,0 +1,23 @@ +@testset "Warm up point generation" begin + model_path = download_data_file( + "http://bigg.ucsd.edu/static/models/e_coli_core.json", + joinpath("data", "e_coli_core.json"), + "7bedec10576cfe935b19218dc881f3fb14f890a1871448fc19a9b4ee15b448d8", + ) + model = load_model(StandardModel, model_path) + + pts, lbs, ubs = warmup_from_variability( + model, + Tulip.Optimizer, + 100; + modifications = [change_constraint("EX_glc__D_e", -2, 2)], + workers = W, + ) + + idx = first(indexin(["EX_glc__D_e"], reactions(model))) + @test size(pts) == (95, 100) + @test all(pts[idx, :] .>= -2) + @test all(pts[idx, :] .<= 2) + @test lbs[idx] == -2 + @test ubs[idx] == 2 +end