diff --git a/src/base/solver.jl b/src/base/solver.jl index 058ab5b40..02735a2bc 100644 --- a/src/base/solver.jl +++ b/src/base/solver.jl @@ -19,16 +19,18 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MAX_S xl, xu = bounds(model) optimization_model = Model(optimizer) - @variable(optimization_model, x[i = 1:n]) + @variable(optimization_model, x[1:n]) @objective(optimization_model, sense, objective(model)' * x) @constraint(optimization_model, mb, stoichiometry(model) * x .== balance(model)) # mass balance @constraint(optimization_model, lbs, xl .<= x) # lower bounds @constraint(optimization_model, ubs, x .<= xu) # upper bounds C = coupling(model) # empty if no coupling - cl, cu = coupling_bounds(model) - isempty(C) || @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds - isempty(C) || @constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds + isempty(C) || begin + cl, cu = coupling_bounds(model) + @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds + @constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds + end return optimization_model end diff --git a/src/reconstruction/gapfill_minimum_reactions.jl b/src/reconstruction/gapfill_minimum_reactions.jl index 59208daf0..c4b975a02 100644 --- a/src/reconstruction/gapfill_minimum_reactions.jl +++ b/src/reconstruction/gapfill_minimum_reactions.jl @@ -1,155 +1,189 @@ """ - gapfill_minimum_reactions( + function gapfill_minimum_reactions( model::MetabolicModel, universal_reactions::Vector{Reaction}, - objective_lower_bound::Float64, optimizer; - modifications=[], + objective_bounds = (_constants.tolerance, _constants.default_reaction_bound), + maximum_new_reactions = 5, weights = fill(1.0, length(universal_reactions)), - objective_upper_bound = COBREXA._constants.default_reaction_bound, - ignore_reactions = [], - max_gaps_fillable = 1000_000, + modifications = [], ) - -Return the indices of reactions in `universal_reactions` that should be added to -`model` so that the model can carry flux through its objective function, which -is bounded by `objective_lower_bound`. Optionally, specify `weights` that can be -used to bias the reactions found through solving the underlying mixed integer -program (MILP). Also, some reactions in `universal_reactions` can be ignored by -specifying their ids in `ignore_reactions`, this is useful to, e.g., restrict -which exchanges can be added. Finally, the limit the search space, it is -possible to specify the maximum number of gaps that can be filled through -`max_gaps_fillable`. - -This gap filling algorithm is based on the one introduced in *Reed, Jennifer L., -et al. "Systems approach to refining genome annotation." Proceedings of the -National Academy of Sciences (2006)*. Briefly, the algorithm find the smallest -number of reactions to add by solving the MILP: -``` -min ∑ wᵢ * yᵢ -s.t. S * x = 0 - xₗ ≤ x ≤ xᵤ ∀ model reactions - y * xₗ ≤ x ≤ y * xᵤ ∀ universal reactions - lb ≤ objective(x) ≤ ub - y ∈ {0, 1} -``` -where `w` is the set of optional `weights`, `x` the fluxes, and `y` the indicator -variables. + +Find a minimal set of reactions from `universal_reactions` that should be added +to `model` so that the model has a feasible solution with bounds on its +objective function given in `objective_bounds`. Weights of the added reactions +may be specified in `weights` to prefer adding reactions with lower weights. + +Internally, this builds and solves a mixed integer program, following +the method of Reed et al. (Reed, Jennifer L., et al. "Systems approach to +refining genome annotation." *Proceedings of the National Academy of Sciences* +(2006)). + +The function returns a solved JuMP optimization model, with the boolean +reaction inclusion indicators in variable vector `y`. Use +[`mask`](@ref) or [`gapfilled_rids`](@ref) to collect the reaction +information in Julia datatypes. + +To reduce the uncertainty in the MILP solver (and likely reduce the +complexity), you may put a limit on the size of the added reaction set in +`maximum_new_reactions`. """ function gapfill_minimum_reactions( model::MetabolicModel, universal_reactions::Vector{Reaction}, - objective_lower_bound::Float64, optimizer; - modifications = [], + objective_bounds = (_constants.tolerance, _constants.default_reaction_bound), + maximum_new_reactions = length(universal_reactions), weights = fill(1.0, length(universal_reactions)), - objective_upper_bound = COBREXA._constants.default_reaction_bound, - ignore_reactions = [], - max_gaps_fillable = _constants.max_gaps_fillable, + modifications = [], ) - # constraints from model to be gap filled - S_model = stoichiometry(model) - metabolite_id_order = metabolites(model) + precache!(model) # constraints from universal reactions that can fill gaps - n_universal_reactions = length(universal_reactions) - S_universal, lbs_universal, ubs_universal = COBREXA._universal_stoichiometry( - universal_reactions, - metabolite_id_order; - ignore_reactions, - ) - - # adjust the model stoichiometric matrix to account for additional metabolites if necessary - S = [ - S_model - spzeros(size(S_universal, 1) - size(S_model, 1), size(S_model, 2)) - ] - # adjust the balance to account for additional metabolites - bal = [ - balance(model) - spzeros(size(S_universal, 1) - size(S_model, 1)) - ] - - #= - First build standard flux balance type optimization problem, then add - specific details of the gap filling algorithm, e.g. indicator constraints, etc. - =# - opt_model = make_optimization_model(model, optimizer; sense = COBREXA.MIN_SENSE) - delete(opt_model, opt_model[:mb]) # need to remove mass balances - unregister(opt_model, :mb) # will re-use symbol + univs = _universal_stoichiometry(universal_reactions, metabolites(model)) + + # add space for additional metabolites and glue with the universal reaction + # stoichiometry + extended_stoichiometry = [[ + stoichiometry(model) + spzeros(length(univs.new_mids), n_reactions(model)) + ] univs.stoichiometry] + + # make the model anew (we can't really use make_optimization_model because + # we need the balances and several other things completely removed. Could + # be solved either by parametrizing make_optimization_model or by making a + # tiny temporary wrapper for this. + # keep this in sync with src/base/solver.jl, except for adding balances. + opt_model = Model(optimizer) + @variable(opt_model, x[1:n_reactions(model)]) + xl, xu = bounds(model) + @constraint(opt_model, lbs, xl .<= x) + @constraint(opt_model, ubs, x .<= xu) + + C = coupling(model) + isempty(C) || begin + cl, cu = coupling_bounds(model) + @constraint(opt_model, c_lbs, cl .<= C * x) + @constraint(opt_model, c_ubs, C * x .<= cu) + end - @variable(opt_model, z[1:n_universal_reactions]) # fluxes from universal reactions - @variable(opt_model, y[1:n_universal_reactions], Bin) # indicators + # add the variables for new stuff + @variable(opt_model, ux[1:length(universal_reactions)]) # fluxes from universal reactions + @variable(opt_model, y[1:length(universal_reactions)], Bin) # indicators - # objective bounds + # combined metabolite balances @constraint( opt_model, - obj_bounds, - objective_lower_bound <= objective(model)' * opt_model[:x] <= objective_upper_bound + extended_stoichiometry * [x; ux] .== + [balance(model); zeros(length(univs.new_mids))] ) + # objective bounds + @constraint(opt_model, objective_bounds[1] <= objective(model)' * x) + @constraint(opt_model, objective_bounds[2] >= objective(model)' * x) + # flux bounds of universal reactions with indicators - @constraint(opt_model, lbs_universal, lbs_universal .* y .<= z) - @constraint(opt_model, ubs_universal, z .<= ubs_universal .* y) + @constraint(opt_model, ulb, univs.lbs .* y .<= ux) + @constraint(opt_model, uub, univs.ubs .* y .>= ux) - # combined mass balances - @constraint(opt_model, mb, S * opt_model[:x] + S_universal * z .== bal) # mass balance of all reactions + # minimize the total number of indicated reactions + @objective(opt_model, Min, weights' * y) - # constrain the maximum number of gaps that can be filled - @constrain(opt_model, max_gaps, sum(y) <= max_gaps_fillable) - - # make new objective - @objective(opt_model, Min, sum(weights .* y)) + # limit the number of indicated reactions + # (prevents the solver from exploring too far) + @constraint(opt_model, sum(y) <= maximum_new_reactions) + # apply all modifications for mod in modifications mod(opt_model, model) end optimize!(opt_model) - findall(value.(y) .> 0) + return opt_model end +""" + gapfilled_mask(opt_model::BitVector) + +Get a `BitVector` of added reactions from the model solved by +[`gapfill_minimum_reactions`](@ref). The bit indexes correspond to the indexes +of `universal_reactions` given to the gapfilling function. In case the model is +not solved, this returns `nothing`. + +# Example + + gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_mask +""" +gapfilled_mask(opt_model)::BitVector = + is_solved(opt_model) ? value.(opt_model[:y]) .> 0 : nothing + +""" + gapfilled_rids(opt_model, universal_reactions::Vector{Reaction})::Vector{String} + +Utility to extract a short vector of IDs of the reactions added by the +gapfilling algorithm. Use with `opt_model` returned from +[`gapfill_minimum_reactions`](@ref). +""" +gapfilled_rids(opt_model, universal_reactions::Vector{Reaction}) = + let v = gapfilled_mask(opt_model) + isnothing(v) ? nothing : [rxn.id for rxn in universal_reactions[v]] + end + +""" + gapfilled_rids(universal_reactions::Vector{Reaction}) + +Overload of [`gapfilled_rids`](@ref) that can be piped easily. + +# Example + + gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_rids(myReactions) +""" +gapfilled_rids(universal_reactions::Vector{Reaction}) = + opt_model -> gapfilled_rids(opt_model, universal_reactions) + """ _universal_stoichiometry( universal_reactions::Vector{Reaction}, - metabolite_id_order, + mids, ) A helper function that constructs the stoichiometric matrix of a set of `universal_reactions`. The order of the metabolites is determined with -`metabolite_id_order`, so that this stoichiometric matrix can be combined with +`mids`, so that this stoichiometric matrix can be combined with another one. """ -function _universal_stoichiometry( - universal_reactions::Vector{Reaction}, - metabolite_id_order; - ignore_reactions = [], -) - rows = Int[] - cols = Int[] - vals = Float64[] - lbs = zeros(length(universal_reactions)) - ubs = zeros(length(universal_reactions)) - met_id_order_lu = Dict(zip(metabolite_id_order, 1:length(metabolite_id_order))) - n_midxs = length(met_id_order_lu) # account for metabolites already in model - - n_cols = 0 # counter for filtered reactions - for (col, rxn) in - enumerate(filter(x -> !in(x.id, ignore_reactions), universal_reactions)) - n_cols += 1 - for (mid, stoich) in rxn.metabolites - if !haskey(met_id_order_lu, mid) - n_midxs += 1 - met_id_order_lu[mid] = n_midxs - end - push!(rows, met_id_order_lu[mid]) - push!(cols, col) - push!(vals, stoich) - end - lbs[col] = rxn.lb - ubs[col] = rxn.ub - end +function _universal_stoichiometry(urxns::Vector{Reaction}, mids::Vector{String}) + + # traversal over all elements in stoichiometry of universal_reactions + stoiMap(f) = [ + f(ridx, mid, stoi) for (ridx, rxn) in enumerate(urxns) for + (mid, stoi) in rxn.metabolites + ] - return sparse(rows, cols, vals, n_midxs, n_cols), lbs, ubs + # make an index and find new metabolites + met_id_lookup = Dict(mids .=> eachindex(mids)) + + new_mids = + collect(Set(filter(x -> !haskey(met_id_lookup, x), stoiMap((_, mid, _) -> mid)))) + all_mids = vcat(mids, new_mids) + + # remake the index with all metabolites + met_id_lookup = Dict(all_mids .=> eachindex(all_mids)) + + # build the result + return ( + stoichiometry = float.( + sparse( + stoiMap((_, mid, _) -> met_id_lookup[mid]), + stoiMap((ridx, _, _) -> ridx), + stoiMap((_, _, stoi) -> stoi), + length(all_mids), + length(urxns), + ), + ), + lbs = [rxn.lb for rxn in urxns], + ubs = [rxn.ub for rxn in urxns], + new_mids = new_mids, + ) end diff --git a/test/reconstruction/gapfill_minimum_reactions.jl b/test/reconstruction/gapfill_minimum_reactions.jl index 6a8118e98..884b31063 100644 --- a/test/reconstruction/gapfill_minimum_reactions.jl +++ b/test/reconstruction/gapfill_minimum_reactions.jl @@ -2,15 +2,9 @@ #= Implement the small model that should be gapfilled. =# - model = StandardModel("partialmodel") - m1 = Metabolite("m1") - m2 = Metabolite("m2") - m3 = Metabolite("m3") - m4 = Metabolite("m4") - m5 = Metabolite("m5") - m6 = Metabolite("m6") - m7 = Metabolite("m7") - m8 = Metabolite("m8") + model = StandardModel("partial model") + + (m1, m2, m3, m4, m5, m6, m7, m8) = Metabolite.("m$i" for i = 1:8) @add_reactions! model begin "r1", nothing → m1, 0, 1 @@ -38,18 +32,16 @@ rB = Reaction("rB", Dict("m2" => -1, "m9" => 1), :forward) rC = Reaction("rC", Dict("m9" => -1, "m10" => 1), :bidirectional) rD = Reaction("rC", Dict("m10" => -1), :reverse) - rE = Reaction("rE", Dict("m2" => -1, "m7" => 2, "m6" => 2), :forward) universal_reactions = [r5, r7, r10, rA, rB, rC, rD] - optimizer = GLPK.Optimizer - rxns = gapfill_minimum_reactions( - model, - universal_reactions, - 0.1, - optimizer; - ignore_reactions = ["rE"], - ) - @test 2 in rxns - @test 3 in rxns - @test length(rxns) == 2 + + rxns = + gapfill_minimum_reactions( + model, + universal_reactions, + GLPK.Optimizer; + objective_bounds = (0.1, 1000.0), + ) |> gapfilled_rids(universal_reactions) + + @test issetequal(["r7", "r10"], rxns) end