Merge pull request #618 from LCSB-BioCore/mk-gapfill-cleanup
clean up the gapfilling code
exaexa committed May 16, 2022
2 parents f8cd84a + 605d8d1 commit 3a2be37
Showing 3 changed files with 164 additions and 136 deletions.
10 changes: 6 additions & 4 deletions src/base/solver.jl
Expand Up @@ -19,16 +19,18 @@ function make_optimization_model(model::MetabolicModel, optimizer; sense = MAX_S
xl, xu = bounds(model)

optimization_model = Model(optimizer)
@variable(optimization_model, x[i = 1:n])
@variable(optimization_model, x[1:n])
@objective(optimization_model, sense, objective(model)' * x)
@constraint(optimization_model, mb, stoichiometry(model) * x .== balance(model)) # mass balance
@constraint(optimization_model, lbs, xl .<= x) # lower bounds
@constraint(optimization_model, ubs, x .<= xu) # upper bounds

C = coupling(model) # empty if no coupling
cl, cu = coupling_bounds(model)
isempty(C) || @constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds
isempty(C) || @constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds
isempty(C) || begin
cl, cu = coupling_bounds(model)
@constraint(optimization_model, c_lbs, cl .<= C * x) # coupling lower bounds
@constraint(optimization_model, c_ubs, C * x .<= cu) # coupling upper bounds

return optimization_model
256 changes: 145 additions & 111 deletions src/reconstruction/gapfill_minimum_reactions.jl
@@ -1,155 +1,189 @@
function gapfill_minimum_reactions(
objective_bounds = (_constants.tolerance, _constants.default_reaction_bound),
maximum_new_reactions = 5,
weights = fill(1.0, length(universal_reactions)),
objective_upper_bound = COBREXA._constants.default_reaction_bound,
ignore_reactions = [],
max_gaps_fillable = 1000_000,
modifications = [],
Return the indices of reactions in `universal_reactions` that should be added to
`model` so that the model can carry flux through its objective function, which
is bounded by `objective_lower_bound`. Optionally, specify `weights` that can be
used to bias the reactions found through solving the underlying mixed integer
program (MILP). Also, some reactions in `universal_reactions` can be ignored by
specifying their ids in `ignore_reactions`, this is useful to, e.g., restrict
which exchanges can be added. Finally, the limit the search space, it is
possible to specify the maximum number of gaps that can be filled through
This gap filling algorithm is based on the one introduced in *Reed, Jennifer L.,
et al. "Systems approach to refining genome annotation." Proceedings of the
National Academy of Sciences (2006)*. Briefly, the algorithm find the smallest
number of reactions to add by solving the MILP:
min ∑ wᵢ * yᵢ
s.t. S * x = 0
xₗ ≤ x ≤ xᵤ ∀ model reactions
y * xₗ ≤ x ≤ y * xᵤ ∀ universal reactions
lb ≤ objective(x) ≤ ub
y ∈ {0, 1}
where `w` is the set of optional `weights`, `x` the fluxes, and `y` the indicator
Find a minimal set of reactions from `universal_reactions` that should be added
to `model` so that the model has a feasible solution with bounds on its
objective function given in `objective_bounds`. Weights of the added reactions
may be specified in `weights` to prefer adding reactions with lower weights.
Internally, this builds and solves a mixed integer program, following
the method of Reed et al. (Reed, Jennifer L., et al. "Systems approach to
refining genome annotation." *Proceedings of the National Academy of Sciences*
The function returns a solved JuMP optimization model, with the boolean
reaction inclusion indicators in variable vector `y`. Use
[`mask`](@ref) or [`gapfilled_rids`](@ref) to collect the reaction
information in Julia datatypes.
To reduce the uncertainty in the MILP solver (and likely reduce the
complexity), you may put a limit on the size of the added reaction set in
function gapfill_minimum_reactions(
modifications = [],
objective_bounds = (_constants.tolerance, _constants.default_reaction_bound),
maximum_new_reactions = length(universal_reactions),
weights = fill(1.0, length(universal_reactions)),
objective_upper_bound = COBREXA._constants.default_reaction_bound,
ignore_reactions = [],
max_gaps_fillable = _constants.max_gaps_fillable,
modifications = [],
# constraints from model to be gap filled
S_model = stoichiometry(model)
metabolite_id_order = metabolites(model)

# constraints from universal reactions that can fill gaps
n_universal_reactions = length(universal_reactions)
S_universal, lbs_universal, ubs_universal = COBREXA._universal_stoichiometry(

# adjust the model stoichiometric matrix to account for additional metabolites if necessary
S = [
spzeros(size(S_universal, 1) - size(S_model, 1), size(S_model, 2))
# adjust the balance to account for additional metabolites
bal = [
spzeros(size(S_universal, 1) - size(S_model, 1))

First build standard flux balance type optimization problem, then add
specific details of the gap filling algorithm, e.g. indicator constraints, etc.
opt_model = make_optimization_model(model, optimizer; sense = COBREXA.MIN_SENSE)
delete(opt_model, opt_model[:mb]) # need to remove mass balances
unregister(opt_model, :mb) # will re-use symbol
univs = _universal_stoichiometry(universal_reactions, metabolites(model))

# add space for additional metabolites and glue with the universal reaction
# stoichiometry
extended_stoichiometry = [[
spzeros(length(univs.new_mids), n_reactions(model))
] univs.stoichiometry]

# make the model anew (we can't really use make_optimization_model because
# we need the balances and several other things completely removed. Could
# be solved either by parametrizing make_optimization_model or by making a
# tiny temporary wrapper for this.
# keep this in sync with src/base/solver.jl, except for adding balances.
opt_model = Model(optimizer)
@variable(opt_model, x[1:n_reactions(model)])
xl, xu = bounds(model)
@constraint(opt_model, lbs, xl .<= x)
@constraint(opt_model, ubs, x .<= xu)

C = coupling(model)
isempty(C) || begin
cl, cu = coupling_bounds(model)
@constraint(opt_model, c_lbs, cl .<= C * x)
@constraint(opt_model, c_ubs, C * x .<= cu)

@variable(opt_model, z[1:n_universal_reactions]) # fluxes from universal reactions
@variable(opt_model, y[1:n_universal_reactions], Bin) # indicators
# add the variables for new stuff
@variable(opt_model, ux[1:length(universal_reactions)]) # fluxes from universal reactions
@variable(opt_model, y[1:length(universal_reactions)], Bin) # indicators

# objective bounds
# combined metabolite balances
objective_lower_bound <= objective(model)' * opt_model[:x] <= objective_upper_bound
extended_stoichiometry * [x; ux] .==
[balance(model); zeros(length(univs.new_mids))]

# objective bounds
@constraint(opt_model, objective_bounds[1] <= objective(model)' * x)
@constraint(opt_model, objective_bounds[2] >= objective(model)' * x)

# flux bounds of universal reactions with indicators
@constraint(opt_model, lbs_universal, lbs_universal .* y .<= z)
@constraint(opt_model, ubs_universal, z .<= ubs_universal .* y)
@constraint(opt_model, ulb, univs.lbs .* y .<= ux)
@constraint(opt_model, uub, .* y .>= ux)

# combined mass balances
@constraint(opt_model, mb, S * opt_model[:x] + S_universal * z .== bal) # mass balance of all reactions
# minimize the total number of indicated reactions
@objective(opt_model, Min, weights' * y)

# constrain the maximum number of gaps that can be filled
@constrain(opt_model, max_gaps, sum(y) <= max_gaps_fillable)

# make new objective
@objective(opt_model, Min, sum(weights .* y))
# limit the number of indicated reactions
# (prevents the solver from exploring too far)
@constraint(opt_model, sum(y) <= maximum_new_reactions)

# apply all modifications
for mod in modifications
mod(opt_model, model)


findall(value.(y) .> 0)
return opt_model

Get a `BitVector` of added reactions from the model solved by
[`gapfill_minimum_reactions`](@ref). The bit indexes correspond to the indexes
of `universal_reactions` given to the gapfilling function. In case the model is
not solved, this returns `nothing`.
# Example
gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_mask
gapfilled_mask(opt_model)::BitVector =
is_solved(opt_model) ? value.(opt_model[:y]) .> 0 : nothing

gapfilled_rids(opt_model, universal_reactions::Vector{Reaction})::Vector{String}
Utility to extract a short vector of IDs of the reactions added by the
gapfilling algorithm. Use with `opt_model` returned from
gapfilled_rids(opt_model, universal_reactions::Vector{Reaction}) =
let v = gapfilled_mask(opt_model)
isnothing(v) ? nothing : [ for rxn in universal_reactions[v]]

Overload of [`gapfilled_rids`](@ref) that can be piped easily.
# Example
gapfill_minimum_reactions(myModel, myReactions, Tulip.Optimizer) |> gapfilled_rids(myReactions)
gapfilled_rids(universal_reactions::Vector{Reaction}) =
opt_model -> gapfilled_rids(opt_model, universal_reactions)

A helper function that constructs the stoichiometric matrix of a set of
`universal_reactions`. The order of the metabolites is determined with
`metabolite_id_order`, so that this stoichiometric matrix can be combined with
`mids`, so that this stoichiometric matrix can be combined with
another one.
function _universal_stoichiometry(
ignore_reactions = [],
rows = Int[]
cols = Int[]
vals = Float64[]
lbs = zeros(length(universal_reactions))
ubs = zeros(length(universal_reactions))
met_id_order_lu = Dict(zip(metabolite_id_order, 1:length(metabolite_id_order)))
n_midxs = length(met_id_order_lu) # account for metabolites already in model

n_cols = 0 # counter for filtered reactions
for (col, rxn) in
enumerate(filter(x -> !in(, ignore_reactions), universal_reactions))
n_cols += 1
for (mid, stoich) in rxn.metabolites
if !haskey(met_id_order_lu, mid)
n_midxs += 1
met_id_order_lu[mid] = n_midxs
push!(rows, met_id_order_lu[mid])
push!(cols, col)
push!(vals, stoich)
lbs[col] =
ubs[col] = rxn.ub
function _universal_stoichiometry(urxns::Vector{Reaction}, mids::Vector{String})

# traversal over all elements in stoichiometry of universal_reactions
stoiMap(f) = [
f(ridx, mid, stoi) for (ridx, rxn) in enumerate(urxns) for
(mid, stoi) in rxn.metabolites

return sparse(rows, cols, vals, n_midxs, n_cols), lbs, ubs
# make an index and find new metabolites
met_id_lookup = Dict(mids .=> eachindex(mids))

new_mids =
collect(Set(filter(x -> !haskey(met_id_lookup, x), stoiMap((_, mid, _) -> mid))))
all_mids = vcat(mids, new_mids)

# remake the index with all metabolites
met_id_lookup = Dict(all_mids .=> eachindex(all_mids))

# build the result
return (
stoichiometry = float.(
stoiMap((_, mid, _) -> met_id_lookup[mid]),
stoiMap((ridx, _, _) -> ridx),
stoiMap((_, _, stoi) -> stoi),
lbs = [ for rxn in urxns],
ubs = [rxn.ub for rxn in urxns],
new_mids = new_mids,
34 changes: 13 additions & 21 deletions test/reconstruction/gapfill_minimum_reactions.jl
Expand Up @@ -2,15 +2,9 @@
Implement the small model that should be gapfilled.
model = StandardModel("partialmodel")
m1 = Metabolite("m1")
m2 = Metabolite("m2")
m3 = Metabolite("m3")
m4 = Metabolite("m4")
m5 = Metabolite("m5")
m6 = Metabolite("m6")
m7 = Metabolite("m7")
m8 = Metabolite("m8")
model = StandardModel("partial model")

(m1, m2, m3, m4, m5, m6, m7, m8) = Metabolite.("m$i" for i = 1:8)

@add_reactions! model begin
"r1", nothing m1, 0, 1
Expand Down Expand Up @@ -38,18 +32,16 @@
rB = Reaction("rB", Dict("m2" => -1, "m9" => 1), :forward)
rC = Reaction("rC", Dict("m9" => -1, "m10" => 1), :bidirectional)
rD = Reaction("rC", Dict("m10" => -1), :reverse)
rE = Reaction("rE", Dict("m2" => -1, "m7" => 2, "m6" => 2), :forward)

universal_reactions = [r5, r7, r10, rA, rB, rC, rD]
optimizer = GLPK.Optimizer
rxns = gapfill_minimum_reactions(
ignore_reactions = ["rE"],
@test 2 in rxns
@test 3 in rxns
@test length(rxns) == 2

rxns =
objective_bounds = (0.1, 1000.0),
) |> gapfilled_rids(universal_reactions)

@test issetequal(["r7", "r10"], rxns)

