added tests + reviews

LCSB-BioCore · Nov 18, 2021 · b413c50 · b413c50
1 parent 8cb03e9
commit b413c50
Show file tree

Hide file tree

Showing 3 changed files with 270 additions and 134 deletions.
diff --git a/src/analysis/max_min_driving_force.jl b/src/analysis/max_min_driving_force.jl
@@ -1,14 +1,19 @@
 """
     max_min_driving_force(
         model::MetabolicModel,
-        gibbs_free_energies::Dict{String,Float64},
+        metabolite_gibbs_free_energies::Dict{String,Float64},
         optimizer;
-        ignore_metabolites::Vector{String} = ["h", "h2o"],
+        flux_solution::Dict{String,Float64} = Dict{String,Float64}(),
+        reaction_gibbs_free_energy_adjustments::Dict{String,Float64} = Dict{String,Float64}(),
+        metabolite_gibbs_free_energy_errors::Dict{String,Float64} = Dict{String,Float64}(),
+        proton_ids::Vector{String} = ["h_c", "h_e"],
+        water_ids::Vector{String} = ["h2o_c", "h2o_e"],
         constant_concentrations::Dict{String,Float64} = Dict{String,Float64}(),
         concentration_ratios::Dict{Tuple{String,String},Float64} = Dict{
             Tuple{String,String},
             Float64,
         }(),
+        ignore_reactions::Vector{String} = Vector{String}(),
         concentration_lb = 1e-6,
         concentration_ub = 10e-3,
         T = _constants.T,
@@ -20,27 +25,44 @@ Perform a max-min driving force analysis on the `model`, as defined by Noor, et
 "Pathway thermodynamics highlights kinetic obstacles in central metabolism.", PLoS
 computational biology, 2014.
 
-The analysis uses the supplied `optimizer` and Gibbs free energies of the
-reactions (in `gibbs_free_energies`) to find the max-min driving force, Gibbs
-free energy of the reactions and the concentrations of metabolites that
-optimize the following problem:
+The analysis uses the supplied `optimizer` and Gibbs free energies of formation of the
+metabolites (in `metabolite_gibbs_free_energies`). Optionally, `flux_solution` can be used
+to set the reaction direction of the `model`; this flux solution should be free of internal
+cycles. This is important if the forward direction of the reaction has a positive ΔG (hence
+the reaction can only run in reverse, since MMDF assumes all fluxes are forward this needs
+to be corrected for). Additionally, `reaction_gibbs_free_energy_adjustments` can be used to
+adjust the ΔG of reactions to account for transporter, etc., effects. Also,
+`metabolite_gibbs_free_energy_errors` can be supplied if the Gibbs energy of formation
+measurements have errors associated with them. In this case they are used to bound the
+allowed Gibbs free energy of formation used for each metabolite.
+
+Specifically, the max-min driving force algorithm returns the Gibbs
+free energy of the reactions, the concentrations of metabolites, the Gibbs free energies of
+formation (not necessarily equal to `metabolite_gibbs_free_energies` if errors are supplied), and
+the actual maximum minimum driving force. The optimization problem solved is:
 ```
 max min -ΔᵣG
-s.t. ΔᵣG = ΔᵣG⁰ + R T S' ln(C)
-     ΔᵣG ≤ 0 (∀r)
+s.t. ΔrG = ΔrG⁰ + R T S' ln(C)
+     ΔrG⁰ = S' ΔfG
+     ΔrG ≤ 0
+     ΔfG⁰ - errors ≤ ΔfG ≤ ΔfG⁰ + errors
      ln(Cₗ) ≤ ln(C) ≤ ln(Cᵤ)
 ```
-where `ΔᵣG` are the Gibbs energies dissipated by the reactions, `ΔᵣG⁰` are the
-Gibbs free energies of the reactions, R is the gas constant, T is the
-temperature, S is the stoichiometry of the model, and C is the vector of
-metabolite concentrations (and their respective lower and upper bounds).
+where `ΔrG` are the Gibbs energies dissipated by the reactions, `ΔfG⁰` are the Gibbs free
+energies of formation of metabolites, `errors` are supplied errors of the formation
+energies, R is the gas constant, T is the temperature, S is the stoichiometry of the model,
+and C is the vector of metabolite concentrations (and their respective lower and upper
+bounds).
 
 In case no feasible solution exists, `nothing` is returned.
 
-Metabolites specified in `ignore_metabolites` are internally ignored -- that
-allows to specify e.g. removal of protons and water, thus allowing the
-thermodynamic calculations to assume constant pH and aqueous conditions. Note, if using
-biochemical thermodynamic data then you _must_ include the ids of protons and water here.
+Reactions specified in `ignore_reactions` are internally ignored when calculating the
+max-min driving force. This should include reactions with zero (or small) fluxes (to prevent
+sign issues), and non-physiological reactions.
+
+Since biochemical thermodynamics are assumed, the `proton_ids` and `water_ids` need to be
+specified so that they can be ignored in the calculations. Effectively this assumes an
+aqueous environment at constant pH is used.
 
 `constant_concentrations` is used to fix the concentrations of certain metabolites (such as
 CO₂). `concentration_ratios` is used to specify additional constraints on metabolite pair
@@ -72,9 +94,7 @@ function max_min_driving_force(
     concentration_ub = 10e-3,
     T = _constants.T,
     R = _constants.R,
-    metabolite_gibbs_free_energy_error_fallback_fraction = 0.01,
     modifications = [],
-    negligible_flux = 1e-6,
 )
 
     opt_model = Model(optimizer)
@@ -103,19 +123,17 @@ function max_min_driving_force(
     end
 
     # set bounds relative to supplied data, fallback to 1% of value
-    dgf_errors = [
-        get(
-            metabolite_gibbs_free_energy_errors,
-            mid,
-            metabolite_gibbs_free_energy_error_fallback_fraction *
-            metabolite_gibbs_free_energies[mid],
-        ) for mid in metabolites(model)
-    ]
-    for midx in eachindex(metabolites(model))
+    for (midx, mid) in enumerate(metabolites(model))
         midx in water_idxs && continue
         midx in proton_idxs && continue
-        JuMP.set_lower_bound(dgfs[midx], dgf0s[midx] - dgf_errors[midx])
-        JuMP.set_upper_bound(dgfs[midx], dgf0s[midx] + dgf_errors[midx])
+        dgf_error = get(metabolite_gibbs_free_energy_errors, mid, 0.0)
+        if dgf_error == 0.0
+            JuMP.fix(dgfs[midx], dgf0s[midx])
+            JuMP.fix(dgfs[midx], dgf0s[midx])
+        else
+            JuMP.set_lower_bound(dgfs[midx], dgf0s[midx] - dgf_error)
+            JuMP.set_upper_bound(dgfs[midx], dgf0s[midx] + dgf_error)
+        end
     end
 
     dgr_adjs =
@@ -126,15 +144,13 @@ function max_min_driving_force(
     @constraint(opt_model, dgrs .== dgr_adjs .+ S' * (dgfs .+ (R * T) .* logcs))
 
     # thermodynamics should correspond to the fluxes
-    fluxes = [get(flux_solution, rid, 1.0) for rid in reactions(model)]
+    flux_signs = [sign(get(flux_solution, rid, 1.0)) for rid in reactions(model)]
 
     # only consider physiological reactions
-    active_ridxs = [
-        idx for (idx, rid) in enumerate(reactions(model)) if
-        rid ∉ ignore_reactions && negligible_flux <= abs(fluxes[idx])
-    ]
+    active_ridxs =
+        [idx for (idx, rid) in enumerate(reactions(model)) if !(rid in ignore_reactions)]
 
-    @constraint(opt_model, dgrs[active_ridxs] .* sign.(fluxes[active_ridxs]) .<= 0)
+    @constraint(opt_model, dgrs[active_ridxs] .* flux_signs[active_ridxs] .<= 0)
 
     # # add the absolute bounds
     for (midx, mid) in enumerate(metabolites(model)) # idx in opt_model (missing ignore_metabolites)
@@ -160,25 +176,29 @@ function max_min_driving_force(
         @constraint(opt_model, logcs[idxs[1]] == log(val) + logcs[idxs[2]])
     end
 
+    @constraint(opt_model, minDF .<= -dgrs[active_ridxs] .* flux_signs[active_ridxs])
+
+    @objective(opt_model, Max, minDF)
+
     # apply the modifications, if any
     for mod in modifications
         mod(model, opt_model)
     end
 
-    @constraint(opt_model, minDF .<= -dgrs[active_ridxs] .* sign.(fluxes[active_ridxs]))
-
-    @objective(opt_model, Max, minDF)
-
     optimize!(opt_model)
 
     is_solved(opt_model) || return nothing
 
     return (
         mmdf = objective_value(opt_model),
-        dgrs = Dict(rid => value(dgrs[i]) for (i, rid) in enumerate(reactions(model))),
-        cs = Dict(
+        dg_reactions = Dict(
+            rid => value(dgrs[i]) for (i, rid) in enumerate(reactions(model))
+        ),
+        concentrations = Dict(
             mid => exp(value(logcs[i])) for (i, mid) in enumerate(metabolites(model))
         ),
-        dgfs = Dict(mid => value(dgfs[i]) for (i, mid) in enumerate(metabolites(model))),
+        dg_formations = Dict(
+            mid => value(dgfs[i]) for (i, mid) in enumerate(metabolites(model))
+        ),
     )
 end
diff --git a/test/analysis/max_min_driving_force.jl b/test/analysis/max_min_driving_force.jl
@@ -1,96 +1,36 @@
 @testset "Max-min driving force analysis" begin
 
-    # This is a relatively standard model of glycolysis.
-    # If editing, keep this the same as the corresponding notebook.
-    mets = [
-        "13dpg",
-        "2pg",
-        "3pg",
-        "adp",
-        "atp",
-        "dhap",
-        "f6p",
-        "fdp",
-        "g3p",
-        "g6p",
-        "glc__D",
-        "h",
-        "h2o",
-        "lac__D",
-        "nad",
-        "nadh",
-        "pep",
-        "pi",
-        "pyr",
-    ]
+    model = load_model(model_paths["e_coli_core.json"])
 
-    rxns = Dict(
-        "ENO" => Dict("2pg" => -1.0, "h2o" => 1.0, "pep" => 1),
-        "FBA" => Dict("fdp" => -1.0, "dhap" => 1.0, "g3p" => 1.0),
-        "GAPD" => Dict(
-            "g3p" => -1.0,
-            "nad" => -1.0,
-            "pi" => -1.0,
-            "h" => 1.0,
-            "nadh" => 1.0,
-            "13dpg" => 1.0,
-        ),
-        "HEX" => Dict(
-            "atp" => -1.0,
-            "glc__D" => -1.0,
-            "g6p" => 1.0,
-            "adp" => 1.0,
-            "h" => 1.0,
-        ),
-        "LDH" => Dict(
-            "pyr" => -1.0,
-            "nadh" => -1.0,
-            "h" => -1.0,
-            "nad" => 1.0,
-            "lac__D" => 1.0,
-        ),
-        "PFK" =>
-            Dict("f6p" => -1.0, "atp" => -1.0, "adp" => 1.0, "h" => 1.0, "fdp" => 1.0),
-        "PGI" => Dict("g6p" => -1.0, "f6p" => 1.0),
-        "PGK" => Dict("13dpg" => -1.0, "adp" => -1.0, "atp" => 1.0, "3pg" => 1.0),
-        "PGM" => Dict("3pg" => -1.0, "2pg" => 1),
-        "PYK" =>
-            Dict("pep" => -1.0, "adp" => -1.0, "h" => -1.0, "atp" => 1.0, "pyr" => 1.0),
-        "TPI" => Dict("dhap" => -1.0, "g3p" => 1.0),
+    sol = flux_balance_analysis_dict(
+        model,
+        GLPK.Optimizer;
+        modifications = [add_loopless_constraints()],
     )
 
-    model = StandardModel("Glycolysis")
+    metabolite_gibbs_free_energy_errors,
+    metabolite_gibbs_free_energies,
+    reaction_gibbs_free_energy_adjustments = get_thermo_data()
 
-    add_metabolites!(model, Metabolite.(mets))
-    add_reactions!(
-        model,
-        collect(Reaction(rid; metabolites = mets) for (rid, mets) in rxns),
-    )
+    model = load_model(StandardModel, "e_coli_core.json")
 
-    gibbs_energies = Dict(
-        "ENO" => -3.81089,
-        "FBA" => 22.3932,
-        "GAPD" => 4.60271,
-        "HEX" => -17.90,
-        "LDH" => -23.6803,
-        "PFK" => -14.5988,
-        "PGI" => 2.6617,
-        "PGK" => -19.32,
-        "PGM" => -4.52041,
-        "PYK" => -27.5833,
-        "TPI" => 5.57535,
-    )
+    flux_solution = flux_balance_analysis_dict(model, GLPK.Optimizer; modifications=[add_loopless_constraints(), change_optimizer_attribute("IPM_IterationsLimit", 1000)])
+
+    ignore_reactions = [rid for rid in reactions(model) if looks_like_biomass_reaction(rid) ||
+        looks_like_exchange_reaction(rid) || rid == "ATPM" || abs(flux_solution[rid]) < 1e-6]
 
     res = max_min_driving_force(
         model,
-        gibbs_energies,
-        Tulip.Optimizer;
-        ignore_metabolites = ["h", "h2o"],
-        modifications = [change_optimizer_attribute("IPM_IterationsLimit", 500)],
-        concentration_ratios = Dict(("atp", "adp") => 10.0, ("nadh", "nad") => 0.1),
-        constant_concentrations = Dict("pi" => 10e-3),
+        metabolite_gibbs_free_energies,
+        optimizer;
+        flux_solution = flux_solution,
+        reaction_gibbs_free_energy_adjustments = reaction_gibbs_free_energy_adjustments,
+        metabolite_gibbs_free_energy_errors = metabolite_gibbs_free_energy_errors,
+        proton_ids = ["h_c", "h_e"],
+        water_ids = ["h2o_c", "h2o_e"],
         concentration_lb = 1e-6,
         concentration_ub = 10e-3,
+        ignore_reactions = ignore_reactions,
     )
 
     expected_energies = Dict(
@@ -125,15 +65,9 @@
         "glc__D" => 4.9684448349558665e-5,
         "lac__D" => 0.0012764690773165621,
     )
-    @test isapprox(res.mmdf, 2.122526369934736, atol = TEST_TOLERANCE)
-    @test issetequal(keys(res.dgs), keys(expected_energies))
-    @test issetequal(keys(res.cs), keys(expected_concentrations))
-    @test all(
-        isapprox(res.dgs[i], expected_energies[i], atol = TEST_TOLERANCE) for
-        i in keys(expected_energies)
-    )
-    @test all(
-        isapprox(res.cs[i], expected_concentrations[i], atol = TEST_TOLERANCE) for
-        i in keys(expected_concentrations)
-    )
+    @test isapprox(res.mmdf, 5.613826837623506, atol = TEST_TOLERANCE)
+
+    @test isapprox(res.concentrations["cit_c"], 0.01)
+    @test isapprox(res.dg_reactions["PYK"], -8.28824174597107)
+    @test isapprox(res.dg_reactions["g3p_c"], -1073.2224793532705)
 end