COBREXA · stelmo · Apr 16, 2024 · Apr 16, 2024 · Apr 16, 2024 · Apr 16, 2024
diff --git a/docs/src/examples/01-loading-and-saving.jl b/docs/src/examples/01-loading-and-saving.jl
@@ -39,15 +39,15 @@ download_model(
     "b4db506aeed0e434c1f5f1fdd35feda0dfe5d82badcfda0e9d1342335ab31116",
 )
 
-# (You do not need to fill in the hash values immediately; simply run the
-# function once and then copy the reported hash value into your script.)
+#md #!!! tip "Tip: How do I get the model hash?"
+#md #    You do not need to fill in the hash values immediately; simply run the function once, and then copy the reported hash value into your script.
 
 # ## Loading models
 
-# To load the flux-balance constrained models, COBREXA uses the
-# [AbstractFBCModels](https://github.com/COBREXA/AbstractFBCModels.jl)
-# framework to import various kinds of models including SBML, JSON and the
-# legacy Matlab-formatted "COBRA toolbox" models.
+# To load genome-scale metabolic models, COBREXA uses the
+# [AbstractFBCModels](https://github.com/COBREXA/AbstractFBCModels.jl) framework
+# to import various kinds of models including SBML, JSON and the legacy
+# Matlab-formatted "COBRA toolbox" models.
 
 # All models can be loaded automatically using [`load_model`](@ref); but you
 # must import the model-type specific packages to load the functionality. (This
@@ -89,10 +89,15 @@ model_converted_to_json = load_model("e_coli_core.xml", JSONFBCModels.JSONFBCMod
 model_in_julia_structures =
     load_model(JSONFBCModels.JSONFBCModel, "e_coli_core.json", A.CanonicalModel.Model)
 
+#md #!!! tip "Tip: Where did v1's StandardModel go?"
+#md #    `CanonicalModel` is a renamed version of `StandardModel`. If you did not use COBREXA v1, ignore this.
+
 # The above command specifies all model types explicitly, leaving least room
 # for guessing-based errors. Note that it is also possible to convert all model
 # types to each other simply by using Julia's `convert`.
 
+model_in_json_structure = convert(JSONFBCModels.JSONFBCModel, model_in_julia_structures)
+
 # ## Saving models
 
 # You can write your models to storage by using [`save_model`](@ref):

diff --git a/docs/src/examples/02-flux-balance-analysis.jl b/docs/src/examples/02-flux-balance-analysis.jl
@@ -61,10 +61,9 @@ solution.objective
 
 solution.fluxes.PFK
 
+#md #!!! tip "Tip: Tab through the model structure"
+#md #    You can use [tab] to quickly see what is in a CT. Unfortunately, due to typesystem limitations, this only works for the topmost level of the tree.
+
 # ...or make a "table" of all fluxes through all reactions:
 
 collect(solution.fluxes)
-
-# ## Advanced: Finding flux balance via the low-level interface
-
-# TODO ConstraintTrees (maybe put this into a separate example?)
diff --git a/docs/src/examples/02a-optimizer-parameters.jl b/docs/src/examples/02a-optimizer-parameters.jl
@@ -20,14 +20,17 @@
 # additional optimizer settings via the `settings` parameter of
 # [`flux_balance_analysis`](@ref). These include e.g.
 #
-# - [`set_optimizer_attribute`](@ref) (typically allowing you to tune e.g.
-#   iteration limits, tolerances, or floating-point precision)
-# - [`set_objective_sense`](@ref) (allowing you to change and reverse the
-#   optimization direction, if required)
-# - [`silence`](@ref) to disable the debug output of the optimizer
-# - and even [`set_optimizer`](@ref), which changes the optimizer
+# - [`set_optimizer_attribute`](@ref) allowing you to tune e.g.
+#   iteration limits, tolerances, or floating-point precision, see JuMP for
+#   more solver specific settings
+# - [`set_objective_sense`](@ref) allowing you to change and reverse the
+#   optimization direction, if required
+# - [`silence`](@ref) allowing you to to disable the debug output of the optimizer
+# - [`set_optimizer`](@ref) allowing you to change the optimizer
 #   implementation used (this is not quite useful in this case, but becomes
 #   beneficial with more complex, multi-stage optimization problems)
+# - [`set_time_limit_sec`](@ref) allowing you to set a time limit before the solver
+#   must terminate (useful for MILP solvers)
 #
 # To demonstrate this, we'll use the usual toy model:
 
@@ -44,6 +47,7 @@ model = load_model("e_coli_core.json")
 
 # Running a FBA with a silent optimizer that has slightly increased iteration
 # limit for IPM algorithm may now look as follows:
+
 solution = flux_balance_analysis(
     model,
     optimizer = Tulip.Optimizer,
@@ -60,7 +64,7 @@ solution = flux_balance_analysis(
 solution = flux_balance_analysis(
     model,
     optimizer = Tulip.Optimizer,
-    settings = [set_optimizer_attribute("IPM_IterationsLimit", 2)],
+    settings = [set_optimizer_attribute("IPM_IterationsLimit", 2), set_time_limit_sec(1)],
 )
 
 println(solution)

diff --git a/docs/src/examples/02b-model-modifications.jl b/docs/src/examples/02b-model-modifications.jl
@@ -17,26 +17,26 @@
 # # Making adjustments to the model
 #
 # Typically, we do not need to solve the models as they come from the authors
-# (someone else already did that!), but we want to perform various
-# perturbations in the model structure and conditions, and explore how the
-# model behaves in the changed conditions.
+# (someone else already did that!), but we want to perform various perturbations
+# in the model structure and conditions, and explore how the model behaves in
+# the changed conditions.
 #
 # With COBREXA, there are 2 different approaches that one can take:
-# 1. We can change the model structure and use the changed metabolic model.
-#    This is better for doing simple and small but systematic modifications,
+# 1. We can change the model structure, and use the changed metabolic model.
+#    This is better for doing simple and small, but systematic modifications,
 #    such as removing metabolites, adding reactions, etc.
 # 2. We can intercept the pipeline that converts the metabolic model to
-#    constraints and then to the optimizer representation, and make small
-#    modifications along that way. This is better for various technical model
-#    adjustments, such as using combined objectives or adding reaction-coupling
-#    constraints.
+#    constraints and/or to the optimizer representation, and make modifications
+#    along that way. This is better suited to making global model adjustments,
+#    such as using combined objectives, or adding reaction-coupling constraints,
+#    or combining multiple models into a bigger one.
 #
-# Here we demonstrate the first, "modelling" approach. The main advantage of
-# that approach is that the modified model is still a FBC model, and you can
-# export, save and share it via the AbstractFBCModels interace. The main
-# disadvantage is that the "common" FBC model interface does not easily express
-# various complicated constructions (communities, reaction coupling, enzyme
-# constraints, etc.) -- see the [example about modifying the
+# Here we demonstrate the first, "modeling" approach. The main advantage of this
+# approach is that the modified model is still a FBC model, and you can export,
+# save and share it via the AbstractFBCModels interace. The main disadvantage is
+# that the "common" FBC model interface does not easily express various
+# complicated constructions (communities, reaction coupling, enzyme constraints,
+# etc.) -- see the [example about modifying the
 # constraints](02c-constraint-modifications.md) for a closer look on how to
 # modify even such complex constructions.
 #
@@ -72,6 +72,9 @@ model.reactions["PFK"]
 
 model.reactions["CS"].stoichiometry
 
+#md # !!! tip "Tip: Create your own Model type!"
+#md #    For some applications, `CanonicalModel` might be too restrictive. Take a look at the docs of [AbstractFBCModels](https://github.com/COBREXA/AbstractFBCModels.jl) to see how simple it is to create your own model type. Further, if you adhere to the interface, _all_ the analysis in COBREXA will just work on it!
+
 # ## Running FBA on modified models
 #
 # Since the canonical model is completely mutable, you can change it in any way
@@ -102,9 +105,9 @@ low_glucose_solution.objective
 # ## Preventing reference-based sharing problems with `deepcopy`
 #
 # People often want to try different perturbations with a single base model. It
-# would therefore look feasible to save retain the "unmodified" model in a
-# single variable, and make copies of that with the modifications applied.
-# Let's observe what happens:
+# would therefore look feasible to save the "unmodified" model in a single
+# variable, and make copies of that with the modifications applied. Let's
+# observe what happens:
 
 base_model = convert(CM.Model, load_model("e_coli_core.json")) # load the base
 
@@ -140,8 +143,8 @@ modified_model.reactions["EX_glc__D_e"].lower_bound = -123.0
 @test modified_model.reactions["EX_glc__D_e"].lower_bound != #src
       base_model.reactions["EX_glc__D_e"].lower_bound #src
 
-#md # !!! danger "Avoid overwriting base models when using in-place modifications"
-#md #     Whenever you are changing a copy of the model, make sure that you are not changing it by a reference. Always use some copy mechanism such as `copy` or `deepcopy` to prevent the default reference-based sharing.
+#md # !!! danger "Danger: Avoid overwriting base models when using in-place modifications"
+#md #    Whenever you are changing a copy of the model, make sure that you are  not changing it by a reference. Always use some copy mechanism such as `copy` or `deepcopy` to prevent the default reference-based sharing.
 
 # ## Observing the differences
 #
@@ -163,5 +166,5 @@ flux_changes =
 # ...and again see what changed most:
 sort(collect(flux_changes), by = last)
 
-#md # !!! tip "For realistic comparisons always use a uniquely defined flux solution"
-#md #     Since the usual flux balance allows a lot of freedom in the "solved" flux and the only value that is "reproducible" by the analysis is the objective, one should never compare the flux distributions directly. Typically, that may result in false-positive (and sometimes false-negative) differences. Use e.g. [parsimonious FBA](03-parsimonious-flux-balance.md) to obtain uniquely determined and safely comparable flux solutions.
+#md # !!! tip "Tip: For realistic comparisons always use a uniquely defined flux solution"
+#md #    Since the usual flux balance allows a lot of freedom in the "solved" flux and the only value that is "reproducible" by the analysis is the objective, one should never compare the flux distributions directly. Typically, that may result in false-positive (and sometimes false-negative) differences. Use e.g. [parsimonious FBA](03-parsimonious-flux-balance.md) to obtain uniquely determined and safely comparable flux solutions.
diff --git a/docs/src/examples/02c-constraint-modifications.jl b/docs/src/examples/02c-constraint-modifications.jl
@@ -16,20 +16,12 @@
 
 # # Making adjustments to the constraint system
 #
-# In the [previous example about model
-# adjustments](02b-model-modifications.md), we noted that some constraint
-# systems may be to complex to be changed within the limits of the usual FBC
-# model view, and we may require a sharper tool to do the changes we need. This
-# example shows how to do that by modifying the constraint systems that are
-# generated within COBREXA to represent the metabolic model contents.
-#
-# ## Background: Model-to-optimizer pipeline
-#
-# ## Background: Constraint trees
-#
-# ## Changing the model-to-optimizer pipeline
-#
-# TODO clean up the stuff below:
+# In the [previous example about model adjustments](02b-model-modifications.md),
+# we noted that some constraint systems may be too complex to be changed within
+# the limits of the usual FBC model view, and we may require a sharper tool to
+# do the changes we need. This example shows how to do that by modifying the
+# constraint systems that are generated within COBREXA to represent the
+# metabolic model contents.
 
 using COBREXA
 
@@ -42,21 +34,53 @@ download_model(
 import JSONFBCModels
 import GLPK
 
-model = load_model("e_coli_core.json")
+model = load_model("e_coli_core.json") # flux balance type model
 
-# ## Customizing the model
+# ## Background: Constraint trees
 
-# We can also modify the model. The most explicit way to do this is
-# to make a new constraint tree representation of the model.
+# COBREXA uses [ConstraintTrees](https://github.com/COBREXA/ConstraintTrees.jl)
+# to represent model structures internally. This framework is incredibly powerful,
+# as it neatly groups relevant variables and constraints together.
 
 import ConstraintTrees as C
 
-ctmodel = flux_balance_constraints(model)
+# In general, constraint-based models use fluxes as variables, and all the
+# constraints are in terms of them (or derived quantities). For "normal" models,
+# you can directly convert their flux balance format (from json, sbml, mat
+# files) into a ConstraintTree. These structures make it particularly easy to
+# formulate new constraints.
+
+ctmodel = flux_balance_constraints(model) # load the ConstraintTree of model
+
+# Notice, variables and constraints are grouped here.
+
+ctmodel.fluxes # all variables
+
+#
+
+ctmodel.flux_stoichiometry # mass balance constraints
+
+#
+
+ctmodel.objective # objective (usually specified in the model as a biomass function), notice it does not have a bound
+
+# ## Customizing the model
+
+# ConstraintTrees make is simple to modify the model. The most explicit way to
+# do this is to make a new constraint tree representation of the model. But
+# first, let's make a new constraint to represent fermentation fluxes.
+
+fermentation = ctmodel.fluxes.EX_ac_e.value + ctmodel.fluxes.EX_etoh_e.value # acetate and ethanol fluxes are grouped
 
-fermentation = ctmodel.fluxes.EX_ac_e.value + ctmodel.fluxes.EX_etoh_e.value
+fermentation_constraint = C.Constraint(fermentation, (10.0, 1000.0)) # create a new constraint, bounding the flux
 
-forced_mixed_fermentation =
-    ctmodel * :fermentation^C.Constraint(fermentation, (10.0, 1000.0)) # new modified model is created
+fermentation_constrainttree = :fermentation^fermentation_constraint # create a new ConstraintTree, naming this constraint
+
+forced_mixed_fermentation = ctmodel * fermentation_constrainttree # new modified model is created
+
+# ConstraintTrees can be directly solved. The variables and constraints are
+# automatically parsed into a JuMP model, which is subsequently solved. Note,
+# you need to specify the objective.
 
 vt = optimized_values(
     forced_mixed_fermentation,
@@ -71,8 +95,6 @@ vt = optimized_values(
 
 ctmodel.fluxes.ATPM.bound = C.Between(1000.0, 10000.0)
 
-#TODO explicitly show here how false sharing looks like
-
 vt = optimized_values(
     ctmodel,
     objective = ctmodel.objective.value,
@@ -81,13 +103,65 @@ vt = optimized_values(
 
 @test isnothing(vt) #src
 
-# Models can also be piped into the analysis functions
+# In general, every attribute of a ConstraintTree can be modified. Using some
+# building block functions, complicated models can be formulated. Here we will
+# create a model with only positive fluxes, by splitting all the reactions into
+# two components (forward and reverse components). This is frequently a first
+# step in building more complicated models.
 
-ctmodel.fluxes.ATPM.bound = C.Between(8.39, 10000.0) # revert
-vt = optimized_values(
-    ctmodel,
-    objective = ctmodel.objective.value,
+positive_model = deepcopy(ctmodel)
+
+positive_model += sign_split_variables( # notice the +
+    positive_model.fluxes,
+    positive = :fluxes_forward,
+    negative = :fluxes_reverse,
+)
+
+#md # !!! warning "Warning: Take care between + and * operators for ConstraintTrees"
+#md #    For ConstraintTrees, `+` adds new variables to a model, and `*` adds constraints to a model with the assumption that the variables they reference are already in the model.
+
+# After creating the new variables, we need to link them to the original
+# variables, using constraints.
+
+positive_model *=
+    :pos_neg_flux_link^sign_split_constraints(; # notice the *
+        positive = positive_model.fluxes_forward,
+        negative = positive_model.fluxes_reverse,
+        signed = positive_model.fluxes,
+    )
+
+# Next, we can specify a new objective, minimizing the sum of all positive fluxes.
+
+positive_model *=
+    :l1_objective^C.Constraint(
+        sum(C.value(v) for v in values(positive_model.fluxes_forward)) +
+        sum(C.value(v) for v in values(positive_model.fluxes_reverse)),
+        nothing, # no bound
+    )
+
+# Notice how easy it was to sum up all the fluxes in the forward and reverse
+# directions. Also, we did not lose any information, as the new variables and
+# objective are just layered on top of the original model.
+
+# Next, we specify a specific growth rate, as a new constraint (making its
+# removal simple later).
+
+positive_model *=
+    :growth_rate_setpoint^C.Constraint(
+        C.value(positive_model.fluxes.BIOMASS_Ecoli_core_w_GAM),
+        C.EqualTo(0.6), # 1/h
+    )
+
+l1_sol = optimized_values(
+    positive_model,
+    objective = positive_model.l1_objective.value,
     optimizer = GLPK.Optimizer,
+    sense = COBREXA.Minimal,
 )
 
-@test isapprox(vt.objective, 0.8739, atol = TEST_TOLERANCE) #src
+# Removing constraints is simple.
+
+delete!(positive_model, :l1_objective)
+
+#md # !!! warning "Warning: Take care to keep your model consistent"
+#md #    While ConstraintTrees gives you the power to very simply create complex models, it does not guard you against making the internal structure inconsistent (e.g. changing the bounds of the positive variables to allow negative numbers, messing with the link constraints, etc.).
diff --git a/docs/src/examples/02d-flux-variability-analysis.jl b/docs/src/examples/02d-flux-variability-analysis.jl
@@ -16,7 +16,9 @@
 
 # # Flux variability analysis (FVA)
 
-# TODO commentary
+# FVA performs FBA to find an optimal objective, and then constrains this
+# objective to lie within certain bounds. Thereafter, each other reaction is
+# minimized and maximized to identify its maximum variability.
 
 using COBREXA
 
@@ -30,13 +32,19 @@ import JSONFBCModels, GLPK
 
 model = load_model("e_coli_core.json")
 
+# Most of the basic analysis functions have standardized frontend functions,
+# making it easy to call them.
+
 solution = flux_variability_analysis(model, optimizer = GLPK.Optimizer)
 
 @test isapprox(solution.ACALD[1], -2.542370370370188, atol = TEST_TOLERANCE) #src
 @test isapprox(solution.ACALD[2], 0.0, atol = TEST_TOLERANCE) #src
 
 # ## Specifying bounds
 
+# Options for FVA include changing the variability tolerances on the objective,
+# or making use of parallel processing, to speed up computations.
+
 very_close = flux_variability_analysis(
     model,
     optimizer = GLPK.Optimizer,
@@ -48,3 +56,11 @@ one_percent_close = flux_variability_analysis(
     optimizer = GLPK.Optimizer,
     objective_bound = relative_tolerance_bound(0.99),
 )
+
+using Distributed
+addprocs(2) # add workers to distribute optimization problem across more CPUs
+@everywhere using COBREXA, GLPK # load packages on workers
+
+solution = flux_variability_analysis(model, optimizer = GLPK.Optimizer; workers = workers()) # distribute work to more cores
+
+rmprocs(workers()...) # cleanup