Skip to content

Commit

Permalink
Merge pull request #284 from LCSB-BioCore/mk-fix-har
Browse files Browse the repository at this point in the history
Clean up hit-and-run sampling
  • Loading branch information
exaexa committed May 27, 2021
2 parents 88babb4 + f179860 commit 4198efd
Show file tree
Hide file tree
Showing 12 changed files with 284 additions and 353 deletions.
1 change: 0 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Expand Down
1 change: 0 additions & 1 deletion src/COBREXA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ using Random
using Serialization
using SparseArrays
using Statistics
using MCMCChains

import Base: findfirst, getindex, show
import Pkg
Expand Down
138 changes: 138 additions & 0 deletions src/analysis/sampling/affine_hit_and_run.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
function affine_hit_and_run(
warmup_points::Matrix{Float64},
lbs::Vector{Float64},
ubs::Vector{Float64};
sample_iters = 100 .* (1:5),
workers = [myid()],
chains = length(workers),
)
Run a hit-and-run style sampling that starts from `warmup_points` and uses
their affine combinations for generating the run directions to sample the space
delimited by `lbs` and `ubs`. The points that represent fluxes in
`warmup_points` should be organized in columns, i.e. `warmup_points[:,1]` is
the first warmup flux.
There are total `chains` of hit-and-run runs, each on a batch of
`size(warmup_points, 2)` points. The runs are scheduled on `workers`, for good
load balancing `chains` should be ideally much greater than `length(workers)`.
Each run continues for `maximum(sample_iters)` iterations; the numbers in
`sample_iters` represent the iterations at which the whole "current" batch of
points is collected for output. For example, `sample_iters=[1,4,5]` causes the
process run for 5 iterations, returning the sample batch that was produced by
1st, 4th and last (5th) iteration.
Returns a matrix of sampled fluxes (in columns), with all collected samples
horizontally concatenated. The total number of samples (columns) will be
`size(warmup_points,2) * chains * length(sample_iters)`.
# Example
```
using COBREXA
using Tulip
model = load_model(StandardModel, model_path)
warmup, lbs, ubs = warmup_from_variability(model, Tulip.Optimizer, 100)
samples = affine_hit_and_run(warmup, lbs, ubs, sample_iters = 1:3)
```
"""
function affine_hit_and_run(
warmup_points::Matrix{Float64},
lbs::Vector{Float64},
ubs::Vector{Float64};
sample_iters = 100 .* (1:5),
workers = [myid()],
chains = length(workers),
)

# distribute starting data to workers
save_at.(workers, :cobrexa_hit_and_run_data, Ref((warmup_points, lbs, ubs)))

# sample all chains
samples = hcat(
dpmap(
chain -> :($COBREXA._affine_hit_and_run_chain(
cobrexa_hit_and_run_data...,
$sample_iters,
$chain,
)),
CachingPool(workers),
1:chains,
)...,
)

# remove warmup points from workers
map(fetch, remove_from.(workers, :cobrexa_hit_and_run_data))

return samples
end

"""
_affine_hit_and_run_chain(warmup, lbs, ubs, iters, chain)
Internal helper function for computing a single affine hit-and-run chain. The
number of the chain is passed for possible future initialization of stable
RNGs.
"""
function _affine_hit_and_run_chain(warmup, lbs, ubs, iters, chain)

points = copy(warmup)
d, n_points = size(points)
result = Matrix{Float64}(undef, size(points, 1), 0)

iter = 0

for iter_target in iters

while iter < iter_target
iter += 1

new_points = copy(points)

for i = 1:n_points

mix = rand(n_points) .+ _constants.tolerance
dir = points * (mix ./ sum(mix)) - points[:, i]

# iteratively collect the maximum and minimum possible multiple
# of `dir` added to the current point
λmax = Inf
λmin = -Inf
for j = 1:d
dl = lbs[j] - points[j, i]
du = ubs[j] - points[j, i]
idir = 1 / dir[j]
if dir[j] < -_constants.tolerance
lower = du * idir
upper = dl * idir
elseif dir[j] > _constants.tolerance
lower = dl * idir
upper = du * idir
else
lower = -Inf
upper = Inf
end
λmin = max(λmin, lower)
λmax = min(λmax, upper)
end

λ = λmin + rand() * (λmax - λmin)
!isfinite(λ) && continue # avoid divergence
new_points[:, i] = points[:, i] .+ λ .* dir

# TODO normally, here we would check if sum(S*new_point) is still
# lower than the tolerance, but we shall trust the computer
# instead.
end

points = new_points
end

result = hcat(result, points)
end

result
end
182 changes: 0 additions & 182 deletions src/analysis/sampling/hit_and_run.jl

This file was deleted.

55 changes: 0 additions & 55 deletions src/analysis/sampling/warmup.jl

This file was deleted.

Loading

0 comments on commit 4198efd

Please sign in to comment.