Merge pull request #583 from SciML/docs

Create a comprehensive documentation for differentiation and sensitivity
SciML · May 29, 2022 · c0970c7 · c0970c7
2 parents 7496485 + c49f86d
commit c0970c7
Show file tree

Hide file tree

Showing 48 changed files with 7,547 additions and 0 deletions.
diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
@@ -0,0 +1,24 @@
+name: Documentation
+
+on:
+  push:
+    branches:
+      - master
+    tags: '*'
+  pull_request:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: '1'
+      - name: Install dependencies
+        run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
+      - name: Build and deploy
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
+        run: julia --project=docs/ docs/make.jl
diff --git a/Project.toml b/Project.toml
@@ -24,6 +24,7 @@ GlobalSensitivity = "af5da776-676b-467e-8baf-acd8249e4f0f"
 GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
 QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -0,0 +1,5 @@
+[deps]
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+
+[compat]
+Documenter = "0.27"
diff --git a/docs/make.jl b/docs/make.jl
@@ -0,0 +1,83 @@
+using Documenter, DiffEqSensitivity
+
+makedocs(
+    sitename = "DiffEqSensitivity.jl",
+    authors="Chris Rackauckas et al.",
+    clean = true,
+    doctest = false,
+    modules = [DiffEqSensitivity],
+
+    format = Documenter.HTML(#analytics = "",
+                             assets = ["assets/favicon.ico"],
+                             canonical="https://sensitivity.sciml.ai/stable/"),
+    pages=[
+        "DiffEqSensitivity.jl: Automatic Differentiation and Adjoints for (Differential) Equation Solvers" => "index.md",
+        "Differentiating Ordinary Differential Equations (ODE) Tutorials" => Any[
+            "ad_examples/differentiating_ode.md",
+            "ad_examples/chaotic_ode.md",
+        ],
+        "Fitting Ordinary Differential Equation (ODE) Tutorials" => Any[
+            "ode_fitting/optimization_ode.md",
+            "ode_fitting/stiff_ode_fit.md",
+            "ode_fitting/exogenous_input.md",
+            "ode_fitting/data_parallel.md",
+            "ode_fitting/prediction_error_method.md",
+            "ode_fitting/second_order_adjoints.md",
+            "ode_fitting/second_order_neural.md",
+        ],
+        "Training Techniques and Tips" => Any[
+            "training_tips/local_minima.md",
+            "training_tips/divergence.md",
+            "training_tips/multiple_nn.md",
+        ],
+        "Neural Ordinary Differential Equation (Neural ODE) Tutorials" => Any[
+            "neural_ode/neural_ode_galacticoptim.md",
+            "neural_ode/neural_ode_flux.md",
+            "neural_ode/mnist_neural_ode.md",
+            "neural_ode/mnist_conv_neural_ode.md",
+            "neural_ode/GPUs.md",
+            "neural_ode/neural_gde.md",
+            "neural_ode/minibatch.md",
+        ],
+        "Stochastic Differential Equation (SDE) Tutorials" => Any[
+            "sde_fitting/optimization_sde.md",
+            "sde_fitting/neural_sde.md",
+        ],
+        "Delay Differential Equation (DDE) Tutorials" => Any[
+            "dde_fitting/delay_diffeq.md",
+        ],
+        "Differential-Algebraic Equation (DAE) Tutorials" => Any[
+            "dae_fitting/physical_constraints.md",
+        ],
+        "Partial Differential Equation (PDE) Tutorials" => Any[
+            "pde_fitting/pde_constrained.md",
+        ],
+        "Hybrid and Jump Equation Tutorials" => Any[
+            "hybrid_jump_fitting/hybrid_diffeq.md",
+            "hybrid_jump_fitting/bouncing_ball.md",
+        ],
+        "Bayesian Estimation Tutorials" => Any[
+            "bayesian/turing_bayesian.md",
+            "bayesian/BayesianNODE_NUTS.md",
+            "bayesian/BayesianNODE_SGLD.md",
+        ],
+        "Optimal and Model Predictive Control Tutorials" => Any[
+            "optimal_control/optimal_control.md",
+            "optimal_control/feedback_control.md",
+            "optimal_control/SDE_control.md",
+        ],
+        "Manual and APIs" => Any[
+            "manual/differential_equation_sensitivities.md",
+            "manual/nonlinear_solve_sensitivities.md",
+            "manual/direct_forward_sensitivity.md",
+            "manual/direct_adjoint_sensitivities.md",
+        ],
+        "Benchmarks" => "Benchmark.md",
+        "Sensitivity Math Details" => "sensitivity_math.md",
+    ]
+)
+
+deploydocs(
+   repo = "github.com/SciML/DiffEqSensitivity.jl.git";
+   push_preview = true
+)
diff --git a/docs/src/Benchmark.md b/docs/src/Benchmark.md
@@ -0,0 +1,148 @@
+# Benchmarks
+
+## Vs Torchdiffeq 1 million and less ODEs
+
+A raw ODE solver benchmark showcases [>30x performance advantage for DifferentialEquations.jl](https://gist.github.com/ChrisRackauckas/cc6ac746e2dfd285c28e0584a2bfd320)
+for ODEs ranging in size from 3 to nearly 1 million.
+
+## Vs Torchdiffeq on neural ODE training
+
+A training benchmark using the spiral ODE from the original neural ODE paper
+[demonstrates a 100x performance advantage for DiffEqFlux in training neural ODEs](https://gist.github.com/ChrisRackauckas/4a4d526c15cc4170ce37da837bfc32c4).
+
+## Vs torchsde on small SDEs
+
+Using the code from torchsde's README we demonstrated a [>70,000x performance
+advantage over torchsde](https://gist.github.com/ChrisRackauckas/6a03e7b151c86b32d74b41af54d495c6).
+Further benchmarking is planned but was found to be computationally infeasible
+for the time being.
+
+## A bunch of adjoint choices on neural ODEs
+
+Quick summary:
+
+- `BacksolveAdjoint` can be the fastest (but use with caution!); about 25% faster
+- Using `ZygoteVJP` is faster than other vjp choices with FastDense due to the overloads
+
+```julia
+using DiffEqFlux, OrdinaryDiffEq, Flux, Optim, Plots, DiffEqSensitivity,
+      Zygote, BenchmarkTools, Random
+
+u0 = Float32[2.0; 0.0]
+datasize = 30
+tspan = (0.0f0, 1.5f0)
+tsteps = range(tspan[1], tspan[2], length = datasize)
+
+function trueODEfunc(du, u, p, t)
+    true_A = [-0.1 2.0; -2.0 -0.1]
+    du .= ((u.^3)'true_A)'
+end
+
+prob_trueode = ODEProblem(trueODEfunc, u0, tspan)
+ode_data = Array(solve(prob_trueode, Tsit5(), saveat = tsteps))
+
+dudt2 = FastChain((x, p) -> x.^3,
+                  FastDense(2, 50, tanh),
+                  FastDense(50, 2))
+Random.seed!(100)
+p = initial_params(dudt2)
+
+prob_neuralode = NeuralODE(dudt2, tspan, Tsit5(), saveat = tsteps)
+
+function loss_neuralode(p)
+    pred = Array(prob_neuralode(u0, p))
+    loss = sum(abs2, ode_data .- pred)
+    return loss
+end
+
+@btime Zygote.gradient(loss_neuralode,p)
+# 2.709 ms (56506 allocations: 6.62 MiB)
+
+prob_neuralode_interpolating = NeuralODE(dudt2, tspan, Tsit5(), saveat = tsteps, sensealg=InterpolatingAdjoint(autojacvec=ReverseDiffVJP(true)))
+
+function loss_neuralode_interpolating(p)
+    pred = Array(prob_neuralode_interpolating(u0, p))
+    loss = sum(abs2, ode_data .- pred)
+    return loss
+end
+
+@btime Zygote.gradient(loss_neuralode_interpolating,p)
+# 5.501 ms (103835 allocations: 2.57 MiB)
+
+prob_neuralode_interpolating_zygote = NeuralODE(dudt2, tspan, Tsit5(), saveat = tsteps, sensealg=InterpolatingAdjoint(autojacvec=ZygoteVJP()))
+
+function loss_neuralode_interpolating_zygote(p)
+    pred = Array(prob_neuralode_interpolating_zygote(u0, p))
+    loss = sum(abs2, ode_data .- pred)
+    return loss
+end
+
+@btime Zygote.gradient(loss_neuralode_interpolating_zygote,p)
+# 2.899 ms (56150 allocations: 6.61 MiB)
+
+prob_neuralode_backsolve = NeuralODE(dudt2, tspan, Tsit5(), saveat = tsteps, sensealg=BacksolveAdjoint(autojacvec=ReverseDiffVJP(true)))
+
+function loss_neuralode_backsolve(p)
+    pred = Array(prob_neuralode_backsolve(u0, p))
+    loss = sum(abs2, ode_data .- pred)
+    return loss
+end
+
+@btime Zygote.gradient(loss_neuralode_backsolve,p)
+# 4.871 ms (85855 allocations: 2.20 MiB)
+
+prob_neuralode_quad = NeuralODE(dudt2, tspan, Tsit5(), saveat = tsteps, sensealg=QuadratureAdjoint(autojacvec=ReverseDiffVJP(true)))
+
+function loss_neuralode_quad(p)
+    pred = Array(prob_neuralode_quad(u0, p))
+    loss = sum(abs2, ode_data .- pred)
+    return loss
+end
+
+@btime Zygote.gradient(loss_neuralode_quad,p)
+# 11.748 ms (79549 allocations: 3.87 MiB)
+
+prob_neuralode_backsolve_tracker = NeuralODE(dudt2, tspan, Tsit5(), saveat = tsteps, sensealg=BacksolveAdjoint(autojacvec=TrackerVJP()))
+
+function loss_neuralode_backsolve_tracker(p)
+    pred = Array(prob_neuralode_backsolve_tracker(u0, p))
+    loss = sum(abs2, ode_data .- pred)
+    return loss
+end
+
+@btime Zygote.gradient(loss_neuralode_backsolve_tracker,p)
+# 27.604 ms (186143 allocations: 12.22 MiB)
+
+prob_neuralode_backsolve_zygote = NeuralODE(dudt2, tspan, Tsit5(), saveat = tsteps, sensealg=BacksolveAdjoint(autojacvec=ZygoteVJP()))
+
+function loss_neuralode_backsolve_zygote(p)
+    pred = Array(prob_neuralode_backsolve_zygote(u0, p))
+    loss = sum(abs2, ode_data .- pred)
+    return loss
+end
+
+@btime Zygote.gradient(loss_neuralode_backsolve_zygote,p)
+# 2.091 ms (49883 allocations: 6.28 MiB)
+
+prob_neuralode_backsolve_false = NeuralODE(dudt2, tspan, Tsit5(), saveat = tsteps, sensealg=BacksolveAdjoint(autojacvec=ReverseDiffVJP(false)))
+
+function loss_neuralode_backsolve_false(p)
+    pred = Array(prob_neuralode_backsolve_false(u0, p))
+    loss = sum(abs2, ode_data .- pred)
+    return loss
+end
+
+@btime Zygote.gradient(loss_neuralode_backsolve_false,p)
+# 4.822 ms (9956 allocations: 1.03 MiB)
+
+prob_neuralode_tracker = NeuralODE(dudt2, tspan, Tsit5(), saveat = tsteps, sensealg=TrackerAdjoint())
+
+function loss_neuralode_tracker(p)
+    pred = Array(prob_neuralode_tracker(u0, p))
+    loss = sum(abs2, ode_data .- pred)
+    return loss
+end
+
+@btime Zygote.gradient(loss_neuralode_tracker,p)
+# 12.614 ms (76346 allocations: 3.12 MiB)
+```