Skip to content

Commit

Permalink
Merge pull request #26 from SciNim/differentiation
Browse files Browse the repository at this point in the history
  • Loading branch information
HugoGranstrom authored May 9, 2022
2 parents 0170715 + d9827d5 commit 614cb30
Show file tree
Hide file tree
Showing 7 changed files with 1,010 additions and 61 deletions.
30 changes: 30 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,33 @@
# v0.8.0 - 09.05.2022
## Optimization has joined the chat
Multi-variate optimization and differentiation has been introduced.

- `numericalnim/differentiate` offers `tensorGradient(f, x)` which calculates the gradient of `f` w.r.t `x` using finite differences, `tensorJacobian` (returns the transpose of the gradient), `tensorHessian`, `mixedDerivative`. It also provides `checkGradient(f, analyticGrad, x, tol)` to verify that the analytic gradient is correct by comparing it to the finite difference approximation.
- `numericalnim/optimize` now has several multi-variate optimization methods:
- `steepestDescent`
- `newton`
- `bfgs`
- `lbfgs`
- They all have the function signatures like:
```nim
proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = bfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U]
```
where `f` is the function to be minimized, `x0` is the starting guess, `options` contain options like tolerance (each method has it own options type which can be created by for example `lbfgsOptions` or `newtonOptions`), `analyticGradient` can be supplied to avoid having to do finite difference approximations of the derivatives.
- There are 4 different line search methods supported and those are set in the `options`: `Armijo, Wolfe, WolfeStrong, NoLineSearch`.
- `levmarq`: non-linear least-square optimizer
```nim
proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U]()): Tensor[U]
```
- `f` is the function you want to fit to the parameters in `param` and `x` is the value to evaluate the function at.
- `params0` is the initial guess for the parameters
- `xData` is a 1D Tensor with the x points and `yData` is a 1D Tensor with the y points.
- `options` can be created using `levmarqOptions`.
- Returns the final parameters
Note: There are basic tests to ensure these methods converge for simple problems, but they are not tested on more complex problems and should be considered experimental until more tests have been done. Please try them out, but don't rely on them for anything important for now. Also, the API isn't set in stone yet so expect that it may change in future versions.
# v0.7.1 -25.01.2022
Add a `nimCI` task for the Nim CI to run now that the tests have external dependencies.
Expand Down
2 changes: 1 addition & 1 deletion numericalnim.nimble
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Package Information
version = "0.7.1"
version = "0.8.0"
author = "Hugo Granström"
description = "A collection of numerical methods written in Nim. Current features: integration, ode, optimization."
license = "MIT"
Expand Down
2 changes: 2 additions & 0 deletions src/numericalnim.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,7 @@ import numericalnim/optimize
export optimize
import numericalnim/interpolate
export interpolate
import numericalnim/differentiate
export differentiate
import ./numericalnim/common/commonTypes
export commonTypes
199 changes: 199 additions & 0 deletions src/numericalnim/differentiate.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import std/strformat
import arraymancer

proc diff1dForward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
## Numerically calculate the derivative of f(x) at x0 using a step size h.
## Uses forward difference which has accuracy O(h)
result = (f(x0 + h) - f(x0)) / h

proc diff1dBackward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
## Numerically calculate the derivative of f(x) at x0 using a step size h.
## Uses backward difference which has accuracy O(h)
result = (f(x0) - f(x0 - h)) / h

proc diff1dCentral*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
## Numerically calculate the derivative of f(x) at x0 using a step size h.
## Uses central difference which has accuracy O(h^2)
result = (f(x0 + h) - f(x0 - h)) / (2*h)

proc secondDiff1dForward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
## Numerically calculate the second derivative of f(x) at x0 using a step size h.
result = (f(x0 + 2*h) - 2*f(x0 + h) + f(x0)) / (h*h)

proc secondDiff1dBackward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
## Numerically calculate the second derivative of f(x) at x0 using a step size h.
result = (f(x0) - 2*f(x0 - h) + f(x0 - 2*h)) / (h*h)

proc secondDiff1dCentral*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
## Numerically calculate the second derivative of f(x) at x0 using a step size h.
## Uses central difference which has accuracy O(h^2)
result = (f(x0 + h) - 2*f(x0) + f(x0 - h)) / (h*h)

proc tensorGradient*[U; T: not Tensor](
f: proc(x: Tensor[U]): T,
x0: Tensor[U],
h: U = U(1e-6),
fastMode: bool = false
): Tensor[T] =
## Calculates the gradient of f(x) w.r.t vector x at x0 using step size h.
## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative.
## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate.
assert x0.rank == 1 # must be a 1d vector
let f0 = f(x0) # make use of this with a `fastMode` switch so we use forward difference instead of central difference?
let xLen = x0.shape[0]
result = newTensor[T](xLen)
var x = x0.clone()
for i in 0 ..< xLen:
x[i] += h
let fPlusH = f(x)
if fastMode:
x[i] -= h # restore to original
result[i] = (fPlusH - f0) / h
else:
x[i] -= 2*h
let fMinusH = f(x)
x[i] += h # restore to original (± float error)
result[i] = (fPlusH - fMinusH) / (2 * h)

proc tensorGradient*[U, T](
f: proc(x: Tensor[U]): Tensor[T],
x0: Tensor[U],
h: U = U(1e-6),
fastMode: bool = false
): Tensor[T] =
## Calculates the gradient of f(x) w.r.t vector x at x0 using step size h.
## Every column is the gradient of one component of f.
## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative.
## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate.
assert x0.rank == 1 # must be a 1d vector
let f0 = f(x0) # make use of this with a `fastMode` switch so we use forward difference instead of central difference?
assert f0.rank == 1
let rows = x0.shape[0]
let cols = f0.shape[0]
result = newTensor[T](rows, cols)
var x = x0.clone()
for i in 0 ..< rows:
x[i] += h
let fPlusH = f(x)
if fastMode:
x[i] -= h # restore to original
result[i, _] = ((fPlusH - f0) / h).reshape(1, cols)
else:
x[i] -= 2*h
let fMinusH = f(x)
x[i] += h # restore to original (± float error)
result[i, _] = ((fPlusH - fMinusH) / (2 * h)).reshape(1, cols)

proc tensorJacobian*[U, T](
f: proc(x: Tensor[U]): Tensor[T],
x0: Tensor[U],
h: U = U(1e-6),
fastMode: bool = false
): Tensor[T] =
## Calculates the jacobian of f(x) w.r.t vector x at x0 using step size h.
## Every row is the gradient of one component of f.
## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative.
## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate.
transpose(tensorGradient(f, x0, h, fastMode))

proc mixedDerivative*[U, T](f: proc(x: Tensor[U]): T, x0: var Tensor[U], indices: (int, int), h: U = U(1e-6)): T =
result = 0
let i = indices[0]
let j = indices[1]
# f(x+h, y+h)
x0[i] += h
x0[j] += h
result += f(x0)

# f(x+h, y-h)
x0[j] -= 2*h
result -= f(x0)

# f(x-h, y-h)
x0[i] -= 2*h
result += f(x0)

# f(x-h, y+h)
x0[j] += 2*h
result -= f(x0)

# restore x0
x0[i] += h
x0[j] -= h

result *= 1 / (4 * h*h)


proc tensorHessian*[U; T: not Tensor](
f: proc(x: Tensor[U]): T,
x0: Tensor[U],
h: U = U(1e-6)
): Tensor[T] =
assert x0.rank == 1 # must be a 1d vector
let f0 = f(x0)
let xLen = x0.shape[0]
var x = x0.clone()
result = zeros[T](xLen, xLen)
for i in 0 ..< xLen:
for j in i ..< xLen:
let mixed = mixedDerivative(f, x, (i, j), h)
result[i, j] = mixed
result[j, i] = mixed

proc checkGradient*[U; T: not Tensor](f: proc(x: Tensor[U]): T, fGrad: proc(x: Tensor[U]): Tensor[T], x0: Tensor[U], tol: T): bool =
## Checks if the provided gradient function `fGrad` gives the same values as numeric gradient.
let numGrad = tensorGradient(f, x0)
let grad = fGrad(x0)
result = true
for i, x in abs(numGrad - grad):
if x > tol:
echo fmt"Gradient at index {i[0]} has error: {x} (tol = {tol})"
result = false

proc checkGradient*[U; T](f: proc(x: Tensor[U]): Tensor[T], fGrad: proc(x: Tensor[U]): Tensor[T], x0: Tensor[U], tol: T): bool =
## Checks if the provided gradient function `fGrad` gives the same values as numeric gradient.
let numGrad = tensorGradient(f, x0)
let grad = fGrad(x0)
result = true
for i, x in abs(numGrad - grad):
if x > tol:
echo fmt"Gradient at index {i[0]} has error: {x} (tol = {tol})"
result = false



when isMainModule:
import std/math
import benchy
proc f1(x: Tensor[float]): Tensor[float] =
x.sum(0)
let x0 = ones[float](10)
echo tensorGradient(f1, x0, 1e-6)
echo tensorGradient(f1, x0, 1e-6, true)
echo tensorJacobian(f1, x0, 1e-6)

proc f2(x: Tensor[float]): float =
sum(x)
echo tensorGradient(f2, x0, 1e-6)
echo tensorGradient(f2, x0, 1e-6, true)

let N = 1000
timeIt "slow mode":
for i in 0 .. N:
keep tensorGradient(f1, x0, 1e-6, false)
timeIt "fast mode":
for i in 0 .. N:
keep tensorGradient(f1, x0, 1e-6, true)
timeIt "slow mode float":
for i in 0 .. N:
keep tensorGradient(f2, x0, 1e-6, false)
timeIt "fast mode float":
for i in 0 .. N:
keep tensorGradient(f2, x0, 1e-6, true)
timeIt "jacobian slow":
for i in 0 .. N:
keep tensorJacobian(f1, x0, 1e-6, false)




Loading

0 comments on commit 614cb30

Please sign in to comment.