-
-
Notifications
You must be signed in to change notification settings - Fork 32
/
arnoldi.jl
329 lines (282 loc) · 10.5 KB
/
arnoldi.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# Arnoldi/Lanczos iteration algorithms (with custom IOP)
#######################################
# Output type/cache
"""
KrylovSubspace{T}(n,[maxiter=30]) -> Ks
Constructs an uninitialized Krylov subspace, which can be filled by `arnoldi!`.
The dimension of the subspace, `Ks.m`, can be dynamically altered but should
be smaller than `maxiter`, the maximum allowed arnoldi iterations.
getV(Ks) -> V
getH(Ks) -> H
Access methods for the (extended) orthonormal basis `V` and the (extended)
Gram-Schmidt coefficients `H`. Both methods return a view into the storage
arrays and has the correct dimensions as indicated by `Ks.m`.
resize!(Ks, maxiter) -> Ks
Resize `Ks` to a different `maxiter`, destroying its contents.
This is an expensive operation and should be used scarcely.
"""
mutable struct KrylovSubspace{T, U, B, VType <: AbstractMatrix{T}, HType <: AbstractMatrix{U}}
m::Int # subspace dimension
maxiter::Int # maximum allowed subspace size
augmented::Int# length of the augmented part
beta::B # norm(b,2)
V::VType # orthonormal bases
H::HType # Gram-Schmidt coefficients (real for Hermitian matrices)
end
function KrylovSubspace{T,U}(n::Integer, maxiter::Integer=30, augmented::Integer=false) where {T,U}
V = Matrix{T}(undef, n + augmented, maxiter + 1)
H = fill(zero(U), maxiter + 1, maxiter + !iszero(augmented))
return KrylovSubspace{T, U, real(T), Matrix{T}, Matrix{U}}(maxiter, maxiter, augmented, zero(real(T)), V, H)
end
KrylovSubspace{T}(args...) where {T} = KrylovSubspace{T,T}(args...)
getV(Ks::KrylovSubspace) = @view(Ks.V[:, 1:Ks.m + 1])
getH(Ks::KrylovSubspace) = @view(Ks.H[1:Ks.m + 1, 1:Ks.m+!iszero(Ks.augmented)])
function Base.resize!(Ks::KrylovSubspace{T,U}, maxiter::Integer) where {T,U}
isaugmented = !iszero(Ks.augmented)
V = similar(Ks.V, T, (size(Ks.V, 1), maxiter + 1))
H = similar(Ks.H, U, (maxiter + 1, maxiter + isaugmented))
fill!(H, zero(U))
if isaugmented
copyto!(@view(V[axes(Ks.V)...]), Ks.V)
copyto!(@view(H[axes(Ks.H)...]), Ks.H)
end
Ks.V = V; Ks.H = H
Ks.m = Ks.maxiter = maxiter
return Ks
end
function Base.show(io::IO, ::MIME"text/plain", Ks::KrylovSubspace)
io′ = IOContext(io, :limit => true, :displaysize => 3 .* displaysize(io) .÷ 7)
println(io, "$(Ks.m)-dimensional Krylov subspace with fields")
println(io, "beta: $(Ks.beta)")
println(io, "V:")
show(io′, "text/plain", getV(Ks))
println(io)
println(io, "H:")
show(io′, "text/plain", getH(Ks))
end
#######################################
# Arnoldi/Lanczos with custom IOP
## High-level interface
"""
arnoldi(A,b[;m,tol,opnorm,iop]) -> Ks
Performs `m` anoldi iterations to obtain the Krylov subspace K_m(A,b).
The n x (m + 1) basis vectors `getV(Ks)` and the (m + 1) x m upper Hessenberg
matrix `getH(Ks)` are related by the recurrence formula
```
v_1=b,\\quad Av_j = \\sum_{i=1}^{j+1}h_{ij}v_i\\quad(j = 1,2,\\ldots,m)
```
`iop` determines the length of the incomplete orthogonalization procedure [^1].
The default value of 0 indicates full Arnoldi. For symmetric/Hermitian `A`,
`iop` will be ignored and the Lanczos algorithm will be used instead.
Refer to `KrylovSubspace` for more information regarding the output.
Happy-breakdown occurs whenver `norm(v_j) < tol * opnorm`, in this case
the dimension of `Ks` is smaller than `m`.
[^1]: Koskela, A. (2015). Approximating the matrix exponential of an
advection-diffusion operator using the incomplete orthogonalization method. In
Numerical Mathematics and Advanced Applications-ENUMATH 2013 (pp. 345-353).
Springer, Cham.
"""
function arnoldi(A, b; m=min(30, size(A, 1)), ishermitian=LinearAlgebra.ishermitian(A), kwargs...)
TA, Tb = eltype(A), eltype(b)
T = promote_type(TA, Tb)
n = length(b)
U = ishermitian ? real(T) : T
V = similar(A, T, (n, m + 1))
H = similar(A, U, (m+1, m))
fill!(H, zero(U))
Ks = KrylovSubspace{T, U, real(T), typeof(V), typeof(H)}(m, m, false, zero(real(T)), V, H)
arnoldi!(Ks, A, b; m=m, ishermitian=ishermitian, kwargs...)
end
## Low-level interface
@inline function applyA!(y, A, x, V, j, n, p)
# We cannot add `@inbounds` to `mul!`, because it is provided by the user.
mul!(y, A, x)
return
end
# augmented
# split augmented V? so this runs on GPU
@inline function applyA!(y, A::Tuple, x, V, j, n, p)
A, B = A
@inbounds begin
# V[1:n, j + 1] = A * @view(V[1:n, j]) + B * @view(V[n+1:n+p, j])
mul!(@view(V[1:n, j + 1]), A, @view(V[1:n, j]))
BLAS.gemm!('N', 'N', 1.0, B, @view(V[n+1:n+p, j]), 1.0, @view(V[1:n, j + 1]))
copyto!(@view(V[n+1:n+p-1, j + 1]), @view(V[n+2:n+p, j]))
V[end, j + 1] = 0
end
return
end
function checkdims(A, b, V)
isaugmented = b isa Tuple # isaugmented
if isaugmented
b′, b_aug = b
n, p = length(b′), length(b_aug)
_A = first(A)
else
n, p = size(V, 1), 0
_A, b′, b_aug = A, b, nothing
end
length(b′) == size(_A,1) == size(_A,2) == size(V, 1)-p || throw(DimensionMismatch("length(b′) [$(length(b′))] == size(_A,1) [$(size(_A,1))] == size(_A,2) [$(size(_A,2))] == size(V, 1)-p [$(size(V, 1)-p)] doesn't hold"))
return b′, b_aug, n, p
end
##############################
# Utilities
##############################
"""
firststep!(Ks, V, H, b) -> nothing
Compute the first step of Arnoldi or Lanczos iteration.
"""
function firststep!(Ks::KrylovSubspace, V, H, b)
@inbounds begin
fill!(H, zero(eltype(H)))
Ks.beta = norm(b)
if !iszero(Ks.beta)
@. V[:, 1] = b / Ks.beta
end
end
end
"""
firststep!(Ks, V, H, b, b_aug, t, mu, l) -> nothing
Compute the first step of Arnoldi or Lanczos iteration of augmented system.
"""
function firststep!(Ks::KrylovSubspace, V, H, b, b_aug, t, mu, l)
@inbounds begin
n, p = length(b), length(b_aug)
map!(b_aug, 1:p) do k
k == p && return mu
i = p - k
return t^i/factorial(i) * mu
end
# Initialize the matrices V and H
fill!(H, 0)
# Normalize initial vector (this norm is nonzero)
bl = @view b[:, l]
Ks.beta = beta = sqrt(bl'bl + b_aug'b_aug)
if !iszero(Ks.beta)
# The first Krylov basis vector
@. V[1:n, 1] = bl / beta
@. V[n+1:n+p, 1] = b_aug / beta
end
end
end
##############################
# Arnoldi
##############################
"""
arnoldi_step!(j, iop, n, A, V, H)
Take the `j`:th step of the Lanczos iteration.
"""
function arnoldi_step!(j::Integer, iop::Integer, A::AT,
V::AbstractMatrix{T}, H::AbstractMatrix{U},
n::Int=-1, p::Int=-1) where {AT,T,U}
x, y = @view(V[:, j]), @view(V[:, j+1])
applyA!(y, A, x, V, j, n, p)
# NOTE: H should always be Array
# on CUDA, we prefer to perform dot
# using CUBLAS and store the result in Array
# since the size of H is rather small
@inbounds for i = max(1, j - iop + 1):j
α = H[i, j] = coeff(U, dot(@view(V[:, i]), y))
axpy!(-α, @view(V[:, i]), y)
end
β = H[j+1, j] = norm(y)
@. y /= β
return β
end
"""
arnoldi!(Ks,A,b[;tol,m,opnorm,iop,init]) -> Ks
Non-allocating version of `arnoldi`.
"""
function arnoldi!(Ks::KrylovSubspace{T1, U}, A::AT, b;
tol::Real=1e-7, m::Int=min(Ks.maxiter, size(A, 1)),
ishermitian::Bool=LinearAlgebra.ishermitian(A isa Tuple ? first(A) : A),
opnorm=nothing, iop::Int=0,
init::Int=0, t::Number=NaN, mu::Number=NaN, l::Int=-1) where {T1 <: Number, U <: Number, AT}
ishermitian && return lanczos!(Ks, A, b; tol=tol, m=m, init=init, t=t, mu=mu, l=l)
m > Ks.maxiter ? resize!(Ks, m) : Ks.m = m # might change if happy-breakdown occurs
@inbounds V, H = getV(Ks), getH(Ks)
b′, b_aug, n, p = checkdims(A, b, V)
if iszero(init)
isaugmented = AT <: Tuple
isaugmented ? firststep!(Ks::KrylovSubspace, V, H, b′, b_aug, t, mu, l) : firststep!(Ks::KrylovSubspace, V, H, b)
init = 1
end
iszero(Ks.beta) && return Ks
iszero(iop) && (iop = m)
for j = init:m
beta = arnoldi_step!(j, iop, A, V, H, n, p)
if beta < tol # happy-breakdown
Ks.m = j
break
end
end
return Ks
end
##############################
# Lanczos
##############################
"""
lanczos_step!(j, m, n, A, V, H)
Take the `j`:th step of the Lanczos iteration.
"""
function lanczos_step!(j::Integer, A,
V::AbstractMatrix{T},
u::AbstractVector{U},
v::AbstractVector{B},
n::Int=-1, p::Int=-1) where {B,T,U}
x, y = @view(V[:, j]),@view(V[:, j+1])
applyA!(y, A, x, V, j, n, p)
α = u[j] = coeff(U, dot(x, y))
axpy!(-α, x, y)
j > 1 && axpy!(-v[j-1], @view(V[:, j-1]), y)
β = v[j] = norm(y)
@. y /= β
return β
end
"""
coeff(::Type,α)
Helper functions that returns the real part if that is what is
required (for Hermitian matrices), otherwise returns the value
as-is.
"""
coeff(::Type{T},α::T) where {T} = α
coeff(::Type{U},α::T) where {U<:Real,T<:Complex} = real(α)
"""
realview(::Type, V) -> real view of `V`
"""
realview(::Type{R}, V::AbstractVector{C}) where {R,C<:Complex} =
@view(reinterpret(R, V)[1:2:end])
realview(::Type{R}, V::AbstractVector{R}) where {R} = V
"""
lanczos!(Ks,A,b[;tol,m,opnorm]) -> Ks
A variation of `arnoldi!` that uses the Lanczos algorithm for
Hermitian matrices.
"""
function lanczos!(Ks::KrylovSubspace{T1, U, B}, A::AT, b;
tol=1e-7, m=min(Ks.maxiter, size(A, 1)),
opnorm=nothing,
init::Int=0, t::Number=NaN, mu::Number=NaN, l::Int=-1) where {T1 <: Number, U <: Number, B, AT}
m > Ks.maxiter ? resize!(Ks, m) : Ks.m = m # might change if happy-breakdown occurs
@inbounds V, H = getV(Ks), getH(Ks)
b′, b_aug, n, p = checkdims(A, b, V)
if iszero(init)
isaugmented = AT <: Tuple
isaugmented ? firststep!(Ks::KrylovSubspace, V, H, b′, b_aug, t, mu, l) : firststep!(Ks::KrylovSubspace, V, H, b)
init = 1
end
iszero(Ks.beta) && return Ks
@inbounds begin
u = @diagview(H)
# `v` is always real, even though `u` may (in general) be complex.
v = realview(B, @diagview(H,-1))
end
for j = 1:m
if tol > lanczos_step!(j, A, V, u, v, n, p)
# happy-breakdown
Ks.m = j
break
end
end
@inbounds copyto!(@diagview(H,1), v[1:end-1])
return Ks
end