From 28d77aff5972e8940cd0acd1659b78d6bafb5c3b Mon Sep 17 00:00:00 2001 From: Elias Jarlebring Date: Fri, 30 Apr 2021 10:11:50 +0200 Subject: [PATCH 1/5] Reduce nof matmat's in exp(StridedMatrix) --- stdlib/LinearAlgebra/src/dense.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index 27fa515e70a02..9da7612844d89 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -634,17 +634,21 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat C = T[120.,60.,12.,1.] end A2 = A * A - P = copy(Inn) - U = C[2] * P - V = C[1] * P - for k in 1:(div(size(C, 1), 2) - 1) + # Compute U and V: Even/odd terms in Padé numerator & denom + # Expansion of k=1 in for loop + P = A2 + U = C[2]*I + C[4]*P + V = C[1]*I + C[3]*P + for k in 2:(div(size(C, 1), 2) - 1) k2 = 2 * k P *= A2 U += C[k2 + 2] * P V += C[k2 + 1] * P end + U = A * U X = V + U + # Padé approximant: (V-U)\(V+U) LAPACK.gesv!(V-U, X) else s = log2(nA/5.4) # power of 2 later reversed by squaring From 44b9eaac5a2197d0241df006b043f720fce0ef1f Mon Sep 17 00:00:00 2001 From: Elias Jarlebring Date: Fri, 30 Apr 2021 10:35:49 +0200 Subject: [PATCH 2/5] exp(::StridedMatrix): use Uniformscaling --- stdlib/LinearAlgebra/src/dense.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index 9da7612844d89..6fcd392a02d6f 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -617,7 +617,6 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat end ilo, ihi, scale = LAPACK.gebal!('B', A) # modifies A nA = opnorm(A, 1) - Inn = Matrix{T}(I, n, n) ## For sufficiently small nA, use lower order Padé-Approximations if (nA <= 2.1) if nA > 0.95 @@ -664,10 +663,14 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat A2 = A * A A4 = A2 * A2 A6 = A2 * A4 + Ut = CC[4]*A2 + Ut[diagind(Ut)] .+= CC[2] U = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+ - CC[8].*A6 .+ CC[6].*A4 .+ CC[4].*A2 .+ CC[2].*Inn) + CC[8].*A6 .+ CC[6].*A4 .+ Ut) + Vt = CC[3]*A2 + Vt[diagind(Vt)] .+= CC[1] V = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+ - CC[7].*A6 .+ CC[5].*A4 .+ CC[3].*A2 .+ CC[1].*Inn + CC[7].*A6 .+ CC[5].*A4 .+ Vt X = V + U LAPACK.gesv!(V-U, X) From ae811e82946958bb0b503de4817caf027b5138b4 Mon Sep 17 00:00:00 2001 From: Elias Jarlebring Date: Tue, 4 May 2021 11:07:44 +0200 Subject: [PATCH 3/5] exp!() In-place mul! for case nA<2.1 --- stdlib/LinearAlgebra/src/dense.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index 6fcd392a02d6f..801fdbf70f1f3 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -641,8 +641,8 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat for k in 2:(div(size(C, 1), 2) - 1) k2 = 2 * k P *= A2 - U += C[k2 + 2] * P - V += C[k2 + 1] * P + mul!(U, C[k2 + 2], P, true, true) # U += C[k2+2]*P + mul!(V, C[k2 + 1], P, true, true) # V += C[k2+1]*P end U = A * U From e10d87ff0f65052ee8227a6ea8093f3cefb615d8 Mon Sep 17 00:00:00 2001 From: Elias Jarlebring Date: Tue, 4 May 2021 13:01:05 +0200 Subject: [PATCH 4/5] exp!() mul! for allocation economical also for nA > 2.1 --- stdlib/LinearAlgebra/src/dense.jl | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index 801fdbf70f1f3..6628097e403fb 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -665,12 +665,25 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat A6 = A2 * A4 Ut = CC[4]*A2 Ut[diagind(Ut)] .+= CC[2] - U = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+ - CC[8].*A6 .+ CC[6].*A4 .+ Ut) - Vt = CC[3]*A2 + # Allocation economical version of: + #U = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+ + # CC[8].*A6 .+ CC[6].*A4 .+ Ut) + U = mul!(CC[8].*A6 .+ CC[6].*A4 .+ Ut, + A6, + CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2, + true, true) + U *= A + + # Allocation economical version of: Vt = CC[3]*A2 (recycle Ut) + Vt = mul!(Ut, CC[3], A2, true, false) Vt[diagind(Vt)] .+= CC[1] - V = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+ - CC[7].*A6 .+ CC[5].*A4 .+ Vt + # Allocation economical version of: + #V = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+ + # CC[7].*A6 .+ CC[5].*A4 .+ Vt + V = mul!(CC[7].*A6 .+ CC[5].*A4 .+ Vt, + A6, + CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2, + true, true) X = V + U LAPACK.gesv!(V-U, X) From 0c3c52064484161555f282bf860eaaf29e3cbcd1 Mon Sep 17 00:00:00 2001 From: Elias Jarlebring Date: Tue, 4 May 2021 14:41:55 +0200 Subject: [PATCH 5/5] exp!() Preserve original order in U update for nA>2.1 Co-authored-by: Daniel Karrasch --- stdlib/LinearAlgebra/src/dense.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index 6628097e403fb..94926805bb387 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -672,7 +672,7 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat A6, CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2, true, true) - U *= A + U = A*U # Allocation economical version of: Vt = CC[3]*A2 (recycle Ut) Vt = mul!(Ut, CC[3], A2, true, false)