Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

spmatmul sparse matrix multiplication - performance improvements #30372

Merged
merged 16 commits into from
Dec 17, 2018
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 53 additions & 33 deletions stdlib/SparseArrays/src/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,61 +147,81 @@ end
*(A::Adjoint{<:Any,<:SparseMatrixCSC{Tv,Ti}}, B::Adjoint{<:Any,<:SparseMatrixCSC{Tv,Ti}}) where {Tv,Ti} = spmatmul(copy(A), copy(B))
*(A::Transpose{<:Any,<:SparseMatrixCSC{Tv,Ti}}, B::Transpose{<:Any,<:SparseMatrixCSC{Tv,Ti}}) where {Tv,Ti} = spmatmul(copy(A), copy(B))

function spmatmul(A::SparseMatrixCSC{Tv,Ti}, B::SparseMatrixCSC{Tv,Ti};
sortindices::Symbol = :sortcols) where {Tv,Ti}
# Gustavsen's matrix multiplication algorithm revisited.
# The result rowval vector is already sorted by construction.
# The auxiliary Vector{Ti} xb is replaced by a Vector{Bool} of same length.
# Besides SparseMatrixCSC also SparseVector is accepted as B.
# The optional argument controlling a sorting algorithm is obsolete.
function spmatmul(A::SparseMatrixCSC{Tv,Ti},
B::Union{<:SparseMatrixCSC{Tv,Ti},<:SparseVector{Tv,Ti}}) where {Tv,Ti,N}

mA, nA = size(A)
mB, nB = size(B)
nA==mB || throw(DimensionMismatch())

colptrA = A.colptr; rowvalA = A.rowval; nzvalA = A.nzval
colptrB = B.colptr; rowvalB = B.rowval; nzvalB = B.nzval
# TODO: Need better estimation of result space
nnzC = min(mA*nB, length(nzvalA) + length(nzvalB))
colptrC = Vector{Ti}(undef, nB+1)
nB = size(B, 2)
nA == size(B, 1) || throw(DimensionMismatch())

rowvalA = rowvals(A); nzvalA = nonzeros(A)
rowvalB = rowvals(B); nzvalB = nonzeros(B)
nnzC = estimate_mulsize(mA, nnz(A), nA, nnz(B), nB) * 11 ÷ 10 # 10% more
if B isa SparseMatrixCSC; colptrC = Vector{Ti}(undef, nB+1) end
rowvalC = Vector{Ti}(undef, nnzC)
nzvalC = Vector{Tv}(undef, nnzC)

@inbounds begin
ip = 1
xb = zeros(Ti, mA)
x = zeros(Tv, mA)
x = Vector{Tv}(undef, mA)
xb = Vector{Bool}(undef, mA)
for i in 1:nB
fill!(xb, false)
if ip + mA - 1 > nnzC
resize!(rowvalC, nnzC + max(nnzC,mA))
resize!(nzvalC, nnzC + max(nnzC,mA))
nnzC = length(nzvalC)
nnzC += max(mA, nnzC>>2)
resize!(rowvalC, nnzC)
resize!(nzvalC, nnzC)
end
colptrC[i] = ip
for jp in colptrB[i]:(colptrB[i+1] - 1)
if B isa SparseMatrixCSC; colptrC[i] = ip end
for jp in nzrange(B, i)
nzB = nzvalB[jp]
j = rowvalB[jp]
for kp in colptrA[j]:(colptrA[j+1] - 1)
for kp in nzrange(A, j)
nzC = nzvalA[kp] * nzB
k = rowvalA[kp]
if xb[k] != i
rowvalC[ip] = k
ip += 1
xb[k] = i
x[k] = nzC
else
if xb[k]
x[k] += nzC
else
x[k] = nzC
xb[k] = true
end
end
end
for vp in colptrC[i]:(ip - 1)
nzvalC[vp] = x[rowvalC[vp]]
for k in 1:mA
if xb[k]
nzvalC[ip] = x[k]
rowvalC[ip] = k
ip += 1
end
KlausC marked this conversation as resolved.
Show resolved Hide resolved
end
end
colptrC[nB+1] = ip
if B isa SparseMatrixCSC; colptrC[nB+1] = ip end
end

deleteat!(rowvalC, colptrC[end]:length(rowvalC))
deleteat!(nzvalC, colptrC[end]:length(nzvalC))
ip -= 1
resize!(rowvalC, ip)
resize!(nzvalC, ip)

# This modification of Gustavson's algorithm has sorted row indices.
# The not needed sort makes a major performance improvement and saves space.
if B isa SparseMatrixCSC
SparseMatrixCSC(mA, nB, colptrC, rowvalC, nzvalC)
else
SparseVector(mA, rowvalC, nzvalC)
end
end

# The Gustavson algorithm does not guarantee the product to have sorted row indices.
Cunsorted = SparseMatrixCSC(mA, nB, colptrC, rowvalC, nzvalC)
C = SparseArrays.sortSparseMatrixCSC!(Cunsorted, sortindices=sortindices)
return C
# For randomly distributed nonzeros of the factors this is a rather good estimation
# for the number of nonzeros of the product.
# For heavily structured matrices the value tends to over-estimation.
function estimate_mulsize(m::Integer, nnzA::Integer, n::Integer, nnzB::Integer, k::Integer)
p = (nnzA / (m * n)) * (nnzB / (n * k))
isnan(p) ? 0 : Int(ceil(-expm1(log1p(-p) * n) * m * k)) # is (1 - (1 - p)^n) * m * k
end

# Frobenius dot/inner product: trace(A'B)
Expand Down
3 changes: 1 addition & 2 deletions stdlib/SparseArrays/test/sparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -318,8 +318,7 @@ end
a = sprand(10, 5, 0.7)
b = sprand(5, 15, 0.3)
@test maximum(abs.(a*b - Array(a)*Array(b))) < 100*eps()
@test maximum(abs.(SparseArrays.spmatmul(a,b,sortindices=:sortcols) - Array(a)*Array(b))) < 100*eps()
@test maximum(abs.(SparseArrays.spmatmul(a,b,sortindices=:doubletranspose) - Array(a)*Array(b))) < 100*eps()
@test maximum(abs.(SparseArrays.spmatmul(a,b) - Array(a)*Array(b))) < 100*eps()
KlausC marked this conversation as resolved.
Show resolved Hide resolved
f = Diagonal(rand(5))
@test Array(a*f) == Array(a)*f
@test Array(f*b) == f*Array(b)
Expand Down