JuliaLang · abraunst · Jan 10, 2019 · Jan 11, 2019 · Jan 11, 2019 · Jan 11, 2019
diff --git a/NEWS.md b/NEWS.md
@@ -61,6 +61,7 @@ Standard library changes
 * Sparse vector outer products are more performant and maintain sparsity in products of the
   form `kron(u, v')`, `u * v'`, and `u .* v'` where `u` and `v` are sparse vectors or column
   views. ([#24980])
+* new `sizehint!(::SparseMatrixCSC, ::Integer)` method ([#30676]).
 
 #### Dates
 

diff --git a/base/array.jl b/base/array.jl
@@ -1022,6 +1022,17 @@ function sizehint!(a::Vector, sz::Integer)
     a
 end
 
+"""
+    capacity(s)
+
+Returns the allocated buffer capacity of s.
+"""
+function capacity end
+
+function capacity(a::Vector)
+    return Int(ccall(:jl_array_capacity, Csize_t, (Any,), a))
+end
+
 """
     pop!(collection) -> item
 

diff --git a/base/exports.jl b/base/exports.jl
@@ -477,6 +477,7 @@ export
     any!,
     any,
     firstindex,
+    capacity,
     collect,
     count,
     delete!,

diff --git a/src/array.c b/src/array.c
@@ -1120,6 +1120,11 @@ JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz)
     }
 }
 
+JL_DLLEXPORT size_t jl_array_capacity(jl_array_t *a)
+{
+    return a->maxsize;
+}
+
 JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
 {
     size_t elsz = ary->elsize;

diff --git a/stdlib/SparseArrays/src/higherorderfns.jl b/stdlib/SparseArrays/src/higherorderfns.jl
@@ -9,7 +9,8 @@ import Base: map, map!, broadcast, copy, copyto!
 using Base: front, tail, to_shape
 using ..SparseArrays: SparseVector, SparseMatrixCSC, AbstractSparseVector,
                       AbstractSparseMatrix, AbstractSparseArray, indtype, nnz, nzrange,
-                      SparseVectorUnion, AdjOrTransSparseVectorUnion, nonzeroinds, nonzeros
+                      SparseVectorUnion, AdjOrTransSparseVectorUnion, nonzeroinds, nonzeros,
+                      spzeros
 using Base.Broadcast: BroadcastStyle, Broadcasted, flatten
 using LinearAlgebra
 
@@ -126,17 +127,26 @@ const SpBroadcasted2{Style<:SPVM,Axes,F,Args<:Tuple{SparseVecOrMat,SparseVecOrMa
 @inline storedvals(A::SparseVecOrMat) = A.nzval
 @inline setcolptr!(A::SparseVector, j, val) = val
 @inline setcolptr!(A::SparseMatrixCSC, j, val) = A.colptr[j] = val
+
 function trimstorage!(A::SparseVecOrMat, maxstored)
     resize!(storedinds(A), maxstored)
     resize!(storedvals(A), maxstored)
     return maxstored
 end
-function expandstorage!(A::SparseVecOrMat, maxstored)
-    length(storedinds(A)) < maxstored && resize!(storedinds(A), maxstored)
-    length(storedvals(A)) < maxstored && resize!(storedvals(A), maxstored)
+
+function expandstorage!(A::SparseVecOrMat, maxstored = capacity(storedinds(A)))
+    if length(storedinds(A)) < maxstored
+        resize!(storedinds(A), maxstored)
+        resize!(storedvals(A), maxstored)
+    end
     return maxstored
 end
 
+_checkbuffers(S::SparseMatrixCSC) = (@assert length(S.colptr) == S.n + 1 && S.colptr[end] - 1 == length(S.rowval) == length(S.nzval); S)
+_checkbuffers(S::SparseVector) = (@assert length(S.nzval) == length(S.nzind); S)
+
+_capacity(A::SparseVecOrMat) = capacity(storedinds(A))
+
 
 # (2) map[!] entry points
 map(f::Tf, A::SparseVector) where {Tf} = _noshapecheck_map(f, A)
@@ -180,7 +190,7 @@ copy(bc::SpBroadcasted1) = _noshapecheck_map(bc.f, bc.args[1])
         storedvals(C)[1] = fofnoargs
         broadcast!(f, view(storedvals(C), 2:length(storedvals(C))))
     end
-    return C
+    return _checkbuffers(C)
 end
 
 function _diffshape_broadcast(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N}
@@ -223,22 +233,12 @@ _maxnnzfrom(shape::NTuple{2}, A::SparseMatrixCSC) = nnz(A) * div(shape[1], A.m)
 @inline _unchecked_maxnnzbcres(shape, As...) = _unchecked_maxnnzbcres(shape, As)
 @inline _checked_maxnnzbcres(shape::NTuple{1}, As...) = shape[1] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
 @inline _checked_maxnnzbcres(shape::NTuple{2}, As...) = shape[1] != 0 && shape[2] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
-@inline function _allocres(shape::NTuple{1}, indextype, entrytype, maxnnz)
-    storedinds = Vector{indextype}(undef, maxnnz)
-    storedvals = Vector{entrytype}(undef, maxnnz)
-    return SparseVector(shape..., storedinds, storedvals)
-end
-@inline function _allocres(shape::NTuple{2}, indextype, entrytype, maxnnz)
-    pointers = Vector{indextype}(undef, shape[2] + 1)
-    storedinds = Vector{indextype}(undef, maxnnz)
-    storedvals = Vector{entrytype}(undef, maxnnz)
-    return SparseMatrixCSC(shape..., pointers, storedinds, storedvals)
-end
+@inline _allocres(shape::NTuple, indextype, entrytype, maxnnz) = sizehint!(spzeros(entrytype, indextype, shape...), maxnnz)
 
 # (4) _map_zeropres!/_map_notzeropres! specialized for a single sparse vector/matrix
 "Stores only the nonzero entries of `map(f, Array(A))` in `C`."
 function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = expandstorage!(C)
     Ck = 1
     @inbounds for j in columns(C)
         setcolptr!(C, j, Ck)
@@ -254,7 +254,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 """
 Densifies `C`, storing `fillvalue` in place of each unstored entry in `A` and
@@ -273,7 +273,7 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMa
     end
     # NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over /
     # nonsequential access of storedvals(C) does not appear to improve performance.
-    return C
+    return _checkbuffers(C)
 end
 # helper functions for these methods and some of those below
 @inline _densecoloffsets(A::SparseVector) = 0
@@ -296,7 +296,7 @@ end
 
 # (5) _map_zeropres!/_map_notzeropres! specialized for a pair of sparse vectors/matrices
 function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = expandstorage!(C)
     rowsentinelA = convert(indtype(A), numrows(C) + 1)
     rowsentinelB = convert(indtype(B), numrows(C) + 1)
     Ck = 1
@@ -335,7 +335,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVe
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
     # Build dense matrix structure in C, expanding storage if necessary
@@ -367,13 +367,13 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMa
             Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 
 # (6) _map_zeropres!/_map_notzeropres! for more than two sparse matrices / vectors
 function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = expandstorage!(C)
     rowsentinel = numrows(C) + 1
     Ck = 1
     stopks = _colstartind_all(1, As)
@@ -397,7 +397,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N})
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
     # Build dense matrix structure in C, expanding storage if necessary
@@ -420,7 +420,7 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{Spars
             activerow = min(rows...)
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 # helper methods for map/map! methods just above
@@ -461,7 +461,7 @@ end
 # (7) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a single (input) sparse vector/matrix
 function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
     isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = expandstorage!(C)
     # C and A cannot have the same shape, as we directed that case to map in broadcast's
     # entry point; here we need efficiently handle only heterogeneous C-A combinations where
     # one or both of C and A has at least one singleton dimension.
@@ -508,7 +508,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
     # For information on this code, see comments in similar code in _broadcast_zeropres! above
@@ -539,14 +539,14 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseV
             end
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 
 # (8) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse vectors/matrices
 function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
     isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = expandstorage!(C)
     rowsentinelA = convert(indtype(A), numrows(C) + 1)
     rowsentinelB = convert(indtype(B), numrows(C) + 1)
     # C, A, and B cannot all have the same shape, as we directed that case to map in broadcast's
@@ -710,7 +710,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::Sp
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
     # For information on this code, see comments in similar code in _broadcast_zeropres! above
@@ -809,7 +809,7 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseV
             end
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 _finishempty!(C::SparseVector) = C
 _finishempty!(C::SparseMatrixCSC) = (fill!(C.colptr, 1); C)
@@ -860,7 +860,7 @@ end
 # (9) _broadcast_zeropres!/_broadcast_notzeropres! for more than two (input) sparse vectors/matrices
 function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
     isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = expandstorage!(C)
     expandsverts = _expandsvert_all(C, As)
     expandshorzs = _expandshorz_all(C, As)
     rowsentinel = numrows(C) + 1
@@ -908,7 +908,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMa
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
     isempty(C) && return _finishempty!(C)
@@ -949,7 +949,7 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg
             end
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 # helper method for broadcast/broadcast! methods just above

diff --git a/stdlib/SparseArrays/src/linalg.jl b/stdlib/SparseArrays/src/linalg.jl
@@ -809,16 +809,15 @@ function triu(S::SparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
     end
     rowval = Vector{Ti}(undef, nnz)
     nzval = Vector{Tv}(undef, nnz)
-    A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
     for col = max(k+1,1) : n
         c1 = S.colptr[col]
-        for c2 = A.colptr[col] : A.colptr[col+1]-1
-            A.rowval[c2] = S.rowval[c1]
-            A.nzval[c2] = S.nzval[c1]
+        for c2 = colptr[col] : colptr[col+1]-1
+            rowval[c2] = S.rowval[c1]
+            nzval[c2] = S.nzval[c1]
             c1 += 1
         end
     end
-    A
+    SparseMatrixCSC(m, n, colptr, rowval, nzval)
 end
 
 function tril(S::SparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
@@ -839,17 +838,16 @@ function tril(S::SparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
     end
     rowval = Vector{Ti}(undef, nnz)
     nzval = Vector{Tv}(undef, nnz)
-    A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
     for col = 1 : min(n, m+k)
         c1 = S.colptr[col+1]-1
-        l2 = A.colptr[col+1]-1
-        for c2 = 0 : l2 - A.colptr[col]
-            A.rowval[l2 - c2] = S.rowval[c1]
-            A.nzval[l2 - c2] = S.nzval[c1]
+        l2 = colptr[col+1]-1
+        for c2 = 0 : l2 - colptr[col]
+            rowval[l2 - c2] = S.rowval[c1]
+            nzval[l2 - c2] = S.nzval[c1]
             c1 -= 1
         end
     end
-    A
+    SparseMatrixCSC(m, n, colptr, rowval, nzval)
 end
 
 ## diff