Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Argument checks for SparseMatrixCSC constructors #31724

Merged
merged 11 commits into from
Jun 26, 2019
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ Standard library changes

#### SparseArrays

* `SparseMatrixCSC(m,n,colptr,rowval,nzval)` perform consistency checks for arguments:
`colptr` must be properly populated and lengths of `colptr`, `rowval`, and `nzval`
must be compatible with `m`, `n`, and `eltype(colptr)`.
* `sparse(I, J, V, m, n)` verifies lengths of `I`, `J`, `V` are equal and compatible with
`eltype(I)` and `m`, `n`.

#### Dates

Expand Down
2 changes: 1 addition & 1 deletion stdlib/SparseArrays/src/higherorderfns.jl
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ _maxnnzfrom(shape::NTuple{2}, A::SparseMatrixCSC) = nnz(A) * div(shape[1], A.m)
return SparseVector(shape..., storedinds, storedvals)
end
@inline function _allocres(shape::NTuple{2}, indextype, entrytype, maxnnz)
pointers = Vector{indextype}(undef, shape[2] + 1)
pointers = ones(indextype, shape[2] + 1)
storedinds = Vector{indextype}(undef, maxnnz)
storedvals = Vector{entrytype}(undef, maxnnz)
return SparseMatrixCSC(shape..., pointers, storedinds, storedvals)
Expand Down
131 changes: 96 additions & 35 deletions stdlib/SparseArrays/src/sparsematrix.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

# Compressed sparse columns data structure
# Assumes that no zeros are stored in the data structure
# No assumptions about stored zeros in the data structure
# Assumes that row values in rowval for each column are sorted
# issorted(rowval[colptr[i]:(colptr[i+1]-1)]) == true
# Assumes that 1 <= colptr[i] <= colptr[i+1] for i in 1..n
# Assumes that nnz <= length(rowval) < typemax(Ti)
# Assumes that 0 <= length(nzval) < typemax(Ti)

"""
SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrix{Tv,Ti}
Expand All @@ -20,8 +23,8 @@ struct SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrix{Tv,Ti}
rowval::Vector{Ti} # Row indices of stored values
nzval::Vector{Tv} # Stored values, typically nonzeros

function SparseMatrixCSC{Tv,Ti}(m::Integer, n::Integer, colptr::Vector{Ti}, rowval::Vector{Ti},
nzval::Vector{Tv}) where {Tv,Ti<:Integer}
function SparseMatrixCSC{Tv,Ti}(m::Integer, n::Integer, colptr::Vector{Ti},
rowval::Vector{Ti}, nzval::Vector{Tv}) where {Tv,Ti<:Integer}
@noinline throwsz(str, lbl, k) =
throw(ArgumentError("number of $str ($lbl) must be ≥ 0, got $k"))
m < 0 && throwsz("rows", 'm', m)
Expand All @@ -32,9 +35,42 @@ end
function SparseMatrixCSC(m::Integer, n::Integer, colptr::Vector, rowval::Vector, nzval::Vector)
Tv = eltype(nzval)
Ti = promote_type(eltype(colptr), eltype(rowval))
sparse_check_Ti(m, n, Ti)
sparse_check(n, colptr, rowval, nzval)
# silently shorten rowval and nzval to usable index positions.
maxlen = abs(widemul(m, n))
isbitstype(Ti) && (maxlen = min(maxlen, typemax(Ti) - 1))
length(rowval) > maxlen && resize!(rowval, maxlen)
length(nzval) > maxlen && resize!(nzval, maxlen)
SparseMatrixCSC{Tv,Ti}(m, n, colptr, rowval, nzval)
end

function sparse_check_Ti(m::Integer, n::Integer, Ti::Type)
@noinline throwTi(str, lbl, k) =
throw(ArgumentError("$str ($lbl = $k) does not fit in Ti = $(Ti)"))
0 ≤ m && (!isbitstype(Ti) || m ≤ typemax(Ti)) || throwTi("number of rows", "m", m)
0 ≤ n && (!isbitstype(Ti) || n ≤ typemax(Ti)) || throwTi("number of columns", "n", n)
end

function sparse_check(n::Integer, colptr::Vector{Ti}, rowval, nzval) where Ti
sparse_check_length("colptr", colptr, n+1, String) # don't check upper bound
ckp = Ti(1)
ckp == colptr[1] || throw(ArgumentError("$ckp == colptr[1] != 1"))
@inbounds for k = 2:n+1
ck = colptr[k]
ckp <= ck || throw(ArgumentError("$ckp == colptr[$(k-1)] > colptr[$k] == $ck"))
ckp = ck
end
sparse_check_length("rowval", rowval, ckp-1, Ti)
sparse_check_length("nzval", nzval, 0, Ti) # we allow empty nzval !!!
end
function sparse_check_length(rowstr, rowval, minlen, Ti)
len = length(rowval)
len >= minlen || throw(ArgumentError("$len == length($rowstr) < $minlen"))
!isbitstype(Ti) || len < typemax(Ti) ||
throw(ArgumentError("$len == length($rowstr) >= $(typemax(Ti))"))
end

size(S::SparseMatrixCSC) = (S.m, S.n)

# Define an alias for views of a SparseMatrixCSC which include all rows and a unit range of the columns.
Expand Down Expand Up @@ -497,7 +533,10 @@ function sparse(I::AbstractVector{Ti}, J::AbstractVector{Ti}, V::AbstractVector{
"length(I) (=$(length(I))) == length(J) (= $(length(J))) == length(V) (= ",
"$(length(V)))")))
end

Tj = Ti
while isbitstype(Tj) && coolen >= typemax(Tj)
Tj = widen(Tj)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This made Tj type unstable and caused the regression in #32985. Only matters for smallish arrays but would be good to fix nonetheless. Can't we just always use Int for this or something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally this would just be an error. This segfaulted in 1.2. Let's just make it an error instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

end
if m == 0 || n == 0 || coolen == 0
if coolen != 0
if n == 0
Expand All @@ -509,13 +548,13 @@ function sparse(I::AbstractVector{Ti}, J::AbstractVector{Ti}, V::AbstractVector{
SparseMatrixCSC(m, n, fill(one(Ti), n+1), Vector{Ti}(), Vector{Tv}())
else
# Allocate storage for CSR form
csrrowptr = Vector{Ti}(undef, m+1)
csrrowptr = Vector{Tj}(undef, m+1)
csrcolval = Vector{Ti}(undef, coolen)
csrnzval = Vector{Tv}(undef, coolen)

# Allocate storage for the CSC form's column pointers and a necessary workspace
csccolptr = Vector{Ti}(undef, n+1)
klasttouch = Vector{Ti}(undef, n)
klasttouch = Vector{Tj}(undef, n)

# Allocate empty arrays for the CSC form's row and nonzero value arrays
# The parent method called below automagically resizes these arrays
Expand Down Expand Up @@ -580,25 +619,28 @@ transposition," ACM TOMS 4(3), 250-269 (1978) inspired this method's use of a pa
counting sorts.
"""
function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
csccolptr::Vector{Ti}, cscrowval::Vector{Ti}, cscnzval::Vector{Tv}) where {Tv,Ti<:Integer}
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Tj},
csrrowptr::Vector{Tj}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
csccolptr::Vector{Ti}, cscrowval::Vector{Ti}, cscnzval::Vector{Tv}) where {Tv,Ti<:Integer,Tj<:Integer}

require_one_based_indexing(I, J, V)
sparse_check_Ti(m, n, Ti)
sparse_check_length("I", I, 0, Tj)
# Compute the CSR form's row counts and store them shifted forward by one in csrrowptr
fill!(csrrowptr, Ti(0))
fill!(csrrowptr, Tj(0))
coolen = length(I)
min(length(J), length(V)) >= coolen || throw(ArgumentError("J and V need length >= length(I) = $coolen"))
@inbounds for k in 1:coolen
Ik = I[k]
if 1 > Ik || m < Ik
throw(ArgumentError("row indices I[k] must satisfy 1 <= I[k] <= m"))
end
csrrowptr[Ik+1] += Ti(1)
csrrowptr[Ik+1] += Tj(1)
end

# Compute the CSR form's rowptrs and store them shifted forward by one in csrrowptr
countsum = Ti(1)
csrrowptr[1] = Ti(1)
countsum = Tj(1)
csrrowptr[1] = Tj(1)
@inbounds for i in 2:(m+1)
overwritten = csrrowptr[i]
csrrowptr[i] = countsum
Expand All @@ -613,7 +655,8 @@ function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
throw(ArgumentError("column indices J[k] must satisfy 1 <= J[k] <= n"))
end
csrk = csrrowptr[Ik+1]
csrrowptr[Ik+1] = csrk + Ti(1)
@assert csrk >= Tj(1) "index into csrcolval exceeds typemax(Ti)"
csrrowptr[Ik+1] = csrk + Tj(1)
csrcolval[csrk] = Jk
csrnzval[csrk] = V[k]
end
Expand All @@ -626,21 +669,21 @@ function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
# Minimizing extraneous communication and nonlocality of reference, primarily by using
# only a single auxiliary array in this step, is the key to this method's performance.
fill!(csccolptr, Ti(0))
fill!(klasttouch, Ti(0))
writek = Ti(1)
fill!(klasttouch, Tj(0))
writek = Tj(1)
newcsrrowptri = Ti(1)
origcsrrowptri = Ti(1)
origcsrrowptri = Tj(1)
origcsrrowptrip1 = csrrowptr[2]
@inbounds for i in 1:m
for readk in origcsrrowptri:(origcsrrowptrip1-Ti(1))
for readk in origcsrrowptri:(origcsrrowptrip1-Tj(1))
j = csrcolval[readk]
if klasttouch[j] < newcsrrowptri
klasttouch[j] = writek
if writek != readk
csrcolval[writek] = j
csrnzval[writek] = csrnzval[readk]
end
writek += Ti(1)
writek += Tj(1)
csccolptr[j+1] += Ti(1)
else
klt = klasttouch[j]
Expand All @@ -654,23 +697,24 @@ function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
end

# Compute the CSC form's colptrs and store them shifted forward by one in csccolptr
countsum = Ti(1)
countsum = Tj(1)
csccolptr[1] = Ti(1)
@inbounds for j in 2:(n+1)
overwritten = csccolptr[j]
csccolptr[j] = countsum
countsum += overwritten
countsum <= typemax(Ti) || throw(ArgumentError("more than typemax(Ti)-1 == $(typemax(Ti)-1) entries"))
end

# Now knowing the CSC form's entry count, resize cscrowval and cscnzval if necessary
cscnnz = countsum - Ti(1)
cscnnz = countsum - Tj(1)
length(cscrowval) < cscnnz && resize!(cscrowval, cscnnz)
length(cscnzval) < cscnnz && resize!(cscnzval, cscnnz)

# Finally counting-sort the row and nonzero values from the CSR form into cscrowval and
# cscnzval. Tracking write positions in csccolptr corrects the column pointers.
@inbounds for i in 1:m
for csrk in csrrowptr[i]:(csrrowptr[i+1]-Ti(1))
for csrk in csrrowptr[i]:(csrrowptr[i+1]-Tj(1))
j = csrcolval[csrk]
x = csrnzval[csrk]
csck = csccolptr[j+1]
Expand All @@ -683,16 +727,16 @@ function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
SparseMatrixCSC(m, n, csccolptr, cscrowval, cscnzval)
end
function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
csccolptr::Vector{Ti}) where {Tv,Ti<:Integer}
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Tj},
csrrowptr::Vector{Tj}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
csccolptr::Vector{Ti}) where {Tv,Ti<:Integer,Tj<:Integer}
sparse!(I, J, V, m, n, combine, klasttouch,
csrrowptr, csrcolval, csrnzval,
csccolptr, Vector{Ti}(), Vector{Tv}())
end
function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv}) where {Tv,Ti<:Integer}
V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Tj},
csrrowptr::Vector{Tj}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv}) where {Tv,Ti<:Integer,Tj<:Integer}
sparse!(I, J, V, m, n, combine, klasttouch,
csrrowptr, csrcolval, csrnzval,
Vector{Ti}(undef, n+1), Vector{Ti}(), Vector{Tv}())
Expand Down Expand Up @@ -826,7 +870,7 @@ adjoint!(X::SparseMatrixCSC{Tv,Ti}, A::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} = f

function ftranspose(A::SparseMatrixCSC{Tv,Ti}, f::Function) where {Tv,Ti}
X = SparseMatrixCSC(A.n, A.m,
Vector{Ti}(undef, A.m+1),
ones(Ti, A.m+1),
Vector{Ti}(undef, nnz(A)),
Vector{Tv}(undef, nnz(A)))
halfperm!(X, A, 1:A.n, f)
Expand Down Expand Up @@ -1045,7 +1089,7 @@ function permute!(X::SparseMatrixCSC{Tv,Ti}, A::SparseMatrixCSC{Tv,Ti},
_checkargs_sourcecompatdest_permute!(A, X)
_checkargs_sourcecompatperms_permute!(A, p, q)
C = SparseMatrixCSC(A.n, A.m,
Vector{Ti}(undef, A.m + 1),
ones(Ti, A.m + 1),
Vector{Ti}(undef, nnz(A)),
Vector{Tv}(undef, nnz(A)))
_checkargs_permutationsvalid_permute!(p, C.colptr, q, X.colptr)
Expand All @@ -1064,7 +1108,7 @@ function permute!(A::SparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
q::AbstractVector{<:Integer}) where {Tv,Ti}
_checkargs_sourcecompatperms_permute!(A, p, q)
C = SparseMatrixCSC(A.n, A.m,
Vector{Ti}(undef, A.m + 1),
ones(Ti, A.m + 1),
Vector{Ti}(undef, nnz(A)),
Vector{Tv}(undef, nnz(A)))
workcolptr = Vector{Ti}(undef, A.n + 1)
Expand Down Expand Up @@ -1135,11 +1179,11 @@ function permute(A::SparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
q::AbstractVector{<:Integer}) where {Tv,Ti}
_checkargs_sourcecompatperms_permute!(A, p, q)
X = SparseMatrixCSC(A.m, A.n,
Vector{Ti}(undef, A.n + 1),
ones(Ti, A.n + 1),
Vector{Ti}(undef, nnz(A)),
Vector{Tv}(undef, nnz(A)))
C = SparseMatrixCSC(A.n, A.m,
Vector{Ti}(undef, A.m + 1),
ones(Ti, A.m + 1),
Vector{Ti}(undef, nnz(A)),
Vector{Tv}(undef, nnz(A)))
_checkargs_permutationsvalid_permute!(p, C.colptr, q, X.colptr)
Expand Down Expand Up @@ -2331,15 +2375,32 @@ function _setindex_scalar!(A::SparseMatrixCSC{Tv,Ti}, _v, _i::Integer, _j::Integ
# Column j does not contain entry A[i,j]. If v is nonzero, insert entry A[i,j] = v
# and return. If to the contrary v is zero, then simply return.
if !iszero(v)
insert!(A.rowval, searchk, i)
insert!(A.nzval, searchk, v)
nz = A.colptr[A.n+1]
# throw exception before state is partially modified
!isbitstype(Ti) || nz < typemax(Ti) ||
throw(ArgumentError("nnz(A) going to exceed typemax(Ti) = $(typemax(Ti))"))

# if nnz(A) < length(rowval/nzval): no need to grow rowval and preserve values
_insert!(A.rowval, searchk, i, nz)
_insert!(A.nzval, searchk, v, nz)
@simd for m in (j + 1):(A.n + 1)
@inbounds A.colptr[m] += 1
@inbounds A.colptr[m] += Ti(1)
end
end
return A
end

# insert item at position pos, shifting only from pos+1 to nz
function _insert!(v::Vector, pos::Integer, item, nz::Integer)
if nz > length(v)
insert!(v, pos, item)
else # nz < length(v)
Base.unsafe_copyto!(v, pos+1, v, pos, nz - pos)
v[pos] = item
v
end
end

function Base.fill!(V::SubArray{Tv, <:Any, <:SparseMatrixCSC, Tuple{Vararg{Union{Integer, AbstractVector{<:Integer}},2}}}, x) where Tv
A = V.parent
I, J = V.indices
Expand Down
56 changes: 56 additions & 0 deletions stdlib/SparseArrays/test/sparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2583,6 +2583,7 @@ end
A = SparseMatrixCSC(Complex{BigInt}[1+im 2+2im]')'[1:1, 2:2]
# ...ensure it does! If necessary, the test needs to be updated to use
# another mechanism to create a suitable A.
resize!(A.nzval, 2)
@assert length(A.nzval) > nnz(A)
@test -A == fill(-2-2im, 1, 1)
@test conj(A) == fill(2-2im, 1, 1)
Expand All @@ -2603,4 +2604,59 @@ end
@test sum(x1, dims=2) == sum(x2, dims=2)
end

@testset "Ti cannot store all potential values #31024" begin
# m * n >= typemax(Ti) but nnz < typemax(Ti)
A = SparseMatrixCSC(12, 12, fill(Int8(1),13), Int8[], Int[])
@test size(A) == (12,12) && nnz(A) == 0
I1 = [Int8(i) for i in 1:20 for _ in 1:20]
J1 = [Int8(i) for _ in 1:20 for i in 1:20]
# m * n >= typemax(Ti) and nnz >= typemax(Ti)
@test_throws ArgumentError sparse(I1, J1, ones(length(I1)))
I1 = Int8.(rand(1:10, 500))
J1 = Int8.(rand(1:10, 500))
V1 = ones(500)
# m * n < typemax(Ti) and length(I) >= typemax(Ti) - combining values
@test sparse(I1, J1, V1, 10, 10) !== nothing
# m * n >= typemax(Ti) and length(I) >= typemax(Ti)
@test sparse(I1, J1, V1, 12, 13) !== nothing
I1 = Int8.(rand(1:10, 126))
J1 = Int8.(rand(1:10, 126))
V1 = ones(126)
# m * n >= typemax(Ti) and length(I) < typemax(Ti)
@test sparse(I1, J1, V1, 100, 100) !== nothing
end

@testset "Typecheck too strict #31435" begin
A = SparseMatrixCSC{Int,Int8}(70, 2, fill(Int8(1), 3), Int8[], Int[])
A[5:67,1:2] .= ones(Int, 63, 2)
@test nnz(A) == 126
# nnz >= typemax
@test_throws ArgumentError A[2,1] = 42
# colptr short
@test_throws ArgumentError SparseMatrixCSC(1, 1, Int[], Int[], Float64[])
# colptr[1] must be 1
@test_throws ArgumentError SparseMatrixCSC(10, 3, [0,1,1,1], Int[], Float64[])
# colptr not ascending
@test_throws ArgumentError SparseMatrixCSC(10, 3, [1,2,1,2], Int[], Float64[])
# rowwal (and nzval) short
@test_throws ArgumentError SparseMatrixCSC(10, 3, [1,2,2,4], [1,2], Float64[])
# nzval short
@test SparseMatrixCSC(10, 3, [1,2,2,4], [1,2,3], Float64[]) !== nothing
# length(rowval) >= typemax
@test_throws ArgumentError SparseMatrixCSC(5, 1, Int8[1,2], fill(Int8(1),127), Int[1,2,3])
@test SparseMatrixCSC{Int,Int8}(5, 1, Int8[1,2], fill(Int8(1),127), Int[1,2,3]) != 0
# length(nzval) >= typemax
@test_throws ArgumentError SparseMatrixCSC(5, 1, Int8[1,2], Int8[1], fill(7, 127))
@test SparseMatrixCSC{Int,Int8}(5, 1, Int8[1,2], Int8[1], fill(7, 127)) != 0

# length(I) >= typemax
@test_throws ArgumentError sparse(UInt8.(1:255), fill(UInt8(1), 255), fill(1, 255))
# m > typemax
@test_throws ArgumentError sparse(UInt8.(1:254), fill(UInt8(1), 254), fill(1, 254), 256, 1)
# n > typemax
@test_throws ArgumentError sparse(UInt8.(1:254), fill(UInt8(1), 254), fill(1, 254), 255, 256)
# n, m maximal
@test sparse(UInt8.(1:254), fill(UInt8(1), 254), fill(1, 254), 255, 255) !== nothing
end

end # module
6 changes: 2 additions & 4 deletions stdlib/SuiteSparse/test/cholmod.jl
Original file line number Diff line number Diff line change
Expand Up @@ -764,10 +764,8 @@ end
end

@testset "Check inputs to Sparse. Related to #20024" for A_ in (
SparseMatrixCSC(2, 2, [1, 2], CHOLMOD.SuiteSparse_long[], Float64[]),
SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1], Float64[]),
SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[], Float64[1.0]),
SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1], Float64[1.0]))
SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1,2], Float64[]),
SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1,2], Float64[1.0]))
@test_throws ArgumentError CHOLMOD.Sparse(size(A_)..., A_.colptr .- 1, A_.rowval .- 1, A_.nzval)
@test_throws ArgumentError CHOLMOD.Sparse(A_)
end
Expand Down