Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster and simpler sprand for SparseMatrixCSC #30494

Merged
merged 5 commits into from
Jan 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 21 additions & 43 deletions stdlib/SparseArrays/src/sparsematrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1382,41 +1382,24 @@ function _sparse_findprevnz(m::SparseMatrixCSC, i::Integer)
return LinearIndices(m)[m.rowval[prevhi-1], prevcol]
end

function sprand_IJ(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat)
((m < 0) || (n < 0)) && throw(ArgumentError("invalid Array dimensions"))
0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]"))
N = n*m

I, J = Vector{Int}(), Vector{Int}() # indices of nonzero elements
sizehint!(I, round(Int,N*density))
sizehint!(J, round(Int,N*density))

# density of nonzero columns:
L = log1p(-density)
coldensity = -expm1(m*L) # = 1 - (1-density)^m
colsparsity = exp(m*L) # = 1 - coldensity
iL = 1/L

rows = Vector{Int}()
for j in randsubseq(r, 1:n, coldensity)
# To get the right statistics, we *must* have a nonempty column j
# even if p*m << 1. To do this, we use an approach similar to
# the one in randsubseq to compute the expected first nonzero row k,
# except given that at least one is nonzero (via Bayes' rule);
# carefully rearranged to avoid excessive roundoff errors.
k = ceil(log(colsparsity + rand(r)*coldensity) * iL)
ik = k < 1 ? 1 : k > m ? m : Int(k) # roundoff-error/underflow paranoia
randsubseq!(r, rows, 1:m-ik, density)
push!(rows, m-ik+1)
append!(I, rows)
nrows = length(rows)
Jlen = length(J)
resize!(J, Jlen+nrows)
@inbounds for i = Jlen+1:length(J)
J[i] = j
function _sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, rfn)
m, n = Int(m), Int(n)
(m < 0 || n < 0) && throw(ArgumentError("invalid Array dimensions"))
0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]"))
j, colm = 1, 0
rowval = randsubseq(r, 1:(m*n), density)
nnz = length(rowval)
colptr = Vector{Int}(undef, n + 1)
@inbounds for col = 1:n+1
colptr[col] = j
while j <= nnz && (rowval[j] -= colm) <= m
j += 1
end
j <= nnz && (rowval[j] += colm)
colm += m
end
I, J
return SparseMatrixCSC(m, n, colptr, rowval, rfn(nnz))
end

"""
Expand All @@ -1432,9 +1415,8 @@ argument specifies a random number generator, see [Random Numbers](@ref).
# Examples
```jldoctest; setup = :(using Random; Random.seed!(1234))
julia> sprand(Bool, 2, 2, 0.5)
2×2 SparseMatrixCSC{Bool,Int64} with 2 stored entries:
[1, 1] = true
[2, 1] = true
2×2 SparseMatrixCSC{Bool,Int64} with 1 stored entry:
[2, 2] = true

julia> sprand(Float64, 3, 0.75)
3-element SparseVector{Float64,Int64} with 1 stored entry:
Expand All @@ -1447,9 +1429,7 @@ function sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat,
N = m*n
N == 0 && return spzeros(T,m,n)
N == 1 && return rand(r) <= density ? sparse([1], [1], rfn(r,1)) : spzeros(T,1,1)

I,J = sprand_IJ(r, m, n, density)
sparse_IJ_sorted!(I, J, rfn(r,length(I)), m, n, +) # it will never need to combine
_sprand(r,m,n,density,i->rfn(r,i))
end

function sprand(m::Integer, n::Integer, density::AbstractFloat,
Expand All @@ -1458,9 +1438,7 @@ function sprand(m::Integer, n::Integer, density::AbstractFloat,
N = m*n
N == 0 && return spzeros(T,m,n)
N == 1 && return rand() <= density ? sparse([1], [1], rfn(1)) : spzeros(T,1,1)

I,J = sprand_IJ(GLOBAL_RNG, m, n, density)
sparse_IJ_sorted!(I, J, rfn(length(I)), m, n, +) # it will never need to combine
_sprand(GLOBAL_RNG,m,n,density,rfn)
end

truebools(r::AbstractRNG, n::Integer) = fill(true, n)
Expand All @@ -1487,8 +1465,8 @@ argument specifies a random number generator, see [Random Numbers](@ref).
```jldoctest; setup = :(using Random; Random.seed!(0))
julia> sprandn(2, 2, 0.75)
2×2 SparseMatrixCSC{Float64,Int64} with 2 stored entries:
[1, 1] = 0.586617
[1, 2] = 0.297336
[1, 2] = 0.586617
[2, 2] = 0.297336
```
"""
sprandn(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat) = sprand(r,m,n,density,randn,Float64)
Expand Down
17 changes: 16 additions & 1 deletion stdlib/SparseArrays/test/sparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1501,7 +1501,7 @@ end
local A = guardseed(1234321) do
triu(sprand(10, 10, 0.2))
end
@test SparseArrays.droptol!(A, 0.01).colptr == [1,1,1,2,2,3,4,6,6,7,9]
@test SparseArrays.droptol!(A, 0.01).colptr == [1, 2, 2, 3, 4, 5, 5, 6, 8, 10, 13]
@test isequal(SparseArrays.droptol!(sparse([1], [1], [1]), 1), SparseMatrixCSC(1, 1, Int[1, 1], Int[], Int[]))
end

Expand Down Expand Up @@ -2396,6 +2396,21 @@ end
@test m2.module == SparseArrays
end

@testset "sprand" begin
p=0.3; m=1000; n=2000;
for s in 1:10
# build a (dense) random matrix with randsubset + rand
Random.seed!(s);
v = randsubseq(1:m*n,p);
x = zeros(m,n);
x[v] .= rand(length(v));
# redo the same with sprand
Random.seed!(s);
a = sprand(m,n,p);
@test x == a
end
end

@testset "sprandn with type $T" for T in (Float64, Float32, Float16, ComplexF64, ComplexF32, ComplexF16)
@test sprandn(T, 5, 5, 0.5) isa AbstractSparseMatrix{T}
end
Expand Down