Skip to content

Commit

Permalink
issue DiskArrays.jl JuliaIO#131
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander-Barth committed Oct 24, 2023
1 parent e407f6a commit 9bb0ffd
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 4 deletions.
87 changes: 87 additions & 0 deletions src/batchgetindex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,91 @@ function batchgetindex(a, i::AbstractVector{Int})
ci = CartesianIndices(size(a))
return batchgetindex(a, ci[i])
end

# indexing with vector of integers from NCDatasets 0.12.17 (MIT)

# computes the shape of the array of size `sz` after applying the indexes
# size(a[indexes...]) == _shape_after_slice(size(a),indexes...)

# the difficulty here is to make the size inferrable by the compiler
@inline _shape_after_slice(sz,indexes...) = __sh(sz,(),1,indexes...)
@inline __sh(sz,sh,n,i::Integer,indexes...) = __sh(sz,sh, n+1,indexes...)
@inline __sh(sz,sh,n,i::Colon, indexes...) = __sh(sz,(sh...,sz[n]), n+1,indexes...)
@inline __sh(sz,sh,n,i, indexes...) = __sh(sz,(sh...,length(i)),n+1,indexes...)
@inline __sh(sz,sh,n) = sh


# convert e.g. vector indices to a list of ranges
# [1,2,3,6,7] into [1:3, 6:7]
# 1:10 into [1:10]
to_range_list(index::Integer,len) = index
to_range_list(index::Colon,len) = [1:len]
to_range_list(index::AbstractRange,len) = [index]

function to_range_list(index::Vector{T},len) where T <: Integer
grow(istart) = istart[begin]:(istart[end]+step(istart))

baseindex = 1
indices_ranges = UnitRange{T}[]

while baseindex <= length(index)
range = index[baseindex]:index[baseindex]
range_test = grow(range)
index_view = @view index[baseindex:end]

while checkbounds(Bool,index_view,length(range_test)) &&
(range_test[end] == index_view[length(range_test)])

range = range_test
range_test = grow(range_test)
end

push!(indices_ranges,range)
baseindex += length(range)
end

# make sure we did not lose any indices
@assert reduce(vcat,indices_ranges,init=T[]) == index
return indices_ranges
end

_range_indices_dest(ri_dest) = ri_dest
_range_indices_dest(ri_dest,i::Integer,rest...) = _range_indices_dest(ri_dest,rest...)
function _range_indices_dest(ri_dest,v,rest...)
baseindex = 0
ind = similar(v,0)
for r in v
rr = 1:length(r)
push!(ind,baseindex .+ rr)
baseindex += length(r)
end

_range_indices_dest((ri_dest...,ind),rest...)
end

# rebase in source indices to the destination array
# for example if we load range 1:3 and 7:10 we will write to ranges 1:3 and 4:7
range_indices_dest(ri...) = _range_indices_dest((),ri...)

function batchgetindex(a::TA,indices::Vararg{Union{Int,Colon,AbstractRange{<:Integer},Vector{Int}},N}) where TA <: AbstractArray{T,N} where {T,N}
sz_source = size(a)
ri = to_range_list.(indices,sz_source)
sz_dest = _shape_after_slice(sz_source,indices...)
ri_dest = range_indices_dest(ri...)

@debug "transform vector of indices to ranges" ri_dest ri

dest = Array{eltype(a),length(sz_dest)}(undef,sz_dest)
for R in CartesianIndices(length.(ri))
ind_source = ntuple(i -> ri[i][R[i]],N)
ind_dest = ntuple(i -> ri_dest[i][R[i]],length(ri_dest))
dest[ind_dest...] = a[ind_source...]
end
return dest
end



function batchgetindex(a, i...)
indvec = create_indexvector(a, i)
return disk_getindex_batch(a, indvec)
Expand Down Expand Up @@ -171,6 +256,8 @@ function _readblock!(A::AbstractArray, A_ret, r::AbstractVector...)
mi, ma = extrema(ids)
return largest_jump > cs && length(ids) / (ma - mi) < 0.5
end
# What TODO?: necessary to avoid infinite recursion
need_batch = false
if any(need_batch)
A_ret .= batchgetindex(A, r...)
else
Expand Down
23 changes: 19 additions & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@ function test_getindex(a)
@test a[20:-1:9] == 20:-1:9
@test a[[3, 5, 8]] == [3, 5, 8]
@test a[2:4:14] == [2, 6, 10, 14]
# Test that readblock was called exactly onces for every getindex
@test getindex_count(a) == 16

@testset "allow_scalar" begin
DiskArrays.allow_scalar(false)
@test_throws ErrorException a[2, 3, 1]
Expand Down Expand Up @@ -493,7 +492,8 @@ end
#Index with range stride much larger than chunk size
a = _DiskArray(reshape(1:100, 20, 5, 1); chunksize=(1, 5, 1))
@test a[1:9:20, :, 1] == trueparent(a)[1:9:20, :, 1]
@test getindex_count(a) == 3
# now getindex_count(a) == 1
#@test getindex_count(a) == 3

b = _DiskArray(zeros(4, 5, 1); chunksize=(4, 1, 1))
b[[1, 4], [2, 5], 1] = ones(2, 2)
Expand All @@ -505,6 +505,21 @@ end
mask[4, 3] = true
b[mask] = fill(2.0, 4)
@test setindex_count(b) == 4


# issue #131

data = rand(1:99,10,10,10)
a1 = _DiskArray(data);

for (ind,expected_count) in [
( ([1,2],[2,3],:), 1 ),
( ([1,2,9,10],:,:), 2 ),
]
a1.getindex_count[] = 0
@test a1[ind...] == data[ind...]
@test getindex_count(a1) == expected_count
end
end

@testset "generator" begin
Expand Down Expand Up @@ -648,7 +663,7 @@ end
a1 .= v2
@test all(Array(a1) .== (4:27) * vec(3:18)')

# TODO Chunks that don't align at all - need to work out
# TODO Chunks that don't align at all - need to work out
# how to choose the smallest chunks to read twice, and when
# to just ignore the chunks and load the whole array.
# a2 .= v1
Expand Down

0 comments on commit 9bb0ffd

Please sign in to comment.