diff --git a/NEWS.md b/NEWS.md index 7b50874313bee..8710b6db73d7c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,11 @@ New language features * Argument splatting (`x...`) can now be used in calls to the `new` pseudo-function in constructors ([#30577]). + * Objects created by calling `skipmissing` on an array can now be indexed using indices + from the parent at non-missing positions. This allows functions such as + `findall`, `findfirst`, `argmin`/`argmax` and `findmin`/`findmax` to work with these + objects, returning the index of matching non-missing elements in the parent ([#31008]). + Multi-threading changes ----------------------- diff --git a/base/missing.jl b/base/missing.jl index 384771ca6ed40..5b47d9695817f 100644 --- a/base/missing.jl +++ b/base/missing.jl @@ -153,6 +153,9 @@ float(A::AbstractArray{Missing}) = A skipmissing(itr) Return an iterator over the elements in `itr` skipping [`missing`](@ref) values. +The returned object can be indexed using indices of `itr` if the latter is indexable. +Indices corresponding to missing values are not valid: they are skipped by [`keys`](@ref) +and [`eachindex`](@ref), and a `MissingException` is thrown when trying to use them. Use [`collect`](@ref) to obtain an `Array` containing the non-`missing` values in `itr`. Note that even if `itr` is a multidimensional array, the result will always @@ -161,9 +164,27 @@ of the input. # Examples ```jldoctest -julia> sum(skipmissing([1, missing, 2])) +julia> x = skipmissing([1, missing, 2]) +Base.SkipMissing{Array{Union{Missing, Int64},1}}(Union{Missing, Int64}[1, missing, 2]) + +julia> sum(x) +3 + +julia> x[1] +1 + +julia> x[2] +ERROR: MissingException: the value at index (2,) is missing +[...] + +julia> argmax(x) 3 +julia> collect(keys(x)) +2-element Array{Int64,1}: + 1 + 3 + julia> collect(skipmissing([1, missing, 2])) 2-element Array{Int64,1}: 1 @@ -196,6 +217,17 @@ function iterate(itr::SkipMissing, state...) item, state end +IndexStyle(::Type{<:SkipMissing{T}}) where {T} = IndexStyle(T) +eachindex(itr::SkipMissing) = + Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, eachindex(itr.x)) +keys(itr::SkipMissing) = + Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, keys(itr.x)) +@propagate_inbounds function getindex(itr::SkipMissing, I...) + v = itr.x[I...] + v === missing && throw(MissingException("the value at index $I is missing")) + v +end + # Optimized mapreduce implementation # The generic method is faster when !(eltype(A) >: Missing) since it does not need # additional loops to identify the two first non-missing values of each block diff --git a/doc/src/manual/missing.md b/doc/src/manual/missing.md index 647434f4b7d25..fbe70686cbe5b 100644 --- a/doc/src/manual/missing.md +++ b/doc/src/manual/missing.md @@ -294,20 +294,52 @@ julia> sum(skipmissing([1, missing])) This convenience function returns an iterator which filters out `missing` values efficiently. It can therefore be used with any function which supports iterators -```jldoctest; setup = :(using Statistics) -julia> maximum(skipmissing([3, missing, 2, 1])) +```jldoctest skipmissing; setup = :(using Statistics) +julia> x = skipmissing([3, missing, 2, 1]) +Base.SkipMissing{Array{Union{Missing, Int64},1}}(Union{Missing, Int64}[3, missing, 2, 1]) + +julia> maximum(x) 3 -julia> mean(skipmissing([3, missing, 2, 1])) +julia> mean(x) 2.0 -julia> mapreduce(sqrt, +, skipmissing([3, missing, 2, 1])) +julia> mapreduce(sqrt, +, x) 4.146264369941973 ``` +Objects created by calling `skipmissing` on an array can be indexed using indices +from the parent array. Indices corresponding to missing values are not valid for +these objects and an error is thrown when trying to use them (they are also skipped +by `keys` and `eachindex`) +```jldoctest skipmissing +julia> x[1] +3 + +julia> x[2] +ERROR: MissingException: the value at index (2,) is missing +[...] +``` + +This allows functions which operate on indices to work in combination with `skipmissing`. +This is notably the case for search and find functions, which return indices +valid for the object returned by `skipmissing` which are also the indices of the +matching entries *in the parent array* +```jldoctest skipmissing +julia> findall(==(1), x) +1-element Array{Int64,1}: + 4 + +julia> findfirst(!iszero, x) +1 + +julia> argmax(x) +1 +``` + Use [`collect`](@ref) to extract non-`missing` values and store them in an array -```jldoctest -julia> collect(skipmissing([3, missing, 2, 1])) +```jldoctest skipmissing +julia> collect(x) 3-element Array{Int64,1}: 3 2 diff --git a/test/missing.jl b/test/missing.jl index 29a5ae3fcb562..4eef0c331b73f 100644 --- a/test/missing.jl +++ b/test/missing.jl @@ -366,6 +366,50 @@ end @test collect(x) == [1, 2, 4] @test collect(x) isa Vector{Int} + @testset "indexing" begin + x = skipmissing([1, missing, 2, missing, missing]) + @test collect(eachindex(x)) == collect(keys(x)) == [1, 3] + @test x[1] === 1 + @test x[3] === 2 + @test_throws MissingException x[2] + @test_throws BoundsError x[6] + @test findfirst(==(2), x) == 3 + @test findall(==(2), x) == [3] + @test argmin(x) == 1 + @test findmin(x) == (1, 1) + @test argmax(x) == 3 + @test findmax(x) == (2, 3) + + x = skipmissing([missing 2; 1 missing]) + @test collect(eachindex(x)) == [2, 3] + @test collect(keys(x)) == [CartesianIndex(2, 1), CartesianIndex(1, 2)] + @test x[2] === x[2, 1] === 1 + @test x[3] === x[1, 2] === 2 + @test_throws MissingException x[1] + @test_throws MissingException x[1, 1] + @test_throws BoundsError x[5] + @test_throws BoundsError x[3, 1] + @test findfirst(==(2), x) == CartesianIndex(1, 2) + @test findall(==(2), x) == [CartesianIndex(1, 2)] + @test argmin(x) == CartesianIndex(2, 1) + @test findmin(x) == (1, CartesianIndex(2, 1)) + @test argmax(x) == CartesianIndex(1, 2) + @test findmax(x) == (2, CartesianIndex(1, 2)) + + for x in (skipmissing([]), skipmissing([missing, missing])) + @test isempty(collect(eachindex(x))) + @test isempty(collect(keys(x))) + @test_throws BoundsError x[3] + @test_throws BoundsError x[3, 1] + @test findfirst(==(2), x) === nothing + @test isempty(findall(==(2), x)) + @test_throws ArgumentError argmin(x) + @test_throws ArgumentError findmin(x) + @test_throws ArgumentError argmax(x) + @test_throws ArgumentError findmax(x) + end + end + @testset "mapreduce" begin # Vary size to test splitting blocks with several configurations of missing values for T in (Int, Float64),