From f24bc594a1ea2921657fd7b2ec98d5772cc5dafc Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Fri, 2 Feb 2018 01:25:12 -0500 Subject: [PATCH 01/16] More thorough aliasing detection in nonscalar copy, broadcast and assignment. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit puts together the beginnings of an "interface" for detecting and avoiding aliasing between arrays before performing some mutating operation on one of them. `A′ = unalias(dest, A)` is the main entry point; it checks to see if `dest` and `A` might alias one another, and then either returns `A` or a copy of `A`. When it returns a copy of `A`, it absolutely must preserve the same type. As such, this function is designed to be specialized on `A`, typically like: Base.unalias(dest, A::T) = mightalias(dest, A) ? typeof(A)(unalias(dest, A.a), unalias(dest, A.b), ...) : A `mightalias(A, B)` is a quick and rough check to see if two arrays alias eachother. Instead of requiring every array to know about every other array type, it simply asks both arrays for their `dataids` and then looks for an empty intersection between every pair. Finally, `dataids(A)` returns a tuple of ranges that describe something about the region of memory the array occupies. It could be simply `(objectid(A):objectid(A),)` or for an array with multiple fields, `(dataids(A.a)..., dataids(A.b)..., ...)`. --- base/abstractarray.jl | 44 ++++++++++++++ base/array.jl | 16 ++--- base/broadcast.jl | 8 ++- base/multidimensional.jl | 22 +++---- base/reshapedarray.jl | 3 + base/subarray.jl | 9 ++- stdlib/SparseArrays/src/higherorderfns.jl | 8 ++- stdlib/SparseArrays/src/sparsematrix.jl | 5 ++ stdlib/SparseArrays/src/sparsevector.jl | 4 ++ stdlib/SparseArrays/test/higherorderfns.jl | 70 +++++++++++++++++++++- test/arrayops.jl | 49 ++++++++++++++- 11 files changed, 208 insertions(+), 30 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index d8df97bb1f879..734df8a7606a7 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1048,6 +1048,50 @@ function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) whe r end +## rudimentary aliasing detection ## +""" + unalias(dest, A) + +Return either A or a copy of A, in a rough effort to prevent modifications to `dest` from +affecting the returned object. No guarantees are provided, and each custom array must +opt-into aliasing detection by overloading this method by specializing on the second argument. + +This function must return an object of exactly the same type as A for performance and type-stability. + +See also `mightalias` and `dataids`. +""" +unalias(dest, A) = A +unalias(dest, A::Array) = mightalias(dest, A) ? copy(A) : A +@inline unalias(dest, As::Tuple) = (unalias(dest, As[1]), unalias(dest, tail(As))...) +unalias(dest, As::Tuple{}) = () + +""" + mightalias(A::AbstractArray, B::AbstractArray) + +Perform a rudimentary test to check if arrays A and B might share the same memory. + +Defaults to false unless `dataids` is specialized for both `A` and `B`. +""" +mightalias(A::AbstractArray, B::AbstractArray) = dataidsoverlap(dataids(A), dataids(B)) +mightalias(x, y) = false + +dataidsoverlap(a::AbstractRange, b::AbstractRange) = max(first(a),first(b)) <= min(last(a),last(b)) +dataidsoverlap(as::Tuple, bs::Tuple) = any(b->dataidsoverlap(as[1], b), bs) || dataidsoverlap(tail(as), bs) +dataidsoverlap(as::Tuple{Any}, bs::Tuple{Any}) = dataidsoverlap(as[1], bs[1]) +dataidsoverlap(as::Tuple{Any,Any}, bs::Tuple{Any}) = dataidsoverlap(as[1], bs[1]) || dataidsoverlap(as[2], bs[1]) +dataidsoverlap(as::Tuple{Any}, bs::Tuple{Any,Any}) = dataidsoverlap(as[1], bs[1]) || dataidsoverlap(as[1], bs[2]) +dataidsoverlap(as::Tuple{}, bs::Tuple) = false +dataidsoverlap(as::Tuple, bs::Tuple{}) = false +dataidsoverlap(::Tuple{}, ::Tuple{}) = false + +""" + dataids(A::AbstractArray) -> Tuple{Vararg{AbstractRange}} + +Return a tuple containing rough information about the memory region(s) the array `A` references. +""" +dataids(A::AbstractArray) = () +dataids(A::Array) = (UInt(pointer(A, 1)):UInt(pointer(A, lastindex(A))),) + ## get (getindex with a default value) ## RangeVecIntList{A<:AbstractVector{Int}} = Union{Tuple{Vararg{Union{AbstractRange, AbstractVector{Int}}}}, diff --git a/base/array.jl b/base/array.jl index 458dab4ef1356..6bb1be47e3267 100644 --- a/base/array.jl +++ b/base/array.jl @@ -693,22 +693,18 @@ function setindex! end # These are redundant with the abstract fallbacks but needed for bootstrap function setindex!(A::Array, x, I::AbstractVector{Int}) @_propagate_inbounds_meta - A === I && (I = copy(I)) - for i in I + J = unalias(A, I) + for i in J A[i] = x end return A end -function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int}) +function setindex!(A::Array, Y::AbstractArray, J::AbstractVector{Int}) @_propagate_inbounds_meta - @boundscheck setindex_shape_check(X, length(I)) + @boundscheck setindex_shape_check(Y, length(J)) + X = unalias(A, Y) + I = unalias(A, J) count = 1 - if X === A - X = copy(X) - I===A && (I = X::typeof(I)) - elseif I === A - I = copy(I) - end for i in I @inbounds x = X[count] A[i] = x diff --git a/base/broadcast.jl b/base/broadcast.jl index 6e317640e53d6..2fca203eb8a61 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -5,7 +5,7 @@ module Broadcast using Base.Cartesian using Base: Indices, OneTo, linearindices, tail, to_shape, _msk_end, unsafe_bitgetindex, bitcache_chunks, bitcache_size, dumpbitcache, - isoperator, promote_typejoin + isoperator, promote_typejoin, unalias import Base: broadcast, broadcast! export BroadcastStyle, broadcast_indices, broadcast_similar, broadcast_getindex, broadcast_setindex!, dotview, @__dot__ @@ -473,9 +473,11 @@ end end # This indirection allows size-dependent implementations. -@inline function _broadcast!(f, C, A, Bs::Vararg{Any,N}) where N +@inline function _broadcast!(f, C, _A, _Bs::Vararg{Any,N}) where N shape = broadcast_indices(C) - @boundscheck check_broadcast_indices(shape, A, Bs...) + @boundscheck check_broadcast_indices(shape, _A, _Bs...) + A = unalias(C, _A) + Bs = unalias(C, _Bs) keeps, Idefaults = map_newindexer(shape, A, Bs) iter = CartesianIndices(shape) _broadcast!(f, C, keeps, Idefaults, A, Bs, Val(N), iter) diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 2576140929370..ba548396fbc7d 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -675,8 +675,8 @@ _iterable(v) = Iterators.repeated(v) @generated function _unsafe_setindex!(::IndexStyle, A::AbstractArray, x, I::Union{Real,AbstractArray}...) N = length(I) quote - X = _iterable(x) - @nexprs $N d->(I_d = I[d]) + X = _iterable(unalias(A, x)) + @nexprs $N d->(I_d = unalias(A, I[d])) idxlens = @ncall $N index_lengths I @ncall $N setindex_shape_check X (d->idxlens[d]) Xs = start(X) @@ -688,8 +688,6 @@ _iterable(v) = Iterators.repeated(v) end end -## - # see discussion in #18364 ... we try not to widen type of the resulting array # from cumsum or cumprod, but in some cases (+, Bool) we may not have a choice. rcum_promote_type(op, ::Type{T}, ::Type{S}) where {T,S<:Number} = promote_op(op, T, S) @@ -1156,8 +1154,9 @@ julia> y """ copyto!(dest, src) -function copyto!(dest::AbstractArray{T,N}, src::AbstractArray{T,N}) where {T,N} - @boundscheck checkbounds(dest, axes(src)...) +function copyto!(dest::AbstractArray{T,N}, source::AbstractArray{T,N}) where {T,N} + checkbounds(dest, axes(source)...) + src = unalias(dest, source) for I in eachindex(IndexStyle(src,dest), src) @inbounds dest[I] = src[I] end @@ -1165,15 +1164,16 @@ function copyto!(dest::AbstractArray{T,N}, src::AbstractArray{T,N}) where {T,N} end function copyto!(dest::AbstractArray{T1,N}, Rdest::CartesianIndices{N}, - src::AbstractArray{T2,N}, Rsrc::CartesianIndices{N}) where {T1,T2,N} + source::AbstractArray{T2,N}, Rsrc::CartesianIndices{N}) where {T1,T2,N} isempty(Rdest) && return dest if size(Rdest) != size(Rsrc) throw(ArgumentError("source and destination must have same size (got $(size(Rsrc)) and $(size(Rdest)))")) end - @boundscheck checkbounds(dest, first(Rdest)) - @boundscheck checkbounds(dest, last(Rdest)) - @boundscheck checkbounds(src, first(Rsrc)) - @boundscheck checkbounds(src, last(Rsrc)) + checkbounds(dest, first(Rdest)) + checkbounds(dest, last(Rdest)) + checkbounds(source, first(Rsrc)) + checkbounds(source, last(Rsrc)) + src = unalias(dest, source) ΔI = first(Rdest) - first(Rsrc) if @generated quote diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl index e7749b9a97960..e496321605d15 100644 --- a/base/reshapedarray.jl +++ b/base/reshapedarray.jl @@ -180,6 +180,9 @@ parent(A::ReshapedArray) = A.parent parentindices(A::ReshapedArray) = map(s->1:s, size(parent(A))) reinterpret(::Type{T}, A::ReshapedArray, dims::Dims) where {T} = reinterpret(T, parent(A), dims) +unalias(dest, A::ReshapedArray) = mightalias(dest, A) ? typeof(A)(unalias(dest, A.parent), A.dims, A.mi) : A +dataids(A::ReshapedArray) = dataids(A.parent) + @inline ind2sub_rs(::Tuple{}, i::Int) = i @inline ind2sub_rs(strds, i) = _ind2sub_rs(strds, i - 1) @inline _ind2sub_rs(::Tuple{}, ind) = (ind + 1,) diff --git a/base/subarray.jl b/base/subarray.jl index c4c3ae0c503a8..ac64a7c03aea8 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -94,6 +94,13 @@ From an array view `A`, returns the corresponding indices in the parent. """ parentindices(a::AbstractArray) = ntuple(i->OneTo(size(a,i)), ndims(a)) +## Aliasing detection +unalias(dest, A::SubArray) = mightalias(dest, A) ? typeof(A)(unalias(dest, A.parent), unalias(dest, A.indices), A.offset1, A.stride1) : A +dataids(A::SubArray) = (dataids(A.parent)..., _indicesids(A.indices...)...) +_indicesids(x::AbstractArray, xs...) = (dataids(x)..., _indicesids(xs...)...) +_indicesids(x, xs...) = _indicesids(xs...) # Skip non-array indices +_indicesids() = () + ## SubArray creation # We always assume that the dimensionality of the parent matches the number of # indices that end up getting passed to it, so we store the parent as a @@ -135,7 +142,7 @@ julia> A # Note A has changed even though we modified b """ function view(A::AbstractArray, I::Vararg{Any,N}) where {N} @_inline_meta - J = to_indices(A, I) + J = unalias(A, to_indices(A, I)) @boundscheck checkbounds(A, J...) unsafe_view(_maybe_reshape_parent(A, index_ndims(J...)), J...) end diff --git a/stdlib/SparseArrays/src/higherorderfns.jl b/stdlib/SparseArrays/src/higherorderfns.jl index 4e7cc6a88656a..5e41b2028c8d2 100644 --- a/stdlib/SparseArrays/src/higherorderfns.jl +++ b/stdlib/SparseArrays/src/higherorderfns.jl @@ -1004,10 +1004,12 @@ broadcast(f, ::PromoteToSparse, ::Nothing, ::Nothing, As::Vararg{Any,N}) where { # For broadcast! with ::Any inputs, we need a layer of indirection to determine whether # the inputs can be promoted to SparseVecOrMat. If it's just SparseVecOrMat and scalars, # we can handle it here, otherwise see below for the promotion machinery. -function broadcast!(f::Tf, dest::SparseVecOrMat, ::SPVM, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} - if f isa typeof(identity) && N == 0 && Base.axes(dest) == Base.axes(A) - return copyto!(dest, A) +function broadcast!(f::Tf, dest::SparseVecOrMat, ::SPVM, _A::SparseVecOrMat, _Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} + if f isa typeof(identity) && N == 0 && Base.axes(dest) == Base.axes(_A) + return copyto!(dest, _A) end + A = Base.unalias(dest, _A) + Bs = Base.unalias(dest, _Bs) _aresameshape(dest, A, Bs...) && return _noshapecheck_map!(f, dest, A, Bs...) Base.Broadcast.check_broadcast_indices(axes(dest), A, Bs...) fofzeros = f(_zeros_eltypes(A, Bs...)...) diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl index 168871503da70..0f28e21c9c5a4 100644 --- a/stdlib/SparseArrays/src/sparsematrix.jl +++ b/stdlib/SparseArrays/src/sparsematrix.jl @@ -260,6 +260,11 @@ function copy(ra::ReshapedArray{<:Any,2,<:SparseMatrixCSC}) return SparseMatrixCSC(mS, nS, colptr, rowval, nzval) end +## Alias detection +using Base: dataids, unalias, mightalias +@inline Base.dataids(S::SparseMatrixCSC) = (dataids(S.colptr)..., dataids(S.rowval)..., dataids(S.nzval)...) +Base.unalias(dest, S::SparseMatrixCSC) = mightalias(dest, S) ? typeof(S)(S.m, S.n, unalias(dest, S.colptr), unalias(dest, S.rowval), unalias(dest, S.nzval)) : S + ## Constructors copy(S::SparseMatrixCSC) = diff --git a/stdlib/SparseArrays/src/sparsevector.jl b/stdlib/SparseArrays/src/sparsevector.jl index fae9d203cb770..845855a19f227 100644 --- a/stdlib/SparseArrays/src/sparsevector.jl +++ b/stdlib/SparseArrays/src/sparsevector.jl @@ -92,6 +92,10 @@ similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, m::Integer) where {TvNew, similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, m::Integer, n::Integer) where {TvNew,TiNew} = _sparsesimilar(S, TvNew, TiNew, (m, n)) +## Alias detection +using Base: dataids, unalias, mightalias +@inline Base.dataids(S::SparseVector) = (dataids(S.nzind)..., dataids(S.nzval)...) +Base.unalias(dest, S::SparseVector)::typeof(S) = mightalias(dest, S) ? typeof(S)(S.n, unalias(dest, S.nzind), unalias(dest, S.nzval)) : S ### Construct empty sparse vector diff --git a/stdlib/SparseArrays/test/higherorderfns.jl b/stdlib/SparseArrays/test/higherorderfns.jl index 536eddb48e657..fdbb120519de9 100644 --- a/stdlib/SparseArrays/test/higherorderfns.jl +++ b/stdlib/SparseArrays/test/higherorderfns.jl @@ -554,4 +554,72 @@ end @test spzeros(1,2) .* spzeros(0,1) == zeros(0,2) end -end # module \ No newline at end of file +@testset "aliasing and indexed assignment or broadcast!" begin + A = sparsevec([0, 0, 1, 1]) + B = sparsevec([1, 1, 0, 0]) + A .+= B + @test A == sparse([1,1,1,1]) + + A = sprandn(10, 10, 0.1) + fA = Array(A) + b = randn(10); + broadcast!(/, A, A, b) + @test A == fA ./ Array(b) + + a = sparse([1,3,5]) + b = sparse([3,1,2]) + a[b] = a + @test a == [3,5,1] + a = sparse([3,2,1]) + a[a] = [4,5,6] + @test a == [6,5,4] + + A = sparse([1,2,3,4]) + V = view(A, A) + @test V == A + V[1] = 2 + @test V == A == [2,2,3,4] + V[1] = 2^30 + @test V == A == [2^30, 2, 3, 4] + + A = sparse([2,1,4,3]) + V = view(A, :) + A[V] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = sparse([2,1,4,3]) + R = reshape(view(A, :), 2, 2) + A[R] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = sparse([2,1,4,3]) + R = reshape(A, 2, 2) + A[R] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + # And broadcasting + a = sparse([1,3,5]) + b = sparse([3,1,2]) + a[b] .= a + @test a == [3,5,1] + a = sparse([3,2,1]) + a[a] .= [4,5,6] + @test a == [6,5,4] + + A = sparse([2,1,4,3]) + V = view(A, :) + A[V] .= (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = sparse([2,1,4,3]) + R = reshape(view(A, :), 2, 2) + A[R] .= reshape((1:4) .+ 2^30, 2, 2) + @test A == [2,1,4,3] .+ 2^30 + + A = sparse([2,1,4,3]) + R = reshape(A, 2, 2) + A[R] .= reshape((1:4) .+ 2^30, 2, 2) + @test A == [2,1,4,3] .+ 2^30 +end + +end # module diff --git a/test/arrayops.jl b/test/arrayops.jl index 59addb462c6a8..a1f9c21140741 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -1060,7 +1060,7 @@ end @test a == [8,3,8] end -@testset "assigning an array into itself" begin +@testset "assigning an array into itself and other aliasing issues" begin a = [1,3,5] b = [3,1,2] a[b] = a @@ -1068,6 +1068,53 @@ end a = [3,2,1] a[a] = [4,5,6] @test a == [6,5,4] + + A = [1,2,3,4] + V = view(A, A) + @test V == A + V[1] = 2 + @test V == A == [2,2,3,4] + V[1] = 2^30 + @test V == A == [2^30, 2, 3, 4] + + A = [2,1,4,3] + V = view(A, :) + A[V] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = [2,1,4,3] + R = reshape(view(A, :), 2, 2) + A[R] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = [2,1,4,3] + R = reshape(A, 2, 2) + A[R] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + # And broadcasting + a = [1,3,5] + b = [3,1,2] + a[b] .= a + @test a == [3,5,1] + a = [3,2,1] + a[a] .= [4,5,6] + @test a == [6,5,4] + + A = [2,1,4,3] + V = view(A, :) + A[V] .= (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = [2,1,4,3] + R = reshape(view(A, :), 2, 2) + A[R] .= reshape((1:4) .+ 2^30, 2, 2) + @test A == [2,1,4,3] .+ 2^30 + + A = [2,1,4,3] + R = reshape(A, 2, 2) + A[R] .= reshape((1:4) .+ 2^30, 2, 2) + @test A == [2,1,4,3] .+ 2^30 end @testset "lexicographic comparison" begin From 1e8ac8f1efc441c4c8a17daeb3a34567b5ec4262 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 6 Feb 2018 12:54:17 -0500 Subject: [PATCH 02/16] Skip aliasing checks in one-argument broadcast! --- base/broadcast.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/base/broadcast.jl b/base/broadcast.jl index 2fca203eb8a61..33e72dbef9a98 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -484,6 +484,16 @@ end return C end +# In the one-argument case, we don't need to worry about aliasing as we only make one pass +@inline function _broadcast!(f, C, A) + shape = broadcast_indices(C) + @boundscheck check_broadcast_indices(shape, A) + keeps, Idefaults = map_newindexer(shape, A, ()) + iter = CartesianIndices(shape) + _broadcast!(f, C, keeps, Idefaults, A, (), Val(0), iter) + return C +end + # broadcast with element type adjusted on-the-fly. This widens the element type of # B as needed (allocating a new container and copying previously-computed values) to # accommodate any incompatible new elements. From 901514b9142813261116ad0a6949cd50c5957b37 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 6 Feb 2018 12:56:34 -0500 Subject: [PATCH 03/16] Fixup docstrings from review --- base/abstractarray.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 734df8a7606a7..1df52078c4efa 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1052,13 +1052,13 @@ end """ unalias(dest, A) -Return either A or a copy of A, in a rough effort to prevent modifications to `dest` from +Return either `A` or a copy of `A`, in a rough effort to prevent modifications to `dest` from affecting the returned object. No guarantees are provided, and each custom array must opt-into aliasing detection by overloading this method by specializing on the second argument. -This function must return an object of exactly the same type as A for performance and type-stability. +This function must return an object of exactly the same type as `A` for performance and type-stability. -See also `mightalias` and `dataids`. +See also [`mightalias`](@ref) and [`dataids`](@ref). """ unalias(dest, A) = A unalias(dest, A::Array) = mightalias(dest, A) ? copy(A) : A @@ -1068,7 +1068,7 @@ unalias(dest, As::Tuple{}) = () """ mightalias(A::AbstractArray, B::AbstractArray) -Perform a rudimentary test to check if arrays A and B might share the same memory. +Perform a rudimentary test to check if arrays `A` and `B` might share the same memory. Defaults to false unless `dataids` is specialized for both `A` and `B`. """ From 534bf915a55c347776bb4ff9911bea4b22fbd23a Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 6 Feb 2018 13:18:44 -0500 Subject: [PATCH 04/16] Add support for dealiasing Adjoint and Transpose And fixup the assumption that `broadcast!(f, C, A)` is safe --- base/broadcast.jl | 5 ++++- stdlib/LinearAlgebra/src/adjtrans.jl | 4 ++++ stdlib/LinearAlgebra/test/adjtrans.jl | 13 +++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/base/broadcast.jl b/base/broadcast.jl index 33e72dbef9a98..752017612a883 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -484,10 +484,13 @@ end return C end -# In the one-argument case, we don't need to worry about aliasing as we only make one pass +# In the one-argument case, we can avoid de-aliasing `A` from `C` if +# `A === C`. Otherwise `A` might be something like `transpose(C)` or +# another such re-ordering that won't iterate the two safely. @inline function _broadcast!(f, C, A) shape = broadcast_indices(C) @boundscheck check_broadcast_indices(shape, A) + A !== C && (A = unalias(C, A)) keeps, Idefaults = map_newindexer(shape, A, ()) iter = CartesianIndices(shape) _broadcast!(f, C, keeps, Idefaults, A, (), Val(0), iter) diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl index 45063f983268a..1d428f4a9b597 100644 --- a/stdlib/LinearAlgebra/src/adjtrans.jl +++ b/stdlib/LinearAlgebra/src/adjtrans.jl @@ -47,6 +47,10 @@ end Adjoint(A) = Adjoint{Base.promote_op(adjoint,eltype(A)),typeof(A)}(A) Transpose(A) = Transpose{Base.promote_op(transpose,eltype(A)),typeof(A)}(A) +using Base: unalias, mightalias, dataids +Base.unalias(dest, A::Union{Adjoint,Transpose}) = mightalias(dest, A) ? typeof(A)(unalias(dest, A.parent)) : A +Base.dataids(A::Union{Adjoint,Transpose}) = dataids(A.parent) + # wrapping lowercase quasi-constructors """ adjoint(A) diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl index 0d5f3362c4ee5..9dd5b068a5dcd 100644 --- a/stdlib/LinearAlgebra/test/adjtrans.jl +++ b/stdlib/LinearAlgebra/test/adjtrans.jl @@ -447,4 +447,17 @@ end @test adjoint!(b, a) === b end +@testset "aliasing with adjoint and transpose" begin + A = collect(reshape(1:25, 5, 5)) .+ rand.().*im + B = copy(A) + B .= B' + @test B == A' + B = copy(A) + B .= transpose(B) + @test B == transpose(A) + B = copy(A) + B .= B .* B' + @test B == A .* A' +end + end # module TestAdjointTranspose From 0fde1371cc96fd9889c94042490b894b1874f246 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Mon, 12 Feb 2018 23:19:18 -0600 Subject: [PATCH 05/16] Refactor API to use `map(x->unalias(dest, x), xs)` instead of... a builtin mapper method. Using varargs would require de-structuring the one arg case: `(a,) = unalias(dest, a)`. --- base/abstractarray.jl | 2 -- base/array.jl | 16 ++++++++-------- base/broadcast.jl | 12 ++++++------ base/multidimensional.jl | 30 +++++++++++++++--------------- base/subarray.jl | 2 +- 5 files changed, 30 insertions(+), 32 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 1df52078c4efa..f1b91cb8e0035 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1062,8 +1062,6 @@ See also [`mightalias`](@ref) and [`dataids`](@ref). """ unalias(dest, A) = A unalias(dest, A::Array) = mightalias(dest, A) ? copy(A) : A -@inline unalias(dest, As::Tuple) = (unalias(dest, As[1]), unalias(dest, tail(As))...) -unalias(dest, As::Tuple{}) = () """ mightalias(A::AbstractArray, B::AbstractArray) diff --git a/base/array.jl b/base/array.jl index 6bb1be47e3267..96a2c9f4196b4 100644 --- a/base/array.jl +++ b/base/array.jl @@ -693,20 +693,20 @@ function setindex! end # These are redundant with the abstract fallbacks but needed for bootstrap function setindex!(A::Array, x, I::AbstractVector{Int}) @_propagate_inbounds_meta - J = unalias(A, I) - for i in J + I′ = unalias(A, I) + for i in I′ A[i] = x end return A end -function setindex!(A::Array, Y::AbstractArray, J::AbstractVector{Int}) +function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int}) @_propagate_inbounds_meta - @boundscheck setindex_shape_check(Y, length(J)) - X = unalias(A, Y) - I = unalias(A, J) + @boundscheck setindex_shape_check(X, length(I)) + X′ = unalias(A, X) + I′ = unalias(A, I) count = 1 - for i in I - @inbounds x = X[count] + for i in I′ + @inbounds x = X′[count] A[i] = x count += 1 end diff --git a/base/broadcast.jl b/base/broadcast.jl index 752017612a883..7f3e70f99c69b 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -473,14 +473,14 @@ end end # This indirection allows size-dependent implementations. -@inline function _broadcast!(f, C, _A, _Bs::Vararg{Any,N}) where N +@inline function _broadcast!(f, C, A, Bs::Vararg{Any,N}) where N shape = broadcast_indices(C) - @boundscheck check_broadcast_indices(shape, _A, _Bs...) - A = unalias(C, _A) - Bs = unalias(C, _Bs) - keeps, Idefaults = map_newindexer(shape, A, Bs) + @boundscheck check_broadcast_indices(shape, A, Bs...) + A′ = unalias(C, A) + Bs′ = map(B->unalias(C, B), Bs) + keeps, Idefaults = map_newindexer(shape, A′, Bs′) iter = CartesianIndices(shape) - _broadcast!(f, C, keeps, Idefaults, A, Bs, Val(N), iter) + _broadcast!(f, C, keeps, Idefaults, A′, Bs′, Val(N), iter) return C end diff --git a/base/multidimensional.jl b/base/multidimensional.jl index ba548396fbc7d..513a6b19676f8 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -675,13 +675,13 @@ _iterable(v) = Iterators.repeated(v) @generated function _unsafe_setindex!(::IndexStyle, A::AbstractArray, x, I::Union{Real,AbstractArray}...) N = length(I) quote - X = _iterable(unalias(A, x)) + x′ = _iterable(unalias(A, x)) @nexprs $N d->(I_d = unalias(A, I[d])) idxlens = @ncall $N index_lengths I - @ncall $N setindex_shape_check X (d->idxlens[d]) - Xs = start(X) + @ncall $N setindex_shape_check x′ (d->idxlens[d]) + xs = start(x′) @inbounds @nloops $N i d->I_d begin - v, Xs = next(X, Xs) + v, xs = next(x′, xs) @ncall $N setindex! A v i end A @@ -1154,36 +1154,36 @@ julia> y """ copyto!(dest, src) -function copyto!(dest::AbstractArray{T,N}, source::AbstractArray{T,N}) where {T,N} - checkbounds(dest, axes(source)...) - src = unalias(dest, source) - for I in eachindex(IndexStyle(src,dest), src) - @inbounds dest[I] = src[I] +function copyto!(dest::AbstractArray{T,N}, src::AbstractArray{T,N}) where {T,N} + checkbounds(dest, axes(src)...) + src′ = unalias(dest, src) + for I in eachindex(IndexStyle(src′,dest), src′) + @inbounds dest[I] = src′[I] end dest end function copyto!(dest::AbstractArray{T1,N}, Rdest::CartesianIndices{N}, - source::AbstractArray{T2,N}, Rsrc::CartesianIndices{N}) where {T1,T2,N} + src::AbstractArray{T2,N}, Rsrc::CartesianIndices{N}) where {T1,T2,N} isempty(Rdest) && return dest if size(Rdest) != size(Rsrc) throw(ArgumentError("source and destination must have same size (got $(size(Rsrc)) and $(size(Rdest)))")) end checkbounds(dest, first(Rdest)) checkbounds(dest, last(Rdest)) - checkbounds(source, first(Rsrc)) - checkbounds(source, last(Rsrc)) - src = unalias(dest, source) + checkbounds(src, first(Rsrc)) + checkbounds(src, last(Rsrc)) + src′ = unalias(dest, src) ΔI = first(Rdest) - first(Rsrc) if @generated quote @nloops $N i (n->Rsrc.indices[n]) begin - @inbounds @nref($N,dest,n->i_n+ΔI[n]) = @nref($N,src,i) + @inbounds @nref($N,dest,n->i_n+ΔI[n]) = @nref($N,src′,i) end end else for I in Rsrc - @inbounds dest[I + ΔI] = src[I] + @inbounds dest[I + ΔI] = src′[I] end end dest diff --git a/base/subarray.jl b/base/subarray.jl index ac64a7c03aea8..b4922bdbb50df 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -142,7 +142,7 @@ julia> A # Note A has changed even though we modified b """ function view(A::AbstractArray, I::Vararg{Any,N}) where {N} @_inline_meta - J = unalias(A, to_indices(A, I)) + J = map(i->unalias(A,i), to_indices(A, I)) @boundscheck checkbounds(A, J...) unsafe_view(_maybe_reshape_parent(A, index_ndims(J...)), J...) end From fa809cbaef7447fc86bb326ac9c04a600f342daa Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 13 Feb 2018 11:53:52 -0600 Subject: [PATCH 06/16] fixup! missing sparse transition to new API --- stdlib/SparseArrays/src/higherorderfns.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/stdlib/SparseArrays/src/higherorderfns.jl b/stdlib/SparseArrays/src/higherorderfns.jl index 5e41b2028c8d2..abbbd9dfb92df 100644 --- a/stdlib/SparseArrays/src/higherorderfns.jl +++ b/stdlib/SparseArrays/src/higherorderfns.jl @@ -1004,18 +1004,18 @@ broadcast(f, ::PromoteToSparse, ::Nothing, ::Nothing, As::Vararg{Any,N}) where { # For broadcast! with ::Any inputs, we need a layer of indirection to determine whether # the inputs can be promoted to SparseVecOrMat. If it's just SparseVecOrMat and scalars, # we can handle it here, otherwise see below for the promotion machinery. -function broadcast!(f::Tf, dest::SparseVecOrMat, ::SPVM, _A::SparseVecOrMat, _Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} - if f isa typeof(identity) && N == 0 && Base.axes(dest) == Base.axes(_A) - return copyto!(dest, _A) +function broadcast!(f::Tf, dest::SparseVecOrMat, ::SPVM, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} + if f isa typeof(identity) && N == 0 && Base.axes(dest) == Base.axes(A) + return copyto!(dest, A) end - A = Base.unalias(dest, _A) - Bs = Base.unalias(dest, _Bs) - _aresameshape(dest, A, Bs...) && return _noshapecheck_map!(f, dest, A, Bs...) - Base.Broadcast.check_broadcast_indices(axes(dest), A, Bs...) - fofzeros = f(_zeros_eltypes(A, Bs...)...) + A′ = Base.unalias(dest, A) + Bs′ = map(B->Base.unalias(dest, B), Bs) + _aresameshape(dest, A′, Bs′...) && return _noshapecheck_map!(f, dest, A′, Bs′...) + Base.Broadcast.check_broadcast_indices(axes(dest), A′, Bs′...) + fofzeros = f(_zeros_eltypes(A′, Bs′...)...) fpreszeros = _iszero(fofzeros) - fpreszeros ? _broadcast_zeropres!(f, dest, A, Bs...) : - _broadcast_notzeropres!(f, fofzeros, dest, A, Bs...) + fpreszeros ? _broadcast_zeropres!(f, dest, A′, Bs′...) : + _broadcast_notzeropres!(f, fofzeros, dest, A′, Bs′...) return dest end function broadcast!(f::Tf, dest::SparseVecOrMat, ::SPVM, mixedsrcargs::Vararg{Any,N}) where {Tf,N} From eeebbe77ba73ce47a4972575c32dfe62144bb9a4 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 13 Feb 2018 14:56:39 -0600 Subject: [PATCH 07/16] Add support for BitArray --- base/bitarray.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base/bitarray.jl b/base/bitarray.jl index e8b2aaa3b0008..1ef0096f8d4e9 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -79,6 +79,9 @@ isassigned(B::BitArray, i::Int) = 1 <= i <= length(B) IndexStyle(::Type{<:BitArray}) = IndexLinear() +dataids(B::BitArray) = (objectid(B):objectid(B),) +unalias(dest, B::BitArray) = mightalias(dest, B) ? copy(B) : B + ## aux functions ## const _msk64 = ~UInt64(0) From 2254dc5f85ff898bb88eec247cb42e80779e0404 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Wed, 14 Feb 2018 12:33:57 -0600 Subject: [PATCH 08/16] Extend broadcast's `===` aliasing exemption to varargs --- base/broadcast.jl | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/base/broadcast.jl b/base/broadcast.jl index 7f3e70f99c69b..25c7b9a73577f 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -472,31 +472,26 @@ end return dest end +# For broadcasted assignments like `broadcast!(f, A, ..., A, ...)`, where `A` +# appears on both the LHS and the RHS of the `.=`, then we know we're only +# going to make one pass through the array, and even though `A` is aliasing +# against itself, the mutations won't affect the result as the indices on the +# LHS and RHS will always match. This is not true in general, but with the `.op=` +# syntax it's fairly common for an argument to be `===` a source. +broadcast_unalias(dest, src) = dest === src ? src : unalias(dest, src) + # This indirection allows size-dependent implementations. @inline function _broadcast!(f, C, A, Bs::Vararg{Any,N}) where N shape = broadcast_indices(C) @boundscheck check_broadcast_indices(shape, A, Bs...) - A′ = unalias(C, A) - Bs′ = map(B->unalias(C, B), Bs) + A′ = broadcast_unalias(C, A) + Bs′ = map(B->broadcast_unalias(C, B), Bs) keeps, Idefaults = map_newindexer(shape, A′, Bs′) iter = CartesianIndices(shape) _broadcast!(f, C, keeps, Idefaults, A′, Bs′, Val(N), iter) return C end -# In the one-argument case, we can avoid de-aliasing `A` from `C` if -# `A === C`. Otherwise `A` might be something like `transpose(C)` or -# another such re-ordering that won't iterate the two safely. -@inline function _broadcast!(f, C, A) - shape = broadcast_indices(C) - @boundscheck check_broadcast_indices(shape, A) - A !== C && (A = unalias(C, A)) - keeps, Idefaults = map_newindexer(shape, A, ()) - iter = CartesianIndices(shape) - _broadcast!(f, C, keeps, Idefaults, A, (), Val(0), iter) - return C -end - # broadcast with element type adjusted on-the-fly. This widens the element type of # B as needed (allocating a new container and copying previously-computed values) to # accommodate any incompatible new elements. From d97d0c3925fdf6b3ae275db44395d6d79e00e083 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Wed, 14 Feb 2018 13:20:48 -0600 Subject: [PATCH 09/16] Add default StridedArray dataids definition and constrain StridedSubArray... to only look at the linear data segment of its parent that it can reach --- base/multidimensional.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 513a6b19676f8..ca9268fd9dd40 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -1085,6 +1085,10 @@ end ### from abstractarray.jl +_increasingrange(a, b) = min(a,b):max(a,b) +dataids(A::StridedArray) = (_increasingrange(UInt(pointer(A, firstindex(A))), UInt(pointer(A, lastindex(A)))),) +dataids(A::StridedSubArray) = (_increasingrange(UInt(pointer(A, firstindex(A))), UInt(pointer(A, lastindex(A)))), _indicesids(A.indices...)...) + """ fill!(A, x) From 5410652e9c586dece798785786e2727652cd986d Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 15 Feb 2018 11:26:12 -0600 Subject: [PATCH 10/16] More refined aliasing detection for views into the same parent And a more accurate StridedArray dataids definition. --- base/abstractarray.jl | 2 +- base/multidimensional.jl | 73 ++++++++++++++++++++++++++++++++++++++-- test/arrayops.jl | 23 +++++++++++++ 3 files changed, 94 insertions(+), 4 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index f1b91cb8e0035..889d229f88a14 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1088,7 +1088,7 @@ dataidsoverlap(::Tuple{}, ::Tuple{}) = false Return a tuple containing rough information about the memory region(s) the array `A` references. """ dataids(A::AbstractArray) = () -dataids(A::Array) = (UInt(pointer(A, 1)):UInt(pointer(A, lastindex(A))),) +dataids(A::Array) = (UInt(pointer(A, 1)):UInt(pointer(A, lastindex(A)))+elsize(A)-1,) ## get (getindex with a default value) ## diff --git a/base/multidimensional.jl b/base/multidimensional.jl index ca9268fd9dd40..82b399b1fbd25 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -1085,9 +1085,76 @@ end ### from abstractarray.jl -_increasingrange(a, b) = min(a,b):max(a,b) -dataids(A::StridedArray) = (_increasingrange(UInt(pointer(A, firstindex(A))), UInt(pointer(A, lastindex(A)))),) -dataids(A::StridedSubArray) = (_increasingrange(UInt(pointer(A, firstindex(A))), UInt(pointer(A, lastindex(A)))), _indicesids(A.indices...)...) +# Computes a range describing a strided array's location in memory for aliasing checks +@inline function _memory_extents(A::StridedArray) + p = UInt(pointer(A, firstindex(A))) + return _memory_extents(p, p, size(A), strides(A), elsize(A)) +end +_memory_extents(lower, upper, ::Tuple{}, ::Tuple{}, elsize) = lower:upper+elsize-1 +_memory_extents(lower, upper, ::Tuple{}, ::Tuple, elsize) = error("broken AbstractArray implementation: lengths of `size` and `strides` must match") +_memory_extents(lower, upper, ::Tuple, ::Tuple{}, elsize) = error("broken AbstractArray implementation: lengths of `size` and `strides` must match") +@inline function _memory_extents(lower, upper, size::Tuple, strides::Tuple, elsize) + size[1] == 0 && return lower:lower-1 # Empty array -> empty memory extents + offset = (size[1]-1) * strides[1] * elsize + if offset > 0 + upper += offset + else + lower += offset + end + return _memory_extents(lower, upper, tail(size), tail(strides), elsize) +end +# Quicker answers for some known types +_memory_extents(A::Array) = UInt(pointer(A, firstindex(A))):UInt(pointer(A, lastindex(A)))+elsize(A)-1 +_memory_extents(A::ReshapedArray) = _memory_extents(A.parent) + +dataids(A::StridedArray) = (_memory_extents(A),) +dataids(A::StridedSubArray) = (_memory_extents(A), _indicesids(A.indices...)...) + +# In the common case where we have two views into the same parent, aliasing checks +# are _much_ easier and more important to get right +function mightalias(A::SubArray{T,<:Any,P}, B::SubArray{T,<:Any,P}) where {T,P} + if A.parent !== B.parent + # We cannot do any better than the usual dataids check + return dataidsoverlap(dataids(A), dataids(B)) + end + # Now we know that A.parent === B.parent. This means that the indices of A + # and B are the same length and indexing into the same dimensions. We can + # just walk through them and check for overlaps: O(ndims(A)). We must finally + # ensure that A isn't used as an index into B + return _indicesmightoverlap(A.indices, B.indices) || dataidsoverlap(dataids(A), _indicesids(B.indices...)) +end + +_indicesmightoverlap(A::Tuple{}, B::Tuple{}) = true +_indicesmightoverlap(A::Tuple{}, B::Tuple) = error("malformed subarray") +_indicesmightoverlap(A::Tuple, B::Tuple{}) = error("malformed subarray") +# For ranges, it's relatively cheap to construct the intersection +@inline function _indicesmightoverlap(A::Tuple{AbstractRange, Vararg{Any}}, B::Tuple{AbstractRange, Vararg{Any}}) + !isempty(intersect(A[1], B[1])) ? _indicesmightoverlap(tail(A), tail(B)) : false +end +# But in the common AbstractUnitRange case, there's an even faster shortcut +@inline function _indicesmightoverlap(A::Tuple{AbstractUnitRange, Vararg{Any}}, B::Tuple{AbstractUnitRange, Vararg{Any}}) + max(first(A[1]),first(B[1])) <= min(last(A[1]),last(B[1])) ? _indicesmightoverlap(tail(A), tail(B)) : false +end +# And we can check scalars against eachother and scalars against arrays quite easily +@inline _indicesmightoverlap(A::Tuple{Real, Vararg{Any}}, B::Tuple{Real, Vararg{Any}}) = + A[1] == B[1] ? _indicesmightoverlap(tail(A), tail(B)) : false +@inline _indicesmightoverlap(A::Tuple{Real, Vararg{Any}}, B::Tuple{AbstractArray, Vararg{Any}}) = + A[1] in B[1] ? _indicesmightoverlap(tail(A), tail(B)) : false +@inline _indicesmightoverlap(A::Tuple{AbstractArray, Vararg{Any}}, B::Tuple{Real, Vararg{Any}}) = + B[1] in A[1] ? _indicesmightoverlap(tail(A), tail(B)) : false +# And small arrays are quick, too +@inline function _indicesmightoverlap(A::Tuple{AbstractArray, Vararg{Any}}, B::Tuple{AbstractArray, Vararg{Any}}) + if length(A[1]) == 1 + return A[1][1] in B[1] ? _indicesmightoverlap(tail(A), tail(B)) : false + elseif length(B[1]) == 1 + return B[1][1] in A[1] ? _indicesmightoverlap(tail(A), tail(B)) : false + else + # But checking larger arrays requires constructing a Set and is too much work + return true + end +end +# And in general, checking the intersection is too much work +_indicesmightoverlap(A::Tuple{Any, Vararg{Any}}, B::Tuple{Any, Vararg{Any}}) = true """ fill!(A, x) diff --git a/test/arrayops.jl b/test/arrayops.jl index a1f9c21140741..3c9a35b67d7ba 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -1117,6 +1117,29 @@ end @test A == [2,1,4,3] .+ 2^30 end +@testset "Base.mightalias unit tests" begin + using Base: mightalias + A = rand(5,4) + @test @views mightalias(A[:], A[:]) + @test @views mightalias(A[:,:], A[:,:]) + @test @views mightalias(A[1:2,1:2], A[1:2,1:2]) + @test @views !mightalias(A[3:4,1:2], A[1:2,:]) + @test @views mightalias(A[3,1:1], A) + @test @views mightalias(A[3,1:1], A[:]) + @test @views mightalias(A[3,1:1], A[:,:]) + @test @views mightalias(A, A[3,1:1]) + @test @views mightalias(A[:], A[3,1:1]) + @test @views mightalias(A[:,:], A[3,1:1]) + + B = reshape(A,10,2) + @test mightalias(A, A) + @test mightalias(A, B) + @test mightalias(B, A) + @test @views mightalias(B[:], A[:]) + @test @views mightalias(B[1:2], A[1:2]) + @test @views !mightalias(B[1:end÷2], A[end÷2+1:end]) +end + @testset "lexicographic comparison" begin @test cmp([1.0], [1]) == 0 @test cmp([1], [1.0]) == 0 From c451ecc13629a5ca225cd7700a8169e9777afbd7 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 15 Feb 2018 15:17:52 -0600 Subject: [PATCH 11/16] fix unalias support for empty arrays --- base/abstractarray.jl | 4 +++- base/multidimensional.jl | 2 +- test/arrayops.jl | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 889d229f88a14..b9f7f5f906fa5 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1088,7 +1088,9 @@ dataidsoverlap(::Tuple{}, ::Tuple{}) = false Return a tuple containing rough information about the memory region(s) the array `A` references. """ dataids(A::AbstractArray) = () -dataids(A::Array) = (UInt(pointer(A, 1)):UInt(pointer(A, lastindex(A)))+elsize(A)-1,) +_memory_extents(A::Array) = isempty(A) ? (UInt(1):UInt(0)) : + (UInt(pointer(A, firstindex(A))):UInt(pointer(A, lastindex(A)))+elsize(A)-1) +dataids(A::Array) = (_memory_extents(A),) ## get (getindex with a default value) ## diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 82b399b1fbd25..aa9b67600b3e4 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -1087,6 +1087,7 @@ end # Computes a range describing a strided array's location in memory for aliasing checks @inline function _memory_extents(A::StridedArray) + isempty(A) && return UInt(1):UInt(0) p = UInt(pointer(A, firstindex(A))) return _memory_extents(p, p, size(A), strides(A), elsize(A)) end @@ -1104,7 +1105,6 @@ _memory_extents(lower, upper, ::Tuple, ::Tuple{}, elsize) = error("broken Abstra return _memory_extents(lower, upper, tail(size), tail(strides), elsize) end # Quicker answers for some known types -_memory_extents(A::Array) = UInt(pointer(A, firstindex(A))):UInt(pointer(A, lastindex(A)))+elsize(A)-1 _memory_extents(A::ReshapedArray) = _memory_extents(A.parent) dataids(A::StridedArray) = (_memory_extents(A),) diff --git a/test/arrayops.jl b/test/arrayops.jl index 3c9a35b67d7ba..6fe522a6f4cf9 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -1130,6 +1130,8 @@ end @test @views mightalias(A, A[3,1:1]) @test @views mightalias(A[:], A[3,1:1]) @test @views mightalias(A[:,:], A[3,1:1]) + @test @views !mightalias(A, A[1:0]) + @test @views !mightalias(A[:], A[1:0]) B = reshape(A,10,2) @test mightalias(A, A) From dd1588e015a5a22589bf1561a03b696777725f51 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Fri, 16 Feb 2018 17:46:13 -0600 Subject: [PATCH 12/16] Major simplification: do not worry about array extents By defining `mightalias(::SubArray, ::SubArray)` we cover the vast majority of cases where extents are important. SubArrays are the basically only array in the standard library that "restrict" views into parents; all other wrappers largely re-shuffle the data. `dataids(::AbstractArray)` now just returns a tuple of `UInt`s that describe the identities of the mutable "blocks" that the array references. By default, this is just `(objectid(A),)`, allowing all custom arrays to detect situations with two `===` arrays. Anything more complicated requires them to define their component parts with a custom `dataids` method definition. --- base/abstractarray.jl | 74 +++++++++++++++++------ base/bitarray.jl | 3 +- base/multidimensional.jl | 40 +++--------- base/reinterpretarray.jl | 1 + base/reshapedarray.jl | 2 +- base/subarray.jl | 46 +++++--------- stdlib/LinearAlgebra/src/adjtrans.jl | 5 +- stdlib/Serialization/src/Serialization.jl | 25 ++------ stdlib/SparseArrays/src/sparsematrix.jl | 8 +-- stdlib/SparseArrays/src/sparsevector.jl | 8 +-- test/arrayops.jl | 7 ++- 11 files changed, 103 insertions(+), 116 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index b9f7f5f906fa5..187d2ab724b4e 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1048,6 +1048,32 @@ function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) whe r end +""" + parent(A) + +Returns the "parent array" of an array view type (e.g., `SubArray`), or the array itself if +it is not a view. + +# Examples +```jldoctest +julia> A = [1 2; 3 4] +2×2 Array{Int64,2}: + 1 2 + 3 4 + +julia> V = view(A, 1:2, :) +2×2 view(::Array{Int64,2}, 1:2, :) with eltype Int64: + 1 2 + 3 4 + +julia> parent(V) +2×2 Array{Int64,2}: + 1 2 + 3 4 +``` +""" +parent(a::AbstractArray) = a + ## rudimentary aliasing detection ## """ unalias(dest, A) @@ -1060,37 +1086,47 @@ This function must return an object of exactly the same type as `A` for performa See also [`mightalias`](@ref) and [`dataids`](@ref). """ -unalias(dest, A) = A -unalias(dest, A::Array) = mightalias(dest, A) ? copy(A) : A +unalias(dest, A) = mightalias(dest, A) ? copypreservingtype(A) : A + +copypreservingtype(A::Array) = copy(A) +copypreservingtype(A::AbstractArray) = (@_noinline_meta; deepcopy(A)::typeof(A)) +copypreservingtype(A) = A """ mightalias(A::AbstractArray, B::AbstractArray) -Perform a rudimentary test to check if arrays `A` and `B` might share the same memory. +Perform a conservative and rudimentary test to check if arrays `A` and `B` might share the same memory. -Defaults to false unless `dataids` is specialized for both `A` and `B`. +By default, this simply checks if either of the arrays reference the same memory +regions, as identified by their [`dataids`](@ref). """ -mightalias(A::AbstractArray, B::AbstractArray) = dataidsoverlap(dataids(A), dataids(B)) +mightalias(A::AbstractArray, B::AbstractArray) = !_isdisjoint(dataids(A), dataids(B)) mightalias(x, y) = false -dataidsoverlap(a::AbstractRange, b::AbstractRange) = max(first(a),first(b)) <= min(last(a),last(b)) -dataidsoverlap(as::Tuple, bs::Tuple) = any(b->dataidsoverlap(as[1], b), bs) || dataidsoverlap(tail(as), bs) -dataidsoverlap(as::Tuple{Any}, bs::Tuple{Any}) = dataidsoverlap(as[1], bs[1]) -dataidsoverlap(as::Tuple{Any,Any}, bs::Tuple{Any}) = dataidsoverlap(as[1], bs[1]) || dataidsoverlap(as[2], bs[1]) -dataidsoverlap(as::Tuple{Any}, bs::Tuple{Any,Any}) = dataidsoverlap(as[1], bs[1]) || dataidsoverlap(as[1], bs[2]) -dataidsoverlap(as::Tuple{}, bs::Tuple) = false -dataidsoverlap(as::Tuple, bs::Tuple{}) = false -dataidsoverlap(::Tuple{}, ::Tuple{}) = false +_isdisjoint(as::Tuple{}, bs::Tuple{}) = true +_isdisjoint(as::Tuple{}, bs::Tuple{Any}) = true +_isdisjoint(as::Tuple{}, bs::Tuple) = true +_isdisjoint(as::Tuple{Any}, bs::Tuple{}) = true +_isdisjoint(as::Tuple{Any}, bs::Tuple{Any}) = as[1] != bs[1] +_isdisjoint(as::Tuple{Any}, bs::Tuple) = !(as[1] in bs) +_isdisjoint(as::Tuple, bs::Tuple{}) = true +_isdisjoint(as::Tuple, bs::Tuple{Any}) = !(bs[1] in as) +_isdisjoint(as::Tuple, bs::Tuple) = !(as[1] in bs) && _isdisjoint(tail(as), bs) """ - dataids(A::AbstractArray) -> Tuple{Vararg{AbstractRange}} + dataids(A::AbstractArray) + +Return a tuple of `UInt`s that represent the mutable data segments of an array. -Return a tuple containing rough information about the memory region(s) the array `A` references. +Custom arrays that would like to opt-in to aliasing detection of their component +parts can specialize this method to return the concatenation of the `dataids` of +their component parts. A typical definition for an array that wraps a parent is +`dataids(C::CustomArray) = dataids(C.parent)`. """ -dataids(A::AbstractArray) = () -_memory_extents(A::Array) = isempty(A) ? (UInt(1):UInt(0)) : - (UInt(pointer(A, firstindex(A))):UInt(pointer(A, lastindex(A)))+elsize(A)-1) -dataids(A::Array) = (_memory_extents(A),) +dataids(A::AbstractArray) = (UInt(objectid(A)),) +dataids(A::Array) = (UInt(pointer(A)),) +dataids(::AbstractRange) = () +dataids(x) = () ## get (getindex with a default value) ## diff --git a/base/bitarray.jl b/base/bitarray.jl index 1ef0096f8d4e9..fe6e21e73b74f 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -79,8 +79,7 @@ isassigned(B::BitArray, i::Int) = 1 <= i <= length(B) IndexStyle(::Type{<:BitArray}) = IndexLinear() -dataids(B::BitArray) = (objectid(B):objectid(B),) -unalias(dest, B::BitArray) = mightalias(dest, B) ? copy(B) : B +copypreservingtype(B::BitArray) = copy(B) ## aux functions ## diff --git a/base/multidimensional.jl b/base/multidimensional.jl index aa9b67600b3e4..9a6f9fbeffc27 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -1085,44 +1085,24 @@ end ### from abstractarray.jl -# Computes a range describing a strided array's location in memory for aliasing checks -@inline function _memory_extents(A::StridedArray) - isempty(A) && return UInt(1):UInt(0) - p = UInt(pointer(A, firstindex(A))) - return _memory_extents(p, p, size(A), strides(A), elsize(A)) -end -_memory_extents(lower, upper, ::Tuple{}, ::Tuple{}, elsize) = lower:upper+elsize-1 -_memory_extents(lower, upper, ::Tuple{}, ::Tuple, elsize) = error("broken AbstractArray implementation: lengths of `size` and `strides` must match") -_memory_extents(lower, upper, ::Tuple, ::Tuple{}, elsize) = error("broken AbstractArray implementation: lengths of `size` and `strides` must match") -@inline function _memory_extents(lower, upper, size::Tuple, strides::Tuple, elsize) - size[1] == 0 && return lower:lower-1 # Empty array -> empty memory extents - offset = (size[1]-1) * strides[1] * elsize - if offset > 0 - upper += offset - else - lower += offset - end - return _memory_extents(lower, upper, tail(size), tail(strides), elsize) -end -# Quicker answers for some known types -_memory_extents(A::ReshapedArray) = _memory_extents(A.parent) - -dataids(A::StridedArray) = (_memory_extents(A),) -dataids(A::StridedSubArray) = (_memory_extents(A), _indicesids(A.indices...)...) - # In the common case where we have two views into the same parent, aliasing checks # are _much_ easier and more important to get right function mightalias(A::SubArray{T,<:Any,P}, B::SubArray{T,<:Any,P}) where {T,P} - if A.parent !== B.parent + if !_parentsmatch(A.parent, B.parent) # We cannot do any better than the usual dataids check - return dataidsoverlap(dataids(A), dataids(B)) + return !_isdisjoint(dataids(A), dataids(B)) end # Now we know that A.parent === B.parent. This means that the indices of A # and B are the same length and indexing into the same dimensions. We can # just walk through them and check for overlaps: O(ndims(A)). We must finally - # ensure that A isn't used as an index into B - return _indicesmightoverlap(A.indices, B.indices) || dataidsoverlap(dataids(A), _indicesids(B.indices...)) + # ensure that the indices don't alias with either parent + return _indicesmightoverlap(A.indices, B.indices) || + !_isdisjoint(dataids(A.parent), _splatmap(dataids, B.indices)) || + !_isdisjoint(dataids(B.parent), _splatmap(dataids, A.indices)) end +_parentsmatch(A::AbstractArray, B::AbstractArray) = A === B +# Two reshape(::Array)s of the same size aren't `===` because they have different headers +_parentsmatch(A::Array, B::Array) = pointer(A) == pointer(B) && size(A) == size(B) _indicesmightoverlap(A::Tuple{}, B::Tuple{}) = true _indicesmightoverlap(A::Tuple{}, B::Tuple) = error("malformed subarray") @@ -1149,7 +1129,7 @@ end elseif length(B[1]) == 1 return B[1][1] in A[1] ? _indicesmightoverlap(tail(A), tail(B)) : false else - # But checking larger arrays requires constructing a Set and is too much work + # But checking larger arrays requires O(m*n) and is too much work return true end end diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl index 81a45af7bc33f..efbec54c1e7e2 100644 --- a/base/reinterpretarray.jl +++ b/base/reinterpretarray.jl @@ -36,6 +36,7 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N} end parent(a::ReinterpretArray) = a.parent +dataids(a::ReinterpretArray) = dataids(a.parent) function size(a::ReinterpretArray{T,N,S} where {N}) where {T,S} psize = size(a.parent) diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl index e496321605d15..4cfec5168a0fc 100644 --- a/base/reshapedarray.jl +++ b/base/reshapedarray.jl @@ -180,7 +180,7 @@ parent(A::ReshapedArray) = A.parent parentindices(A::ReshapedArray) = map(s->1:s, size(parent(A))) reinterpret(::Type{T}, A::ReshapedArray, dims::Dims) where {T} = reinterpret(T, parent(A), dims) -unalias(dest, A::ReshapedArray) = mightalias(dest, A) ? typeof(A)(unalias(dest, A.parent), A.dims, A.mi) : A +copypreservingtype(A::ReshapedArray) = typeof(A)(copypreservingtype(A.parent), A.dims, A.mi) dataids(A::ReshapedArray) = dataids(A.parent) @inline ind2sub_rs(::Tuple{}, i::Int) = i diff --git a/base/subarray.jl b/base/subarray.jl index b4922bdbb50df..1f8966b05b3f1 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -59,34 +59,9 @@ similar(V::SubArray, T::Type, dims::Dims) = similar(V.parent, T, dims) sizeof(V::SubArray) = length(V) * sizeof(eltype(V)) -""" - parent(A) - -Returns the "parent array" of an array view type (e.g., `SubArray`), or the array itself if -it is not a view. - -# Examples -```jldoctest -julia> A = [1 2; 3 4] -2×2 Array{Int64,2}: - 1 2 - 3 4 - -julia> V = view(A, 1:2, :) -2×2 view(::Array{Int64,2}, 1:2, :) with eltype Int64: - 1 2 - 3 4 - -julia> parent(V) -2×2 Array{Int64,2}: - 1 2 - 3 4 -``` -""" parent(V::SubArray) = V.parent parentindices(V::SubArray) = V.indices -parent(a::AbstractArray) = a """ parentindices(A) @@ -95,11 +70,22 @@ From an array view `A`, returns the corresponding indices in the parent. parentindices(a::AbstractArray) = ntuple(i->OneTo(size(a,i)), ndims(a)) ## Aliasing detection -unalias(dest, A::SubArray) = mightalias(dest, A) ? typeof(A)(unalias(dest, A.parent), unalias(dest, A.indices), A.offset1, A.stride1) : A -dataids(A::SubArray) = (dataids(A.parent)..., _indicesids(A.indices...)...) -_indicesids(x::AbstractArray, xs...) = (dataids(x)..., _indicesids(xs...)...) -_indicesids(x, xs...) = _indicesids(xs...) # Skip non-array indices -_indicesids() = () +dataids(A::SubArray) = (dataids(A.parent)..., _splatmap(dataids, A.indices)...) +_splatmap(f, ::Tuple{}) = () +_splatmap(f, t::Tuple) = (f(t[1])..., _splatmap(f, tail(t))...) +copypreservingtype(A::SubArray) = typeof(A)(copypreservingtype(A.parent), map(copypreservingtype, A.indices), A.offset1, A.stride1) + +# When the parent is an Array we can trim the size down a bit. In the future this +# could possibly be extended to any mutable array. +function copypreservingtype(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Vararg{Union{Real,AbstractRange,Array}}},LD} + dest = Array{T}(uninitialized, index_lengths(V.indices...)) + copyto!(dest, V) + SubArray{T,N,A,I,LD}(dest, map(_trimmedindex, V.indices), 0, Int(LD)) +end +# Transform indices to be "dense" +_trimmedindex(i::Real) = oftype(i, 1) +_trimmedindex(i::AbstractUnitRange) = i +_trimmedindex(i::AbstractArray) = oftype(i, reshape(linearindices(i), axes(i))) ## SubArray creation # We always assume that the dimensionality of the parent matches the number of diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl index 1d428f4a9b597..0c8f98ef4320b 100644 --- a/stdlib/LinearAlgebra/src/adjtrans.jl +++ b/stdlib/LinearAlgebra/src/adjtrans.jl @@ -47,9 +47,8 @@ end Adjoint(A) = Adjoint{Base.promote_op(adjoint,eltype(A)),typeof(A)}(A) Transpose(A) = Transpose{Base.promote_op(transpose,eltype(A)),typeof(A)}(A) -using Base: unalias, mightalias, dataids -Base.unalias(dest, A::Union{Adjoint,Transpose}) = mightalias(dest, A) ? typeof(A)(unalias(dest, A.parent)) : A -Base.dataids(A::Union{Adjoint,Transpose}) = dataids(A.parent) +Base.dataids(A::Union{Adjoint, Transpose}) = Base.dataids(A.parent) +Base.copypreservingtype(A::Union{Adjoint,Transpose}) = typeof(A)(Base.copypreservingtype(A.parent)) # wrapping lowercase quasi-constructors """ diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl index 061c564ea9dc0..d9ebb4ba2f330 100644 --- a/stdlib/Serialization/src/Serialization.jl +++ b/stdlib/Serialization/src/Serialization.jl @@ -9,7 +9,7 @@ module Serialization import Base: GMP, Bottom, unsafe_convert, uncompressed_ast import Core: svec, SimpleVector -using Base: ViewIndex, Slice, index_lengths, unwrap_unionall +using Base: copypreservingtype, unwrap_unionall using Core.IR export serialize, deserialize, AbstractSerializer, Serializer @@ -272,29 +272,12 @@ function serialize(s::AbstractSerializer, a::Array) end function serialize(s::AbstractSerializer, a::SubArray{T,N,A}) where {T,N,A<:Array} - b = trimmedsubarray(a) + # SubArray's copy only selects the relevant data (and reduces the size) but does not + # preserve the type of the argument. This internal function does both: + b = copypreservingtype(a) serialize_any(s, b) end -function trimmedsubarray(V::SubArray{T,N,A}) where {T,N,A<:Array} - dest = Array{eltype(V)}(uninitialized, trimmedsize(V)) - copyto!(dest, V) - _trimmedsubarray(dest, V, (), V.indices...) -end - -trimmedsize(V) = index_lengths(V.indices...) - -function _trimmedsubarray(A, V::SubArray{T,N,P,I,LD}, newindices) where {T,N,P,I,LD} - LD && return SubArray{T,N,P,I,LD}(A, newindices, Base.compute_offset1(A, 1, newindices), 1) - SubArray{T,N,P,I,LD}(A, newindices, 0, 0) -end -_trimmedsubarray(A, V, newindices, index::ViewIndex, indices...) = _trimmedsubarray(A, V, (newindices..., trimmedindex(V.parent, length(newindices)+1, index)), indices...) - -trimmedindex(P, d, i::Real) = oftype(i, 1) -trimmedindex(P, d, i::Colon) = i -trimmedindex(P, d, i::Slice) = i -trimmedindex(P, d, i::AbstractArray) = oftype(i, reshape(linearindices(i), axes(i))) - function serialize(s::AbstractSerializer, ss::String) len = sizeof(ss) if len <= 255 diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl index 0f28e21c9c5a4..62154aa2d94b7 100644 --- a/stdlib/SparseArrays/src/sparsematrix.jl +++ b/stdlib/SparseArrays/src/sparsematrix.jl @@ -260,10 +260,10 @@ function copy(ra::ReshapedArray{<:Any,2,<:SparseMatrixCSC}) return SparseMatrixCSC(mS, nS, colptr, rowval, nzval) end -## Alias detection -using Base: dataids, unalias, mightalias -@inline Base.dataids(S::SparseMatrixCSC) = (dataids(S.colptr)..., dataids(S.rowval)..., dataids(S.nzval)...) -Base.unalias(dest, S::SparseMatrixCSC) = mightalias(dest, S) ? typeof(S)(S.m, S.n, unalias(dest, S.colptr), unalias(dest, S.rowval), unalias(dest, S.nzval)) : S +## Alias detection and prevention +using Base: dataids, copypreservingtype +Base.dataids(S::SparseMatrixCSC) = (dataids(S.colptr)..., dataids(S.rowval)..., dataids(S.nzval)...) +Base.copypreservingtype(S::SparseMatrixCSC) = typeof(S)(S.m, S.n, copypreservingtype(S.colptr), copypreservingtype(S.rowval), copypreservingtype(S.nzval)) ## Constructors diff --git a/stdlib/SparseArrays/src/sparsevector.jl b/stdlib/SparseArrays/src/sparsevector.jl index 845855a19f227..bf10a4f603e1d 100644 --- a/stdlib/SparseArrays/src/sparsevector.jl +++ b/stdlib/SparseArrays/src/sparsevector.jl @@ -92,10 +92,10 @@ similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, m::Integer) where {TvNew, similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, m::Integer, n::Integer) where {TvNew,TiNew} = _sparsesimilar(S, TvNew, TiNew, (m, n)) -## Alias detection -using Base: dataids, unalias, mightalias -@inline Base.dataids(S::SparseVector) = (dataids(S.nzind)..., dataids(S.nzval)...) -Base.unalias(dest, S::SparseVector)::typeof(S) = mightalias(dest, S) ? typeof(S)(S.n, unalias(dest, S.nzind), unalias(dest, S.nzval)) : S +## Alias detection and prevention +using Base: dataids, copypreservingtype +Base.dataids(S::SparseVector) = (dataids(S.nzind)..., dataids(S.nzval)...) +Base.copypreservingtype(S::SparseVector) = typeof(S)(S.n, copypreservingtype(S.nzind), copypreservingtype(S.nzval)) ### Construct empty sparse vector diff --git a/test/arrayops.jl b/test/arrayops.jl index 6fe522a6f4cf9..10fd27728ca60 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -1130,8 +1130,6 @@ end @test @views mightalias(A, A[3,1:1]) @test @views mightalias(A[:], A[3,1:1]) @test @views mightalias(A[:,:], A[3,1:1]) - @test @views !mightalias(A, A[1:0]) - @test @views !mightalias(A[:], A[1:0]) B = reshape(A,10,2) @test mightalias(A, A) @@ -1140,6 +1138,11 @@ end @test @views mightalias(B[:], A[:]) @test @views mightalias(B[1:2], A[1:2]) @test @views !mightalias(B[1:end÷2], A[end÷2+1:end]) + + AA = [[1],[2]] + @test @views mightalias(AA, AA[:]) + @test @views mightalias(AA[:], AA[:]) + @test @views mightalias(AA[1:1], AA[1:2]) end @testset "lexicographic comparison" begin From 2ba568843f5e1f3aaa13487fc8bf5f60f7e52998 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Wed, 21 Feb 2018 10:18:02 -0600 Subject: [PATCH 13/16] Remove hyphen in type-stability --- base/abstractarray.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 74d5679d0f4d6..55ba997144ead 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1081,7 +1081,7 @@ Return either `A` or a copy of `A`, in a rough effort to prevent modifications t affecting the returned object. No guarantees are provided, and each custom array must opt-into aliasing detection by overloading this method by specializing on the second argument. -This function must return an object of exactly the same type as `A` for performance and type-stability. +This function must return an object of exactly the same type as `A` for performance and type stability. See also [`mightalias`](@ref) and [`dataids`](@ref). """ From 636453dd99baecd12174f1fdff8f44af59b15e16 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 22 Feb 2018 11:46:25 -0500 Subject: [PATCH 14/16] Use `copy` with a method type assertion instead of `deepcopy` This will do what we want in almost all cases. We only hit this method if `A` is aliasing a mutable array... which means that `A` is mutable as well, and thus in many cases it has defined an appropriate `similar(A)` method that returns the same type. --- base/abstractarray.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 55ba997144ead..eebd4525f1df5 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1088,7 +1088,7 @@ See also [`mightalias`](@ref) and [`dataids`](@ref). unalias(dest, A) = mightalias(dest, A) ? copypreservingtype(A) : A copypreservingtype(A::Array) = copy(A) -copypreservingtype(A::AbstractArray) = (@_noinline_meta; deepcopy(A)::typeof(A)) +copypreservingtype(A::AbstractArray)::typeof(A) = (@_noinline_meta; copy(A)) copypreservingtype(A) = A """ From 5caac482ee1d22ce49fb770e771e7e1a9bed5907 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Fri, 23 Feb 2018 11:41:03 -0600 Subject: [PATCH 15/16] Avoid comparing dataids in some obvious cases --- base/abstractarray.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index eebd4525f1df5..84976e160f43a 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1085,7 +1085,9 @@ This function must return an object of exactly the same type as `A` for performa See also [`mightalias`](@ref) and [`dataids`](@ref). """ -unalias(dest, A) = mightalias(dest, A) ? copypreservingtype(A) : A +unalias(dest, A::AbstractArray) = mightalias(dest, A) ? copypreservingtype(A) : A +unalias(dest, A::AbstractRange) = A +unalias(dest, A) = A copypreservingtype(A::Array) = copy(A) copypreservingtype(A::AbstractArray)::typeof(A) = (@_noinline_meta; copy(A)) From 1ffc1358c8482523e4571cce75e225e8fe764b9d Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Mon, 26 Feb 2018 12:56:01 -0600 Subject: [PATCH 16/16] Rename copypreservingtype to unaliascopy and update docs --- base/abstractarray.jl | 52 ++++++++++++++++------- base/bitarray.jl | 2 - base/reshapedarray.jl | 2 +- base/subarray.jl | 4 +- stdlib/LinearAlgebra/src/adjtrans.jl | 2 +- stdlib/Serialization/src/Serialization.jl | 4 +- stdlib/SparseArrays/src/sparsematrix.jl | 4 +- stdlib/SparseArrays/src/sparsevector.jl | 4 +- 8 files changed, 47 insertions(+), 27 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 84976e160f43a..2ca029d8b2d16 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1075,31 +1075,53 @@ parent(a::AbstractArray) = a ## rudimentary aliasing detection ## """ - unalias(dest, A) + Base.unalias(dest, A) -Return either `A` or a copy of `A`, in a rough effort to prevent modifications to `dest` from -affecting the returned object. No guarantees are provided, and each custom array must -opt-into aliasing detection by overloading this method by specializing on the second argument. +Return either `A` or a copy of `A` in a rough effort to prevent modifications to `dest` from +affecting the returned object. No guarantees are provided. -This function must return an object of exactly the same type as `A` for performance and type stability. +Custom arrays that wrap or use fields containing arrays that might alias against other +external objects should provide a [`Base.dataids`](@ref) implementation. -See also [`mightalias`](@ref) and [`dataids`](@ref). +This function must return an object of exactly the same type as `A` for performance and type +stability. Mutable custom arrays for which [`copy(A)`](@ref) is not `typeof(A)` should +provide a [`Base.unaliascopy`](@ref) implementation. + +See also [`Base.mightalias`](@ref). """ -unalias(dest, A::AbstractArray) = mightalias(dest, A) ? copypreservingtype(A) : A +unalias(dest, A::AbstractArray) = mightalias(dest, A) ? unaliascopy(A) : A unalias(dest, A::AbstractRange) = A unalias(dest, A) = A -copypreservingtype(A::Array) = copy(A) -copypreservingtype(A::AbstractArray)::typeof(A) = (@_noinline_meta; copy(A)) -copypreservingtype(A) = A +""" + Base.unaliascopy(A) + +Make a preventative copy of `A` in an operation where `A` [`Base.mightalias`](@ref) against +another array in order to preserve consistent semantics as that other array is mutated. + +This must return an object of the same type as `A` to preserve optimal performance in the +much more common case where aliasing does not occur. By default, +`unaliascopy(A::AbstractArray)` will attempt to use [`copy(A)`](@ref), but in cases where +`copy(A)` is not a `typeof(A)`, then the array should provide a custom implementation of +`Base.unaliascopy(A)`. +""" +unaliascopy(A::Array) = copy(A) +unaliascopy(A::AbstractArray)::typeof(A) = (@_noinline_meta; _unaliascopy(A, copy(A))) +_unaliascopy(A::T, C::T) where {T} = C +_unaliascopy(A, C) = throw(ArgumentError(""" + an array of type `$(typeof(A).name)` shares memory with another argument and must + make a preventative copy of itself in order to maintain consistent semantics, + but `copy(A)` returns a new array of type `$(typeof(C))`. To fix, implement: + `Base.unaliascopy(A::$(typeof(A).name))::typeof(A)`""")) +unaliascopy(A) = A """ - mightalias(A::AbstractArray, B::AbstractArray) + Base.mightalias(A::AbstractArray, B::AbstractArray) -Perform a conservative and rudimentary test to check if arrays `A` and `B` might share the same memory. +Perform a conservative test to check if arrays `A` and `B` might share the same memory. By default, this simply checks if either of the arrays reference the same memory -regions, as identified by their [`dataids`](@ref). +regions, as identified by their [`Base.dataids`](@ref). """ mightalias(A::AbstractArray, B::AbstractArray) = !_isdisjoint(dataids(A), dataids(B)) mightalias(x, y) = false @@ -1115,14 +1137,14 @@ _isdisjoint(as::Tuple, bs::Tuple{Any}) = !(bs[1] in as) _isdisjoint(as::Tuple, bs::Tuple) = !(as[1] in bs) && _isdisjoint(tail(as), bs) """ - dataids(A::AbstractArray) + Base.dataids(A::AbstractArray) Return a tuple of `UInt`s that represent the mutable data segments of an array. Custom arrays that would like to opt-in to aliasing detection of their component parts can specialize this method to return the concatenation of the `dataids` of their component parts. A typical definition for an array that wraps a parent is -`dataids(C::CustomArray) = dataids(C.parent)`. +`Base.dataids(C::CustomArray) = dataids(C.parent)`. """ dataids(A::AbstractArray) = (UInt(objectid(A)),) dataids(A::Array) = (UInt(pointer(A)),) diff --git a/base/bitarray.jl b/base/bitarray.jl index fe6e21e73b74f..e8b2aaa3b0008 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -79,8 +79,6 @@ isassigned(B::BitArray, i::Int) = 1 <= i <= length(B) IndexStyle(::Type{<:BitArray}) = IndexLinear() -copypreservingtype(B::BitArray) = copy(B) - ## aux functions ## const _msk64 = ~UInt64(0) diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl index 4cfec5168a0fc..2c94ea834c8a8 100644 --- a/base/reshapedarray.jl +++ b/base/reshapedarray.jl @@ -180,7 +180,7 @@ parent(A::ReshapedArray) = A.parent parentindices(A::ReshapedArray) = map(s->1:s, size(parent(A))) reinterpret(::Type{T}, A::ReshapedArray, dims::Dims) where {T} = reinterpret(T, parent(A), dims) -copypreservingtype(A::ReshapedArray) = typeof(A)(copypreservingtype(A.parent), A.dims, A.mi) +unaliascopy(A::ReshapedArray) = typeof(A)(unaliascopy(A.parent), A.dims, A.mi) dataids(A::ReshapedArray) = dataids(A.parent) @inline ind2sub_rs(::Tuple{}, i::Int) = i diff --git a/base/subarray.jl b/base/subarray.jl index 1f8966b05b3f1..72fc09440ee82 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -73,11 +73,11 @@ parentindices(a::AbstractArray) = ntuple(i->OneTo(size(a,i)), ndims(a)) dataids(A::SubArray) = (dataids(A.parent)..., _splatmap(dataids, A.indices)...) _splatmap(f, ::Tuple{}) = () _splatmap(f, t::Tuple) = (f(t[1])..., _splatmap(f, tail(t))...) -copypreservingtype(A::SubArray) = typeof(A)(copypreservingtype(A.parent), map(copypreservingtype, A.indices), A.offset1, A.stride1) +unaliascopy(A::SubArray) = typeof(A)(unaliascopy(A.parent), map(unaliascopy, A.indices), A.offset1, A.stride1) # When the parent is an Array we can trim the size down a bit. In the future this # could possibly be extended to any mutable array. -function copypreservingtype(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Vararg{Union{Real,AbstractRange,Array}}},LD} +function unaliascopy(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Vararg{Union{Real,AbstractRange,Array}}},LD} dest = Array{T}(uninitialized, index_lengths(V.indices...)) copyto!(dest, V) SubArray{T,N,A,I,LD}(dest, map(_trimmedindex, V.indices), 0, Int(LD)) diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl index 0c8f98ef4320b..7e9c4c4bfdaaf 100644 --- a/stdlib/LinearAlgebra/src/adjtrans.jl +++ b/stdlib/LinearAlgebra/src/adjtrans.jl @@ -48,7 +48,7 @@ Adjoint(A) = Adjoint{Base.promote_op(adjoint,eltype(A)),typeof(A)}(A) Transpose(A) = Transpose{Base.promote_op(transpose,eltype(A)),typeof(A)}(A) Base.dataids(A::Union{Adjoint, Transpose}) = Base.dataids(A.parent) -Base.copypreservingtype(A::Union{Adjoint,Transpose}) = typeof(A)(Base.copypreservingtype(A.parent)) +Base.unaliascopy(A::Union{Adjoint,Transpose}) = typeof(A)(Base.unaliascopy(A.parent)) # wrapping lowercase quasi-constructors """ diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl index dcf8f374fee8d..42ec3cd38408b 100644 --- a/stdlib/Serialization/src/Serialization.jl +++ b/stdlib/Serialization/src/Serialization.jl @@ -11,7 +11,7 @@ module Serialization import Base: GMP, Bottom, unsafe_convert, uncompressed_ast import Core: svec, SimpleVector -using Base: copypreservingtype, unwrap_unionall +using Base: unaliascopy, unwrap_unionall using Core.IR export serialize, deserialize, AbstractSerializer, Serializer @@ -276,7 +276,7 @@ end function serialize(s::AbstractSerializer, a::SubArray{T,N,A}) where {T,N,A<:Array} # SubArray's copy only selects the relevant data (and reduces the size) but does not # preserve the type of the argument. This internal function does both: - b = copypreservingtype(a) + b = unaliascopy(a) serialize_any(s, b) end diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl index 62154aa2d94b7..f4b8a8a549e34 100644 --- a/stdlib/SparseArrays/src/sparsematrix.jl +++ b/stdlib/SparseArrays/src/sparsematrix.jl @@ -261,9 +261,9 @@ function copy(ra::ReshapedArray{<:Any,2,<:SparseMatrixCSC}) end ## Alias detection and prevention -using Base: dataids, copypreservingtype +using Base: dataids, unaliascopy Base.dataids(S::SparseMatrixCSC) = (dataids(S.colptr)..., dataids(S.rowval)..., dataids(S.nzval)...) -Base.copypreservingtype(S::SparseMatrixCSC) = typeof(S)(S.m, S.n, copypreservingtype(S.colptr), copypreservingtype(S.rowval), copypreservingtype(S.nzval)) +Base.unaliascopy(S::SparseMatrixCSC) = typeof(S)(S.m, S.n, unaliascopy(S.colptr), unaliascopy(S.rowval), unaliascopy(S.nzval)) ## Constructors diff --git a/stdlib/SparseArrays/src/sparsevector.jl b/stdlib/SparseArrays/src/sparsevector.jl index bf10a4f603e1d..0ab3f8b153497 100644 --- a/stdlib/SparseArrays/src/sparsevector.jl +++ b/stdlib/SparseArrays/src/sparsevector.jl @@ -93,9 +93,9 @@ similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, m::Integer, n::Integer) w _sparsesimilar(S, TvNew, TiNew, (m, n)) ## Alias detection and prevention -using Base: dataids, copypreservingtype +using Base: dataids, unaliascopy Base.dataids(S::SparseVector) = (dataids(S.nzind)..., dataids(S.nzval)...) -Base.copypreservingtype(S::SparseVector) = typeof(S)(S.n, copypreservingtype(S.nzind), copypreservingtype(S.nzval)) +Base.unaliascopy(S::SparseVector) = typeof(S)(S.n, unaliascopy(S.nzind), unaliascopy(S.nzval)) ### Construct empty sparse vector