From 9e10015337204c0b216011cad583d002574428ca Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Fri, 11 Oct 2019 11:10:43 -0500 Subject: [PATCH] Simplify, simdify, and speed up BitArray broadcasts with partitions --- base/broadcast.jl | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/base/broadcast.jl b/base/broadcast.jl index 16c0653be515d..ebce0afaba6c1 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -919,20 +919,19 @@ end length(dest) < 256 && return invoke(copyto!, Tuple{AbstractArray, Broadcasted{Nothing}}, dest, bc) tmp = Vector{Bool}(undef, bitcache_size) destc = dest.chunks - ind = cind = 1 + cind = 1 bc′ = preprocess(dest, bc) - @simd for I in eachindex(bc′) - @inbounds tmp[ind] = bc′[I] - ind += 1 - if ind > bitcache_size - dumpbitcache(destc, cind, tmp) - cind += bitcache_chunks - ind = 1 + for P in Iterators.partition(eachindex(bc′), bitcache_size) + ind = 1 + @simd for I in P + @inbounds tmp[ind] = bc′[I] + ind += 1 + end + @simd for i in ind:bitcache_size + @inbounds tmp[i] = false end - end - if ind > 1 - @inbounds tmp[ind:bitcache_size] .= false dumpbitcache(destc, cind, tmp) + cind += bitcache_chunks end return dest end