Skip to content

Commit

Permalink
bugfix: reverse(eachline) should not read before the initial stream p…
Browse files Browse the repository at this point in the history
…osition
  • Loading branch information
stevengj committed Sep 18, 2021
1 parent 2a24c52 commit b72fdf3
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 14 deletions.
28 changes: 16 additions & 12 deletions base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1060,13 +1060,14 @@ isdone(itr::EachLine, state...) = eof(itr.stream)
# Reverse-order iteration for the EachLine iterator for seekable streams,
# which works by reading the stream from the end in 4kiB chunks.
function iterate(r::Iterators.Reverse{<:EachLine})
p0 = position(r.itr.stream)
seekend(r.itr.stream) # may throw if io is non-seekable
p = position(r.itr.stream)
# chunks = circular buffer of 4kiB blocks read from end of stream
chunks = empty!(Vector{Vector{UInt8}}(undef, 2)) # allocate space for 2 buffers (common case)
inewline = jnewline = 0
while p > 0 && inewline == 0 # read chunks until we find a newline or we read whole file
chunk = Vector{UInt8}(undef, min(4096, p))
while p > p0 && inewline == 0 # read chunks until we find a newline or we read whole file
chunk = Vector{UInt8}(undef, min(4096, p-p0))
p -= length(chunk)
readbytes!(seek(r.itr.stream, p), chunk)
pushfirst!(chunks, chunk)
Expand All @@ -1077,12 +1078,13 @@ function iterate(r::Iterators.Reverse{<:EachLine})
inewline = something(findprev(==(UInt8('\n')), chunk, inewline-1), 0)
end
end
return iterate(r, (p, chunks, 1, inewline, length(chunks), jnewline == 0 && !isempty(chunks) ? length(chunks[end])+1 : jnewline))
return iterate(r, (p0, p, chunks, 1, inewline, length(chunks), jnewline == 0 && !isempty(chunks) ? length(chunks[end])+1 : jnewline))
end
function iterate(r::Iterators.Reverse{<:EachLine}, state)
# state tuple: p = file position, chunks = circular array of chunk buffers,
# state tuple: p0 = initial file position, p = current position,
# chunks = circular array of chunk buffers,
# current line is from chunks[ichunk][inewline+1] to chunks[jchunk][jnewline]
p, chunks, ichunk, inewline, jchunk, jnewline = state
p0, p, chunks, ichunk, inewline, jchunk, jnewline = state
if inewline == 0 # no newline found, remaining line = rest of chunks (if any)
isempty(chunks) && return (r.itr.ondone(); nothing)
buf = IOBuffer(sizehint = ichunk==jchunk ? jnewline : 4096)
Expand All @@ -1093,7 +1095,8 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state)
chunk = chunks[jchunk]
write(buf, view(chunk, 1:min(length(chunk), jnewline - 1 + r.itr.keep)))
empty!(chunks) # will cause next iteration to terminate
return (String(take!(buf)), state)
seekend(r.itr.stream) # reposition to end of stream for isdone
s = String(take!(buf))
else
# extract the string from chunks[ichunk][inewline+1] to chunks[jchunk][jnewline]
jnewline = min(length(chunks[jchunk]), jnewline - 1 + r.itr.keep)
Expand All @@ -1115,7 +1118,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state)
i = jchunk
while i != ichunk
chunk = chunks[i]
p -= length(resize!(chunk, min(4096, p)))
p -= length(resize!(chunk, min(4096, p-p0)))
readbytes!(seek(r.itr.stream, p), chunk)
i = i == 1 ? length(chunks) : i - 1
end
Expand All @@ -1133,21 +1136,22 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state)
end

# read more chunks to look for a newline (should rarely happen)
if inewline == 0 && p > 0
if inewline == 0 && p > p0
ichunk = jchunk + 1
while true
chunk = Vector{UInt8}(undef, min(4096, p))
chunk = Vector{UInt8}(undef, min(4096, p-p0))
p -= length(chunk)
readbytes!(seek(r.itr.stream, p), chunk)
insert!(chunks, ichunk, chunk)
inewline = something(findlast(==(UInt8('\n')), chunk), 0)
(p == 0 || inewline > 0) && break
(p == p0 || inewline > 0) && break
end
end

return (s, (p, chunks, ichunk, inewline, jchunk, jnewline))
end
return (s, (p0, p, chunks, ichunk, inewline, jchunk, jnewline))
end
isdone(r::Iterators.Reverse{<:EachLine}, state) = isempty(state[3]) # isempty(chunks)
isdone(r::Iterators.Reverse{<:EachLine}) = isdone(r.itr)

# use reverse iteration to get end of EachLines (if possible)
last(itr::EachLine) = first(Iterators.reverse(itr))
Expand Down
19 changes: 17 additions & 2 deletions test/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ end
@test isempty(itr) # now it is empty
end

# exercise buffer code for reverse(eachline)
# more tests for reverse(eachline)
@testset "reverse(eachline)" begin
lines = vcat(repr.(1:4), ' '^50000 .* repr.(5:10), repr.(11:10^5))
for lines in (lines, reverse(lines)), finalnewline in (true, false)
Expand All @@ -639,8 +639,23 @@ end
@test last(eachline(seekstart(buf))) == last(lines)
@test last(eachline(seekstart(buf)),10^4) == last(lines,10^4)
@test last(eachline(seekstart(buf)),length(lines)*2) == lines
@test reverse!(collect(Iterators.reverse(eachline(seek(buf, sum(sizeof, lines[1:100]) + 100))))) == lines[101:end]
@test isempty(Iterators.reverse(eachline(buf)))
end

@test isempty(collect(Iterators.reverse(eachline(seekstart(IOBuffer())))))
let rempty = Iterators.reverse(eachline(IOBuffer()))
@test isempty(rempty)
@test isempty(collect(rempty))
end

let buf = IOBuffer("foo\nbar")
@test readline(buf) == "foo"
r = Iterators.reverse(eachline(buf))
line, state = iterate(r)
@test line == "bar"
@test Base.isdone(r, state)
@test Base.isdone(r)
@test isempty(r) && isempty(collect(r))
end
end

0 comments on commit b72fdf3

Please sign in to comment.