Skip to content

Commit

Permalink
fix bugs handling unusual tarballs with directory after contents (#105)
Browse files Browse the repository at this point in the history
This fixes three related bugs when processing tarballs where a directory
entry appears *after* some contents of that directory. This is unusual
since neither command-line `tar` nor `Tar.create` will produce tarballs
like this, and I only dicovered the bug when I accidentally modified
`Tar.create` to emit directory entries after emiting everything inside
of the directory. Each of `extract`, `tree_hash` and `rewrite` did this
wrong in slightly different ways previously. Now they all (hopefully) do
the right thing, and we test that they each handle a manually generated
tarball with this kind of unusual ordering.

(cherry picked from commit 9316af8)
  • Loading branch information
StefanKarpinski committed May 4, 2021
1 parent 2972f5e commit 0906dfa
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 4 deletions.
4 changes: 3 additions & 1 deletion src/create.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ function rewrite_tarball(
end
node = node′
end
node[name] = (hdr, position(old_tar))
if !(hdr.type == :directory && get(node, name, nothing) isa Dict)
node[name] = (hdr, position(old_tar))
end
skip_data(old_tar, hdr.size)
end
write_tarball(new_tar, tree, buf=buf) do node, tar_path
Expand Down
10 changes: 7 additions & 3 deletions src/extract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,16 @@ function extract_tarball(
paths = read_tarball(predicate, tar; buf=buf, skeleton=skeleton) do hdr, parts
# get the file system version of the path
sys_path = reduce(joinpath, init=root, parts)
# delete anything that's there already
ispath(sys_path) && rm(sys_path, force=true, recursive=true)
# ensure dirname(sys_path) is a directory
dir = dirname(sys_path)
st = stat(dir)
if !isdir(st)
ispath(st) && rm(dir, force=true, recursive=true)
mkpath(dir)
else
st = lstat(sys_path)
hdr.type == :directory && isdir(st) && return # from callback
ispath(st) && rm(sys_path, force=true, recursive=true)
end
# create the path
if hdr.type == :directory
Expand Down Expand Up @@ -206,7 +208,9 @@ function git_tree_hash(
node = node′
end
if hdr.type == :directory
node[name] = Dict{String,Any}()
if !(get(node, name, nothing) isa Dict)
node[name] = Dict{String,Any}()
end
return
end
if hdr.type == :symlink
Expand Down
32 changes: 32 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,38 @@ if @isdefined(gtar)
end
end

@testset "directory after contents" begin
# create and hash a reference tarball
tarball, io = mktemp()
# executable files: hashing works on Windows + old Julia version
Tar.write_header(io, Tar.Header("dir/file", :file, 0o755, 0, ""))
Tar.write_header(io, Tar.Header("file", :file, 0o755, 0, ""))
close(io)
hash = Tar.tree_hash(tarball)
rm(tarball)
# create a version with directory entries after contents
tarball, io = mktemp()
Tar.write_header(io, Tar.Header("file", :file, 0o755, 0, ""))
Tar.write_header(io, Tar.Header(".", :directory, 0o755, 0, ""))
Tar.write_header(io, Tar.Header("dir/file", :file, 0o755, 0, ""))
Tar.write_header(io, Tar.Header("dir", :directory, 0o755, 0, ""))
close(io)
# check extract
tree = Tar.extract(tarball)
check_tree_hash(hash, tree)
# check tree_hash
@test Tar.tree_hash(tarball) == hash
# check rewrite
tarball′ = Tar.rewrite(tarball)
@test Tar.list(tarball′) == [
Tar.Header("dir/file", :file, 0o755, 0, "")
Tar.Header("file", :file, 0o755, 0, "")
]
# cleanup
rm(tarball′)
rm(tarball)
end

@testset "symlink attacks" begin
# not dangerous but still not allowed
tarball, io = mktemp()
Expand Down

0 comments on commit 0906dfa

Please sign in to comment.