Skip to content

Commit

Permalink
archive/tar: read/write extended pax/gnu tar archives
Browse files Browse the repository at this point in the history
Support reading pax archives and applying extended attributes
like long names, subsecond mtime resolutions, etc. Default to
writing pax archives for long file and link names.
Support reading gnu archives using the ././@LongLink extended
header for file name and link target.

Fixes #3300

R=dsymonds, dave, rogpeppe, remyoudompheng, chressie, rsc
CC=golang-dev
https://golang.org/cl/6700047
  • Loading branch information
shanemhansen authored and dsymonds committed Feb 11, 2013
1 parent fece09e commit 1068279
Show file tree
Hide file tree
Showing 7 changed files with 472 additions and 9 deletions.
9 changes: 9 additions & 0 deletions src/pkg/archive/tar/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
// References:
// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
// http://www.gnu.org/software/tar/manual/html_node/Standard.html
// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
package tar

import (
Expand All @@ -33,6 +34,8 @@ const (
TypeCont = '7' // reserved
TypeXHeader = 'x' // extended header
TypeXGlobalHeader = 'g' // global extended header
TypeGNULongName = 'L' // Next file has a long name
TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name
)

// A Header represents a single header in a tar archive.
Expand All @@ -54,6 +57,12 @@ type Header struct {
ChangeTime time.Time // status change time
}

// File name constants from the tar spec.
const (
fileNameSize = 100 // Maximum number of bytes in a standard tar name.
fileNamePrefixSize = 155 // Maximum number of ustar extension bytes.
)

// sysStat, if non-nil, populates h from system-dependent fields of fi.
var sysStat func(fi os.FileInfo, h *Header) error

Expand Down
179 changes: 176 additions & 3 deletions src/pkg/archive/tar/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,16 @@ import (
"io/ioutil"
"os"
"strconv"
"strings"
"time"
)

var (
ErrHeader = errors.New("archive/tar: invalid tar header")
)

const maxNanoSecondIntSize = 9

// A Reader provides sequential access to the contents of a tar archive.
// A tar archive consists of a sequence of files.
// The Next method advances to the next file in the archive (including the first),
Expand All @@ -41,13 +44,183 @@ func (tr *Reader) Next() (*Header, error) {
if tr.err == nil {
tr.skipUnread()
}
if tr.err == nil {
if tr.err != nil {
return hdr, tr.err
}
hdr = tr.readHeader()
if hdr == nil {
return hdr, tr.err
}
// Check for PAX/GNU header.
switch hdr.Typeflag {
case TypeXHeader:
// PAX extended header
headers, err := parsePAX(tr)
if err != nil {
return nil, err
}
// We actually read the whole file,
// but this skips alignment padding
tr.skipUnread()
hdr = tr.readHeader()
mergePAX(hdr, headers)
return hdr, nil
case TypeGNULongName:
// We have a GNU long name header. Its contents are the real file name.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
hdr, err := tr.Next()
hdr.Name = cString(realname)
return hdr, err
case TypeGNULongLink:
// We have a GNU long link header.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
hdr, err := tr.Next()
hdr.Linkname = cString(realname)
return hdr, err
}
return hdr, tr.err
}

// Parse bytes as a NUL-terminated C-style string.
// mergePAX merges well known headers according to PAX standard.
// In general headers with the same name as those found
// in the header struct overwrite those found in the header
// struct with higher precision or longer values. Esp. useful
// for name and linkname fields.
func mergePAX(hdr *Header, headers map[string]string) error {
for k, v := range headers {
switch k {
case "path":
hdr.Name = v
case "linkpath":
hdr.Linkname = v
case "gname":
hdr.Gname = v
case "uname":
hdr.Uname = v
case "uid":
uid, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return err
}
hdr.Uid = int(uid)
case "gid":
gid, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return err
}
hdr.Gid = int(gid)
case "atime":
t, err := parsePAXTime(v)
if err != nil {
return err
}
hdr.AccessTime = t
case "mtime":
t, err := parsePAXTime(v)
if err != nil {
return err
}
hdr.ModTime = t
case "ctime":
t, err := parsePAXTime(v)
if err != nil {
return err
}
hdr.ChangeTime = t
case "size":
size, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return err
}
hdr.Size = int64(size)
}

}
return nil
}

// parsePAXTime takes a string of the form %d.%d as described in
// the PAX specification.
func parsePAXTime(t string) (time.Time, error) {
buf := []byte(t)
pos := bytes.IndexByte(buf, '.')
var seconds, nanoseconds int64
var err error
if pos == -1 {
seconds, err = strconv.ParseInt(t, 10, 0)
if err != nil {
return time.Time{}, err
}
} else {
seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
if err != nil {
return time.Time{}, err
}
nano_buf := string(buf[pos+1:])
// Pad as needed before converting to a decimal.
// For example .030 -> .030000000 -> 30000000 nanoseconds
if len(nano_buf) < maxNanoSecondIntSize {
// Right pad
nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
} else if len(nano_buf) > maxNanoSecondIntSize {
// Right truncate
nano_buf = nano_buf[:maxNanoSecondIntSize]
}
nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
if err != nil {
return time.Time{}, err
}
}
ts := time.Unix(seconds, nanoseconds)
return ts, nil
}

// parsePAX parses PAX headers.
// If an extended header (type 'x') is invalid, ErrHeader is returned
func parsePAX(r io.Reader) (map[string]string, error) {
buf, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
headers := make(map[string]string)
// Each record is constructed as
// "%d %s=%s\n", length, keyword, value
for len(buf) > 0 {
// or the header was empty to start with.
var sp int
// The size field ends at the first space.
sp = bytes.IndexByte(buf, ' ')
if sp == -1 {
return nil, ErrHeader
}
// Parse the first token as a decimal integer.
n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
if err != nil {
return nil, ErrHeader
}
// Extract everything between the decimal and the n -1 on the
// beginning to to eat the ' ', -1 on the end to skip the newline.
var record []byte
record, buf = buf[sp+1:n-1], buf[n:]
// The first equals is guaranteed to mark the end of the key.
// Everything else is value.
eq := bytes.IndexByte(record, '=')
if eq == -1 {
return nil, ErrHeader
}
key, value := record[:eq], record[eq+1:]
headers[string(key)] = string(value)
}
return headers, nil
}

// cString parses bytes as a NUL-terminated C-style string.
// If a NUL byte is not found then the whole slice is returned as a string.
func cString(b []byte) string {
n := 0
Expand Down Expand Up @@ -85,7 +258,7 @@ func (tr *Reader) octal(b []byte) int64 {
return int64(x)
}

// Skip any unread bytes in the existing file entry, as well as any alignment padding.
// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
func (tr *Reader) skipUnread() {
nr := tr.nb + tr.pad // number of bytes to skip
tr.nb, tr.pad = 0, 0
Expand Down
106 changes: 105 additions & 1 deletion src/pkg/archive/tar/reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"fmt"
"io"
"os"
"reflect"
"strings"
"testing"
"time"
)
Expand Down Expand Up @@ -108,6 +110,38 @@ var untarTests = []*untarTest{
},
},
},
{
file: "testdata/pax.tar",
headers: []*Header{
{
Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
Mode: 0664,
Uid: 1000,
Gid: 1000,
Uname: "shane",
Gname: "shane",
Size: 7,
ModTime: time.Unix(1350244992, 23960108),
ChangeTime: time.Unix(1350244992, 23960108),
AccessTime: time.Unix(1350244992, 23960108),
Typeflag: TypeReg,
},
{
Name: "a/b",
Mode: 0777,
Uid: 1000,
Gid: 1000,
Uname: "shane",
Gname: "shane",
Size: 0,
ModTime: time.Unix(1350266320, 910238425),
ChangeTime: time.Unix(1350266320, 910238425),
AccessTime: time.Unix(1350266320, 910238425),
Typeflag: TypeSymlink,
Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
},
},
},
}

func TestReader(t *testing.T) {
Expand All @@ -133,7 +167,7 @@ testLoop:
}
hdr, err := tr.Next()
if err == io.EOF {
break
continue testLoop
}
if hdr != nil || err != nil {
t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err)
Expand Down Expand Up @@ -260,3 +294,73 @@ func TestNonSeekable(t *testing.T) {
t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread)
}
}

func TestParsePAXHeader(t *testing.T) {
paxTests := [][3]string{
{"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths
{"a", "a=name", "9 a=name\n"}, // Test case involving multiple acceptable length
{"mtime", "mtime=1350244992.023960108", "30 mtime=1350244992.023960108\n"}}
for _, test := range paxTests {
key, expected, raw := test[0], test[1], test[2]
reader := bytes.NewBuffer([]byte(raw))
headers, err := parsePAX(reader)
if err != nil {
t.Errorf("Couldn't parse correctly formatted headers: %v", err)
continue
}
if strings.EqualFold(headers[key], expected) {
t.Errorf("mtime header incorrectly parsed: got %s, wanted %s", headers[key], expected)
continue
}
trailer := make([]byte, 100)
n, err := reader.Read(trailer)
if err != io.EOF || n != 0 {
t.Error("Buffer wasn't consumed")
}
}
badHeader := bytes.NewBuffer([]byte("3 somelongkey="))
if _, err := parsePAX(badHeader); err != ErrHeader {
t.Fatal("Unexpected success when parsing bad header")
}
}

func TestParsePAXTime(t *testing.T) {
// Some valid PAX time values
timestamps := map[string]time.Time{
"1350244992.023960108": time.Unix(1350244992, 23960108), // The commoon case
"1350244992.02396010": time.Unix(1350244992, 23960100), // Lower precision value
"1350244992.0239601089": time.Unix(1350244992, 23960108), // Higher precision value
"1350244992": time.Unix(1350244992, 0), // Low precision value
}
for input, expected := range timestamps {
ts, err := parsePAXTime(input)
if err != nil {
t.Fatal(err)
}
if !ts.Equal(expected) {
t.Fatal("Time parsing failure %s %s", ts, expected)
}
}
}

func TestMergePAX(t *testing.T) {
hdr := new(Header)
// Test a string, integer, and time based value.
headers := map[string]string{
"path": "a/b/c",
"uid": "1000",
"mtime": "1350244992.023960108",
}
err := mergePAX(hdr, headers)
if err != nil {
t.Fatal(err)
}
want := &Header{
Name: "a/b/c",
Uid: 1000,
ModTime: time.Unix(1350244992, 23960108),
}
if !reflect.DeepEqual(hdr, want) {
t.Errorf("incorrect merge: got %+v, want %+v", hdr, want)
}
}
Binary file added src/pkg/archive/tar/testdata/pax.tar
Binary file not shown.
Binary file added src/pkg/archive/tar/testdata/ustar.tar
Binary file not shown.
Loading

0 comments on commit 1068279

Please sign in to comment.