From 32273f949ddcc40d480aa15be4ef9a2a06dfe73f Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Wed, 28 Jun 2023 16:43:43 +1000 Subject: [PATCH 1/4] fix: cache offsets for sequential reads --- traversal/selector/matcher.go | 2 +- traversal/selector/matcher_util.go | 30 +++++++++++++++++++----------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/traversal/selector/matcher.go b/traversal/selector/matcher.go index ff2596c1..94c5e26a 100644 --- a/traversal/selector/matcher.go +++ b/traversal/selector/matcher.go @@ -55,7 +55,7 @@ func (s Slice) Slice(n datamodel.Node) (datamodel.Node, error) { return nil, err } - sr := io.NewSectionReader(readerat{rdr}, s.From, s.To-s.From) + sr := io.NewSectionReader(&readerat{rdr, 0}, s.From, s.To-s.From) return basicnode.NewBytesFromReader(sr), nil } bytes, err := n.AsBytes() diff --git a/traversal/selector/matcher_util.go b/traversal/selector/matcher_util.go index 237c5e71..39e62ec0 100644 --- a/traversal/selector/matcher_util.go +++ b/traversal/selector/matcher_util.go @@ -1,20 +1,28 @@ package selector -import "io" +import ( + "io" +) type readerat struct { - io.ReadSeeker + rs io.ReadSeeker + off int64 } -// ReadAt provides the io.ReadAt method over a ReadSeeker. -// This implementation does not support concurrent calls to `ReadAt`, -// as specified by the ReaderAt interface, and so must only be used -// in non-concurrent use cases. -func (r readerat) ReadAt(p []byte, off int64) (n int, err error) { - // TODO: consider keeping track of current offset. - _, err = r.Seek(off, 0) +// ReadAt provides the io.ReadAt method over a ReadSeeker. It will track the +// current offset and seek if necessary. +func (r *readerat) ReadAt(p []byte, off int64) (n int, err error) { + if off != r.off { + _, err = r.rs.Seek(off, 0) + if err != nil { + return 0, err + } + r.off = off + } + c, err := r.rs.Read(p) if err != nil { - return 0, err + return c, err } - return r.Read(p) + r.off += int64(c) + return c, nil } From 211944d98ee896b96ca103fe48eb3d2a8475be95 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Thu, 29 Jun 2023 19:47:29 +1000 Subject: [PATCH 2/4] feat: remove hard-error when slice matcher reaches non-string/bytes node --- traversal/selector/matcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/traversal/selector/matcher.go b/traversal/selector/matcher.go index 94c5e26a..97e16b78 100644 --- a/traversal/selector/matcher.go +++ b/traversal/selector/matcher.go @@ -73,7 +73,7 @@ func (s Slice) Slice(n datamodel.Node) (datamodel.Node, error) { return basicnode.NewBytes(bytes[from:to]), nil default: - return nil, fmt.Errorf("selector slice rejected on %s: subset match must be over string or bytes", n.Kind()) + return nil, nil } } From 54aa838eb563b36036c9711ae3e523f5a1038232 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 4 Jul 2023 12:38:46 +1000 Subject: [PATCH 3/4] feat(test): add matcher/slice selector test cases --- node/tests/byteSpecs.go | 1 - traversal/selector/matcher.go | 10 +- traversal/selector/matcher_test.go | 307 +++++++++++++++++++++++++++++ traversal/selector/matcher_util.go | 3 +- 4 files changed, 312 insertions(+), 9 deletions(-) create mode 100644 traversal/selector/matcher_test.go diff --git a/node/tests/byteSpecs.go b/node/tests/byteSpecs.go index 69bf494b..6d54de7a 100644 --- a/node/tests/byteSpecs.go +++ b/node/tests/byteSpecs.go @@ -30,6 +30,5 @@ func SpecTestBytes(t *testing.T, np datamodel.NodePrototype) { qt.Check(t, err, qt.IsNil) qt.Check(t, bytes, qt.DeepEquals, []byte("asdf")) } - }) } diff --git a/traversal/selector/matcher.go b/traversal/selector/matcher.go index 97e16b78..ff9d5ccd 100644 --- a/traversal/selector/matcher.go +++ b/traversal/selector/matcher.go @@ -40,11 +40,11 @@ func (s Slice) Slice(n datamodel.Node) (datamodel.Node, error) { return nil, err } to = s.To - if len(str) < int(to) { + if int64(len(str)) < to { to = int64(len(str)) } from = s.From - if len(str) < int(from) { + if int64(len(str)) < from { from = int64(len(str)) } return basicnode.NewString(str[from:to]), nil @@ -54,7 +54,6 @@ func (s Slice) Slice(n datamodel.Node) (datamodel.Node, error) { if err != nil { return nil, err } - sr := io.NewSectionReader(&readerat{rdr, 0}, s.From, s.To-s.From) return basicnode.NewBytesFromReader(sr), nil } @@ -63,14 +62,13 @@ func (s Slice) Slice(n datamodel.Node) (datamodel.Node, error) { return nil, err } to = s.To - if len(bytes) < int(to) { + if int64(len(bytes)) < to { to = int64(len(bytes)) } from = s.From - if len(bytes) < int(from) { + if int64(len(bytes)) < from { from = int64(len(bytes)) } - return basicnode.NewBytes(bytes[from:to]), nil default: return nil, nil diff --git a/traversal/selector/matcher_test.go b/traversal/selector/matcher_test.go new file mode 100644 index 00000000..1c803d5b --- /dev/null +++ b/traversal/selector/matcher_test.go @@ -0,0 +1,307 @@ +package selector_test + +import ( + "fmt" + "io" + "math" + "testing" + + qt "github.com/frankban/quicktest" + + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/fluent/qp" + "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/ipld/go-ipld-prime/traversal" + "github.com/ipld/go-ipld-prime/traversal/selector" +) + +func TestSubsetMatch(t *testing.T) { + expectedString := "foobarbaz!" + nodes := []struct { + name string + node datamodel.Node + }{ + {"stringNode", basicnode.NewString(expectedString)}, + {"bytesNode", basicnode.NewBytes([]byte(expectedString))}, + {"largeBytesNode", &MultiByteNode{ + Bytes: [][]byte{ + []byte("foo"), + []byte("bar"), + []byte("baz"), + []byte("!"), + }, + }}, + } + + // selector for a slice of the value of the "bipbop" field within a map + mkRangeSelector := func(from int64, to int64) (datamodel.Node, error) { + return qp.BuildMap(basicnode.Prototype.Map, 1, func(na datamodel.MapAssembler) { + qp.MapEntry(na, selector.SelectorKey_ExploreFields, qp.Map(1, func(na datamodel.MapAssembler) { + qp.MapEntry(na, selector.SelectorKey_Fields, qp.Map(1, func(na datamodel.MapAssembler) { + qp.MapEntry(na, "bipbop", qp.Map(1, func(na datamodel.MapAssembler) { + qp.MapEntry(na, selector.SelectorKey_Matcher, qp.Map(1, func(na datamodel.MapAssembler) { + qp.MapEntry(na, selector.SelectorKey_Subset, qp.Map(1, func(na datamodel.MapAssembler) { + qp.MapEntry(na, selector.SelectorKey_From, qp.Int(from)) + qp.MapEntry(na, selector.SelectorKey_To, qp.Int(to)) + })) + })) + })) + })) + })) + }) + } + + for _, tc := range []struct { + from int64 + to int64 + exp string + }{ + {0, math.MaxInt64, expectedString}, + {0, int64(len(expectedString)), expectedString}, + {0, 0, ""}, + {0, 1, "f"}, + {0, 2, "fo"}, + {0, 3, "foo"}, + {0, 4, "foob"}, + {1, 4, "oob"}, + {2, 4, "ob"}, + {3, 4, "b"}, + {4, 4, ""}, + {4, math.MaxInt64, "arbaz!"}, + {4, int64(len(expectedString)), "arbaz!"}, + {4, int64(len(expectedString) - 1), "arbaz"}, + {0, int64(len(expectedString) - 1), expectedString[0 : len(expectedString)-1]}, + {0, int64(len(expectedString) - 2), expectedString[0 : len(expectedString)-2]}, + } { + for _, variant := range nodes { + t.Run(fmt.Sprintf("%s[%d:%d]", variant.name, tc.from, tc.to), func(t *testing.T) { + selNode, err := mkRangeSelector(tc.from, tc.to) + qt.Assert(t, err, qt.IsNil) + ss, err := selector.ParseSelector(selNode) + qt.Assert(t, err, qt.IsNil) + + // node that the selector will match, with our variant node embedded in the "bipbop" field + n, err := qp.BuildMap(basicnode.Prototype.Map, 1, func(na datamodel.MapAssembler) { + qp.MapEntry(na, "bipbop", qp.Node(variant.node)) + }) + + var got datamodel.Node + qt.Assert(t, err, qt.IsNil) + err = traversal.WalkMatching(n, ss, func(prog traversal.Progress, n datamodel.Node) error { + qt.Assert(t, got, qt.IsNil) + got = n + return nil + }) + qt.Assert(t, err, qt.IsNil) + + qt.Assert(t, got, qt.IsNotNil) + qt.Assert(t, got.Kind(), qt.Equals, variant.node.Kind()) + var gotString string + switch got.Kind() { + case datamodel.Kind_String: + gotString, err = got.AsString() + qt.Assert(t, err, qt.IsNil) + case datamodel.Kind_Bytes: + byts, err := got.AsBytes() + qt.Assert(t, err, qt.IsNil) + gotString = string(byts) + } + qt.Assert(t, gotString, qt.DeepEquals, tc.exp) + }) + } + } +} + +func TestMultiByteNode_Sanity(t *testing.T) { + mbn := &MultiByteNode{ + Bytes: [][]byte{ + []byte("foo"), + []byte("bar"), + []byte("baz"), + []byte("!"), + }, + } + // Sanity check that the readseeker works. + // (This is a test of the test, not the code under test.) + + for _, rl := range []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} { + t.Run("readseeker works with read length "+qt.Format(rl), func(t *testing.T) { + rs, err := mbn.AsLargeBytes() + qt.Assert(t, err, qt.IsNil) + acc := make([]byte, 0, mbn.size()) + buf := make([]byte, rl) + for { + n, err := rs.Read(buf) + if err == io.EOF { + qt.Check(t, n, qt.Equals, 0) + break + } + qt.Assert(t, err, qt.IsNil) + acc = append(acc, buf[0:n]...) + } + qt.Assert(t, string(acc), qt.DeepEquals, "foobarbaz!") + }) + } + + t.Run("readseeker can seek and read middle bytes", func(t *testing.T) { + rs, err := mbn.AsLargeBytes() + qt.Assert(t, err, qt.IsNil) + _, err = rs.Seek(2, io.SeekStart) + qt.Assert(t, err, qt.IsNil) + buf := make([]byte, 2) + acc := make([]byte, 0, 5) + for len(acc) < 5 { + n, err := rs.Read(buf) + qt.Assert(t, err, qt.IsNil) + acc = append(acc, buf[0:n]...) + } + qt.Assert(t, string(acc), qt.DeepEquals, "obarba") + }) + + t.Run("readseeker can seek and read last byte", func(t *testing.T) { + rs, err := mbn.AsLargeBytes() + qt.Assert(t, err, qt.IsNil) + _, err = rs.Seek(-1, io.SeekEnd) + qt.Assert(t, err, qt.IsNil) + buf := make([]byte, 1) + n, err := rs.Read(buf) + qt.Assert(t, err, qt.IsNil) + qt.Check(t, n, qt.Equals, 1) + qt.Check(t, string(buf[0]), qt.Equals, "!") + }) +} + +var _ datamodel.Node = (*MultiByteNode)(nil) +var _ datamodel.LargeBytesNode = (*MultiByteNode)(nil) + +// MultiByteNode is a node that is a concatenation of multiple byte slices. +// It's not particularly sophisticated but lets us exercise LargeBytesNode as a +// path through the selectors. The novel behaviour of Read() and Seek() on the +// AsLargeBytes is similar to that which would be expected from a LBN ADL, such +// as UnixFS sharded files. +type MultiByteNode struct { + Bytes [][]byte +} + +func (mbn *MultiByteNode) Kind() datamodel.Kind { + return datamodel.Kind_Bytes +} + +func (mbn *MultiByteNode) AsBytes() ([]byte, error) { + ret := make([]byte, 0, mbn.size()) + for _, b := range mbn.Bytes { + ret = append(ret, b...) + } + return ret, nil +} + +func (mbn *MultiByteNode) size() int { + var size int + for _, b := range mbn.Bytes { + size += len(b) + } + return size +} + +func (mbn *MultiByteNode) AsLargeBytes() (io.ReadSeeker, error) { + return &mbnReadSeeker{node: mbn}, nil +} + +func (mbn *MultiByteNode) AsBool() (bool, error) { + return false, datamodel.ErrWrongKind{TypeName: "bool", MethodName: "AsBool", AppropriateKind: datamodel.KindSet_JustBytes} +} + +func (mbn *MultiByteNode) AsInt() (int64, error) { + return 0, datamodel.ErrWrongKind{TypeName: "int", MethodName: "AsInt", AppropriateKind: datamodel.KindSet_JustBytes} +} + +func (mbn *MultiByteNode) AsFloat() (float64, error) { + return 0, datamodel.ErrWrongKind{TypeName: "float", MethodName: "AsFloat", AppropriateKind: datamodel.KindSet_JustBytes} +} + +func (mbn *MultiByteNode) AsString() (string, error) { + return "", datamodel.ErrWrongKind{TypeName: "string", MethodName: "AsString", AppropriateKind: datamodel.KindSet_JustBytes} +} + +func (mbn *MultiByteNode) AsLink() (datamodel.Link, error) { + return nil, datamodel.ErrWrongKind{TypeName: "link", MethodName: "AsLink", AppropriateKind: datamodel.KindSet_JustBytes} +} + +func (mbn *MultiByteNode) AsNode() (datamodel.Node, error) { + return nil, nil +} + +func (mbn *MultiByteNode) Size() int { + return 0 +} + +func (mbn *MultiByteNode) IsAbsent() bool { + return false +} + +func (mbn *MultiByteNode) IsNull() bool { + return false +} + +func (mbn *MultiByteNode) Length() int64 { + return 0 +} + +func (mbn *MultiByteNode) ListIterator() datamodel.ListIterator { + return nil +} + +func (mbn *MultiByteNode) MapIterator() datamodel.MapIterator { + return nil +} + +func (mbn *MultiByteNode) LookupByIndex(idx int64) (datamodel.Node, error) { + return nil, datamodel.ErrWrongKind{} +} + +func (mbn *MultiByteNode) LookupByString(key string) (datamodel.Node, error) { + return nil, datamodel.ErrWrongKind{} +} + +func (mbn *MultiByteNode) LookupByNode(key datamodel.Node) (datamodel.Node, error) { + return nil, datamodel.ErrWrongKind{} +} + +func (mbn *MultiByteNode) LookupBySegment(seg datamodel.PathSegment) (datamodel.Node, error) { + return nil, datamodel.ErrWrongKind{} +} + +func (mbn *MultiByteNode) Prototype() datamodel.NodePrototype { + return basicnode.Prototype.Bytes // not really ... but it'll do for this test +} + +type mbnReadSeeker struct { + node *MultiByteNode + offset int +} + +func (mbnrs *mbnReadSeeker) Read(p []byte) (int, error) { + var acc int + for _, byts := range mbnrs.node.Bytes { + if mbnrs.offset-acc >= len(byts) { + acc += len(byts) + continue + } + n := copy(p, byts[mbnrs.offset-acc:]) + mbnrs.offset += n + return n, nil + } + return 0, io.EOF +} + +func (mbnrs *mbnReadSeeker) Seek(offset int64, whence int) (int64, error) { + switch whence { + case io.SeekStart: + mbnrs.offset = int(offset) + case io.SeekCurrent: + mbnrs.offset += int(offset) + case io.SeekEnd: + mbnrs.offset = mbnrs.node.size() + int(offset) + } + return int64(mbnrs.offset), nil +} diff --git a/traversal/selector/matcher_util.go b/traversal/selector/matcher_util.go index 39e62ec0..cfb1bca7 100644 --- a/traversal/selector/matcher_util.go +++ b/traversal/selector/matcher_util.go @@ -13,8 +13,7 @@ type readerat struct { // current offset and seek if necessary. func (r *readerat) ReadAt(p []byte, off int64) (n int, err error) { if off != r.off { - _, err = r.rs.Seek(off, 0) - if err != nil { + if _, err = r.rs.Seek(off, io.SeekStart); err != nil { return 0, err } r.off = off From 349deb22a1fd573b1e15d387cde95d2dc9a35704 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 4 Jul 2023 17:35:56 +1000 Subject: [PATCH 4/4] chore: extract MultiByteNote to testutil package --- testutil/multibytenode.go | 147 ++++++++++++++++++++ testutil/multibytenode_test.go | 67 +++++++++ traversal/selector/matcher_test.go | 210 +---------------------------- 3 files changed, 221 insertions(+), 203 deletions(-) create mode 100644 testutil/multibytenode.go create mode 100644 testutil/multibytenode_test.go diff --git a/testutil/multibytenode.go b/testutil/multibytenode.go new file mode 100644 index 00000000..88c87e2a --- /dev/null +++ b/testutil/multibytenode.go @@ -0,0 +1,147 @@ +package testutil + +import ( + "io" + + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/node/basicnode" +) + +var _ datamodel.Node = MultiByteNode{} +var _ datamodel.LargeBytesNode = (*MultiByteNode)(nil) + +// MultiByteNode is a node that is a concatenation of multiple byte slices. +// It's not particularly sophisticated but lets us exercise LargeBytesNode in a +// non-trivial way. +// The novel behaviour of Read() and Seek() on the AsLargeBytes is similar to +// that which would be expected from a LBN ADL, such as UnixFS sharded files. +type MultiByteNode struct { + bytes [][]byte +} + +func NewMultiByteNode(bytes ...[]byte) MultiByteNode { + return MultiByteNode{bytes: bytes} +} + +func (mbn MultiByteNode) Kind() datamodel.Kind { + return datamodel.Kind_Bytes +} + +func (mbn MultiByteNode) AsBytes() ([]byte, error) { + ret := make([]byte, 0, mbn.TotalLength()) + for _, b := range mbn.bytes { + ret = append(ret, b...) + } + return ret, nil +} + +func (mbn MultiByteNode) TotalLength() int { + var size int + for _, b := range mbn.bytes { + size += len(b) + } + return size +} + +func (mbn MultiByteNode) AsLargeBytes() (io.ReadSeeker, error) { + return &mbnReadSeeker{node: mbn}, nil +} + +func (mbn MultiByteNode) AsBool() (bool, error) { + return false, datamodel.ErrWrongKind{TypeName: "bool", MethodName: "AsBool", AppropriateKind: datamodel.KindSet_JustBytes} +} + +func (mbn MultiByteNode) AsInt() (int64, error) { + return 0, datamodel.ErrWrongKind{TypeName: "int", MethodName: "AsInt", AppropriateKind: datamodel.KindSet_JustBytes} +} + +func (mbn MultiByteNode) AsFloat() (float64, error) { + return 0, datamodel.ErrWrongKind{TypeName: "float", MethodName: "AsFloat", AppropriateKind: datamodel.KindSet_JustBytes} +} + +func (mbn MultiByteNode) AsString() (string, error) { + return "", datamodel.ErrWrongKind{TypeName: "string", MethodName: "AsString", AppropriateKind: datamodel.KindSet_JustBytes} +} + +func (mbn MultiByteNode) AsLink() (datamodel.Link, error) { + return nil, datamodel.ErrWrongKind{TypeName: "link", MethodName: "AsLink", AppropriateKind: datamodel.KindSet_JustBytes} +} + +func (mbn MultiByteNode) AsNode() (datamodel.Node, error) { + return nil, nil +} + +func (mbn MultiByteNode) Size() int { + return 0 +} + +func (mbn MultiByteNode) IsAbsent() bool { + return false +} + +func (mbn MultiByteNode) IsNull() bool { + return false +} + +func (mbn MultiByteNode) Length() int64 { + return 0 +} + +func (mbn MultiByteNode) ListIterator() datamodel.ListIterator { + return nil +} + +func (mbn MultiByteNode) MapIterator() datamodel.MapIterator { + return nil +} + +func (mbn MultiByteNode) LookupByIndex(idx int64) (datamodel.Node, error) { + return nil, datamodel.ErrWrongKind{} +} + +func (mbn MultiByteNode) LookupByString(key string) (datamodel.Node, error) { + return nil, datamodel.ErrWrongKind{} +} + +func (mbn MultiByteNode) LookupByNode(key datamodel.Node) (datamodel.Node, error) { + return nil, datamodel.ErrWrongKind{} +} + +func (mbn MultiByteNode) LookupBySegment(seg datamodel.PathSegment) (datamodel.Node, error) { + return nil, datamodel.ErrWrongKind{} +} + +func (mbn MultiByteNode) Prototype() datamodel.NodePrototype { + return basicnode.Prototype.Bytes // not really ... but it'll do for this test +} + +type mbnReadSeeker struct { + node MultiByteNode + offset int +} + +func (mbnrs *mbnReadSeeker) Read(p []byte) (int, error) { + var acc int + for _, byts := range mbnrs.node.bytes { + if mbnrs.offset-acc >= len(byts) { + acc += len(byts) + continue + } + n := copy(p, byts[mbnrs.offset-acc:]) + mbnrs.offset += n + return n, nil + } + return 0, io.EOF +} + +func (mbnrs *mbnReadSeeker) Seek(offset int64, whence int) (int64, error) { + switch whence { + case io.SeekStart: + mbnrs.offset = int(offset) + case io.SeekCurrent: + mbnrs.offset += int(offset) + case io.SeekEnd: + mbnrs.offset = mbnrs.node.TotalLength() + int(offset) + } + return int64(mbnrs.offset), nil +} diff --git a/testutil/multibytenode_test.go b/testutil/multibytenode_test.go new file mode 100644 index 00000000..c3ca5e59 --- /dev/null +++ b/testutil/multibytenode_test.go @@ -0,0 +1,67 @@ +package testutil_test + +import ( + "io" + "testing" + + qt "github.com/frankban/quicktest" + + "github.com/ipld/go-ipld-prime/testutil" +) + +func TestMultiByteNode(t *testing.T) { + mbn := testutil.NewMultiByteNode( + []byte("foo"), + []byte("bar"), + []byte("baz"), + []byte("!"), + ) + // Sanity check that the readseeker works. + // (This is a test of the test, not the code under test.) + + for _, rl := range []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} { + t.Run("readseeker works with read length "+qt.Format(rl), func(t *testing.T) { + rs, err := mbn.AsLargeBytes() + qt.Assert(t, err, qt.IsNil) + acc := make([]byte, 0, mbn.TotalLength()) + buf := make([]byte, rl) + for { + n, err := rs.Read(buf) + if err == io.EOF { + qt.Check(t, n, qt.Equals, 0) + break + } + qt.Assert(t, err, qt.IsNil) + acc = append(acc, buf[0:n]...) + } + qt.Assert(t, string(acc), qt.DeepEquals, "foobarbaz!") + }) + } + + t.Run("readseeker can seek and read middle bytes", func(t *testing.T) { + rs, err := mbn.AsLargeBytes() + qt.Assert(t, err, qt.IsNil) + _, err = rs.Seek(2, io.SeekStart) + qt.Assert(t, err, qt.IsNil) + buf := make([]byte, 2) + acc := make([]byte, 0, 5) + for len(acc) < 5 { + n, err := rs.Read(buf) + qt.Assert(t, err, qt.IsNil) + acc = append(acc, buf[0:n]...) + } + qt.Assert(t, string(acc), qt.DeepEquals, "obarba") + }) + + t.Run("readseeker can seek and read last byte", func(t *testing.T) { + rs, err := mbn.AsLargeBytes() + qt.Assert(t, err, qt.IsNil) + _, err = rs.Seek(-1, io.SeekEnd) + qt.Assert(t, err, qt.IsNil) + buf := make([]byte, 1) + n, err := rs.Read(buf) + qt.Assert(t, err, qt.IsNil) + qt.Check(t, n, qt.Equals, 1) + qt.Check(t, string(buf[0]), qt.Equals, "!") + }) +} diff --git a/traversal/selector/matcher_test.go b/traversal/selector/matcher_test.go index 1c803d5b..0ddc4706 100644 --- a/traversal/selector/matcher_test.go +++ b/traversal/selector/matcher_test.go @@ -2,7 +2,6 @@ package selector_test import ( "fmt" - "io" "math" "testing" @@ -11,6 +10,7 @@ import ( "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/fluent/qp" "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/ipld/go-ipld-prime/testutil" "github.com/ipld/go-ipld-prime/traversal" "github.com/ipld/go-ipld-prime/traversal/selector" ) @@ -23,14 +23,12 @@ func TestSubsetMatch(t *testing.T) { }{ {"stringNode", basicnode.NewString(expectedString)}, {"bytesNode", basicnode.NewBytes([]byte(expectedString))}, - {"largeBytesNode", &MultiByteNode{ - Bytes: [][]byte{ - []byte("foo"), - []byte("bar"), - []byte("baz"), - []byte("!"), - }, - }}, + {"largeBytesNode", testutil.NewMultiByteNode( + []byte("foo"), + []byte("bar"), + []byte("baz"), + []byte("!"), + )}, } // selector for a slice of the value of the "bipbop" field within a map @@ -111,197 +109,3 @@ func TestSubsetMatch(t *testing.T) { } } } - -func TestMultiByteNode_Sanity(t *testing.T) { - mbn := &MultiByteNode{ - Bytes: [][]byte{ - []byte("foo"), - []byte("bar"), - []byte("baz"), - []byte("!"), - }, - } - // Sanity check that the readseeker works. - // (This is a test of the test, not the code under test.) - - for _, rl := range []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} { - t.Run("readseeker works with read length "+qt.Format(rl), func(t *testing.T) { - rs, err := mbn.AsLargeBytes() - qt.Assert(t, err, qt.IsNil) - acc := make([]byte, 0, mbn.size()) - buf := make([]byte, rl) - for { - n, err := rs.Read(buf) - if err == io.EOF { - qt.Check(t, n, qt.Equals, 0) - break - } - qt.Assert(t, err, qt.IsNil) - acc = append(acc, buf[0:n]...) - } - qt.Assert(t, string(acc), qt.DeepEquals, "foobarbaz!") - }) - } - - t.Run("readseeker can seek and read middle bytes", func(t *testing.T) { - rs, err := mbn.AsLargeBytes() - qt.Assert(t, err, qt.IsNil) - _, err = rs.Seek(2, io.SeekStart) - qt.Assert(t, err, qt.IsNil) - buf := make([]byte, 2) - acc := make([]byte, 0, 5) - for len(acc) < 5 { - n, err := rs.Read(buf) - qt.Assert(t, err, qt.IsNil) - acc = append(acc, buf[0:n]...) - } - qt.Assert(t, string(acc), qt.DeepEquals, "obarba") - }) - - t.Run("readseeker can seek and read last byte", func(t *testing.T) { - rs, err := mbn.AsLargeBytes() - qt.Assert(t, err, qt.IsNil) - _, err = rs.Seek(-1, io.SeekEnd) - qt.Assert(t, err, qt.IsNil) - buf := make([]byte, 1) - n, err := rs.Read(buf) - qt.Assert(t, err, qt.IsNil) - qt.Check(t, n, qt.Equals, 1) - qt.Check(t, string(buf[0]), qt.Equals, "!") - }) -} - -var _ datamodel.Node = (*MultiByteNode)(nil) -var _ datamodel.LargeBytesNode = (*MultiByteNode)(nil) - -// MultiByteNode is a node that is a concatenation of multiple byte slices. -// It's not particularly sophisticated but lets us exercise LargeBytesNode as a -// path through the selectors. The novel behaviour of Read() and Seek() on the -// AsLargeBytes is similar to that which would be expected from a LBN ADL, such -// as UnixFS sharded files. -type MultiByteNode struct { - Bytes [][]byte -} - -func (mbn *MultiByteNode) Kind() datamodel.Kind { - return datamodel.Kind_Bytes -} - -func (mbn *MultiByteNode) AsBytes() ([]byte, error) { - ret := make([]byte, 0, mbn.size()) - for _, b := range mbn.Bytes { - ret = append(ret, b...) - } - return ret, nil -} - -func (mbn *MultiByteNode) size() int { - var size int - for _, b := range mbn.Bytes { - size += len(b) - } - return size -} - -func (mbn *MultiByteNode) AsLargeBytes() (io.ReadSeeker, error) { - return &mbnReadSeeker{node: mbn}, nil -} - -func (mbn *MultiByteNode) AsBool() (bool, error) { - return false, datamodel.ErrWrongKind{TypeName: "bool", MethodName: "AsBool", AppropriateKind: datamodel.KindSet_JustBytes} -} - -func (mbn *MultiByteNode) AsInt() (int64, error) { - return 0, datamodel.ErrWrongKind{TypeName: "int", MethodName: "AsInt", AppropriateKind: datamodel.KindSet_JustBytes} -} - -func (mbn *MultiByteNode) AsFloat() (float64, error) { - return 0, datamodel.ErrWrongKind{TypeName: "float", MethodName: "AsFloat", AppropriateKind: datamodel.KindSet_JustBytes} -} - -func (mbn *MultiByteNode) AsString() (string, error) { - return "", datamodel.ErrWrongKind{TypeName: "string", MethodName: "AsString", AppropriateKind: datamodel.KindSet_JustBytes} -} - -func (mbn *MultiByteNode) AsLink() (datamodel.Link, error) { - return nil, datamodel.ErrWrongKind{TypeName: "link", MethodName: "AsLink", AppropriateKind: datamodel.KindSet_JustBytes} -} - -func (mbn *MultiByteNode) AsNode() (datamodel.Node, error) { - return nil, nil -} - -func (mbn *MultiByteNode) Size() int { - return 0 -} - -func (mbn *MultiByteNode) IsAbsent() bool { - return false -} - -func (mbn *MultiByteNode) IsNull() bool { - return false -} - -func (mbn *MultiByteNode) Length() int64 { - return 0 -} - -func (mbn *MultiByteNode) ListIterator() datamodel.ListIterator { - return nil -} - -func (mbn *MultiByteNode) MapIterator() datamodel.MapIterator { - return nil -} - -func (mbn *MultiByteNode) LookupByIndex(idx int64) (datamodel.Node, error) { - return nil, datamodel.ErrWrongKind{} -} - -func (mbn *MultiByteNode) LookupByString(key string) (datamodel.Node, error) { - return nil, datamodel.ErrWrongKind{} -} - -func (mbn *MultiByteNode) LookupByNode(key datamodel.Node) (datamodel.Node, error) { - return nil, datamodel.ErrWrongKind{} -} - -func (mbn *MultiByteNode) LookupBySegment(seg datamodel.PathSegment) (datamodel.Node, error) { - return nil, datamodel.ErrWrongKind{} -} - -func (mbn *MultiByteNode) Prototype() datamodel.NodePrototype { - return basicnode.Prototype.Bytes // not really ... but it'll do for this test -} - -type mbnReadSeeker struct { - node *MultiByteNode - offset int -} - -func (mbnrs *mbnReadSeeker) Read(p []byte) (int, error) { - var acc int - for _, byts := range mbnrs.node.Bytes { - if mbnrs.offset-acc >= len(byts) { - acc += len(byts) - continue - } - n := copy(p, byts[mbnrs.offset-acc:]) - mbnrs.offset += n - return n, nil - } - return 0, io.EOF -} - -func (mbnrs *mbnReadSeeker) Seek(offset int64, whence int) (int64, error) { - switch whence { - case io.SeekStart: - mbnrs.offset = int(offset) - case io.SeekCurrent: - mbnrs.offset += int(offset) - case io.SeekEnd: - mbnrs.offset = mbnrs.node.size() + int(offset) - } - return int64(mbnrs.offset), nil -}