From f076b1ef460765220ec72067ea3bbc2ac3976623 Mon Sep 17 00:00:00 2001 From: Peter Rabbitson Date: Tue, 21 Jan 2020 16:05:24 +0100 Subject: [PATCH 1/6] Fix preexisting tests - they would never fail as written --- parse_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/parse_test.go b/parse_test.go index f82aba5..f819199 100644 --- a/parse_test.go +++ b/parse_test.go @@ -15,8 +15,8 @@ func TestParseRabin(t *testing.T) { t.Errorf(err.Error()) } _, err = parseRabinString(r, chk2) - if err == ErrRabinMin { - t.Log("it should be ErrRabinMin here.") + if err != ErrRabinMin { + t.Fatalf("Expected an 'ErrRabinMin' error, got: %#v", err) } } @@ -26,11 +26,11 @@ func TestParseSize(t *testing.T) { size1 := "size-0" size2 := "size-32" _, err := FromString(r, size1) - if err == ErrSize { - t.Log("it should be ErrSize here.") + if err != ErrSize { + t.Fatalf("Expected an 'ErrSize' error, got: %#v", err) } _, err = FromString(r, size2) - if err == ErrSize { + if err != nil { t.Fatal(err) } } From 397f536c853dd5e6145e26c0720a736f18c2e4f2 Mon Sep 17 00:00:00 2001 From: Peter Rabbitson Date: Tue, 21 Jan 2020 16:57:58 +0100 Subject: [PATCH 2/6] Add various sanity checks for size specifications --- parse.go | 30 +++++++++++++++++++++++++++++- splitting.go | 3 --- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/parse.go b/parse.go index 5d472b7..59656bf 100644 --- a/parse.go +++ b/parse.go @@ -8,9 +8,25 @@ import ( "strings" ) +const ( + // DefaultBlockSize is the chunk size that splitters produce (or aim to). + DefaultBlockSize int64 = 1024 * 256 + + // 1 MB, on-wire block size for "datablocks ( unixfs, etc )" + // copy of https://github.com/ipfs/go-unixfs/blob/v0.2.3/importer/helpers/helpers.go#L8 + BlockSizeLimit int = 1048576 + + // in case we are using raw-leaves: this would match BlockSizeLimit, but we can't assume that + // be conservative and substract the PB wraping size of a full DAG-PB+UnixFS node describing 1M + // (2b(type2/file)+4b(data-field:3-byte-len-delimited)+4b(size-field:3-byte-varint))+(4b(DAG-type-1:3-byte-len-delimited)) + // FIXME - this calculation will need an update for CBOR + BlockPayloadLimit int = (BlockSizeLimit - (2 + 4 + 4 + 4)) +) + var ( ErrRabinMin = errors.New("rabin min must be greater than 16") - ErrSize = errors.New("chunker size muster greater than 0") + ErrSize = errors.New("chunker size must be greater than 0") + ErrSizeMax = fmt.Errorf("chunker parameters may not exceed the maximum block payload size of %d", BlockPayloadLimit) ) // FromString returns a Splitter depending on the given string: @@ -28,6 +44,8 @@ func FromString(r io.Reader, chunker string) (Splitter, error) { return nil, err } else if size <= 0 { return nil, ErrSize + } else if size > BlockPayloadLimit { + return nil, ErrSizeMax } return NewSizeSplitter(r, int64(size)), nil @@ -51,6 +69,8 @@ func parseRabinString(r io.Reader, chunker string) (Splitter, error) { size, err := strconv.Atoi(parts[1]) if err != nil { return nil, err + } else if int(float32(size)*1.5) > BlockPayloadLimit { // FIXME - there is probably a better way to bubble up this calculation from NewRabin() + return nil, ErrSizeMax } return NewRabin(r, uint64(size)), nil case 4: @@ -84,6 +104,14 @@ func parseRabinString(r io.Reader, chunker string) (Splitter, error) { return nil, err } + if min >= avg { + return nil, errors.New("incorrect format: rabin-min must be smaller than rabin-avg") + } else if avg >= max { + return nil, errors.New("incorrect format: rabin-avg must be smaller than rabin-max") + } else if max > BlockPayloadLimit { + return nil, ErrSizeMax + } + return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil default: return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'") diff --git a/splitting.go b/splitting.go index 2b23739..a137820 100644 --- a/splitting.go +++ b/splitting.go @@ -13,9 +13,6 @@ import ( var log = logging.Logger("chunk") -// DefaultBlockSize is the chunk size that splitters produce (or aim to). -var DefaultBlockSize int64 = 1024 * 256 - // A Splitter reads bytes from a Reader and creates "chunks" (byte slices) // that can be used to build DAG nodes. type Splitter interface { From 65d7a6e9ab7c927a2f596590b6eabe00051368e9 Mon Sep 17 00:00:00 2001 From: Peter Rabbitson Date: Tue, 21 Jan 2020 16:58:17 +0100 Subject: [PATCH 3/6] Test all the things --- parse_test.go | 70 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/parse_test.go b/parse_test.go index f819199..242d582 100644 --- a/parse_test.go +++ b/parse_test.go @@ -2,35 +2,79 @@ package chunk import ( "bytes" + "fmt" "testing" ) +const ( + testTwoThirdsOfBlockPayloadLimit = 2 * (float32(BlockPayloadLimit) / float32(3)) +) + func TestParseRabin(t *testing.T) { - max := 1000 - r := bytes.NewReader(randBuf(t, max)) - chk1 := "rabin-18-25-32" - chk2 := "rabin-15-23-31" - _, err := parseRabinString(r, chk1) + r := bytes.NewReader(randBuf(t, 1000)) + + _, err := FromString(r, "rabin-18-25-32") if err != nil { t.Errorf(err.Error()) } - _, err = parseRabinString(r, chk2) + + _, err = FromString(r, "rabin-15-23-31") if err != ErrRabinMin { t.Fatalf("Expected an 'ErrRabinMin' error, got: %#v", err) } + + _, err = FromString(r, "rabin-20-20-21") + if err == nil || err.Error() != "incorrect format: rabin-min must be smaller than rabin-avg" { + t.Fatalf("Expected an arg-out-of-order error, got: %#v", err) + } + + _, err = FromString(r, "rabin-19-21-21") + if err == nil || err.Error() != "incorrect format: rabin-avg must be smaller than rabin-max" { + t.Fatalf("Expected an arg-out-of-order error, got: %#v", err) + } + + _, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", BlockPayloadLimit)) + if err != nil { + t.Fatalf("Expected success, got: %#v", err) + } + + _, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", 1+BlockPayloadLimit)) + if err != ErrSizeMax { + t.Fatalf("Expected 'ErrSizeMax', got: %#v", err) + } + + _, err = FromString(r, fmt.Sprintf("rabin-%.0f", testTwoThirdsOfBlockPayloadLimit)) + if err != nil { + t.Fatalf("Expected success, got: %#v", err) + } + + _, err = FromString(r, fmt.Sprintf("rabin-%.0f", 1+testTwoThirdsOfBlockPayloadLimit)) + if err != ErrSizeMax { + t.Fatalf("Expected 'ErrSizeMax', got: %#v", err) + } + } func TestParseSize(t *testing.T) { - max := 1000 - r := bytes.NewReader(randBuf(t, max)) - size1 := "size-0" - size2 := "size-32" - _, err := FromString(r, size1) + r := bytes.NewReader(randBuf(t, 1000)) + + _, err := FromString(r, "size-0") if err != ErrSize { t.Fatalf("Expected an 'ErrSize' error, got: %#v", err) } - _, err = FromString(r, size2) + + _, err = FromString(r, "size-32") + if err != nil { + t.Fatalf("Expected success, got: %#v", err) + } + + _, err = FromString(r, fmt.Sprintf("size-%d", BlockPayloadLimit)) if err != nil { - t.Fatal(err) + t.Fatalf("Expected success, got: %#v", err) + } + + _, err = FromString(r, fmt.Sprintf("size-%d", 1+BlockPayloadLimit)) + if err != ErrSizeMax { + t.Fatalf("Expected 'ErrSizeMax', got: %#v", err) } } From 5f9fd98c2afd218261ae9405a8f5d647498a2f88 Mon Sep 17 00:00:00 2001 From: Peter Rabbitson Date: Tue, 21 Jan 2020 17:03:50 +0100 Subject: [PATCH 4/6] Remove last pieces of gx --- .travis.yml | 4 ---- Makefile | 18 ------------------ README.md | 2 -- package.json | 47 ----------------------------------------------- 4 files changed, 71 deletions(-) delete mode 100644 Makefile delete mode 100644 package.json diff --git a/.travis.yml b/.travis.yml index 4cfe98c..f32dfd1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,11 +9,8 @@ go: env: global: - GOTFLAGS="-race" - matrix: - - BUILD_DEPTYPE=gx - BUILD_DEPTYPE=gomod - # disable travis install install: - true @@ -24,7 +21,6 @@ script: cache: directories: - - $GOPATH/src/gx - $GOPATH/pkg/mod - $HOME/.cache/go-build diff --git a/Makefile b/Makefile deleted file mode 100644 index 73f2841..0000000 --- a/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -all: deps -gx: - go get github.com/whyrusleeping/gx - go get github.com/whyrusleeping/gx-go -deps: gx - gx --verbose install --global - gx-go rewrite -test: deps - gx test -v -race -coverprofile=coverage.txt -covermode=atomic . -rw: - gx-go rewrite -rwundo: - gx-go rewrite --undo -publish: rwundo - gx publish -.PHONY: all gx deps test rw rwundo publish - - diff --git a/README.md b/README.md index 84161c5..7b1b5b2 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,6 @@ The package provides a `SizeSplitter` which creates chunks of equal size and it > go get github.com/ipfs/go-ipfs-chunker ``` -It uses [Gx](https://github.com/whyrusleeping/gx) to manage dependencies. You can use `make all` to build it with the `gx` dependencies. - ## Usage ``` diff --git a/package.json b/package.json deleted file mode 100644 index d53c0e7..0000000 --- a/package.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "author": "hsanjuan", - "bugs": { - "url": "https://github.com/ipfs/go-ipfs-chunker" - }, - "gx": { - "dvcsimport": "github.com/ipfs/go-ipfs-chunker" - }, - "gxDependencies": [ - { - "hash": "QmbkT7eMTyXfpeyB3ZMxxcxg7XH8t6uXp49jqzz4HB7BGF", - "name": "go-log", - "version": "1.5.9" - }, - { - "author": "whyrusleeping", - "hash": "QmZooytqEoUwQjv7KzH4d3xyJnyvD3AWJaCDMYt5pbCtua", - "name": "chunker", - "version": "0.0.1" - }, - { - "author": "whyrusleeping", - "hash": "QmNohiVssaPw3KVLZik59DBVGTSm2dGvYT9eoXt5DQ36Yz", - "name": "go-ipfs-util", - "version": "1.2.9" - }, - { - "author": "stebalien", - "hash": "QmYYLnAzR28nAQ4U5MFniLprnktu6eTFKibeNt96V21EZK", - "name": "go-block-format", - "version": "0.2.2" - }, - { - "author": "Stebalien", - "hash": "QmQDvJoB6aJWN3sjr3xsgXqKCXf4jU5zdMXpDMsBkYVNqa", - "name": "go-buffer-pool", - "version": "0.1.3" - } - ], - "gxVersion": "0.12.1", - "language": "go", - "license": "MIT", - "name": "go-ipfs-chunker", - "releaseCmd": "git commit -a -m \"gx publish $VERSION\"", - "version": "0.1.6" -} - From 4414aa269d6baa18717e34aa87f9a72d4948ff4b Mon Sep 17 00:00:00 2001 From: Peter Rabbitson Date: Tue, 21 Jan 2020 21:06:23 +0100 Subject: [PATCH 5/6] Address block/chunk logical mismatch, name things correctly --- parse.go | 21 ++++++++------------- parse_test.go | 14 +++++++------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/parse.go b/parse.go index 59656bf..dee8304 100644 --- a/parse.go +++ b/parse.go @@ -12,21 +12,16 @@ const ( // DefaultBlockSize is the chunk size that splitters produce (or aim to). DefaultBlockSize int64 = 1024 * 256 - // 1 MB, on-wire block size for "datablocks ( unixfs, etc )" - // copy of https://github.com/ipfs/go-unixfs/blob/v0.2.3/importer/helpers/helpers.go#L8 - BlockSizeLimit int = 1048576 - - // in case we are using raw-leaves: this would match BlockSizeLimit, but we can't assume that - // be conservative and substract the PB wraping size of a full DAG-PB+UnixFS node describing 1M - // (2b(type2/file)+4b(data-field:3-byte-len-delimited)+4b(size-field:3-byte-varint))+(4b(DAG-type-1:3-byte-len-delimited)) - // FIXME - this calculation will need an update for CBOR - BlockPayloadLimit int = (BlockSizeLimit - (2 + 4 + 4 + 4)) + // No leaf block should contain more than 1MiB of payload data ( wrapping overhead aside ) + // This effectively mandates the maximum chunk size + // See discussion at https://github.com/ipfs/go-ipfs-chunker/pull/21#discussion_r369124879 for background + ChunkSizeLimit int = 1048576 ) var ( ErrRabinMin = errors.New("rabin min must be greater than 16") ErrSize = errors.New("chunker size must be greater than 0") - ErrSizeMax = fmt.Errorf("chunker parameters may not exceed the maximum block payload size of %d", BlockPayloadLimit) + ErrSizeMax = fmt.Errorf("chunker parameters may not exceed the maximum chunk size of %d", ChunkSizeLimit) ) // FromString returns a Splitter depending on the given string: @@ -44,7 +39,7 @@ func FromString(r io.Reader, chunker string) (Splitter, error) { return nil, err } else if size <= 0 { return nil, ErrSize - } else if size > BlockPayloadLimit { + } else if size > ChunkSizeLimit { return nil, ErrSizeMax } return NewSizeSplitter(r, int64(size)), nil @@ -69,7 +64,7 @@ func parseRabinString(r io.Reader, chunker string) (Splitter, error) { size, err := strconv.Atoi(parts[1]) if err != nil { return nil, err - } else if int(float32(size)*1.5) > BlockPayloadLimit { // FIXME - there is probably a better way to bubble up this calculation from NewRabin() + } else if int(float32(size)*1.5) > ChunkSizeLimit { // FIXME - this will be addressed in a subsequent PR return nil, ErrSizeMax } return NewRabin(r, uint64(size)), nil @@ -108,7 +103,7 @@ func parseRabinString(r io.Reader, chunker string) (Splitter, error) { return nil, errors.New("incorrect format: rabin-min must be smaller than rabin-avg") } else if avg >= max { return nil, errors.New("incorrect format: rabin-avg must be smaller than rabin-max") - } else if max > BlockPayloadLimit { + } else if max > ChunkSizeLimit { return nil, ErrSizeMax } diff --git a/parse_test.go b/parse_test.go index 242d582..237a2b4 100644 --- a/parse_test.go +++ b/parse_test.go @@ -7,7 +7,7 @@ import ( ) const ( - testTwoThirdsOfBlockPayloadLimit = 2 * (float32(BlockPayloadLimit) / float32(3)) + testTwoThirdsOfChunkLimit = 2 * (float32(ChunkSizeLimit) / float32(3)) ) func TestParseRabin(t *testing.T) { @@ -33,22 +33,22 @@ func TestParseRabin(t *testing.T) { t.Fatalf("Expected an arg-out-of-order error, got: %#v", err) } - _, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", BlockPayloadLimit)) + _, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", ChunkSizeLimit)) if err != nil { t.Fatalf("Expected success, got: %#v", err) } - _, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", 1+BlockPayloadLimit)) + _, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", 1+ChunkSizeLimit)) if err != ErrSizeMax { t.Fatalf("Expected 'ErrSizeMax', got: %#v", err) } - _, err = FromString(r, fmt.Sprintf("rabin-%.0f", testTwoThirdsOfBlockPayloadLimit)) + _, err = FromString(r, fmt.Sprintf("rabin-%.0f", testTwoThirdsOfChunkLimit)) if err != nil { t.Fatalf("Expected success, got: %#v", err) } - _, err = FromString(r, fmt.Sprintf("rabin-%.0f", 1+testTwoThirdsOfBlockPayloadLimit)) + _, err = FromString(r, fmt.Sprintf("rabin-%.0f", 1+testTwoThirdsOfChunkLimit)) if err != ErrSizeMax { t.Fatalf("Expected 'ErrSizeMax', got: %#v", err) } @@ -68,12 +68,12 @@ func TestParseSize(t *testing.T) { t.Fatalf("Expected success, got: %#v", err) } - _, err = FromString(r, fmt.Sprintf("size-%d", BlockPayloadLimit)) + _, err = FromString(r, fmt.Sprintf("size-%d", ChunkSizeLimit)) if err != nil { t.Fatalf("Expected success, got: %#v", err) } - _, err = FromString(r, fmt.Sprintf("size-%d", 1+BlockPayloadLimit)) + _, err = FromString(r, fmt.Sprintf("size-%d", 1+ChunkSizeLimit)) if err != ErrSizeMax { t.Fatalf("Expected 'ErrSizeMax', got: %#v", err) } From a144aabc0513b209b93755fbffe0895140d68728 Mon Sep 17 00:00:00 2001 From: Peter Rabbitson Date: Wed, 22 Jan 2020 16:59:08 +0100 Subject: [PATCH 6/6] Remove extra missed hidden .gx state --- .gx/lastpubver | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .gx/lastpubver diff --git a/.gx/lastpubver b/.gx/lastpubver deleted file mode 100644 index caa3a7f..0000000 --- a/.gx/lastpubver +++ /dev/null @@ -1 +0,0 @@ -0.1.6: QmYmZ81dU5nnmBFy5MmktXLZpt8QCWhRJd6M1uxVF6vke8