From 67f98377342a3edaf9c2decd68943c5590af5891 Mon Sep 17 00:00:00 2001 From: Jakub Sztandera Date: Mon, 7 Oct 2019 18:06:02 +0200 Subject: [PATCH 1/4] Introduce buzhash chunker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It has the same properties as Rabin but is much faster. Benchmark results: ``` name speed Buzhash2/1K-4 1.71GB/s ± 4% Buzhash2/1M-4 1.15GB/s ± 2% Buzhash2/16M-4 892MB/s ± 1% Buzhash2/100M-4 904MB/s ± 2% Rabin/1K-4 13.8MB/s ± 3% Rabin/1M-4 171MB/s ± 3% Rabin/16M-4 182MB/s ± 4% Rabin/100M-4 182MB/s ± 3% Default/1K-4 1.74GB/s ± 4% Default/1M-4 3.22GB/s ± 2% Default/16M-4 3.88GB/s ± 2% Default/100M-4 4.21GB/s ± 6% ``` License: MIT Signed-off-by: Jakub Sztandera --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 636c62adcda..321a14d3e2d 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/ipfs/go-filestore v0.0.2 github.com/ipfs/go-fs-lock v0.0.1 github.com/ipfs/go-ipfs-blockstore v0.1.0 - github.com/ipfs/go-ipfs-chunker v0.0.1 + github.com/ipfs/go-ipfs-chunker v0.0.3 github.com/ipfs/go-ipfs-cmds v0.1.1 github.com/ipfs/go-ipfs-config v0.0.11 github.com/ipfs/go-ipfs-ds-help v0.0.1 diff --git a/go.sum b/go.sum index c999c92f0ac..15b0ee26b0c 100644 --- a/go.sum +++ b/go.sum @@ -183,6 +183,8 @@ github.com/ipfs/go-ipfs-blocksutil v0.0.1 h1:Eh/H4pc1hsvhzsQoMEP3Bke/aW5P5rVM1IW github.com/ipfs/go-ipfs-blocksutil v0.0.1/go.mod h1:Yq4M86uIOmxmGPUHv/uI7uKqZNtLb449gwKqXjIsnRk= github.com/ipfs/go-ipfs-chunker v0.0.1 h1:cHUUxKFQ99pozdahi+uSC/3Y6HeRpi9oTeUHbE27SEw= github.com/ipfs/go-ipfs-chunker v0.0.1/go.mod h1:tWewYK0we3+rMbOh7pPFGDyypCtvGcBFymgY4rSDLAw= +github.com/ipfs/go-ipfs-chunker v0.0.3 h1:UuXhKoxvxl/vGhie+WXFRZYCwpZjbKF2SzD1Tvxif1I= +github.com/ipfs/go-ipfs-chunker v0.0.3/go.mod h1:RkGJorerOQNTDPgmX7HtJ5YzVQqaIYdzI/hrCHty5Kc= github.com/ipfs/go-ipfs-cmds v0.1.1 h1:H9/BLf5rcsULHMj/x8gC0e5o+raYhqk1OQsfzbGMNM4= github.com/ipfs/go-ipfs-cmds v0.1.1/go.mod h1:k1zMXcOLtljA9iAnZHddbH69yVm5+weRL0snmMD/rK0= github.com/ipfs/go-ipfs-config v0.0.11 h1:5/4nas2CQXiKr2/MLxU24GDGTBvtstQIQezuk7ltOQQ= From 6f4f9b4cbba6c752e416bfe433de8063385900b3 Mon Sep 17 00:00:00 2001 From: Jakub Sztandera Date: Mon, 7 Oct 2019 18:17:01 +0200 Subject: [PATCH 2/4] Add test for buzhash License: MIT Signed-off-by: Jakub Sztandera --- test/sharness/t0040-add-and-cat.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/sharness/t0040-add-and-cat.sh b/test/sharness/t0040-add-and-cat.sh index edd3e120714..b6c1f500e76 100755 --- a/test/sharness/t0040-add-and-cat.sh +++ b/test/sharness/t0040-add-and-cat.sh @@ -193,6 +193,16 @@ test_add_cat_file() { test_expect_code 1 ipfs add -Q --chunker rabin-12-512-1024 mountdir/hello.txt ' + test_expect_success "ipfs add --chunker buzhash suceeds" ' + ipfs add --chunker buzhash mountdir/hello.txt >actual + ' + + test_expect_success "ipfs add --chunker buzhahs output looks good" ' + HASH="QmVr26fY1tKyspEJBniVhqxQeEjhF78XerGiqWAwraVLQH" && + echo "added $HASH hello.txt" >expected && + test_cmp expected actual + ' + test_expect_success "ipfs add on hidden file succeeds" ' echo "Hello Worlds!" >mountdir/.hello.txt && ipfs add mountdir/.hello.txt >actual From 76d2ad404ddb129cf15af79c494d81a6086e6f93 Mon Sep 17 00:00:00 2001 From: Jakub Sztandera Date: Mon, 7 Oct 2019 19:57:43 +0200 Subject: [PATCH 3/4] Add buzhash to `ipfs add` docs License: MIT Signed-off-by: Jakub Sztandera --- core/commands/add.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/commands/add.go b/core/commands/add.go index 0ad74ac95b1..c517487005c 100644 --- a/core/commands/add.go +++ b/core/commands/add.go @@ -11,7 +11,7 @@ import ( "github.com/ipfs/go-ipfs/core/commands/cmdenv" cmds "github.com/ipfs/go-ipfs-cmds" - "github.com/ipfs/go-ipfs-files" + files "github.com/ipfs/go-ipfs-files" coreiface "github.com/ipfs/interface-go-ipfs-core" "github.com/ipfs/interface-go-ipfs-core/options" mh "github.com/multiformats/go-multihash" @@ -80,9 +80,9 @@ how to break files into blocks. Blocks with same content can be deduplicated. Different chunking strategies will produce different hashes for the same file. The default is a fixed block size of 256 * 1024 bytes, 'size-262144'. Alternatively, you can use the -Rabin fingerprint chunker for content defined chunking by specifying -rabin-[min]-[avg]-[max] (where min/avg/max refer to the desired -chunk sizes in bytes), e.g. 'rabin-262144-524288-1048576'. +Buzhash or Rabin fingerprint chunker for content defined chunking by +specifying buzhash or rabin-[min]-[avg]-[max] (where min/avg/max refer +to the desired chunk sizes in bytes), e.g. 'rabin-262144-524288-1048576'. The following examples use very small byte sizes to demonstrate the properties of the different chunkers on a small file. You'll likely @@ -120,7 +120,7 @@ You can now check what blocks have been created by: cmds.BoolOption(trickleOptionName, "t", "Use trickle-dag format for dag generation."), cmds.BoolOption(onlyHashOptionName, "n", "Only chunk and hash - do not write to disk."), cmds.BoolOption(wrapOptionName, "w", "Wrap files with a directory object."), - cmds.StringOption(chunkerOptionName, "s", "Chunking algorithm, size-[bytes] or rabin-[min]-[avg]-[max]").WithDefault("size-262144"), + cmds.StringOption(chunkerOptionName, "s", "Chunking algorithm, size-[bytes], rabin-[min]-[avg]-[max] or buzhash").WithDefault("size-262144"), cmds.BoolOption(pinOptionName, "Pin this object when adding.").WithDefault(true), cmds.BoolOption(rawLeavesOptionName, "Use raw blocks for leaf nodes. (experimental)"), cmds.BoolOption(noCopyOptionName, "Add the file using filestore. Implies raw-leaves. (experimental)"), From 4391ef92e53aedc52ec4fce0ca04938c125ca75b Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Tue, 8 Oct 2019 08:34:55 +0900 Subject: [PATCH 4/4] fix(sharness): spelling --- test/sharness/t0040-add-and-cat.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sharness/t0040-add-and-cat.sh b/test/sharness/t0040-add-and-cat.sh index b6c1f500e76..74408fbb60f 100755 --- a/test/sharness/t0040-add-and-cat.sh +++ b/test/sharness/t0040-add-and-cat.sh @@ -197,7 +197,7 @@ test_add_cat_file() { ipfs add --chunker buzhash mountdir/hello.txt >actual ' - test_expect_success "ipfs add --chunker buzhahs output looks good" ' + test_expect_success "ipfs add --chunker buzhash output looks good" ' HASH="QmVr26fY1tKyspEJBniVhqxQeEjhF78XerGiqWAwraVLQH" && echo "added $HASH hello.txt" >expected && test_cmp expected actual