diff --git a/core/commands/refs.go b/core/commands/refs.go index 6f84f8fb144..77348a0834d 100644 --- a/core/commands/refs.go +++ b/core/commands/refs.go @@ -10,11 +10,11 @@ import ( cmds "github.com/ipfs/go-ipfs/commands" "github.com/ipfs/go-ipfs/core" e "github.com/ipfs/go-ipfs/core/commands/e" - path "gx/ipfs/QmdMPBephdLYNESkruDX2hcDTgFYhoCt4LimWhgnomSdV2/go-path" - "gx/ipfs/QmSP88ryZkHSRn1fnngAaV2Vcn63WUJzAavnRM9CVdU1Ky/go-ipfs-cmdkit" + cmdkit "gx/ipfs/QmSP88ryZkHSRn1fnngAaV2Vcn63WUJzAavnRM9CVdU1Ky/go-ipfs-cmdkit" ipld "gx/ipfs/QmX5CsuHyVZeTLxgRSYkgLSDQKb9UjE8xnhQzCEJWWWFsC/go-ipld-format" cid "gx/ipfs/QmZFbDTY9jfSBms2MchvYM9oYRbAF19K7Pby47yDBfpPrb/go-cid" + path "gx/ipfs/QmdMPBephdLYNESkruDX2hcDTgFYhoCt4LimWhgnomSdV2/go-path" ) // KeyList is a general type for outputting lists of keys @@ -64,6 +64,7 @@ NOTE: List all references recursively by using the flag '-r'. cmdkit.BoolOption("edges", "e", "Emit edge format: ` -> `."), cmdkit.BoolOption("unique", "u", "Omit duplicate refs from output."), cmdkit.BoolOption("recursive", "r", "Recursively list links of child nodes."), + cmdkit.IntOption("max-depth", "Only for recursive refs, limits fetch and listing to the given depth").WithDefault(-1), }, Run: func(req cmds.Request, res cmds.Response) { ctx := req.Context() @@ -85,6 +86,16 @@ NOTE: List all references recursively by using the flag '-r'. return } + maxDepth, _, err := req.Option("max-depth").Int() + if err != nil { + res.SetError(err, cmdkit.ErrNormal) + return + } + + if !recursive { + maxDepth = 1 // write only direct refs + } + format, _, err := req.Option("format").String() if err != nil { res.SetError(err, cmdkit.ErrNormal) @@ -119,12 +130,12 @@ NOTE: List all references recursively by using the flag '-r'. defer close(out) rw := RefWriter{ - out: out, - DAG: n.DAG, - Ctx: ctx, - Unique: unique, - PrintFmt: format, - Recursive: recursive, + out: out, + DAG: n.DAG, + Ctx: ctx, + Unique: unique, + PrintFmt: format, + MaxDepth: maxDepth, } for _, o := range objs { @@ -231,86 +242,127 @@ type RefWriter struct { DAG ipld.DAGService Ctx context.Context - Unique bool - Recursive bool - PrintFmt string + Unique bool + MaxDepth int + PrintFmt string - seen *cid.Set + seen map[string]int } // WriteRefs writes refs of the given object to the underlying writer. func (rw *RefWriter) WriteRefs(n ipld.Node) (int, error) { - if rw.Recursive { - return rw.writeRefsRecursive(n) - } - return rw.writeRefsSingle(n) + return rw.writeRefsRecursive(n, 0) + } -func (rw *RefWriter) writeRefsRecursive(n ipld.Node) (int, error) { +func (rw *RefWriter) writeRefsRecursive(n ipld.Node, depth int) (int, error) { nc := n.Cid() var count int for i, ng := range ipld.GetDAG(rw.Ctx, rw.DAG, n) { lc := n.Links()[i].Cid - if rw.skip(lc) { + goDeeper, shouldWrite := rw.visit(lc, depth+1) // The children are at depth+1 + + // Avoid "Get()" on the node and continue with next Link. + // We can do this if: + // - We printed it before (thus it was already seen and + // fetched with Get() + // - AND we must not go deeper. + // This is an optimization for pruned branches which have been + // visited before. + if !shouldWrite && !goDeeper { continue } - if err := rw.WriteEdge(nc, lc, n.Links()[i].Name); err != nil { - return count, err - } - + // We must Get() the node because: + // - it is new (never written) + // - OR we need to go deeper. + // This ensures printed refs are always fetched. nd, err := ng.Get(rw.Ctx) if err != nil { return count, err } - c, err := rw.writeRefsRecursive(nd) - count += c - if err != nil { - return count, err - } - } - return count, nil -} - -func (rw *RefWriter) writeRefsSingle(n ipld.Node) (int, error) { - c := n.Cid() - - if rw.skip(c) { - return 0, nil - } - - count := 0 - for _, l := range n.Links() { - lc := l.Cid - if rw.skip(lc) { - continue + // Write this node if not done before (or !Unique) + if shouldWrite { + if err := rw.WriteEdge(nc, lc, n.Links()[i].Name); err != nil { + return count, err + } + count++ } - if err := rw.WriteEdge(c, lc, l.Name); err != nil { - return count, err + // Keep going deeper. This happens: + // - On unexplored branches + // - On branches not explored deep enough + // Note when !Unique, branches are always considered + // unexplored and only depth limits apply. + if goDeeper { + c, err := rw.writeRefsRecursive(nd, depth+1) + count += c + if err != nil { + return count, err + } } - count++ } + return count, nil } -// skip returns whether to skip a cid -func (rw *RefWriter) skip(c *cid.Cid) bool { +// visit returns two values: +// - the first boolean is true if we should keep traversing the DAG +// - the second boolean is true if we should print the CID +// +// visit will do branch pruning depending on rw.MaxDepth, previously visited +// cids and whether rw.Unique is set. i.e. rw.Unique = false and +// rw.MaxDepth = -1 disables any pruning. But setting rw.Unique to true will +// prune already visited branches at the cost of keeping as set of visited +// CIDs in memory. +func (rw *RefWriter) visit(c *cid.Cid, depth int) (bool, bool) { + atMaxDepth := rw.MaxDepth >= 0 && depth == rw.MaxDepth + overMaxDepth := rw.MaxDepth >= 0 && depth > rw.MaxDepth + + // Shortcut when we are over max depth. In practice, this + // only applies when calling refs with --maxDepth=0, as root's + // children are already over max depth. Otherwise nothing should + // hit this. + if overMaxDepth { + return false, false + } + + // We can shortcut right away if we don't need unique output: + // - we keep traversing when not atMaxDepth + // - always print if !rw.Unique { - return false + return !atMaxDepth, true } + // Unique == true from this point. + // Thus, we keep track of seen Cids, and their depth. if rw.seen == nil { - rw.seen = cid.NewSet() + rw.seen = make(map[string]int) } - - has := rw.seen.Has(c) - if !has { - rw.seen.Add(c) + key := string(c.Bytes()) + oldDepth, ok := rw.seen[key] + + // Unique == true && depth < MaxDepth (or unlimited) from this point + + // Branch pruning cases: + // - We saw the Cid before and either: + // - Depth is unlimited (MaxDepth = -1) + // - We saw it higher (smaller depth) in the DAG (means we must have + // explored deep enough before) + // Because we saw the CID, we don't print it again. + if ok && (rw.MaxDepth < 0 || oldDepth <= depth) { + return false, false } - return has + + // Final case, we must keep exploring the DAG from this CID + // (unless we hit the depth limit). + // We note down its depth because it was either not seen + // or is lower than last time. + // We print if it was not seen. + rw.seen[key] = depth + return !atMaxDepth, !ok } // Write one edge diff --git a/test/sharness/t0095-refs.sh b/test/sharness/t0095-refs.sh new file mode 100755 index 00000000000..67dcdfaba85 --- /dev/null +++ b/test/sharness/t0095-refs.sh @@ -0,0 +1,176 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 Protocol Labs, Inc +# MIT Licensed; see the LICENSE file in this repository. +# + +test_description="Test 'ipfs refs' command" + +. lib/test-lib.sh + +test_init_ipfs +test_launch_ipfs_daemon --offline + +# This file performs tests with the following directory +# structure. +# +# L0- _______ A_________ +# / | \ \ +# L1- B C D 1.txt +# / \ | | +# L2- D 1.txt B 2.txt +# | / \ +# L3- 2.txt D 1.txt +# | +# L4- 2.txt +# +# 'ipfs add -r A' output: +# +# added QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v A/1.txt +# added QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v A/B/1.txt +# added QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 A/B/D/2.txt +# added QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v A/C/B/1.txt +# added QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 A/C/B/D/2.txt +# added QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 A/D/2.txt +# added QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS A/B/D +# added QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa A/B +# added QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS A/C/B/D +# added QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa A/C/B +# added QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH A/C +# added QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS A/D +# added QmU6xujRsYzcrkocuR3fhfnkZBB8eyUFFq4WKRGw2aS15h A +# +# 'ipfs refs -r QmU6xujRsYzcrkocuR3fhfnkZBB8eyUFFq4WKRGw2aS15h' sample output +# that shows visit order in a stable go-ipfs version: +# +# QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v - 1.txt +# QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa - B (A/B) +# QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v - 1.txt (A/B/1.txt) +# QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS - D (A/B/D) +# QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 - 2.txt (A/B/D/2.txt) +# QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH - C (A/C) +# QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa - B (A/C/B) +# QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v - 1.txt (A/C/B/1.txt) +# QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS - D (A/C/B/D) +# QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 - 2.txt (A/C/B/D/2.txt) +# QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS - D (A/D) +# QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 - 2.txt (A/D/2.txt) + + +refsroot=QmU6xujRsYzcrkocuR3fhfnkZBB8eyUFFq4WKRGw2aS15h + +test_expect_success "create and add folders for refs" ' + mkdir -p A/B/D A/C/B/D A/D + echo "1" > A/1.txt + echo "1" > A/B/1.txt + echo "1" > A/C/B/1.txt + echo "2" > A/B/D/2.txt + echo "2" > A/C/B/D/2.txt + echo "2" > A/D/2.txt + root=$(ipfs add -r -Q A) + [[ "$root" == "$refsroot" ]] +' + +test_expect_success "ipfs refs -r" ' + cat < expected.txt +QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v +QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa +QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v +QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS +QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 +QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH +QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa +QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v +QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS +QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 +QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS +QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 +EOF + + ipfs refs -r $refsroot > refsr.txt + test_cmp expected.txt refsr.txt +' + +# Unique is like above but removing duplicates +test_expect_success "ipfs refs -r --unique" ' + cat < expected.txt +QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v +QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa +QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS +QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 +QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH +EOF + + ipfs refs -r --unique $refsroot > refsr.txt + test_cmp expected.txt refsr.txt +' + +# First level is 1.txt, B, C, D +test_expect_success "ipfs refs" ' + cat < expected.txt +QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v +QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa +QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH +QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS +EOF + ipfs refs $refsroot > refs.txt + test_cmp expected.txt refs.txt +' + +# max-depth=0 should return an empty list +test_expect_success "ipfs refs -r --max-depth=0" ' + cat < expected.txt +EOF + ipfs refs -r --max-depth=0 $refsroot > refs.txt + test_cmp expected.txt refs.txt +' + +# max-depth=1 should be equivalent to running without -r +test_expect_success "ipfs refs -r --max-depth=1" ' + ipfs refs -r --max-depth=1 $refsroot > refsr.txt + ipfs refs $refsroot > refs.txt + test_cmp refsr.txt refs.txt +' + +# We should see the depth limit engage at level 2 +test_expect_success "ipfs refs -r --max-depth=2" ' + cat < expected.txt +QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v +QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa +QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v +QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS +QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH +QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa +QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS +QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 +EOF + ipfs refs -r --max-depth=2 $refsroot > refsr.txt + test_cmp refsr.txt expected.txt +' + +# Here branch pruning and re-exploration come into place +# At first it should see D at level 2 and don't go deeper. +# But then after doing C it will see D at level 1 and go deeper +# so that it outputs the hash for 2.txt (-q61). +# We also see that C/B is pruned as it's been shown before. +# +# Excerpt from diagram above: +# +# L0- _______ A_________ +# / | \ \ +# L1- B C D 1.txt +# / \ | | +# L2- D 1.txt B 2.txt +test_expect_success "ipfs refs -r --unique --max-depth=2" ' + cat < expected.txt +QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v +QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa +QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS +QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH +QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 +EOF + ipfs refs -r --unique --max-depth=2 $refsroot > refsr.txt + test_cmp refsr.txt expected.txt +' + +test_done