Skip to content

Commit

Permalink
Merge pull request #5337 from ipfs/feat/depth-limited-refs
Browse files Browse the repository at this point in the history
Feat: depth limited refs -r
  • Loading branch information
Stebalien committed Aug 27, 2018
2 parents 93c4f19 + fe89e2e commit 66b54d9
Show file tree
Hide file tree
Showing 2 changed files with 283 additions and 55 deletions.
162 changes: 107 additions & 55 deletions core/commands/refs.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ import (
cmds "github.com/ipfs/go-ipfs/commands"
"github.com/ipfs/go-ipfs/core"
e "github.com/ipfs/go-ipfs/core/commands/e"
path "gx/ipfs/QmdMPBephdLYNESkruDX2hcDTgFYhoCt4LimWhgnomSdV2/go-path"

"gx/ipfs/QmSP88ryZkHSRn1fnngAaV2Vcn63WUJzAavnRM9CVdU1Ky/go-ipfs-cmdkit"
cmdkit "gx/ipfs/QmSP88ryZkHSRn1fnngAaV2Vcn63WUJzAavnRM9CVdU1Ky/go-ipfs-cmdkit"
ipld "gx/ipfs/QmX5CsuHyVZeTLxgRSYkgLSDQKb9UjE8xnhQzCEJWWWFsC/go-ipld-format"
cid "gx/ipfs/QmZFbDTY9jfSBms2MchvYM9oYRbAF19K7Pby47yDBfpPrb/go-cid"
path "gx/ipfs/QmdMPBephdLYNESkruDX2hcDTgFYhoCt4LimWhgnomSdV2/go-path"
)

// KeyList is a general type for outputting lists of keys
Expand Down Expand Up @@ -64,6 +64,7 @@ NOTE: List all references recursively by using the flag '-r'.
cmdkit.BoolOption("edges", "e", "Emit edge format: `<from> -> <to>`."),
cmdkit.BoolOption("unique", "u", "Omit duplicate refs from output."),
cmdkit.BoolOption("recursive", "r", "Recursively list links of child nodes."),
cmdkit.IntOption("max-depth", "Only for recursive refs, limits fetch and listing to the given depth").WithDefault(-1),
},
Run: func(req cmds.Request, res cmds.Response) {
ctx := req.Context()
Expand All @@ -85,6 +86,16 @@ NOTE: List all references recursively by using the flag '-r'.
return
}

maxDepth, _, err := req.Option("max-depth").Int()
if err != nil {
res.SetError(err, cmdkit.ErrNormal)
return
}

if !recursive {
maxDepth = 1 // write only direct refs
}

format, _, err := req.Option("format").String()
if err != nil {
res.SetError(err, cmdkit.ErrNormal)
Expand Down Expand Up @@ -119,12 +130,12 @@ NOTE: List all references recursively by using the flag '-r'.
defer close(out)

rw := RefWriter{
out: out,
DAG: n.DAG,
Ctx: ctx,
Unique: unique,
PrintFmt: format,
Recursive: recursive,
out: out,
DAG: n.DAG,
Ctx: ctx,
Unique: unique,
PrintFmt: format,
MaxDepth: maxDepth,
}

for _, o := range objs {
Expand Down Expand Up @@ -231,86 +242,127 @@ type RefWriter struct {
DAG ipld.DAGService
Ctx context.Context

Unique bool
Recursive bool
PrintFmt string
Unique bool
MaxDepth int
PrintFmt string

seen *cid.Set
seen map[string]int
}

// WriteRefs writes refs of the given object to the underlying writer.
func (rw *RefWriter) WriteRefs(n ipld.Node) (int, error) {
if rw.Recursive {
return rw.writeRefsRecursive(n)
}
return rw.writeRefsSingle(n)
return rw.writeRefsRecursive(n, 0)

}

func (rw *RefWriter) writeRefsRecursive(n ipld.Node) (int, error) {
func (rw *RefWriter) writeRefsRecursive(n ipld.Node, depth int) (int, error) {
nc := n.Cid()

var count int
for i, ng := range ipld.GetDAG(rw.Ctx, rw.DAG, n) {
lc := n.Links()[i].Cid
if rw.skip(lc) {
goDeeper, shouldWrite := rw.visit(lc, depth+1) // The children are at depth+1

// Avoid "Get()" on the node and continue with next Link.
// We can do this if:
// - We printed it before (thus it was already seen and
// fetched with Get()
// - AND we must not go deeper.
// This is an optimization for pruned branches which have been
// visited before.
if !shouldWrite && !goDeeper {
continue
}

if err := rw.WriteEdge(nc, lc, n.Links()[i].Name); err != nil {
return count, err
}

// We must Get() the node because:
// - it is new (never written)
// - OR we need to go deeper.
// This ensures printed refs are always fetched.
nd, err := ng.Get(rw.Ctx)
if err != nil {
return count, err
}

c, err := rw.writeRefsRecursive(nd)
count += c
if err != nil {
return count, err
}
}
return count, nil
}

func (rw *RefWriter) writeRefsSingle(n ipld.Node) (int, error) {
c := n.Cid()

if rw.skip(c) {
return 0, nil
}

count := 0
for _, l := range n.Links() {
lc := l.Cid
if rw.skip(lc) {
continue
// Write this node if not done before (or !Unique)
if shouldWrite {
if err := rw.WriteEdge(nc, lc, n.Links()[i].Name); err != nil {
return count, err
}
count++
}

if err := rw.WriteEdge(c, lc, l.Name); err != nil {
return count, err
// Keep going deeper. This happens:
// - On unexplored branches
// - On branches not explored deep enough
// Note when !Unique, branches are always considered
// unexplored and only depth limits apply.
if goDeeper {
c, err := rw.writeRefsRecursive(nd, depth+1)
count += c
if err != nil {
return count, err
}
}
count++
}

return count, nil
}

// skip returns whether to skip a cid
func (rw *RefWriter) skip(c *cid.Cid) bool {
// visit returns two values:
// - the first boolean is true if we should keep traversing the DAG
// - the second boolean is true if we should print the CID
//
// visit will do branch pruning depending on rw.MaxDepth, previously visited
// cids and whether rw.Unique is set. i.e. rw.Unique = false and
// rw.MaxDepth = -1 disables any pruning. But setting rw.Unique to true will
// prune already visited branches at the cost of keeping as set of visited
// CIDs in memory.
func (rw *RefWriter) visit(c *cid.Cid, depth int) (bool, bool) {
atMaxDepth := rw.MaxDepth >= 0 && depth == rw.MaxDepth
overMaxDepth := rw.MaxDepth >= 0 && depth > rw.MaxDepth

// Shortcut when we are over max depth. In practice, this
// only applies when calling refs with --maxDepth=0, as root's
// children are already over max depth. Otherwise nothing should
// hit this.
if overMaxDepth {
return false, false
}

// We can shortcut right away if we don't need unique output:
// - we keep traversing when not atMaxDepth
// - always print
if !rw.Unique {
return false
return !atMaxDepth, true
}

// Unique == true from this point.
// Thus, we keep track of seen Cids, and their depth.
if rw.seen == nil {
rw.seen = cid.NewSet()
rw.seen = make(map[string]int)
}

has := rw.seen.Has(c)
if !has {
rw.seen.Add(c)
key := string(c.Bytes())
oldDepth, ok := rw.seen[key]

// Unique == true && depth < MaxDepth (or unlimited) from this point

// Branch pruning cases:
// - We saw the Cid before and either:
// - Depth is unlimited (MaxDepth = -1)
// - We saw it higher (smaller depth) in the DAG (means we must have
// explored deep enough before)
// Because we saw the CID, we don't print it again.
if ok && (rw.MaxDepth < 0 || oldDepth <= depth) {
return false, false
}
return has

// Final case, we must keep exploring the DAG from this CID
// (unless we hit the depth limit).
// We note down its depth because it was either not seen
// or is lower than last time.
// We print if it was not seen.
rw.seen[key] = depth
return !atMaxDepth, !ok
}

// Write one edge
Expand Down
Loading

0 comments on commit 66b54d9

Please sign in to comment.