Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ipfs dag stat command #7553

Merged
merged 5 commits into from
Aug 17, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/commands/commands_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ func TestROCommands(t *testing.T) {
"/dag",
"/dag/get",
"/dag/resolve",
"/dag/stat",
"/dns",
"/get",
"/ls",
Expand Down Expand Up @@ -99,6 +100,7 @@ func TestCommands(t *testing.T) {
"/dag/put",
"/dag/import",
"/dag/resolve",
"/dag/stat",
"/dht",
"/dht/findpeer",
"/dht/findprovs",
Expand Down
115 changes: 115 additions & 0 deletions core/commands/dag/dag.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"time"

"github.com/ipfs/go-ipfs/core/commands/cmdenv"
"github.com/ipfs/go-ipfs/core/commands/e"
"github.com/ipfs/go-ipfs/core/coredag"
iface "github.com/ipfs/interface-go-ipfs-core"

Expand All @@ -19,6 +20,7 @@ import (
files "github.com/ipfs/go-ipfs-files"
ipld "github.com/ipfs/go-ipld-format"
mdag "github.com/ipfs/go-merkledag"
traverse "github.com/ipfs/go-merkledag/traverse"
ipfspath "github.com/ipfs/go-path"
"github.com/ipfs/interface-go-ipfs-core/options"
path "github.com/ipfs/interface-go-ipfs-core/path"
Expand Down Expand Up @@ -54,6 +56,7 @@ to deprecate and replace the existing 'ipfs object' command moving forward.
"resolve": DagResolveCmd,
"import": DagImportCmd,
"export": DagExportCmd,
"stat": DagStatCmd,
},
}

Expand Down Expand Up @@ -668,3 +671,115 @@ The output of blocks happens in strict DAG-traversal, first-seen, order.
},
},
}

type DagStat struct {
Size uint64
NumBlocks int64
}

func (s *DagStat) String() string {
return fmt.Sprintf("Size: %d, NumBlocks: %d", s.Size, s.NumBlocks)
}

var DagStatCmd = &cmds.Command{
Helptext: cmds.HelpText{
Tagline: "Gets stats for a DAG",
ShortDescription: `
'ipfs dag size' fetches a dag and returns various statistics about the DAG.
Statistics include size and number of blocks.

Note: This command skips duplicate blocks in reporting both size and the number of blocks
aschmahmann marked this conversation as resolved.
Show resolved Hide resolved
`,
},
Arguments: []cmds.Argument{
cmds.StringArg("root", true, false, "CID of a DAG root to get statistics for").EnableStdin(),
},
Options: []cmds.Option{
cmds.BoolOption(progressOptionName, "p", "Return progressive data while reading through the DAG").WithDefault(true),
},
Run: func(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment) error {
progressive := req.Options[progressOptionName].(bool)

api, err := cmdenv.GetApi(env, req)
if err != nil {
return err
}

rp, err := api.ResolvePath(req.Context, path.New(req.Arguments[0]))
if err != nil {
return err
}

if len(rp.Remainder()) > 0 {
return fmt.Errorf("cannot return size for anything other than a DAG with a root CID")
}
Comment on lines +713 to +715
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is supportable, but didn't seem necessary for an initial release of this command. Open to counter opinions.


obj, err := api.Dag().Get(req.Context, rp.Cid())
if err != nil {
return err
}

dagstats := &DagStat{}
err = traverse.Traverse(obj, traverse.Options{
DAG: api.Dag(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably try to create a bitswap session here, but I'm fine punting that if necessary (this API will usually be called on local DAGs, I assume).

Order: traverse.DFSPre,
Func: func(current traverse.State) error {
dagstats.Size += uint64(len(current.Node.RawData()))
dagstats.NumBlocks++

if progressive {
if err := res.Emit(dagstats); err != nil {
return err
}
}
return nil
},
ErrFunc: nil,
SkipDuplicates: true,
})
if err != nil {
return fmt.Errorf("error traversing DAG: %w", err)
}

if !progressive {
if err := res.Emit(dagstats); err != nil {
return err
}
}

return nil
},
Type: DagStat{},
PostRun: cmds.PostRunMap{
cmds.CLI: func(res cmds.Response, re cmds.ResponseEmitter) error {
var dagStats *DagStat
for {
v, err := res.Next()
if err != nil {
if err == io.EOF {
break
}
return err
}

out, ok := v.(*DagStat)
if !ok {
return e.TypeErr(out, v)
}
dagStats = out
fmt.Fprintf(os.Stderr, "%v\r", out)
}
return re.Emit(dagStats)
},
},
Encoders: cmds.EncoderMap{
cmds.Text: cmds.MakeTypedEncoder(func(req *cmds.Request, w io.Writer, event *DagStat) error {
_, err := fmt.Fprintf(
w,
"%v\n",
event,
)
Comment on lines +770 to +782
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like the pattern we use for emitting updates as well as a final result, but would like some confirmation that this is reasonable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is... I just wish we had a better way.

return err
}),
},
}
1 change: 1 addition & 0 deletions core/commands/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ var rootROSubcommands = map[string]*cmds.Command{
Subcommands: map[string]*cmds.Command{
"get": dag.DagGetCmd,
"resolve": dag.DagResolveCmd,
"stat": dag.DagStatCmd,
aschmahmann marked this conversation as resolved.
Show resolved Hide resolved
},
},
"resolve": ResolveCmd,
Expand Down
35 changes: 35 additions & 0 deletions test/sharness/t0053-dag.sh
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,41 @@ test_dag_cmd() {
test_cmp resolve_obj_exp resolve_obj &&
test_cmp resolve_data_exp resolve_data
'

test_expect_success "dag stat of simple IPLD object" '
Comment on lines +271 to +272
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a few tests here, do we need more before shipping this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems reasonable.

ipfs dag stat $NESTED_HASH > actual_stat_inner_ipld_obj &&
echo "Size: 15, NumBlocks: 1" > exp_stat_inner_ipld_obj &&
test_cmp exp_stat_inner_ipld_obj actual_stat_inner_ipld_obj &&
ipfs dag stat $HASH > actual_stat_ipld_obj &&
echo "Size: 61, NumBlocks: 2" > exp_stat_ipld_obj &&
test_cmp exp_stat_ipld_obj actual_stat_ipld_obj
'

test_expect_success "dag stat of simple UnixFS object" '
BASIC_UNIXFS=$(echo "1234" | ipfs add --pin=false -q) &&
ipfs dag stat $BASIC_UNIXFS > actual_stat_basic_unixfs &&
echo "Size: 13, NumBlocks: 1" > exp_stat_basic_unixfs &&
test_cmp exp_stat_basic_unixfs actual_stat_basic_unixfs
'

# The multiblock file is just 10000000 copies of the number 1
# As most of its data is replicated it should have a small number of blocks
test_expect_success "dag stat of multiblock UnixFS object" '
MULTIBLOCK_UNIXFS=$(printf "1%.0s" {1..10000000} | ipfs add --pin=false -q) &&
ipfs dag stat $MULTIBLOCK_UNIXFS > actual_stat_multiblock_unixfs &&
echo "Size: 302582, NumBlocks: 3" > exp_stat_multiblock_unixfs &&
test_cmp exp_stat_multiblock_unixfs actual_stat_multiblock_unixfs
'

test_expect_success "dag stat of directory of UnixFS objects" '
mkdir -p unixfsdir &&
echo "1234" > unixfsdir/small.txt
printf "1%.0s" {1..10000000} > unixfsdir/many1s.txt &&
DIRECTORY_UNIXFS=$(ipfs add -r --pin=false -Q unixfsdir) &&
ipfs dag stat $DIRECTORY_UNIXFS > actual_stat_directory_unixfs &&
echo "Size: 302705, NumBlocks: 5" > exp_stat_directory_unixfs &&
test_cmp exp_stat_directory_unixfs actual_stat_directory_unixfs
'
}

# should work offline
Expand Down