diff --git a/core/commands/commands_test.go b/core/commands/commands_test.go index 940b9e0f43f..9584ff70786 100644 --- a/core/commands/commands_test.go +++ b/core/commands/commands_test.go @@ -25,6 +25,7 @@ func TestROCommands(t *testing.T) { "/dag", "/dag/get", "/dag/resolve", + "/dag/stat", "/dns", "/get", "/ls", @@ -99,6 +100,7 @@ func TestCommands(t *testing.T) { "/dag/put", "/dag/import", "/dag/resolve", + "/dag/stat", "/dht", "/dht/findpeer", "/dht/findprovs", diff --git a/core/commands/dag/dag.go b/core/commands/dag/dag.go index 85588be2f14..4df6571cc83 100644 --- a/core/commands/dag/dag.go +++ b/core/commands/dag/dag.go @@ -10,6 +10,7 @@ import ( "time" "github.com/ipfs/go-ipfs/core/commands/cmdenv" + "github.com/ipfs/go-ipfs/core/commands/e" "github.com/ipfs/go-ipfs/core/coredag" iface "github.com/ipfs/interface-go-ipfs-core" @@ -19,6 +20,7 @@ import ( files "github.com/ipfs/go-ipfs-files" ipld "github.com/ipfs/go-ipld-format" mdag "github.com/ipfs/go-merkledag" + traverse "github.com/ipfs/go-merkledag/traverse" ipfspath "github.com/ipfs/go-path" "github.com/ipfs/interface-go-ipfs-core/options" path "github.com/ipfs/interface-go-ipfs-core/path" @@ -54,6 +56,7 @@ to deprecate and replace the existing 'ipfs object' command moving forward. "resolve": DagResolveCmd, "import": DagImportCmd, "export": DagExportCmd, + "stat": DagStatCmd, }, } @@ -668,3 +671,116 @@ The output of blocks happens in strict DAG-traversal, first-seen, order. }, }, } + +type DagStat struct { + Size uint64 + NumBlocks int64 +} + +func (s *DagStat) String() string { + return fmt.Sprintf("Size: %d, NumBlocks: %d", s.Size, s.NumBlocks) +} + +var DagStatCmd = &cmds.Command{ + Helptext: cmds.HelpText{ + Tagline: "Gets stats for a DAG", + ShortDescription: ` +'ipfs dag size' fetches a dag and returns various statistics about the DAG. +Statistics include size and number of blocks. + +Note: This command skips duplicate blocks in reporting both size and the number of blocks +`, + }, + Arguments: []cmds.Argument{ + cmds.StringArg("root", true, false, "CID of a DAG root to get statistics for").EnableStdin(), + }, + Options: []cmds.Option{ + cmds.BoolOption(progressOptionName, "p", "Return progressive data while reading through the DAG").WithDefault(true), + }, + Run: func(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment) error { + progressive := req.Options[progressOptionName].(bool) + + api, err := cmdenv.GetApi(env, req) + if err != nil { + return err + } + + rp, err := api.ResolvePath(req.Context, path.New(req.Arguments[0])) + if err != nil { + return err + } + + if len(rp.Remainder()) > 0 { + return fmt.Errorf("cannot return size for anything other than a DAG with a root CID") + } + + nodeGetter := mdag.NewSession(req.Context, api.Dag()) + obj, err := nodeGetter.Get(req.Context, rp.Cid()) + if err != nil { + return err + } + + dagstats := &DagStat{} + err = traverse.Traverse(obj, traverse.Options{ + DAG: nodeGetter, + Order: traverse.DFSPre, + Func: func(current traverse.State) error { + dagstats.Size += uint64(len(current.Node.RawData())) + dagstats.NumBlocks++ + + if progressive { + if err := res.Emit(dagstats); err != nil { + return err + } + } + return nil + }, + ErrFunc: nil, + SkipDuplicates: true, + }) + if err != nil { + return fmt.Errorf("error traversing DAG: %w", err) + } + + if !progressive { + if err := res.Emit(dagstats); err != nil { + return err + } + } + + return nil + }, + Type: DagStat{}, + PostRun: cmds.PostRunMap{ + cmds.CLI: func(res cmds.Response, re cmds.ResponseEmitter) error { + var dagStats *DagStat + for { + v, err := res.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + + out, ok := v.(*DagStat) + if !ok { + return e.TypeErr(out, v) + } + dagStats = out + fmt.Fprintf(os.Stderr, "%v\r", out) + } + return re.Emit(dagStats) + }, + }, + Encoders: cmds.EncoderMap{ + cmds.Text: cmds.MakeTypedEncoder(func(req *cmds.Request, w io.Writer, event *DagStat) error { + _, err := fmt.Fprintf( + w, + "%v\n", + event, + ) + return err + }), + }, +} diff --git a/core/commands/root.go b/core/commands/root.go index 5d00770f006..b9a8dc40909 100644 --- a/core/commands/root.go +++ b/core/commands/root.go @@ -191,6 +191,7 @@ var rootROSubcommands = map[string]*cmds.Command{ Subcommands: map[string]*cmds.Command{ "get": dag.DagGetCmd, "resolve": dag.DagResolveCmd, + "stat": dag.DagStatCmd, }, }, "resolve": ResolveCmd, diff --git a/test/sharness/t0053-dag.sh b/test/sharness/t0053-dag.sh index de35a58fd1c..2225f79ec4d 100755 --- a/test/sharness/t0053-dag.sh +++ b/test/sharness/t0053-dag.sh @@ -268,6 +268,41 @@ test_dag_cmd() { test_cmp resolve_obj_exp resolve_obj && test_cmp resolve_data_exp resolve_data ' + + test_expect_success "dag stat of simple IPLD object" ' + ipfs dag stat $NESTED_HASH > actual_stat_inner_ipld_obj && + echo "Size: 15, NumBlocks: 1" > exp_stat_inner_ipld_obj && + test_cmp exp_stat_inner_ipld_obj actual_stat_inner_ipld_obj && + ipfs dag stat $HASH > actual_stat_ipld_obj && + echo "Size: 61, NumBlocks: 2" > exp_stat_ipld_obj && + test_cmp exp_stat_ipld_obj actual_stat_ipld_obj + ' + + test_expect_success "dag stat of simple UnixFS object" ' + BASIC_UNIXFS=$(echo "1234" | ipfs add --pin=false -q) && + ipfs dag stat $BASIC_UNIXFS > actual_stat_basic_unixfs && + echo "Size: 13, NumBlocks: 1" > exp_stat_basic_unixfs && + test_cmp exp_stat_basic_unixfs actual_stat_basic_unixfs + ' + + # The multiblock file is just 10000000 copies of the number 1 + # As most of its data is replicated it should have a small number of blocks + test_expect_success "dag stat of multiblock UnixFS object" ' + MULTIBLOCK_UNIXFS=$(printf "1%.0s" {1..10000000} | ipfs add --pin=false -q) && + ipfs dag stat $MULTIBLOCK_UNIXFS > actual_stat_multiblock_unixfs && + echo "Size: 302582, NumBlocks: 3" > exp_stat_multiblock_unixfs && + test_cmp exp_stat_multiblock_unixfs actual_stat_multiblock_unixfs + ' + + test_expect_success "dag stat of directory of UnixFS objects" ' + mkdir -p unixfsdir && + echo "1234" > unixfsdir/small.txt + printf "1%.0s" {1..10000000} > unixfsdir/many1s.txt && + DIRECTORY_UNIXFS=$(ipfs add -r --pin=false -Q unixfsdir) && + ipfs dag stat $DIRECTORY_UNIXFS > actual_stat_directory_unixfs && + echo "Size: 302705, NumBlocks: 5" > exp_stat_directory_unixfs && + test_cmp exp_stat_directory_unixfs actual_stat_directory_unixfs + ' } # should work offline