From 934b8b0ddccda805eb99b7229032e9cd2f866e7c Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Fri, 8 Jun 2018 12:21:32 -0300 Subject: [PATCH] unixfs: integrate `pb.Data` into `FSNode` To avoid duplicating fields and making the code easier to follow. Remove all of `FSNode` previous fields in favor on a single `pb.Data` structure that is not exported. Accessor methods are added only for the necessary internal fields. This takes up more memory, `pb.Data` is always created inside `FSNode` and it stays there instead of just being created and destroyed during the (un)marshal operations. The removed fields `Data`, `blocksizes` and `Type` had a direct counterpart in the embedded `pb.Data` structure, in contrast (only) the `subtotal` field doesn't have one, it was used as a temporary accumulator to track the `Filesize`, which is now being kept updated on every modification (to ensure the entire `FSNode` is always at a valid state), so `subtotal` could just be removed without the addition of any other field (this temporary accumulator was obscuring how `Filesize` was computed). To keep `Filesize` up to date a method was added (`UpdateFilesize()`) to adjust its value in the two places where the file size could be modified, when changing its data (in `SetData()`, accessor method added) and when adding or removing child nodes (in `AddBlockSize()` and `RemoveBlockSize()`). A constructor method was added (`NewFSNode()`) to initialize the required fields, like `Type` which is explicitly set, this deprecates the previous methodology of just calling `new(FSNode)` and relying in the default value of `pb.Data_DataType` (`Data_Raw`) to avoid an explicit assignment. Also, `Filesize` is initialized to avoid being left with a `nil` value before marshaling empty nodes, which would result in a different hash from previous versions, to be backwards compatible. Previous versions of `GetBytes()` always set the `Filesize` value, even though it is reflected as an `optional` field in the `.proto` file (this may be an inaccurate field rule). Without the duplicated fields the functions `GetBytes()` and `FSNodeFromBytes()` are now reduced to simple `Marshal()` and `Unmarshal()` operations respectively. License: MIT Signed-off-by: Lucas Molas --- importer/helpers/dagbuilder.go | 4 +- importer/helpers/helpers.go | 4 +- mfs/file.go | 2 +- mfs/mfs_test.go | 2 +- unixfs/mod/dagmodifier.go | 2 +- unixfs/unixfs.go | 92 +++++++++++++++++++++++----------- unixfs/unixfs_test.go | 5 +- 7 files changed, 72 insertions(+), 39 deletions(-) diff --git a/importer/helpers/dagbuilder.go b/importer/helpers/dagbuilder.go index 3945b5ac81d..962c69dc808 100644 --- a/importer/helpers/dagbuilder.go +++ b/importer/helpers/dagbuilder.go @@ -116,7 +116,7 @@ func (db *DagBuilderHelper) GetDagServ() ipld.DAGService { func (db *DagBuilderHelper) NewUnixfsNode() *UnixfsNode { n := &UnixfsNode{ node: new(dag.ProtoNode), - ufmt: &ft.FSNode{Type: ft.TFile}, + ufmt: ft.NewFSNode(ft.TFile), } n.SetPrefix(db.prefix) return n @@ -161,7 +161,7 @@ func (db *DagBuilderHelper) NewLeaf(data []byte) (*UnixfsNode, error) { func (db *DagBuilderHelper) newUnixfsBlock() *UnixfsNode { n := &UnixfsNode{ node: new(dag.ProtoNode), - ufmt: &ft.FSNode{Type: ft.TRaw}, + ufmt: ft.NewFSNode(ft.TRaw), } n.SetPrefix(db.prefix) return n diff --git a/importer/helpers/helpers.go b/importer/helpers/helpers.go index 4e4390ca998..65c24b46dc2 100644 --- a/importer/helpers/helpers.go +++ b/importer/helpers/helpers.go @@ -77,7 +77,7 @@ func (n *UnixfsNode) Set(other *UnixfsNode) { n.raw = other.raw n.rawnode = other.rawnode if other.ufmt != nil { - n.ufmt.Data = other.ufmt.Data + n.ufmt.SetData(other.ufmt.GetData()) } } @@ -127,7 +127,7 @@ func (n *UnixfsNode) RemoveChild(index int, dbh *DagBuilderHelper) { // SetData stores data in this node. func (n *UnixfsNode) SetData(data []byte) { - n.ufmt.Data = data + n.ufmt.SetData(data) } // FileSize returns the total file size of this tree (including children) diff --git a/mfs/file.go b/mfs/file.go index 3839a279d91..14b0a65db10 100644 --- a/mfs/file.go +++ b/mfs/file.go @@ -60,7 +60,7 @@ func (fi *File) Open(flags int, sync bool) (FileDescriptor, error) { return nil, err } - switch fsn.Type { + switch fsn.GetType() { default: return nil, fmt.Errorf("unsupported fsnode type for 'file'") case ft.TSymlink: diff --git a/mfs/mfs_test.go b/mfs/mfs_test.go index 0fdeed8e567..6ea8cd7caf0 100644 --- a/mfs/mfs_test.go +++ b/mfs/mfs_test.go @@ -852,7 +852,7 @@ func TestFlushing(t *testing.T) { t.Fatal(err) } - if fsnode.Type != ft.TDirectory { + if fsnode.GetType() != ft.TDirectory { t.Fatal("root wasnt a directory") } diff --git a/unixfs/mod/dagmodifier.go b/unixfs/mod/dagmodifier.go index 12781b21965..f308982958e 100644 --- a/unixfs/mod/dagmodifier.go +++ b/unixfs/mod/dagmodifier.go @@ -526,7 +526,7 @@ func dagTruncate(ctx context.Context, n ipld.Node, size uint64, ds ipld.DAGServi var cur uint64 end := 0 var modified ipld.Node - ndata := new(ft.FSNode) + ndata := ft.NewFSNode(ft.TRaw) for i, lnk := range nd.Links() { child, err := lnk.GetNode(ctx, ds) if err != nil { diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go index 654de7ff82e..3ba01fb0fc4 100644 --- a/unixfs/unixfs.go +++ b/unixfs/unixfs.go @@ -139,67 +139,101 @@ func DataSize(data []byte) (uint64, error) { } } -// An FSNode represents a filesystem object. +// An FSNode represents a filesystem object using the UnixFS specification. +// +// The `NewFSNode` constructor should be used instead of just calling `new(FSNode)` +// to guarantee that the required (`Type` and `Filesize`) fields in the `format` +// structure are initialized before marshaling (in `GetBytes()`). type FSNode struct { - Data []byte - // total data size for each child - blocksizes []uint64 - - // running sum of blocksizes - subtotal uint64 - - // node type of this node - Type pb.Data_DataType + // UnixFS format defined as a protocol buffers message. + format pb.Data } // FSNodeFromBytes unmarshal a protobuf message onto an FSNode. func FSNodeFromBytes(b []byte) (*FSNode, error) { - pbn := new(pb.Data) - err := proto.Unmarshal(b, pbn) + n := new(FSNode) + err := proto.Unmarshal(b, &n.format) if err != nil { return nil, err } - n := new(FSNode) - n.Data = pbn.Data - n.blocksizes = pbn.Blocksizes - n.subtotal = pbn.GetFilesize() - uint64(len(n.Data)) - n.Type = pbn.GetType() return n, nil } +// NewFSNode creates a new FSNode structure with the given `dataType`. +// +// It initializes the (required) `Type` field (that doesn't have a `Set()` +// accessor so it must be specified at creation), otherwise the `Marshal()` +// method in `GetBytes()` would fail (`required field "Type" not set`). +// +// It also initializes the `Filesize` pointer field to ensure its value +// is never nil before marshaling, this is not a required field but it is +// done to be backwards compatible with previous `go-ipfs` versions hash. +// (If it wasn't initialized there could be cases where `Filesize` could +// have been left at nil, when the `FSNode` was created but no data or +// child nodes were set to adjust it, as is the case in `NewLeaf()`.) +func NewFSNode(dataType pb.Data_DataType) *FSNode { + n := new(FSNode) + n.format.Type = &dataType + + // Initialize by `Filesize` by updating it with a dummy (zero) value. + n.UpdateFilesize(0) + + return n +} + // AddBlockSize adds the size of the next child block of this node func (n *FSNode) AddBlockSize(s uint64) { - n.subtotal += s - n.blocksizes = append(n.blocksizes, s) + n.UpdateFilesize(int64(s)) + n.format.Blocksizes = append(n.format.Blocksizes, s) } // RemoveBlockSize removes the given child block's size. func (n *FSNode) RemoveBlockSize(i int) { - n.subtotal -= n.blocksizes[i] - n.blocksizes = append(n.blocksizes[:i], n.blocksizes[i+1:]...) + n.UpdateFilesize(-int64(n.format.Blocksizes[i])) + n.format.Blocksizes = append(n.format.Blocksizes[:i], n.format.Blocksizes[i+1:]...) } // GetBytes marshals this node as a protobuf message. func (n *FSNode) GetBytes() ([]byte, error) { - pbn := new(pb.Data) - pbn.Type = &n.Type - pbn.Filesize = proto.Uint64(uint64(len(n.Data)) + n.subtotal) - pbn.Blocksizes = n.blocksizes - pbn.Data = n.Data - return proto.Marshal(pbn) + return proto.Marshal(&n.format) } // FileSize returns the total size of this tree. That is, the size of // the data in this node plus the size of all its children. func (n *FSNode) FileSize() uint64 { - return uint64(len(n.Data)) + n.subtotal + return n.format.GetFilesize() } // NumChildren returns the number of child blocks of this node func (n *FSNode) NumChildren() int { - return len(n.blocksizes) + return len(n.format.Blocksizes) +} + +// GetData retrieves the `Data` field from the internal `format`. +func (n *FSNode) GetData() []byte { + return n.format.GetData() +} + +// SetData sets the `Data` field from the internal `format` +// updating its `Filesize`. +func (n *FSNode) SetData(newData []byte) { + n.UpdateFilesize(int64(len(newData) - len(n.GetData()))) + n.format.Data = newData +} + +// UpdateFilesize updates the `Filesize` field from the internal `format` +// by a signed difference (`filesizeDiff`). +// TODO: Add assert to check for `Filesize` > 0? +func (n *FSNode) UpdateFilesize(filesizeDiff int64) { + n.format.Filesize = proto.Uint64(uint64( + int64(n.format.GetFilesize()) + filesizeDiff)) +} + +// GetType retrieves the `Type` field from the internal `format`. +func (n *FSNode) GetType() pb.Data_DataType { + return n.format.GetType() } // Metadata is used to store additional FSNode information. diff --git a/unixfs/unixfs_test.go b/unixfs/unixfs_test.go index 6edc2ca0bb8..967ee0ca87f 100644 --- a/unixfs/unixfs_test.go +++ b/unixfs/unixfs_test.go @@ -10,14 +10,13 @@ import ( ) func TestFSNode(t *testing.T) { - fsn := new(FSNode) - fsn.Type = TFile + fsn := NewFSNode(TFile) for i := 0; i < 16; i++ { fsn.AddBlockSize(100) } fsn.RemoveBlockSize(15) - fsn.Data = make([]byte, 128) + fsn.SetData(make([]byte, 128)) b, err := fsn.GetBytes() if err != nil {