-
Notifications
You must be signed in to change notification settings - Fork 306
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ostree repo commit: Speed up composing trees with --tree=ref
#1643
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,8 @@ | |
#include "otutil.h" | ||
#include "ostree.h" | ||
|
||
#include "ostree-core-private.h" | ||
|
||
/** | ||
* SECTION:ostree-mutable-tree | ||
* @title: In-memory modifiable filesystem tree | ||
|
@@ -38,6 +40,15 @@ | |
* programmatically. | ||
*/ | ||
|
||
typedef enum { | ||
MTREE_STATE_WHOLE, | ||
|
||
/* MTREE_STATE_LAZY allows us to not read files and subdirs from the objects | ||
* on disk until they're actually needed - often they won't be needed at | ||
* all. */ | ||
MTREE_STATE_LAZY | ||
} OstreeMutableTreeState; | ||
|
||
/** | ||
* OstreeMutableTree: | ||
* | ||
|
@@ -55,6 +66,8 @@ struct OstreeMutableTree | |
* it sets parent = NULL on all its children (see remove_child_mtree) */ | ||
OstreeMutableTree *parent; | ||
|
||
OstreeMutableTreeState state; | ||
|
||
/* This is the checksum of the Dirtree object that corresponds to the current | ||
* contents of this directory. contents_checksum can be NULL if the SHA was | ||
* never calculated or contents of this mtree or any subdirectory has been | ||
|
@@ -71,7 +84,14 @@ struct OstreeMutableTree | |
* and xattrs of this directory. This can be NULL. */ | ||
char *metadata_checksum; | ||
|
||
/* const char* filename -> const char* checksum */ | ||
/* ======== Valid for state LAZY: =========== */ | ||
|
||
/* The repo so we can look up the checksums. */ | ||
OstreeRepo *repo; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's going to result in a lot of refs on the repo. 🤔(In Rust we'd just have the mutable tree have a lifetime bounded by that of the repo) But eh, it's fine for now, if we ever profile and find this is an issue we can perhaps just have the ref be maintained by the toplevel mutable tree, and children chase up to the root. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Not too many I think. There's only one per LAZY I agree that it's a shame to churn the repo refcount, but it's unlikely to be worth fixing. |
||
|
||
/* ======== Valid for state WHOLE: ========== */ | ||
|
||
/* const char* filename -> const char* checksum. */ | ||
GHashTable *files; | ||
|
||
/* const char* filename -> OstreeMutableTree* subtree */ | ||
|
@@ -80,8 +100,6 @@ struct OstreeMutableTree | |
|
||
G_DEFINE_TYPE (OstreeMutableTree, ostree_mutable_tree, G_TYPE_OBJECT) | ||
|
||
static void invalidate_contents_checksum (OstreeMutableTree *self); | ||
|
||
static void | ||
ostree_mutable_tree_finalize (GObject *object) | ||
{ | ||
|
@@ -95,6 +113,8 @@ ostree_mutable_tree_finalize (GObject *object) | |
g_hash_table_destroy (self->files); | ||
g_hash_table_destroy (self->subdirs); | ||
|
||
g_clear_object (&self->repo); | ||
|
||
G_OBJECT_CLASS (ostree_mutable_tree_parent_class)->finalize (object); | ||
} | ||
|
||
|
@@ -117,7 +137,6 @@ insert_child_mtree (OstreeMutableTree *self, const gchar* name, | |
g_assert_null (child->parent); | ||
g_hash_table_insert (self->subdirs, g_strdup (name), child); | ||
child->parent = self; | ||
invalidate_contents_checksum (self); | ||
} | ||
|
||
static void | ||
|
@@ -139,6 +158,7 @@ ostree_mutable_tree_init (OstreeMutableTree *self) | |
g_free, g_free); | ||
self->subdirs = g_hash_table_new_full (g_str_hash, g_str_equal, | ||
g_free, remove_child_mtree); | ||
self->state = MTREE_STATE_WHOLE; | ||
} | ||
|
||
static void | ||
|
@@ -153,6 +173,83 @@ invalidate_contents_checksum (OstreeMutableTree *self) | |
} | ||
} | ||
|
||
/* Go from state LAZY to state WHOLE by reading the tree from disk */ | ||
static gboolean | ||
_ostree_mutable_tree_make_whole (OstreeMutableTree *self, | ||
GCancellable *cancellable, | ||
GError **error) | ||
{ | ||
if (self->state == MTREE_STATE_WHOLE) | ||
return TRUE; | ||
|
||
g_assert_cmpuint (self->state, ==, MTREE_STATE_LAZY); | ||
g_assert_nonnull (self->repo); | ||
g_assert_nonnull (self->contents_checksum); | ||
g_assert_nonnull (self->metadata_checksum); | ||
g_assert_cmpuint (g_hash_table_size (self->files), ==, 0); | ||
g_assert_cmpuint (g_hash_table_size (self->subdirs), ==, 0); | ||
|
||
g_autoptr(GVariant) dirtree = NULL; | ||
if (!ostree_repo_load_variant (self->repo, OSTREE_OBJECT_TYPE_DIR_TREE, | ||
self->contents_checksum, &dirtree, error)) | ||
return FALSE; | ||
|
||
{ | ||
g_autoptr(GVariant) dir_file_contents = g_variant_get_child_value (dirtree, 0); | ||
GVariantIter viter; | ||
g_variant_iter_init (&viter, dir_file_contents); | ||
const char *fname; | ||
GVariant *contents_csum_v = NULL; | ||
while (g_variant_iter_loop (&viter, "(&s@ay)", &fname, &contents_csum_v)) | ||
{ | ||
char tmp_checksum[OSTREE_SHA256_STRING_LEN + 1]; | ||
_ostree_checksum_inplace_from_bytes_v (contents_csum_v, tmp_checksum); | ||
g_hash_table_insert (self->files, g_strdup (fname), | ||
g_strdup (tmp_checksum)); | ||
} | ||
} | ||
|
||
/* Process subdirectories */ | ||
{ | ||
g_autoptr(GVariant) dir_subdirs = g_variant_get_child_value (dirtree, 1); | ||
const char *dname; | ||
GVariant *subdirtree_csum_v = NULL; | ||
GVariant *subdirmeta_csum_v = NULL; | ||
GVariantIter viter; | ||
g_variant_iter_init (&viter, dir_subdirs); | ||
while (g_variant_iter_loop (&viter, "(&s@ay@ay)", &dname, | ||
&subdirtree_csum_v, &subdirmeta_csum_v)) | ||
{ | ||
char subdirtree_checksum[OSTREE_SHA256_STRING_LEN+1]; | ||
_ostree_checksum_inplace_from_bytes_v (subdirtree_csum_v, subdirtree_checksum); | ||
char subdirmeta_checksum[OSTREE_SHA256_STRING_LEN+1]; | ||
_ostree_checksum_inplace_from_bytes_v (subdirmeta_csum_v, subdirmeta_checksum); | ||
insert_child_mtree (self, dname, ostree_mutable_tree_new_from_checksum ( | ||
self->repo, subdirtree_checksum, subdirmeta_checksum)); | ||
} | ||
} | ||
|
||
g_clear_object (&self->repo); | ||
self->state = MTREE_STATE_WHOLE; | ||
return TRUE; | ||
} | ||
|
||
/* _ostree_mutable_tree_make_whole can fail if state == MTREE_STATE_LAZY, but | ||
* we have getters that preceed the existence of MTREE_STATE_LAZY which can't | ||
* return errors. So instead this function will fail and print a warning. */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm. One approach I've taken in other places for things like this is to cache the error, changing the object into an "error state". Have all API functions be "noops", and return the error at the next opportunity. We might also be able to add a new API to check for and consume that error? Not a hard requirement right now though. |
||
static void | ||
_assert_ostree_mutable_tree_make_whole (OstreeMutableTree *self) | ||
{ | ||
GError *err = NULL; | ||
if (!_ostree_mutable_tree_make_whole (self, NULL, &err)) | ||
{ | ||
g_warning ("Failed to make lazy OstreeMutableTree whole and no way to " | ||
"report error to caller. Error was: %s", | ||
err ? err->message : "unknown"); | ||
g_return_if_reached (); | ||
} | ||
} | ||
|
||
void | ||
ostree_mutable_tree_set_metadata_checksum (OstreeMutableTree *self, | ||
const char *checksum) | ||
|
@@ -183,6 +280,8 @@ ostree_mutable_tree_set_contents_checksum (OstreeMutableTree *self, | |
"already has a checksum set. Old checksum %s, new checksum %s", | ||
self->contents_checksum, checksum); | ||
|
||
_assert_ostree_mutable_tree_make_whole (self); | ||
|
||
g_free (self->contents_checksum); | ||
self->contents_checksum = g_strdup (checksum); | ||
} | ||
|
@@ -215,6 +314,9 @@ ostree_mutable_tree_replace_file (OstreeMutableTree *self, | |
if (!ot_util_filename_validate (name, error)) | ||
goto out; | ||
|
||
if (!_ostree_mutable_tree_make_whole (self, NULL, error)) | ||
goto out; | ||
|
||
if (g_hash_table_lookup (self->subdirs, name)) | ||
{ | ||
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED, | ||
|
@@ -256,6 +358,9 @@ ostree_mutable_tree_ensure_dir (OstreeMutableTree *self, | |
if (!ot_util_filename_validate (name, error)) | ||
goto out; | ||
|
||
if (!_ostree_mutable_tree_make_whole (self, NULL, error)) | ||
goto out; | ||
|
||
if (g_hash_table_lookup (self->files, name)) | ||
{ | ||
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED, | ||
|
@@ -267,6 +372,7 @@ ostree_mutable_tree_ensure_dir (OstreeMutableTree *self, | |
if (!ret_dir) | ||
{ | ||
ret_dir = ostree_mutable_tree_new (); | ||
invalidate_contents_checksum (self); | ||
insert_child_mtree (self, name, g_object_ref (ret_dir)); | ||
} | ||
|
||
|
@@ -287,6 +393,9 @@ ostree_mutable_tree_lookup (OstreeMutableTree *self, | |
g_autoptr(OstreeMutableTree) ret_subdir = NULL; | ||
g_autofree char *ret_file_checksum = NULL; | ||
|
||
if (!_ostree_mutable_tree_make_whole (self, NULL, error)) | ||
goto out; | ||
|
||
ret_subdir = ot_gobject_refz (g_hash_table_lookup (self->subdirs, name)); | ||
if (!ret_subdir) | ||
{ | ||
|
@@ -328,6 +437,9 @@ ostree_mutable_tree_ensure_parent_dirs (OstreeMutableTree *self, | |
OstreeMutableTree *subdir = self; /* nofree */ | ||
g_autoptr(OstreeMutableTree) ret_parent = NULL; | ||
|
||
if (!_ostree_mutable_tree_make_whole (self, NULL, error)) | ||
goto out; | ||
|
||
g_assert (metadata_checksum != NULL); | ||
|
||
if (!self->metadata_checksum) | ||
|
@@ -348,6 +460,7 @@ ostree_mutable_tree_ensure_parent_dirs (OstreeMutableTree *self, | |
next = g_hash_table_lookup (subdir->subdirs, name); | ||
if (!next) | ||
{ | ||
invalidate_contents_checksum (subdir); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be cleaner to just put this call in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't do that because we call |
||
next = ostree_mutable_tree_new (); | ||
ostree_mutable_tree_set_metadata_checksum (next, metadata_checksum); | ||
insert_child_mtree (subdir, g_strdup (name), next); | ||
|
@@ -364,6 +477,71 @@ ostree_mutable_tree_ensure_parent_dirs (OstreeMutableTree *self, | |
return ret; | ||
} | ||
|
||
const char empty_tree_csum[] = "6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d"; | ||
|
||
/** | ||
* ostree_mutable_tree_fill_empty_from_dirtree: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possibly a better name would be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I would agree with that (keeping it private for now). |
||
* | ||
* Merges @self with the tree given by @contents_checksum and | ||
* @metadata_checksum, but only if it's possible without writing new objects to | ||
* the @repo. We can do this if either @self is empty, the tree given by | ||
* @contents_checksum is empty or if both trees already have the same | ||
* @contents_checksum. | ||
* | ||
* Returns: @TRUE if merge was successful, @FALSE if it was not possible. | ||
* | ||
* This function enables optimisations when composing trees. The provided | ||
* checksums are not loaded or checked when this function is called. Instead | ||
* the contents will be loaded only when needed. | ||
*/ | ||
gboolean | ||
ostree_mutable_tree_fill_empty_from_dirtree (OstreeMutableTree *self, | ||
OstreeRepo *repo, | ||
const char *contents_checksum, | ||
const char *metadata_checksum) | ||
{ | ||
g_return_val_if_fail (repo, FALSE); | ||
g_return_val_if_fail (contents_checksum, FALSE); | ||
g_return_val_if_fail (metadata_checksum, FALSE); | ||
|
||
switch (self->state) | ||
{ | ||
case MTREE_STATE_LAZY: | ||
{ | ||
if (g_strcmp0 (contents_checksum, self->contents_checksum) == 0 || | ||
g_strcmp0 (empty_tree_csum, self->contents_checksum) == 0) | ||
break; | ||
|
||
if (g_strcmp0 (empty_tree_csum, contents_checksum) == 0) | ||
{ | ||
/* Adding an empty tree to a full one - stick with the old contents */ | ||
contents_checksum = self->contents_checksum; | ||
break; | ||
} | ||
else | ||
return FALSE; | ||
} | ||
case MTREE_STATE_WHOLE: | ||
if (g_hash_table_size (self->files) == 0 && | ||
g_hash_table_size (self->subdirs) == 0) | ||
break; | ||
/* We're not empty - can't convert to a LAZY tree */ | ||
return FALSE; | ||
default: | ||
g_assert_not_reached (); | ||
} | ||
|
||
self->state = MTREE_STATE_LAZY; | ||
g_set_object (&self->repo, repo); | ||
ostree_mutable_tree_set_metadata_checksum (self, metadata_checksum); | ||
if (g_strcmp0 (self->contents_checksum, contents_checksum) != 0) | ||
{ | ||
invalidate_contents_checksum (self); | ||
self->contents_checksum = g_strdup (contents_checksum); | ||
} | ||
return TRUE; | ||
} | ||
|
||
/** | ||
* ostree_mutable_tree_walk: | ||
* @self: Tree | ||
|
@@ -392,6 +570,8 @@ ostree_mutable_tree_walk (OstreeMutableTree *self, | |
else | ||
{ | ||
OstreeMutableTree *subdir; | ||
if (!_ostree_mutable_tree_make_whole (self, NULL, error)) | ||
return FALSE; | ||
|
||
subdir = g_hash_table_lookup (self->subdirs, split_path->pdata[start]); | ||
if (!subdir) | ||
|
@@ -410,6 +590,7 @@ ostree_mutable_tree_walk (OstreeMutableTree *self, | |
GHashTable * | ||
ostree_mutable_tree_get_subdirs (OstreeMutableTree *self) | ||
{ | ||
_assert_ostree_mutable_tree_make_whole (self); | ||
return self->subdirs; | ||
} | ||
|
||
|
@@ -422,6 +603,7 @@ ostree_mutable_tree_get_subdirs (OstreeMutableTree *self) | |
GHashTable * | ||
ostree_mutable_tree_get_files (OstreeMutableTree *self) | ||
{ | ||
_assert_ostree_mutable_tree_make_whole (self); | ||
return self->files; | ||
} | ||
|
||
|
@@ -435,3 +617,27 @@ ostree_mutable_tree_new (void) | |
{ | ||
return (OstreeMutableTree*)g_object_new (OSTREE_TYPE_MUTABLE_TREE, NULL); | ||
} | ||
|
||
/** | ||
* ostree_mutable_tree_new_from_checksum: | ||
* @repo: The repo which contains the objects refered by the checksums. | ||
* @contents_checksum: dirtree checksum | ||
* @metadata_checksum: dirmeta checksum | ||
* | ||
* Creates a new OstreeMutableTree with the contents taken from the given repo | ||
* and checksums. The data will be loaded from the repo lazily as needed. | ||
* | ||
* Returns: (transfer full): A new tree | ||
*/ | ||
OstreeMutableTree * | ||
ostree_mutable_tree_new_from_checksum (OstreeRepo *repo, | ||
const char *contents_checksum, | ||
const char *metadata_checksum) | ||
{ | ||
OstreeMutableTree* out = (OstreeMutableTree*)g_object_new (OSTREE_TYPE_MUTABLE_TREE, NULL); | ||
out->state = MTREE_STATE_LAZY; | ||
out->repo = g_object_ref (repo); | ||
out->contents_checksum = g_strdup (contents_checksum); | ||
out->metadata_checksum = g_strdup (metadata_checksum); | ||
return out; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This sounds like it might be a good fit for weak refs: https://developer.gnome.org/gobject/stable/gobject-The-Base-Object-Type.html#g-object-add-weak-pointer. So then your hash table value free function can just be
g_object_unref
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did consider this. I figured it's not worth the overhead both in the source and at runtime. The difference would be:
OstreeMutableTree
instances that are part of the same hierarchy from different threads anyway as we have no synchronisation aroundcontents_checksum
. We could add more synchronisation here but it hardly seems worth it.shared_ptr
withmake_shared
)They're minor points but I think the current implementation is nicer and we avoid a bunch of error handling for codepaths that will never be taken. I'm not wed to the current implementation so I can change it to use weak_refs if you'd like.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is just a quick comment, I haven't fully reviewed this patch yet.
Yeah, GLib's weak refs use a global mutex. I don't know if that really matters here (probably not) - bigger perf wins would probably be having a more efficient string-tree structure.
Having non-owning pointers for trees is pretty standard I believe.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just looked at Rust's BTree for example, and there the parent pointer is a raw pointer. It kind of has to be in Rust of course, otherwise it'd be cyclic.