Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

staticdata: handle cycles in datatypes #52752

Merged
merged 1 commit into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 84 additions & 63 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ static arraylist_t deser_sym;
static htable_t external_objects;

static htable_t serialization_order; // to break cycles, mark all objects that are serialized
static htable_t unique_ready; // as we serialize types, we need to know if all reachable objects are also already serialized. This tracks whether `immediate` has been set for all of them.
static htable_t nullptrs;
// FIFO queue for objects to be serialized. Anything requiring fixup upon deserialization
// must be "toplevel" in this queue. For types, parameters and field types must appear
Expand Down Expand Up @@ -485,6 +484,7 @@ typedef struct {
arraylist_t relocs_list; // a list of (location, target) pairs, see description at top
arraylist_t gctags_list; // "
arraylist_t uniquing_types; // a list of locations that reference types that must be de-duplicated
arraylist_t uniquing_super; // a list of datatypes, used in super fields, that need to be marked in uniquing_types once they are reached, for handling unique-ing of them on deserialization
arraylist_t uniquing_objs; // a list of locations that reference non-types that must be de-duplicated
arraylist_t fixup_types; // a list of locations of types requiring (re)caching
arraylist_t fixup_objs; // a list of locations of objects requiring (re)caching
Expand Down Expand Up @@ -757,14 +757,13 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
{
jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
jl_queue_for_serialization_(s, (jl_value_t*)t, 1, immediate);
const jl_datatype_layout_t *layout = t->layout;

if (!recursive)
goto done_fields;

if (s->incremental && jl_is_datatype(v) && immediate) {
jl_datatype_t *dt = (jl_datatype_t*)v;
// ensure super is queued (though possibly not yet handled, since it may have cycles)
jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, 1);
// ensure all type parameters are recached
jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1);
if (jl_is_datatype_singleton(dt) && needs_uniquing(dt->instance)) {
Expand All @@ -773,7 +772,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
// (it may get serialized from elsewhere though)
record_field_change(&dt->instance, jl_nothing);
}
immediate = 0; // do not handle remaining fields immediately (just field types remains)
goto done_fields; // for now
}
if (s->incremental && jl_is_method_instance(v)) {
jl_method_instance_t *mi = (jl_method_instance_t*)v;
Expand Down Expand Up @@ -829,11 +828,9 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
}
}


if (immediate) // must be things that can be recursively handled, and valid as type parameters
assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v));

const jl_datatype_layout_t *layout = t->layout;
if (layout->npointers == 0) {
// bitstypes do not require recursion
}
Expand Down Expand Up @@ -896,22 +893,35 @@ done_fields: ;

// We've encountered an item we need to cache
void **bp = ptrhash_bp(&serialization_order, v);
assert(*bp != (void*)(uintptr_t)-1);
if (s->incremental) {
void **bp2 = ptrhash_bp(&unique_ready, v);
if (*bp2 == HT_NOTFOUND)
assert(*bp == (void*)(uintptr_t)-2);
else if (*bp != (void*)(uintptr_t)-2)
return;
}
else {
assert(*bp == (void*)(uintptr_t)-2);
}
assert(*bp == (void*)(uintptr_t)-2);
arraylist_push(&serialization_queue, (void*) v);
size_t idx = serialization_queue.len - 1;
assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");

*bp = (void*)((char*)HT_NOTFOUND + 1 + idx);

// DataType is very unusual, in that some of the fields need to be pre-order, and some
// (notably super) must not be (even if `jl_queue_for_serialization_` would otherwise
// try to promote itself to be immediate)
if (s->incremental && jl_is_datatype(v) && immediate && recursive) {
jl_datatype_t *dt = (jl_datatype_t*)v;
void **bp = ptrhash_bp(&serialization_order, (void*)dt->super);
if (*bp != (void*)-2) {
// if super is already on the stack of things to handle when this returns, do
// not try to handle it now
jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, immediate);
}
immediate = 0;
char *data = (char*)jl_data_ptr(v);
size_t i, np = layout->npointers;
for (i = 0; i < np; i++) {
uint32_t ptr = jl_ptr_offset(t, i);
if (ptr * sizeof(jl_value_t*) == offsetof(jl_datatype_t, super))
continue; // skip the super field, since it might not be quite validly ordered
int mutabl = 1;
jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl);
jl_queue_for_serialization_(s, fld, 1, immediate);
}
}
}

static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
Expand All @@ -930,28 +940,19 @@ static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, i
}

void **bp = ptrhash_bp(&serialization_order, v);
if (*bp == HT_NOTFOUND) {
*bp = (void*)(uintptr_t)(immediate ? -2 : -1);
}
else {
if (!s->incremental || !immediate || !recursive)
return;
void **bp2 = ptrhash_bp(&unique_ready, v);
if (*bp2 == HT_NOTFOUND)
*bp2 = v; // now is unique_ready
else {
assert(*bp != (void*)(uintptr_t)-1);
return; // already was unique_ready
}
assert(*bp != (void*)(uintptr_t)-2); // should be unique_ready then
if (*bp == (void*)(uintptr_t)-1)
*bp = (void*)(uintptr_t)-2; // now immediate
}
assert(!immediate || *bp != (void*)(uintptr_t)-2);
if (*bp == HT_NOTFOUND)
*bp = (void*)(uintptr_t)-1; // now enqueued
else if (!s->incremental || !immediate || !recursive || *bp != (void*)(uintptr_t)-1)
return;

if (immediate)
if (immediate) {
*bp = (void*)(uintptr_t)-2; // now immediate
jl_insert_into_serialization_queue(s, v, recursive, immediate);
else
}
else {
arraylist_push(&object_worklist, (void*)v);
}
}

// Do a pre-order traversal of the to-serialize worklist, in the identical order
Expand Down Expand Up @@ -1101,8 +1102,10 @@ static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t o
if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) {
if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld)))
arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset);
else
else if (jl_is_method_instance(fld))
arraylist_push(&s->uniquing_objs, (void*)(uintptr_t)offset);
else
assert(0 && "unknown object type with needs_uniquing set");
}
}

Expand Down Expand Up @@ -1301,7 +1304,15 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
write_pointerfield(s, (jl_value_t*)mi->sparam_vals);
continue;
}
else if (!jl_is_datatype(v)) {
else if (jl_is_datatype(v)) {
for (size_t i = 0; i < s->uniquing_super.len; i++) {
if (s->uniquing_super.items[i] == (void*)v) {
s->uniquing_super.items[i] = arraylist_pop(&s->uniquing_super);
arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(reloc_offset|3));
}
}
}
else {
assert(jl_is_datatype_singleton(t) && "unreachable");
}
}
Expand Down Expand Up @@ -1698,6 +1709,9 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
ios_write(s->const_data, (char*)&dyn, sizeof(jl_fielddescdyn_t));
}
}
void *superidx = ptrhash_get(&serialization_order, dt->super);
if (s->incremental && superidx != HT_NOTFOUND && (char*)superidx - 1 - (char*)HT_NOTFOUND > item && needs_uniquing((jl_value_t*)dt->super))
arraylist_push(&s->uniquing_super, dt->super);
}
else if (jl_is_typename(v)) {
assert(f == s->s);
Expand Down Expand Up @@ -1741,6 +1755,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
}
}
}
assert(s->uniquing_super.len == 0);
}

// In deserialization, create Symbols and set up the
Expand Down Expand Up @@ -2562,7 +2577,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2));
}
htable_new(&serialization_order, 25000);
htable_new(&unique_ready, 0);
htable_new(&nullptrs, 0);
arraylist_new(&object_worklist, 0);
arraylist_new(&serialization_queue, 0);
Expand All @@ -2587,6 +2601,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
arraylist_new(&s.relocs_list, 0);
arraylist_new(&s.gctags_list, 0);
arraylist_new(&s.uniquing_types, 0);
arraylist_new(&s.uniquing_super, 0);
arraylist_new(&s.uniquing_objs, 0);
arraylist_new(&s.fixup_types, 0);
arraylist_new(&s.fixup_objs, 0);
Expand Down Expand Up @@ -2838,6 +2853,11 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
arraylist_free(&object_worklist);
arraylist_free(&serialization_queue);
arraylist_free(&layout_table);
arraylist_free(&s.uniquing_types);
arraylist_free(&s.uniquing_super);
arraylist_free(&s.uniquing_objs);
arraylist_free(&s.fixup_types);
arraylist_free(&s.fixup_objs);
arraylist_free(&s.ccallable_list);
arraylist_free(&s.memowner_list);
arraylist_free(&s.memref_list);
Expand All @@ -2849,7 +2869,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
if (worklist)
htable_free(&external_objects);
htable_free(&serialization_order);
htable_free(&unique_ready);
htable_free(&nullptrs);
htable_free(&symbol_table);
htable_free(&fptr_to_id);
Expand Down Expand Up @@ -3220,31 +3239,43 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
uintptr_t item = (uintptr_t)s.uniquing_types.items[i];
// check whether we are operating on the typetag
// (needing to ignore GC bits) or a regular field
int tag = (item & 1) == 1;
// check whether this is a gvar index
int gvar = (item & 2) == 2;
// and check whether this is a gvar index
int tag = (item & 3);
item &= ~(uintptr_t)3;
uintptr_t *pfld;
jl_value_t **obj, *newobj;
if (gvar) {
if (tag == 3) {
obj = (jl_value_t**)(image_base + item);
pfld = NULL;
for (size_t i = 0; i < delay_list.len; i += 2) {
if (obj == (jl_value_t **)delay_list.items[i + 0]) {
pfld = (uintptr_t*)delay_list.items[i + 1];
delay_list.items[i + 1] = arraylist_pop(&delay_list);
delay_list.items[i + 0] = arraylist_pop(&delay_list);
break;
}
}
assert(pfld);
}
else if (tag == 2) {
if (image->gvars_base == NULL)
continue;
item >>= 2;
assert(item < s.gvar_record->size / sizeof(reloc_t));
pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item);
obj = *(jl_value_t***)pfld;
assert(tag == 0);
}
else {
pfld = (uintptr_t*)(image_base + item);
if (tag)
if (tag == 1)
obj = (jl_value_t**)jl_typeof(jl_valueof(pfld));
else
obj = *(jl_value_t***)pfld;
if ((char*)obj > (char*)pfld) {
// this must be the super field
assert(tag == 0);
arraylist_push(&delay_list, pfld);
arraylist_push(&delay_list, obj);
arraylist_push(&delay_list, pfld);
ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid
*pfld = (uintptr_t)NULL;
continue;
Expand Down Expand Up @@ -3294,25 +3325,14 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
assert(newobj && newobj != jl_nothing);
arraylist_push(&cleanup_list, (void*)obj);
}
if (tag)
if (tag == 1)
*pfld = (uintptr_t)newobj | GC_OLD | GC_IN_IMAGE;
else
*pfld = (uintptr_t)newobj;
assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
assert(jl_typetagis(obj, otyp));
}
// A few fields (reached via super) might be self-recursive. This is rare, but handle them now.
// They cannot be instances though, since the type must fully exist before the singleton field can be allocated
for (size_t i = 0; i < delay_list.len; ) {
uintptr_t *pfld = (uintptr_t*)delay_list.items[i++];
jl_value_t **obj = (jl_value_t **)delay_list.items[i++];
assert(jl_is_datatype(obj));
jl_datatype_t *dt = (jl_datatype_t*)obj[0];
assert(jl_is_datatype(dt));
jl_value_t *newobj = (jl_value_t*)dt;
*pfld = (uintptr_t)newobj;
assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
}
assert(delay_list.len == 0);
arraylist_free(&delay_list);
// now that all the fields of dt are assigned and unique, copy them into
// their final newdt memory location: this ensures we do not accidentally
Expand Down Expand Up @@ -3360,11 +3380,12 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
for (size_t i = 0; i < s.uniquing_objs.len; i++) {
uintptr_t item = (uintptr_t)s.uniquing_objs.items[i];
// check whether this is a gvar index
int gvar = (item & 2) == 2;
int tag = (item & 3);
assert(tag == 0 || tag == 2);
item &= ~(uintptr_t)3;
uintptr_t *pfld;
jl_value_t **obj, *newobj;
if (gvar) {
if (tag == 2) {
if (image->gvars_base == NULL)
continue;
item >>= 2;
Expand Down
13 changes: 6 additions & 7 deletions src/staticdata_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,15 @@ int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeo
jl_datatype_t *dt = (jl_datatype_t*)t;
assert(jl_object_in_image((jl_value_t*)dt->name) && "type_in_worklist mistake?");
jl_datatype_t *super = dt->super;
// check if super is news, since then we must be new also
// (it is also possible that super is indeterminate now, wait for `t`
// to be resolved, then will be determined later and fixed up by the
// delay_list, for this and any other references to it).
while (super != jl_any_type) {
assert(super);
// fast-path: check if super is in news, since then we must be new also
// (it is also possible that super is indeterminate or NULL right now,
// waiting for `t` to be resolved, then will be determined later as
// soon as possible afterwards).
while (super != NULL && super != jl_any_type) {
if (ptrhash_has(news, (void*)super))
return 1;
if (!(image_base < (char*)super && (char*)super <= image_base + sizeof_sysimg))
break; // fast-path for rejection of super
break; // the rest must all be non-new
// otherwise super might be something that was not cached even though a later supertype might be
// for example while handling `Type{Mask{4, U} where U}`, if we have `Mask{4, U} <: AbstractSIMDVector{4}`
super = super->super;
Expand Down
6 changes: 5 additions & 1 deletion test/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ precompile_test_harness(false) do dir
d = den(a)
return h
end
abstract type AbstractAlgebraMap{A} end
struct GAPGroupHomomorphism{A, B} <: AbstractAlgebraMap{GAPGroupHomomorphism{B, A}} end
end
""")
write(Foo2_file,
Expand All @@ -130,7 +132,7 @@ precompile_test_harness(false) do dir
write(Foo_file,
"""
module $Foo_module
import $FooBase_module, $FooBase_module.typeA
import $FooBase_module, $FooBase_module.typeA, $FooBase_module.GAPGroupHomomorphism
import $Foo2_module: $Foo2_module, override, overridenc
import $FooBase_module.hash
import Test
Expand Down Expand Up @@ -213,6 +215,8 @@ precompile_test_harness(false) do dir
Base.convert(::Type{Some{Value18343}}, ::Value18343{Some}) = 2
Base.convert(::Type{Ref}, ::Value18343{T}) where {T} = 3

const GAPType1 = GAPGroupHomomorphism{Nothing, Nothing}
const GAPType2 = GAPGroupHomomorphism{1, 2}

# issue #28297
mutable struct Result
Expand Down