Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix: schema evolution fix-to-var reads #5321

Merged
merged 2 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions test/src/unit-cppapi-schema-evolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ TEST_CASE(

TEST_CASE(
"C++ API: SchemaEvolution, drop fixed attribute and add back as var-sized",
"[!mayfail][cppapi][schema][evolution][add][drop]") {
"[cppapi][schema][evolution][add][drop]") {
test::VFSTestSetup vfs_test_setup;
Context ctx{vfs_test_setup.ctx()};
auto array_uri{
Expand Down Expand Up @@ -850,6 +850,8 @@ TEST_CASE(
}
query_w.submit_and_finalize();
array_w.close();
uint64_t fragment_write_ts = tiledb_timestamp_now_ms() + 1;
std::this_thread::sleep_for(std::chrono::milliseconds(100));

// Evolve schema to drop attribute "a"
ArraySchemaEvolution schema_evolution = ArraySchemaEvolution(ctx);
Expand All @@ -868,17 +870,35 @@ TEST_CASE(

// Read the array
std::string buffer;
std::vector<uint64_t> offsets = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
std::vector<uint64_t> offsets(10);
Array array_r(ctx, array_uri, TILEDB_READ);
Subarray subarray(ctx, array_r);
subarray.add_range(0, 1, 10);
Subarray subarray_r(ctx, array_r);
subarray_r.add_range(0, 1, 10);
Query query_r(ctx, array_r, TILEDB_READ);
query_r.set_layout(layout)
.set_subarray(subarray)
.set_subarray(subarray_r)
.set_data_buffer("a", buffer)
.set_offsets_buffer("a", offsets);
query_r.submit();
array_r.close();
Comment on lines 871 to 883
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we CHECK something here?

Copy link
Member

@kounelisagis kounelisagis Nov 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CHECK(buffer.size() == 0) and CHECK(offsets.size() == 10) work.
Should we add them?


// Read the original array
std::vector<int> a_data(10);
array_r.open(TILEDB_READ, fragment_write_ts);
Subarray subarray_r2(ctx, array_r);
subarray_r2.add_range(0, 1, 10);
Query query_r2(ctx, array_r, TILEDB_READ);
query_r2.set_layout(layout)
.set_subarray(subarray_r2)
.set_data_buffer("a", a_data);
query_r2.submit();
array_r.close();
auto result_num = (int)query_r2.result_buffer_elements()["a"].second;
CHECK(result_num == 10);
a_data.resize(result_num);
CHECK_THAT(
a_data,
Catch::Matchers::Equals(std::vector<int>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}));
}

TEST_CASE(
Expand Down
16 changes: 15 additions & 1 deletion tiledb/sm/query/readers/reader_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ bool ReaderBase::skip_field(
return true;
}

// If an attribute exists but has changed from fixed->var or var->fixed after
// schema evolution, ignore for this fragment's tile offsets
if ((array_schema_.var_size(name) && !schema->var_size(name)) ||
(!array_schema_.var_size(name) && schema->var_size(name))) {
return true;
}

// If the fragment doesn't include timestamps
if (timestamps_not_present(name, frag_idx)) {
return true;
Expand Down Expand Up @@ -1135,7 +1142,14 @@ uint64_t ReaderBase::get_attribute_tile_size(

tile_size += fragment_metadata_[f]->tile_size(name, t);

if (array_schema_.var_size(name)) {
/**
* Note: There is a distinct case in which a schema may evolve from
* fixed-sized to var-sized. In this scenario, the LoadedFragmentMetadata
* contains fixed tiles. The tile_var_size should be calculated iff
* both the current _and_ loaded attributes are var-sized.
*/
if (array_schema_.var_size(name) &&
fragment_metadata_[f]->array_schema()->var_size(name)) {
tile_size +=
fragment_metadata_[f]->loaded_metadata()->tile_var_size(name, t);
}
Expand Down
Loading