Skip to content

Commit

Permalink
[fix](partial update) fix data correctness risk when load delete sign…
Browse files Browse the repository at this point in the history
… data into a table with sequence col (#32574)
  • Loading branch information
zhannngchen authored and Doris-Extras committed Mar 22, 2024
1 parent 55b7f7f commit d3bdda6
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 10 deletions.
12 changes: 7 additions & 5 deletions be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,10 +537,12 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
return st;
}

// if the delete sign is marked, it means that the value columns of the row
// will not be read. So we don't need to read the missing values from the previous rows.
// But we still need to mark the previous row on delete bitmap
if (have_delete_sign) {
// 1. if the delete sign is marked, it means that the value columns of the row will not
// be read. So we don't need to read the missing values from the previous rows.
// 2. the one exception is when there are sequence columns in the table, we need to read
// the sequence columns, otherwise it may cause the merge-on-read based compaction
// policy to produce incorrect results
if (have_delete_sign && !_tablet_schema->has_sequence_col()) {
has_default_or_nullable = true;
use_default_or_null_flag.emplace_back(true);
} else {
Expand Down Expand Up @@ -726,7 +728,7 @@ Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f
(delete_sign_column_data != nullptr &&
delete_sign_column_data[read_index[idx + segment_start_pos]] != 0)) {
for (auto i = 0; i < cids_missing.size(); ++i) {
// if the column has default value, fiil it with default value
// if the column has default value, fill it with default value
// otherwise, if the column is nullable, fill it with null value
const auto& tablet_column = _tablet_schema->column(cids_missing[i]);
if (tablet_column.has_default_value()) {
Expand Down
12 changes: 7 additions & 5 deletions be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,12 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
return st;
}

// if the delete sign is marked, it means that the value columns of the row
// will not be read. So we don't need to read the missing values from the previous rows.
// But we still need to mark the previous row on delete bitmap
if (have_delete_sign) {
// 1. if the delete sign is marked, it means that the value columns of the row will not
// be read. So we don't need to read the missing values from the previous rows.
// 2. the one exception is when there are sequence columns in the table, we need to read
// the sequence columns, otherwise it may cause the merge-on-read based compaction
// policy to produce incorrect results
if (have_delete_sign && !_tablet_schema->has_sequence_col()) {
has_default_or_nullable = true;
use_default_or_null_flag.emplace_back(true);
} else {
Expand Down Expand Up @@ -656,7 +658,7 @@ Status VerticalSegmentWriter::_fill_missing_columns(
(delete_sign_column_data != nullptr &&
delete_sign_column_data[read_index[idx + segment_start_pos]] != 0)) {
for (auto i = 0; i < missing_cids.size(); ++i) {
// if the column has default value, fiil it with default value
// if the column has default value, fill it with default value
// otherwise, if the column is nullable, fill it with null value
const auto& tablet_column = _tablet_schema->column(missing_cids[i]);
if (tablet_column.has_default_value()) {
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ suite("test_primary_key_partial_update_seq_col_delete", "p0") {
select * from ${tableName} order by id;
"""

sql "SET show_hidden_columns=true"

sql "sync"
qt_partial_update_without_seq_hidden_columns """
select * from ${tableName} order by id;
"""

sql "SET show_hidden_columns=false"
// provide the sequence column this time, should update according to the
// given sequence values
streamLoad {
Expand Down

0 comments on commit d3bdda6

Please sign in to comment.