Skip to content

Commit

Permalink
[improve](group commit) Group commit support max filter ratio when ro…
Browse files Browse the repository at this point in the history
…ws is less than value in config (#28139)
  • Loading branch information
mymeiyi authored Dec 12, 2023
1 parent d25cbdd commit 45b2dba
Show file tree
Hide file tree
Showing 21 changed files with 475 additions and 36 deletions.
3 changes: 2 additions & 1 deletion be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1100,10 +1100,11 @@ DEFINE_Int16(bitmap_serialize_version, "1");
DEFINE_String(group_commit_replay_wal_dir, "./wal");
DEFINE_Int32(group_commit_replay_wal_retry_num, "10");
DEFINE_Int32(group_commit_replay_wal_retry_interval_seconds, "5");
DEFINE_Bool(wait_internal_group_commit_finish, "false");

// the count of thread to group commit insert
DEFINE_Int32(group_commit_insert_threads, "10");
DEFINE_Int32(group_commit_memory_rows_for_max_filter_ratio, "10000");
DEFINE_Bool(wait_internal_group_commit_finish, "false");

DEFINE_mInt32(scan_thread_nice_value, "0");
DEFINE_mInt32(tablet_schema_cache_recycle_interval, "86400");
Expand Down
3 changes: 2 additions & 1 deletion be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1173,10 +1173,11 @@ DECLARE_Int16(bitmap_serialize_version);
DECLARE_String(group_commit_replay_wal_dir);
DECLARE_Int32(group_commit_replay_wal_retry_num);
DECLARE_Int32(group_commit_replay_wal_retry_interval_seconds);
DECLARE_Bool(wait_internal_group_commit_finish);

// This config can be set to limit thread number in group commit insert thread pool.
DECLARE_mInt32(group_commit_insert_threads);
DECLARE_mInt32(group_commit_memory_rows_for_max_filter_ratio);
DECLARE_Bool(wait_internal_group_commit_finish);

// The configuration item is used to lower the priority of the scanner thread,
// typically employed to ensure CPU scheduling for write operations.
Expand Down
6 changes: 2 additions & 4 deletions be/src/runtime/group_commit_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,7 @@ void LoadBlockQueue::cancel(const Status& st) {

Status GroupCommitTable::get_first_block_load_queue(
int64_t table_id, int64_t base_schema_version, const UniqueId& load_id,
std::shared_ptr<vectorized::Block> block, std::shared_ptr<LoadBlockQueue>& load_block_queue,
int be_exe_version) {
std::shared_ptr<LoadBlockQueue>& load_block_queue, int be_exe_version) {
DCHECK(table_id == _table_id);
{
std::unique_lock l(_lock);
Expand Down Expand Up @@ -425,7 +424,6 @@ void GroupCommitMgr::stop() {
Status GroupCommitMgr::get_first_block_load_queue(int64_t db_id, int64_t table_id,
int64_t base_schema_version,
const UniqueId& load_id,
std::shared_ptr<vectorized::Block> block,
std::shared_ptr<LoadBlockQueue>& load_block_queue,
int be_exe_version) {
std::shared_ptr<GroupCommitTable> group_commit_table;
Expand All @@ -439,7 +437,7 @@ Status GroupCommitMgr::get_first_block_load_queue(int64_t db_id, int64_t table_i
group_commit_table = _table_map[table_id];
}
return group_commit_table->get_first_block_load_queue(table_id, base_schema_version, load_id,
block, load_block_queue, be_exe_version);
load_block_queue, be_exe_version);
}

Status GroupCommitMgr::get_load_block_queue(int64_t table_id, const TUniqueId& instance_id,
Expand Down
2 changes: 0 additions & 2 deletions be/src/runtime/group_commit_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ class GroupCommitTable {
_all_block_queues_bytes(all_block_queue_bytes) {};
Status get_first_block_load_queue(int64_t table_id, int64_t base_schema_version,
const UniqueId& load_id,
std::shared_ptr<vectorized::Block> block,
std::shared_ptr<LoadBlockQueue>& load_block_queue,
int be_exe_version);
Status get_load_block_queue(const TUniqueId& instance_id,
Expand Down Expand Up @@ -142,7 +141,6 @@ class GroupCommitMgr {
std::shared_ptr<LoadBlockQueue>& load_block_queue);
Status get_first_block_load_queue(int64_t db_id, int64_t table_id, int64_t base_schema_version,
const UniqueId& load_id,
std::shared_ptr<vectorized::Block> block,
std::shared_ptr<LoadBlockQueue>& load_block_queue,
int be_exe_version);

Expand Down
9 changes: 9 additions & 0 deletions be/src/runtime/stream_load/stream_load_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,15 @@ Status StreamLoadExecutor::execute_plan_fragment(std::shared_ptr<StreamLoadConte
ctx->number_loaded_rows);
}
} else {
if (ctx->group_commit && status->is<DATA_QUALITY_ERROR>()) {
ctx->number_total_rows = state->num_rows_load_total();
ctx->number_loaded_rows = state->num_rows_load_success();
ctx->number_filtered_rows = state->num_rows_load_filtered();
ctx->number_unselected_rows = state->num_rows_load_unselected();
if (ctx->number_filtered_rows > 0 && !state->get_error_log_file_path().empty()) {
ctx->error_url = to_load_error_http_path(state->get_error_log_file_path());
}
}
LOG(WARNING) << "fragment execute failed"
<< ", query_id=" << UniqueId(ctx->put_result.params.params.query_id)
<< ", err_msg=" << status->to_string() << ", " << ctx->brief();
Expand Down
61 changes: 47 additions & 14 deletions be/src/vec/sink/group_commit_block_sink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Status GroupCommitBlockSink::init(const TDataSink& t_sink) {
_base_schema_version = table_sink.base_schema_version;
_group_commit_mode = table_sink.group_commit_mode;
_load_id = table_sink.load_id;
_max_filter_ratio = table_sink.max_filter_ratio;
return Status::OK();
}

Expand Down Expand Up @@ -84,18 +85,28 @@ Status GroupCommitBlockSink::open(RuntimeState* state) {
}

Status GroupCommitBlockSink::close(RuntimeState* state, Status close_status) {
if (_load_block_queue) {
_load_block_queue->remove_load_id(_load_id);
}
RETURN_IF_ERROR(DataSink::close(state, close_status));
RETURN_IF_ERROR(close_status);
// wait to wal
int64_t total_rows = state->num_rows_load_total();
int64_t loaded_rows = state->num_rows_load_total();
state->update_num_rows_load_filtered(_block_convertor->num_filtered_rows() + total_rows -
loaded_rows);
state->set_num_rows_load_total(loaded_rows + state->num_rows_load_unselected() +
state->num_rows_load_filtered());
state->update_num_rows_load_filtered(_block_convertor->num_filtered_rows() + total_rows -
loaded_rows);
if (!_is_block_appended) {
// if not meet the max_filter_ratio, we should return error status directly
int64_t num_selected_rows =
state->num_rows_load_total() - state->num_rows_load_unselected();
if (num_selected_rows > 0 &&
(double)state->num_rows_load_filtered() / num_selected_rows > _max_filter_ratio) {
return Status::DataQualityError("too many filtered rows");
}
RETURN_IF_ERROR(_add_blocks());
}
if (_load_block_queue) {
_load_block_queue->remove_load_id(_load_id);
}
// wait to wal
auto st = Status::OK();
if (_load_block_queue && (_load_block_queue->wait_internal_group_commit_finish ||
_group_commit_mode == TGroupCommitMode::SYNC_MODE)) {
Expand Down Expand Up @@ -148,6 +159,8 @@ Status GroupCommitBlockSink::_add_block(RuntimeState* state,
if (block->rows() == 0) {
return Status::OK();
}
// the insert group commit tvf always accept nullable columns, so we should convert
// the non-nullable columns to nullable columns
for (int i = 0; i < block->columns(); ++i) {
if (block->get_by_position(i).type->is_nullable()) {
continue;
Expand All @@ -166,22 +179,42 @@ Status GroupCommitBlockSink::_add_block(RuntimeState* state,
}
std::shared_ptr<vectorized::Block> output_block = vectorized::Block::create_shared();
output_block->swap(cur_mutable_block->to_block());
if (!_is_block_appended && state->num_rows_load_total() + state->num_rows_load_unselected() +
state->num_rows_load_filtered() <=
config::group_commit_memory_rows_for_max_filter_ratio) {
_blocks.emplace_back(output_block);
} else {
if (!_is_block_appended) {
RETURN_IF_ERROR(_add_blocks());
}
RETURN_IF_ERROR(_load_block_queue->add_block(
output_block, _group_commit_mode != TGroupCommitMode::SYNC_MODE));
}
return Status::OK();
}

Status GroupCommitBlockSink::_add_blocks() {
DCHECK(_is_block_appended == false);
TUniqueId load_id;
load_id.__set_hi(_load_id.hi);
load_id.__set_lo(_load_id.lo);
if (_load_block_queue == nullptr) {
if (state->exec_env()->wal_mgr()->is_running()) {
RETURN_IF_ERROR(state->exec_env()->group_commit_mgr()->get_first_block_load_queue(
_db_id, _table_id, _base_schema_version, load_id, block, _load_block_queue,
state->be_exec_version()));
state->set_import_label(_load_block_queue->label);
state->set_wal_id(_load_block_queue->txn_id);
if (_state->exec_env()->wal_mgr()->is_running()) {
RETURN_IF_ERROR(_state->exec_env()->group_commit_mgr()->get_first_block_load_queue(
_db_id, _table_id, _base_schema_version, load_id, _load_block_queue,
_state->be_exec_version()));
_state->set_import_label(_load_block_queue->label);
_state->set_wal_id(_load_block_queue->txn_id);
} else {
return Status::InternalError("be is stopping");
}
}
RETURN_IF_ERROR(_load_block_queue->add_block(
output_block, _group_commit_mode != TGroupCommitMode::SYNC_MODE));
for (auto it = _blocks.begin(); it != _blocks.end(); ++it) {
RETURN_IF_ERROR(_load_block_queue->add_block(
*it, _group_commit_mode != TGroupCommitMode::SYNC_MODE));
}
_is_block_appended = true;
_blocks.clear();
return Status::OK();
}

Expand Down
5 changes: 5 additions & 0 deletions be/src/vec/sink/group_commit_block_sink.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class GroupCommitBlockSink : public DataSink {

private:
Status _add_block(RuntimeState* state, std::shared_ptr<vectorized::Block> block);
Status _add_blocks();

vectorized::VExprContextSPtrs _output_vexpr_ctxs;

Expand All @@ -65,6 +66,10 @@ class GroupCommitBlockSink : public DataSink {
TGroupCommitMode::type _group_commit_mode;
UniqueId _load_id;
std::shared_ptr<LoadBlockQueue> _load_block_queue;
// used to calculate if meet the max filter ratio
std::vector<std::shared_ptr<vectorized::Block>> _blocks;
bool _is_block_appended = false;
double _max_filter_ratio = 0.0;
};

} // namespace vectorized
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,7 @@ private DataSink createDataSink() throws AnalysisException {
if (isGroupCommitStreamLoadSql) {
sink = new GroupCommitBlockSink((OlapTable) targetTable, olapTuple,
targetPartitionIds, analyzer.getContext().getSessionVariable().isEnableSingleReplicaInsert(),
ConnectContext.get().getSessionVariable().getGroupCommit());
ConnectContext.get().getSessionVariable().getGroupCommit(), 0);
} else {
sink = new OlapTableSink((OlapTable) targetTable, olapTuple, targetPartitionIds,
analyzer.getContext().getSessionVariable().isEnableSingleReplicaInsert());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@

public class GroupCommitBlockSink extends OlapTableSink {
private String groupCommit;
private double maxFilterRatio;

public GroupCommitBlockSink(OlapTable dstTable, TupleDescriptor tupleDescriptor, List<Long> partitionIds,
boolean singleReplicaLoad, String groupCommit) {
boolean singleReplicaLoad, String groupCommit, double maxFilterRatio) {
super(dstTable, tupleDescriptor, partitionIds, singleReplicaLoad);
this.groupCommit = groupCommit;
this.maxFilterRatio = maxFilterRatio;
}

protected TDataSinkType getDataSinkType() {
Expand All @@ -45,6 +47,7 @@ protected TDataSink toThrift() {
TGroupCommitMode groupCommitMode = parseGroupCommit(groupCommit);
Preconditions.checkNotNull(groupCommitMode, "Group commit is: " + groupCommit);
tDataSink.olap_table_sink.setGroupCommitMode(groupCommitMode);
tDataSink.olap_table_sink.setMaxFilterRatio(maxFilterRatio);
return tDataSink;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,12 @@ public GroupCommitPlanner(Database db, OlapTable table, List<String> targetColum
}
streamLoadPutRequest
.setDb(db.getFullName())
.setMaxFilterRatio(1)
.setMaxFilterRatio(ConnectContext.get().getSessionVariable().enableInsertStrict ? 0 : 1)
.setTbl(table.getName())
.setFileType(TFileType.FILE_STREAM).setFormatType(TFileFormatType.FORMAT_CSV_PLAIN)
.setMergeType(TMergeType.APPEND).setThriftRpcTimeoutMs(5000).setLoadId(queryId)
.setTrimDoubleQuotes(true).setGroupCommitMode(groupCommit);
.setTrimDoubleQuotes(true).setGroupCommitMode(groupCommit)
.setStrictMode(ConnectContext.get().getSessionVariable().enableInsertStrict);
StreamLoadTask streamLoadTask = StreamLoadTask.fromTStreamLoadPutRequest(streamLoadPutRequest);
StreamLoadPlanner planner = new StreamLoadPlanner(db, table, streamLoadTask);
// Will using load id as query id in fragment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@ public TExecPlanFragmentParams plan(TUniqueId loadId, int fragmentInstanceIdInde
OlapTableSink olapTableSink;
if (taskInfo instanceof StreamLoadTask && ((StreamLoadTask) taskInfo).getGroupCommit() != null) {
olapTableSink = new GroupCommitBlockSink(destTable, tupleDesc, partitionIds,
Config.enable_single_replica_load, ((StreamLoadTask) taskInfo).getGroupCommit());
Config.enable_single_replica_load, ((StreamLoadTask) taskInfo).getGroupCommit(),
taskInfo.getMaxFilterRatio());
} else {
olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds, Config.enable_single_replica_load);
}
Expand Down Expand Up @@ -481,7 +482,8 @@ public TPipelineFragmentParams planForPipeline(TUniqueId loadId, int fragmentIns
OlapTableSink olapTableSink;
if (taskInfo instanceof StreamLoadTask && ((StreamLoadTask) taskInfo).getGroupCommit() != null) {
olapTableSink = new GroupCommitBlockSink(destTable, tupleDesc, partitionIds,
Config.enable_single_replica_load, ((StreamLoadTask) taskInfo).getGroupCommit());
Config.enable_single_replica_load, ((StreamLoadTask) taskInfo).getGroupCommit(),
taskInfo.getMaxFilterRatio());
} else {
olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds, Config.enable_single_replica_load);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.protobuf.ByteString;
import com.google.protobuf.ProtocolStringList;
import lombok.Setter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -1891,7 +1892,9 @@ private void handleInsertStmt() throws Exception {
List<InternalService.PDataRow> rows = groupCommitPlanner.getRows(nativeInsertStmt);
PGroupCommitInsertResponse response = groupCommitPlanner.executeGroupCommitInsert(context, rows);
TStatusCode code = TStatusCode.findByValue(response.getStatus().getStatusCode());
if (code == TStatusCode.DATA_QUALITY_ERROR) {
ProtocolStringList errorMsgsList = response.getStatus().getErrorMsgsList();
if (code == TStatusCode.DATA_QUALITY_ERROR && !errorMsgsList.isEmpty() && errorMsgsList.get(0)
.contains("schema version not match")) {
LOG.info("group commit insert failed. stmt: {}, backend id: {}, status: {}, "
+ "schema version: {}, retry: {}", insertStmt.getOrigStmt().originStmt,
groupCommitPlanner.getBackend().getId(),
Expand Down
1 change: 1 addition & 0 deletions gensrc/thrift/DataSinks.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ struct TOlapTableSink {
// used by GroupCommitBlockSink
21: optional i64 base_schema_version
22: optional TGroupCommitMode group_commit_mode
23: optional double max_filter_ratio
}

struct TDataSink {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 a 10
2 \N -1
3 a 10
9 a \N

-- !sql --
1 a 10
2 \N -1
3 a 10
6 a \N
7 a \N
9 a \N

-- !sql --
1 a 21
1 a 21
2 b 22
2 b 22
3 c 23
3 c 23
4 d \N

-- !sql --
1 a 21
1 a 21
2 b 22
2 b 22
3 c 23
3 c 23
4 d \N
4 d \N

4 changes: 4 additions & 0 deletions regression-test/data/insert_p0/test_group_commit_10.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1,a,21
2,b,22
3,c,23
4,d,a
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
6 f 60
7 e 70
8 f 80
10 a 10
11 a 11
12 a \N

-- !sql --
2402288
Expand Down
Loading

0 comments on commit 45b2dba

Please sign in to comment.