@@ -49,6 +49,7 @@ Status GroupCommitBlockSink::init(const TDataSink& t_sink) {
49
49
_base_schema_version = table_sink.base_schema_version ;
50
50
_group_commit_mode = table_sink.group_commit_mode ;
51
51
_load_id = table_sink.load_id ;
52
+ _max_filter_ratio = table_sink.max_filter_ratio ;
52
53
return Status::OK ();
53
54
}
54
55
@@ -84,18 +85,28 @@ Status GroupCommitBlockSink::open(RuntimeState* state) {
84
85
}
85
86
86
87
Status GroupCommitBlockSink::close (RuntimeState* state, Status close_status) {
87
- if (_load_block_queue) {
88
- _load_block_queue->remove_load_id (_load_id);
89
- }
90
88
RETURN_IF_ERROR (DataSink::close (state, close_status));
91
89
RETURN_IF_ERROR (close_status);
92
- // wait to wal
93
90
int64_t total_rows = state->num_rows_load_total ();
94
91
int64_t loaded_rows = state->num_rows_load_total ();
95
- state->update_num_rows_load_filtered (_block_convertor->num_filtered_rows () + total_rows -
96
- loaded_rows);
97
92
state->set_num_rows_load_total (loaded_rows + state->num_rows_load_unselected () +
98
93
state->num_rows_load_filtered ());
94
+ state->update_num_rows_load_filtered (_block_convertor->num_filtered_rows () + total_rows -
95
+ loaded_rows);
96
+ if (!_is_block_appended) {
97
+ // if not meet the max_filter_ratio, we should return error status directly
98
+ int64_t num_selected_rows =
99
+ state->num_rows_load_total () - state->num_rows_load_unselected ();
100
+ if (num_selected_rows > 0 &&
101
+ (double )state->num_rows_load_filtered () / num_selected_rows > _max_filter_ratio) {
102
+ return Status::DataQualityError (" too many filtered rows" );
103
+ }
104
+ RETURN_IF_ERROR (_add_blocks ());
105
+ }
106
+ if (_load_block_queue) {
107
+ _load_block_queue->remove_load_id (_load_id);
108
+ }
109
+ // wait to wal
99
110
auto st = Status::OK ();
100
111
if (_load_block_queue && (_load_block_queue->wait_internal_group_commit_finish ||
101
112
_group_commit_mode == TGroupCommitMode::SYNC_MODE)) {
@@ -137,6 +148,8 @@ Status GroupCommitBlockSink::_add_block(RuntimeState* state,
137
148
if (block->rows () == 0 ) {
138
149
return Status::OK ();
139
150
}
151
+ // the insert group commit tvf always accept nullable columns, so we should convert
152
+ // the non-nullable columns to nullable columns
140
153
for (int i = 0 ; i < block->columns (); ++i) {
141
154
if (block->get_by_position (i).type ->is_nullable ()) {
142
155
continue ;
@@ -155,16 +168,35 @@ Status GroupCommitBlockSink::_add_block(RuntimeState* state,
155
168
}
156
169
std::shared_ptr<vectorized::Block> output_block = vectorized::Block::create_shared ();
157
170
output_block->swap (cur_mutable_block->to_block ());
171
+ if (!_is_block_appended && state->num_rows_load_total () + state->num_rows_load_unselected () +
172
+ state->num_rows_load_filtered () <=
173
+ config::group_commit_memory_rows_for_max_filter_ratio) {
174
+ _blocks.emplace_back (output_block);
175
+ } else {
176
+ if (!_is_block_appended) {
177
+ RETURN_IF_ERROR (_add_blocks ());
178
+ }
179
+ RETURN_IF_ERROR (_load_block_queue->add_block (output_block));
180
+ }
181
+ return Status::OK ();
182
+ }
183
+
184
+ Status GroupCommitBlockSink::_add_blocks () {
185
+ DCHECK (_is_block_appended == false );
158
186
TUniqueId load_id;
159
187
load_id.__set_hi (_load_id.hi );
160
188
load_id.__set_lo (_load_id.lo );
161
189
if (_load_block_queue == nullptr ) {
162
- RETURN_IF_ERROR (state->exec_env ()->group_commit_mgr ()->get_first_block_load_queue (
163
- _db_id, _table_id, _base_schema_version, load_id, block, _load_block_queue));
164
- state->set_import_label (_load_block_queue->label );
165
- state->set_wal_id (_load_block_queue->txn_id );
190
+ RETURN_IF_ERROR (_state->exec_env ()->group_commit_mgr ()->get_first_block_load_queue (
191
+ _db_id, _table_id, _base_schema_version, load_id, _load_block_queue));
192
+ _state->set_import_label (_load_block_queue->label );
193
+ _state->set_wal_id (_load_block_queue->txn_id );
194
+ }
195
+ for (auto it = _blocks.begin (); it != _blocks.end (); ++it) {
196
+ RETURN_IF_ERROR (_load_block_queue->add_block (*it));
166
197
}
167
- RETURN_IF_ERROR (_load_block_queue->add_block (output_block));
198
+ _is_block_appended = true ;
199
+ _blocks.clear ();
168
200
return Status::OK ();
169
201
}
170
202
0 commit comments