Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Heap reading optimization and fixes #366

Merged
merged 2 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/pgduckdb/scan/heap_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class HeapReader {
OffsetNumber m_current_tuple_index;
int m_page_tuples_left;
HeapTupleData m_tuple;
BufferAccessStrategy m_buffer_access_strategy;
};

} // namespace pgduckdb
28 changes: 11 additions & 17 deletions include/pgduckdb/scan/postgres_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,36 +24,30 @@ class PostgresScanGlobalState {
TupleDesc m_tuple_desc;
std::mutex m_lock; // Lock for one replacement scan
bool m_count_tuples_only;
duckdb::map<duckdb::idx_t, duckdb::column_t> m_read_columns_ids;
duckdb::map<duckdb::idx_t, duckdb::column_t> m_output_columns_ids;
duckdb::TableFilterSet *m_filters = nullptr;
/* Postgres column id to duckdb output vector idx */
std::vector<duckdb::pair<duckdb::column_t, duckdb::idx_t>> m_input_columns;
std::vector<duckdb::TableFilter *> m_column_filters;
/* Duckdb output vector idx with information about postgres column id */
duckdb::vector<duckdb::pair<duckdb::idx_t, duckdb::column_t>> m_output_columns;
std::atomic<std::uint32_t> m_total_row_count;
duckdb::map<int, Datum> m_relation_missing_attrs;
};

class PostgresScanLocalState {
public:
PostgresScanLocalState(const PostgresScanGlobalState *psgs) : m_output_vector_size(0), m_exhausted_scan(false) {
if (psgs->m_count_tuples_only) {
values = nullptr;
nulls = nullptr;
} else {
PostgresScanLocalState(const PostgresScanGlobalState *psgs)
: m_output_vector_size(0), m_exhausted_scan(false), values(nullptr), nulls(nullptr) {
if (!psgs->m_count_tuples_only) {
/* FIXME: all calls to duckdb_malloc/duckdb_free should be changed in future */
const auto s = psgs->m_read_columns_ids.size();
const auto s = psgs->m_input_columns.size();
values = (Datum *)duckdb_malloc(sizeof(Datum) * s);
nulls = (bool *)duckdb_malloc(sizeof(bool) * s);
}
}

~PostgresScanLocalState() {
if (values) {
duckdb_free(values);
values = nullptr;
}
if (nulls) {
duckdb_free(nulls);
nulls = nullptr;
}
duckdb_free(values);
Y-- marked this conversation as resolved.
Show resolved Hide resolved
duckdb_free(nulls);
}

int m_output_vector_size;
Expand Down
39 changes: 19 additions & 20 deletions src/pgduckdb_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1088,45 +1088,44 @@ InsertTupleIntoChunk(duckdb::DataChunk &output, duckdb::shared_ptr<PostgresScanG
*/

/* Read heap tuple with all required columns. */
for (auto const &[columnIdx, valueIdx] : scan_global_state->m_read_columns_ids) {
values[valueIdx] =
HeapTupleFetchNextColumnDatum(scan_global_state->m_tuple_desc, tuple, heap_tuple_read_state, columnIdx + 1,
&nulls[valueIdx], scan_global_state->m_relation_missing_attrs);
if (scan_global_state->m_filters &&
(scan_global_state->m_filters->filters.find(valueIdx) != scan_global_state->m_filters->filters.end())) {
auto &filter = scan_global_state->m_filters->filters[valueIdx];
const auto valid_tuple = ApplyValueFilter(*filter, values[valueIdx], nulls[valueIdx],
scan_global_state->m_tuple_desc->attrs[columnIdx].atttypid);
for (auto const &[attr_id, input_column_idx] : scan_global_state->m_input_columns) {
values[input_column_idx] =
HeapTupleFetchNextColumnDatum(scan_global_state->m_tuple_desc, tuple, heap_tuple_read_state, attr_id + 1,
&nulls[input_column_idx], scan_global_state->m_relation_missing_attrs);
if (scan_global_state->m_column_filters[input_column_idx]) {
auto &filter = scan_global_state->m_column_filters[input_column_idx];
const auto valid_tuple = ApplyValueFilter(*filter, values[input_column_idx], nulls[input_column_idx],
scan_global_state->m_tuple_desc->attrs[attr_id].atttypid);
if (!valid_tuple) {
return;
}
}

}

/* Write tuple columns in output vector. */
for (idx_t idx = 0; idx < scan_global_state->m_output_columns_ids.size(); idx++) {
auto &result = output.data[idx];
idx_t output_column_idx = scan_global_state->m_read_columns_ids[scan_global_state->m_output_columns_ids[idx]];
if (nulls[output_column_idx]) {
int i = 0;
for (auto const &[input_column_idx, attr_id] : scan_global_state->m_output_columns) {
auto &result = output.data[i];
if (nulls[input_column_idx]) {
auto &array_mask = duckdb::FlatVector::Validity(result);
array_mask.SetInvalid(scan_local_state->m_output_vector_size);
} else {
auto attr = scan_global_state->m_tuple_desc->attrs[scan_global_state->m_output_columns_ids[idx]];
auto attr = scan_global_state->m_tuple_desc->attrs[attr_id];
if (attr.attlen == -1) {
bool should_free = false;
values[output_column_idx] =
DetoastPostgresDatum(reinterpret_cast<varlena *>(values[output_column_idx]), &should_free);
ConvertPostgresToDuckValue(attr.atttypid, values[output_column_idx], result,
values[input_column_idx] = DetoastPostgresDatum(
reinterpret_cast<varlena *>(values[input_column_idx]), &should_free);
ConvertPostgresToDuckValue(attr.atttypid, values[input_column_idx], result,
scan_local_state->m_output_vector_size);
if (should_free) {
duckdb_free(reinterpret_cast<void *>(values[output_column_idx]));
duckdb_free(reinterpret_cast<void *>(values[input_column_idx]));
}
} else {
ConvertPostgresToDuckValue(attr.atttypid, values[output_column_idx], result,
ConvertPostgresToDuckValue(attr.atttypid, values[input_column_idx], result,
scan_local_state->m_output_vector_size);
}
}
i++;
}

scan_local_state->m_output_vector_size++;
Expand Down
10 changes: 7 additions & 3 deletions src/scan/heap_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,19 @@ HeapReader::HeapReader(Relation rel, duckdb::shared_ptr<HeapReaderGlobalState> h
m_tuple.t_data = NULL;
m_tuple.t_tableOid = RelationGetRelid(m_rel);
ItemPointerSetInvalid(&m_tuple.t_self);
DuckdbProcessLock::GetLock().lock();
m_buffer_access_strategy = GetAccessStrategy(BAS_BULKREAD);
DuckdbProcessLock::GetLock().unlock();
}

HeapReader::~HeapReader() {
DuckdbProcessLock::GetLock().lock();
/* If execution is interrupted and buffer is still opened close it now */
if (m_buffer != InvalidBuffer) {
DuckdbProcessLock::GetLock().lock();
UnlockReleaseBuffer(m_buffer);
DuckdbProcessLock::GetLock().unlock();
}
FreeAccessStrategy(m_buffer_access_strategy);
DuckdbProcessLock::GetLock().unlock();
}

Page
Expand Down Expand Up @@ -97,7 +101,7 @@ HeapReader::ReadPageTuples(duckdb::DataChunk &output) {
block = m_block_number;

m_buffer = PostgresFunctionGuard<Buffer>(ReadBufferExtended, m_rel, MAIN_FORKNUM, block, RBM_NORMAL,
GetAccessStrategy(BAS_BULKREAD));
m_buffer_access_strategy);

PostgresFunctionGuard(LockBuffer, m_buffer, BUFFER_LOCK_SHARE);

Expand Down
29 changes: 22 additions & 7 deletions src/scan/postgres_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,25 @@ PostgresScanGlobalState::InitGlobalState(duckdb::TableFunctionInitInput &input)
return;
}

/* We need ordered columns ids for reading tuple. */
/* We need ordered columns id for reading tuple. */
duckdb::map<duckdb::column_t, duckdb::idx_t> ordered_input_columns;
for (duckdb::idx_t i = 0; i < input.column_ids.size(); i++) {
m_read_columns_ids[input.column_ids[i]] = i;
ordered_input_columns[input.column_ids[i]] = i;
}

auto table_filters = input.filters.get();
m_column_filters.resize(input.column_ids.size(), 0);

for (auto const &[attr_id, column_idx] : ordered_input_columns) {
duckdb::TableFilter *column_filter = nullptr;
if (table_filters) {
auto column_filter_it = table_filters->filters.find(column_idx);
if (column_filter_it != table_filters->filters.end()) {
column_filter = column_filter_it->second.get();
}
}
m_input_columns.emplace_back(duckdb::make_pair(attr_id, column_idx));
m_column_filters[column_idx] = column_filter;
}

/* We need to check do we consider projection_ids or column_ids list to be used
Expand All @@ -52,21 +68,20 @@ PostgresScanGlobalState::InitGlobalState(duckdb::TableFunctionInitInput &input)
*/
if (input.CanRemoveFilterColumns()) {
for (duckdb::idx_t i = 0; i < input.projection_ids.size(); i++) {
m_output_columns_ids[i] = input.column_ids[input.projection_ids[i]];
m_output_columns.emplace_back(
duckdb::make_pair(input.projection_ids[i], input.column_ids[input.projection_ids[i]]));
}
} else {
for (duckdb::idx_t i = 0; i < input.column_ids.size(); i++) {
m_output_columns_ids[i] = input.column_ids[i];
m_output_columns.emplace_back(duckdb::make_pair(i, input.column_ids[i]));
}
}

m_filters = input.filters.get();
}

void
PostgresScanGlobalState::InitRelationMissingAttrs(TupleDesc tuple_desc) {
std::lock_guard<std::mutex> lock(DuckdbProcessLock::GetLock());
for(int attnum = 0; attnum < tuple_desc->natts; attnum++) {
for (int attnum = 0; attnum < tuple_desc->natts; attnum++) {
bool is_null = false;
Datum attr = getmissingattr(tuple_desc, attnum + 1, &is_null);
/* Add missing attr datum if not null*/
Expand Down