From 2a808cabada4a8e099e89c3e320a1b775fbbc540 Mon Sep 17 00:00:00 2001 From: Jan Nidzwetzki Date: Fri, 22 Dec 2023 15:04:35 +0100 Subject: [PATCH] Fix use of freed path in decompression sort logic In the function add_chunk_sorted_paths, we create sorted versions of the decompress paths. We construct a sort node and place it on top of the decompressed chunk to do this. However, the decompress chunk path will be also added to the relation via add_path. This function can recycle the provided path if better paths are already known. Therefore, we need our own private copy for the sorted paths. --- .unreleased/fix_6465 | 1 + .../nodes/decompress_chunk/decompress_chunk.c | 15 ++++- .../transparent_decompression_queries.out | 56 +++++++++++++++++++ .../transparent_decompress_chunk-16.out | 2 +- .../sql/transparent_decompression_queries.sql | 45 +++++++++++++++ 5 files changed, 115 insertions(+), 4 deletions(-) create mode 100644 .unreleased/fix_6465 diff --git a/.unreleased/fix_6465 b/.unreleased/fix_6465 new file mode 100644 index 00000000000..3d677dd1c33 --- /dev/null +++ b/.unreleased/fix_6465 @@ -0,0 +1 @@ +Fixes: #6465 Fix use of freed path in decompression sort logic diff --git a/tsl/src/nodes/decompress_chunk/decompress_chunk.c b/tsl/src/nodes/decompress_chunk/decompress_chunk.c index 326c2a92bd1..9063fcad7bd 100644 --- a/tsl/src/nodes/decompress_chunk/decompress_chunk.c +++ b/tsl/src/nodes/decompress_chunk/decompress_chunk.c @@ -627,7 +627,7 @@ can_batch_sorted_merge(PlannerInfo *root, CompressionInfo *info, Chunk *chunk) */ static void add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht, Index ht_relid, - Path *decompress_chunk_path, Path *compressed_path) + Path *path, Path *compressed_path) { if (root->query_pathkeys == NIL) return; @@ -636,6 +636,14 @@ add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht, if (!IsA(compressed_path, Path)) return; + /* Copy the decompress chunk path because the original can be recycled in add_path, and our + * sorted path must be independent. */ + if (!ts_is_decompress_chunk_path(path)) + return; + + DecompressChunkPath *decompress_chunk_path = + copy_decompress_chunk_path((DecompressChunkPath *) path); + /* Iterate over the sort_pathkeys and generate all possible useful sortings */ List *useful_pathkeys = NIL; ListCell *lc; @@ -660,12 +668,13 @@ add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht, useful_pathkeys = lappend(useful_pathkeys, pathkey); /* Create the sorted path for these useful_pathkeys */ - if (!pathkeys_contained_in(useful_pathkeys, decompress_chunk_path->pathkeys)) + if (!pathkeys_contained_in(useful_pathkeys, + decompress_chunk_path->custom_path.path.pathkeys)) { Path *sorted_path = (Path *) create_sort_path(root, chunk_rel, - decompress_chunk_path, + &decompress_chunk_path->custom_path.path, list_copy(useful_pathkeys), /* useful_pathkeys is modified in each iteration */ root->limit_tuples); diff --git a/tsl/test/expected/transparent_decompression_queries.out b/tsl/test/expected/transparent_decompression_queries.out index bb52d8b06a1..625a758ee9f 100644 --- a/tsl/test/expected/transparent_decompression_queries.out +++ b/tsl/test/expected/transparent_decompression_queries.out @@ -219,3 +219,59 @@ SELECT * FROM pseudo WHERE now() IS NOT NULL; Sat Jan 01 00:00:00 2000 PST (1 row) +-- ensure needed decompression paths underneath a sort node +-- are not recycled by PostgreSQL +SET random_page_cost = 4.0; +DROP TABLE IF EXISTS options_data; +NOTICE: table "options_data" does not exist, skipping +CREATE TABLE IF NOT EXISTS options_data ( + date date NOT NULL, + contract_code text NOT NULL, + expiry_code text NOT NULL, + option_type character(1) NOT NULL, + strike real NOT NULL, + price double precision, + source text NOT NULL, + meta text +); +SELECT create_hypertable('options_data', 'date', chunk_time_interval => interval '1 day'); + create_hypertable +--------------------------- + (7,public,options_data,t) +(1 row) + +INSERT INTO options_data (date, contract_code, expiry_code, option_type, strike, price, source, meta) +SELECT + date_series, + 'CONTRACT' || date_series::text, + 'EXPIRY' || date_series::text, + CASE WHEN random() < 0.5 THEN 'C' ELSE 'P' END, -- Randomly choose 'C' or 'P' for option_type + random() * 100, -- Random strike value between 0 and 100 + random() * 100, -- Random price value between 0 and 100 + 'SOURCE' || date_series::text, + 'META' || date_series::text +FROM generate_series( + '2023-12-01 00:00:00', + '2023-12-05 00:00:00', + INTERVAL '10 minute' +) AS date_series; +ALTER TABLE options_data SET (timescaledb.compress, + timescaledb.compress_segmentby = 'contract_code, expiry_code, option_type, strike, source', + timescaledb.compress_orderby = 'date DESC'); +SELECT compress_chunk(ch) FROM show_chunks('options_data', older_than=> '2023-12-05') ch; + compress_chunk +----------------------------------------- + _timescaledb_internal._hyper_7_13_chunk + _timescaledb_internal._hyper_7_14_chunk + _timescaledb_internal._hyper_7_15_chunk + _timescaledb_internal._hyper_7_16_chunk +(4 rows) + +ANALYZE options_data; +SELECT max(date) AS date FROM options_data WHERE contract_code='CONTRACT2023-12-03 04:00:00+01'; + date +------ + +(1 row) + +RESET random_page_cost; diff --git a/tsl/test/shared/expected/transparent_decompress_chunk-16.out b/tsl/test/shared/expected/transparent_decompress_chunk-16.out index dffc57c6373..de0a06d7aab 100644 --- a/tsl/test/shared/expected/transparent_decompress_chunk-16.out +++ b/tsl/test/shared/expected/transparent_decompress_chunk-16.out @@ -713,7 +713,7 @@ QUERY PLAN -> Sort Sort Key: m3."time" -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk m3 - -> Parallel Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_2 + -> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_2 Filter: (device_id = 3) -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk m1 -> Seq Scan on compress_hyper_X_X_chunk diff --git a/tsl/test/sql/transparent_decompression_queries.sql b/tsl/test/sql/transparent_decompression_queries.sql index ee091c0ba05..ca9a2ddc360 100644 --- a/tsl/test/sql/transparent_decompression_queries.sql +++ b/tsl/test/sql/transparent_decompression_queries.sql @@ -93,3 +93,48 @@ SELECT compress_chunk(show_chunks('pseudo')); SELECT * FROM pseudo WHERE now() IS NOT NULL; +-- ensure needed decompression paths underneath a sort node +-- are not recycled by PostgreSQL +SET random_page_cost = 4.0; +DROP TABLE IF EXISTS options_data; + +CREATE TABLE IF NOT EXISTS options_data ( + date date NOT NULL, + contract_code text NOT NULL, + expiry_code text NOT NULL, + option_type character(1) NOT NULL, + strike real NOT NULL, + price double precision, + source text NOT NULL, + meta text +); + +SELECT create_hypertable('options_data', 'date', chunk_time_interval => interval '1 day'); + +INSERT INTO options_data (date, contract_code, expiry_code, option_type, strike, price, source, meta) +SELECT + date_series, + 'CONTRACT' || date_series::text, + 'EXPIRY' || date_series::text, + CASE WHEN random() < 0.5 THEN 'C' ELSE 'P' END, -- Randomly choose 'C' or 'P' for option_type + random() * 100, -- Random strike value between 0 and 100 + random() * 100, -- Random price value between 0 and 100 + 'SOURCE' || date_series::text, + 'META' || date_series::text +FROM generate_series( + '2023-12-01 00:00:00', + '2023-12-05 00:00:00', + INTERVAL '10 minute' +) AS date_series; + +ALTER TABLE options_data SET (timescaledb.compress, + timescaledb.compress_segmentby = 'contract_code, expiry_code, option_type, strike, source', + timescaledb.compress_orderby = 'date DESC'); + +SELECT compress_chunk(ch) FROM show_chunks('options_data', older_than=> '2023-12-05') ch; + +ANALYZE options_data; + +SELECT max(date) AS date FROM options_data WHERE contract_code='CONTRACT2023-12-03 04:00:00+01'; + +RESET random_page_cost;