Skip to content

Commit

Permalink
Fix use of freed path in decompression sort logic
Browse files Browse the repository at this point in the history
In the function add_chunk_sorted_paths, we create sorted versions of the
decompress paths. We construct a sort node and place it on top of the
decompressed chunk to do this. However, the decompress chunk path will
be also added to the relation via add_path. This function can recycle
the provided path if better paths are already known. Therefore, we need
our own private copy for the sorted paths.
  • Loading branch information
jnidzwetzki committed Dec 23, 2023
1 parent 82557eb commit 2a808ca
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 4 deletions.
1 change: 1 addition & 0 deletions .unreleased/fix_6465
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixes: #6465 Fix use of freed path in decompression sort logic
15 changes: 12 additions & 3 deletions tsl/src/nodes/decompress_chunk/decompress_chunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ can_batch_sorted_merge(PlannerInfo *root, CompressionInfo *info, Chunk *chunk)
*/
static void
add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht, Index ht_relid,
Path *decompress_chunk_path, Path *compressed_path)
Path *path, Path *compressed_path)
{
if (root->query_pathkeys == NIL)
return;
Expand All @@ -636,6 +636,14 @@ add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht,
if (!IsA(compressed_path, Path))
return;

/* Copy the decompress chunk path because the original can be recycled in add_path, and our
* sorted path must be independent. */
if (!ts_is_decompress_chunk_path(path))
return;

DecompressChunkPath *decompress_chunk_path =
copy_decompress_chunk_path((DecompressChunkPath *) path);

/* Iterate over the sort_pathkeys and generate all possible useful sortings */
List *useful_pathkeys = NIL;
ListCell *lc;
Expand All @@ -660,12 +668,13 @@ add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht,
useful_pathkeys = lappend(useful_pathkeys, pathkey);

/* Create the sorted path for these useful_pathkeys */
if (!pathkeys_contained_in(useful_pathkeys, decompress_chunk_path->pathkeys))
if (!pathkeys_contained_in(useful_pathkeys,
decompress_chunk_path->custom_path.path.pathkeys))
{
Path *sorted_path =
(Path *) create_sort_path(root,
chunk_rel,
decompress_chunk_path,
&decompress_chunk_path->custom_path.path,
list_copy(useful_pathkeys), /* useful_pathkeys is modified
in each iteration */
root->limit_tuples);
Expand Down
56 changes: 56 additions & 0 deletions tsl/test/expected/transparent_decompression_queries.out
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,59 @@ SELECT * FROM pseudo WHERE now() IS NOT NULL;
Sat Jan 01 00:00:00 2000 PST
(1 row)

-- ensure needed decompression paths underneath a sort node
-- are not recycled by PostgreSQL
SET random_page_cost = 4.0;
DROP TABLE IF EXISTS options_data;
NOTICE: table "options_data" does not exist, skipping
CREATE TABLE IF NOT EXISTS options_data (
date date NOT NULL,
contract_code text NOT NULL,
expiry_code text NOT NULL,
option_type character(1) NOT NULL,
strike real NOT NULL,
price double precision,
source text NOT NULL,
meta text
);
SELECT create_hypertable('options_data', 'date', chunk_time_interval => interval '1 day');
create_hypertable
---------------------------
(7,public,options_data,t)
(1 row)

INSERT INTO options_data (date, contract_code, expiry_code, option_type, strike, price, source, meta)
SELECT
date_series,
'CONTRACT' || date_series::text,
'EXPIRY' || date_series::text,
CASE WHEN random() < 0.5 THEN 'C' ELSE 'P' END, -- Randomly choose 'C' or 'P' for option_type
random() * 100, -- Random strike value between 0 and 100
random() * 100, -- Random price value between 0 and 100
'SOURCE' || date_series::text,
'META' || date_series::text
FROM generate_series(
'2023-12-01 00:00:00',
'2023-12-05 00:00:00',
INTERVAL '10 minute'
) AS date_series;
ALTER TABLE options_data SET (timescaledb.compress,
timescaledb.compress_segmentby = 'contract_code, expiry_code, option_type, strike, source',
timescaledb.compress_orderby = 'date DESC');
SELECT compress_chunk(ch) FROM show_chunks('options_data', older_than=> '2023-12-05') ch;
compress_chunk
-----------------------------------------
_timescaledb_internal._hyper_7_13_chunk
_timescaledb_internal._hyper_7_14_chunk
_timescaledb_internal._hyper_7_15_chunk
_timescaledb_internal._hyper_7_16_chunk
(4 rows)

ANALYZE options_data;
SELECT max(date) AS date FROM options_data WHERE contract_code='CONTRACT2023-12-03 04:00:00+01';
date
------

(1 row)

RESET random_page_cost;
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ QUERY PLAN
-> Sort
Sort Key: m3."time"
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk m3
-> Parallel Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_2
-> Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_2
Filter: (device_id = 3)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk m1
-> Seq Scan on compress_hyper_X_X_chunk
Expand Down
45 changes: 45 additions & 0 deletions tsl/test/sql/transparent_decompression_queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,48 @@ SELECT compress_chunk(show_chunks('pseudo'));

SELECT * FROM pseudo WHERE now() IS NOT NULL;

-- ensure needed decompression paths underneath a sort node
-- are not recycled by PostgreSQL
SET random_page_cost = 4.0;
DROP TABLE IF EXISTS options_data;

CREATE TABLE IF NOT EXISTS options_data (
date date NOT NULL,
contract_code text NOT NULL,
expiry_code text NOT NULL,
option_type character(1) NOT NULL,
strike real NOT NULL,
price double precision,
source text NOT NULL,
meta text
);

SELECT create_hypertable('options_data', 'date', chunk_time_interval => interval '1 day');

INSERT INTO options_data (date, contract_code, expiry_code, option_type, strike, price, source, meta)
SELECT
date_series,
'CONTRACT' || date_series::text,
'EXPIRY' || date_series::text,
CASE WHEN random() < 0.5 THEN 'C' ELSE 'P' END, -- Randomly choose 'C' or 'P' for option_type
random() * 100, -- Random strike value between 0 and 100
random() * 100, -- Random price value between 0 and 100
'SOURCE' || date_series::text,
'META' || date_series::text
FROM generate_series(
'2023-12-01 00:00:00',
'2023-12-05 00:00:00',
INTERVAL '10 minute'
) AS date_series;

ALTER TABLE options_data SET (timescaledb.compress,
timescaledb.compress_segmentby = 'contract_code, expiry_code, option_type, strike, source',
timescaledb.compress_orderby = 'date DESC');

SELECT compress_chunk(ch) FROM show_chunks('options_data', older_than=> '2023-12-05') ch;

ANALYZE options_data;

SELECT max(date) AS date FROM options_data WHERE contract_code='CONTRACT2023-12-03 04:00:00+01';

RESET random_page_cost;

0 comments on commit 2a808ca

Please sign in to comment.