Skip to content

Commit

Permalink
Removal of sequence number in compression
Browse files Browse the repository at this point in the history
Sequence numbers were an optimization for ordering batches based on the
orderby configuration setting. It was used for ordered append and
avoiding sorting compressed data when it matched the query ordering.
However, with enabling changes to compressed data, bookkeeping of
sequence numbers is becoming more of a hassle. Removing them and
using the metadata columns for ordering reduces that burden while
keeping all the existing optimizations that relied on the sequences
in place.
  • Loading branch information
antekresic committed Sep 26, 2024
1 parent 7c5dbbb commit 0be3ba2
Show file tree
Hide file tree
Showing 79 changed files with 5,891 additions and 5,446 deletions.
113 changes: 113 additions & 0 deletions sql/updates/reverse-dev.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,116 @@
-- check whether we can safely downgrade the existing compression setup
CREATE OR REPLACE FUNCTION pg_temp.add_sequence_number_metadata_column(
comp_ch_schema_name text,
comp_ch_table_name text
)
RETURNS BOOL LANGUAGE PLPGSQL AS
$BODY$
DECLARE
chunk_schema_name text;
chunk_table_name text;
index_name text;
segmentby_columns text;
BEGIN
SELECT ch.schema_name, ch.table_name INTO STRICT chunk_schema_name, chunk_table_name
FROM _timescaledb_catalog.chunk ch
INNER JOIN _timescaledb_catalog.chunk comp_ch
ON ch.compressed_chunk_id = comp_ch.id
WHERE comp_ch.schema_name = comp_ch_schema_name
AND comp_ch.table_name = comp_ch_table_name;

IF NOT FOUND THEN
RAISE USING
ERRCODE = 'feature_not_supported',
MESSAGE = 'Cannot migrate compressed chunk to version 2.16.1, chunk not found';
END IF;

-- Add sequence number column to compressed chunk
EXECUTE format('ALTER TABLE %s.%s ADD COLUMN _ts_meta_sequence_num INT DEFAULT NULL', comp_ch_schema_name, comp_ch_table_name);

-- Remove all indexes from compressed chunk
FOR index_name IN
SELECT format('%s.%s', i.schemaname, i.indexname)
FROM pg_indexes i
WHERE i.schemaname = comp_ch_schema_name
AND i.tablename = comp_ch_table_name
LOOP
EXECUTE format('DROP INDEX %s;', index_name);
END LOOP;

-- Fetch the segmentby columns from compression settings
SELECT string_agg(cs.segmentby_column, ',') INTO segmentby_columns
FROM (
SELECT unnest(segmentby)
FROM _timescaledb_catalog.compression_settings
WHERE relid = format('%s.%s', comp_ch_schema_name, comp_ch_table_name)::regclass::oid
AND segmentby IS NOT NULL
) AS cs(segmentby_column);

-- Create compressed chunk index based on sequence num metadata column
-- If there is no segmentby columns, we can skip creating the index
IF FOUND AND segmentby_columns IS NOT NULL THEN
EXECUTE format('CREATE INDEX ON %s.%s (%s, _ts_meta_sequence_num);', comp_ch_schema_name, comp_ch_table_name, segmentby_columns);
END IF;

-- Mark compressed chunk as unordered
UPDATE _timescaledb_catalog.chunk
SET status = status | 2 -- set unordered bit
WHERE schema_name = chunk_schema_name
AND table_name = chunk_table_name;

RETURN true;
END
$BODY$ SET search_path TO pg_catalog, pg_temp;

DO $$
DECLARE
chunk_count int;
chunk_record record;
BEGIN
-- if we find chunks which don't have sequence number metadata column in
-- compressed chunk, we need to stop downgrade and have the user run
-- a migration script to re-add the missing columns
SELECT count(*) INTO STRICT chunk_count
FROM _timescaledb_catalog.chunk ch
INNER JOIN _timescaledb_catalog.chunk uncomp_ch
ON uncomp_ch.compressed_chunk_id = ch.id
WHERE not exists (
SELECT
FROM pg_attribute att
WHERE attrelid=format('%I.%I',ch.schema_name,ch.table_name)::regclass
AND attname='_ts_meta_sequence_num')
AND NOT uncomp_ch.dropped;

-- Doing the migration if we find 10 or less chunks that need to be migrated
IF chunk_count > 10 THEN
RAISE USING
ERRCODE = 'feature_not_supported',
MESSAGE = 'Cannot downgrade compressed hypertables with chunks that do not contain sequence numbers. Run timescaledb--2.17-2.16.1.sql migration script before downgrading.',
DETAIL = 'Number of chunks that need to be migrated: '|| chunk_count::text;
ELSIF chunk_count > 0 THEN
FOR chunk_record IN
SELECT comp_ch.*
FROM _timescaledb_catalog.chunk ch
INNER JOIN _timescaledb_catalog.chunk comp_ch
ON ch.compressed_chunk_id = comp_ch.id
WHERE not exists (
SELECT
FROM pg_attribute att
WHERE attrelid=format('%I.%I',comp_ch.schema_name,comp_ch.table_name)::regclass
AND attname='_ts_meta_sequence_num')
AND NOT ch.dropped
LOOP
PERFORM pg_temp.add_sequence_number_metadata_column(chunk_record.schema_name, chunk_record.table_name);
RAISE LOG 'Migrated compressed chunk %s.%s to version 2.16.1', chunk_record.schema_name, chunk_record.table_name;
END LOOP;

RAISE LOG 'Migration successful!';
END IF;
END
$$;

DROP FUNCTION pg_temp.add_sequence_number_metadata_column(text, text);

DROP FUNCTION _timescaledb_functions.compressed_data_info(_timescaledb_internal.compressed_data);
DROP INDEX _timescaledb_catalog.compression_chunk_size_idx;
DROP FUNCTION IF EXISTS _timescaledb_functions.drop_osm_chunk(REGCLASS);
10 changes: 10 additions & 0 deletions test/sql/updates/post.compression.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@

SELECT * FROM compress ORDER BY time DESC, small_cardinality, large_cardinality, some_double, some_int, some_custom, some_bool;

-- This recompression is necessary only for downgrades from 2.17 to 2.16.1
-- due to downgrade migration requiring to add sequence number metadata
-- column and causing compressed chunks to be unordered.
-- Recompressing the chunks fully fixes the difference.
SELECT count(decompress_chunk(ch, true)) FROM show_chunks('compress') ch;
SELECT count(compress_chunk(ch, true)) FROM show_chunks('compress') ch;

-- Running this query again to confirm data is consistent even after above recompression
SELECT * FROM compress ORDER BY time DESC, small_cardinality, large_cardinality, some_double, some_int, some_custom, some_bool;

INSERT INTO compress(time, small_cardinality, large_cardinality, some_double, some_int, some_custom, some_bool)
SELECT g, 'QW', g::text, 2, 0, (100,4)::custom_type_for_compression, false
FROM generate_series('2019-11-01 00:00'::timestamp, '2019-12-15 00:00'::timestamp, '1 day') g;
Expand Down
7 changes: 6 additions & 1 deletion tsl/src/compression/api.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "cache.h"
#include "chunk.h"
#include "compression.h"
#include "compression_storage.h"
#include "create.h"
#include "debug_point.h"
#include "error_utils.h"
Expand Down Expand Up @@ -327,6 +328,11 @@ find_chunk_to_merge_into(Hypertable *ht, Chunk *current_chunk)
if (!ts_compression_settings_equal(ht_comp_settings, prev_comp_settings))
return NULL;

/* We don't support merging chunks with sequence numbers */
if (get_attnum(prev_comp_reloid, COMPRESSION_COLUMN_METADATA_SEQUENCE_NUM_NAME) !=
InvalidAttrNumber)
return NULL;

return previous_chunk;
}

Expand Down Expand Up @@ -1239,7 +1245,6 @@ recompress_chunk_segmentwise_impl(Chunk *uncompressed_chunk)
compressed_chunk_rel,
compressed_rel_tupdesc->natts,
true /*need_bistate*/,
true /*reset_sequence*/,
0 /*insert options*/);

/* create an array of the segmentby column offsets in the compressed chunk */
Expand Down
Loading

0 comments on commit 0be3ba2

Please sign in to comment.