From 179f8ed3a48c92897b61a8a8b34f6bb1d866930a Mon Sep 17 00:00:00 2001 From: chienguo <2778180460@qq.com> Date: Sun, 10 Mar 2024 21:55:55 +0800 Subject: [PATCH 1/4] add a new ratio config for data cache --- src/query/config/src/config.rs | 12 ++++++------ src/query/config/src/inner.rs | 8 ++++---- src/query/service/src/global_services.rs | 6 +++++- src/query/service/tests/it/storages/fuse/io.rs | 2 +- .../it/storages/testdata/configs_table_basic.txt | 2 +- .../common/cache_manager/src/cache_manager.rs | 10 ++++++++-- 6 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 3f136b324500..ed9dee21b751 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -2803,18 +2803,18 @@ pub struct CacheConfig { #[serde(rename = "disk")] pub disk_cache_config: DiskCacheConfig, - /// Max size of in memory table column object cache. By default it is 0 (disabled) + /// Max percentage of in memory table column object cache relative to whole memory. By default it is 0 (disabled) /// - /// CAUTION: The cached items are deserialized table column objects, may take a lot of memory. + /// CAUTION: The cached items are deserialized table column objects, ma take a lot of memory. /// /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, /// and the access pattern will benefit from caching, consider enabled this cache. #[clap( - long = "cache-table-data-deserialized-data-bytes", + long = "cache-table-data-deserialized-memory-ratio", value_name = "VALUE", default_value = "0" )] - pub table_data_deserialized_data_bytes: u64, + pub table_data_deserialized_memory_ratio: u64, // ----- the following options/args are all deprecated ---- /// Max number of cached table segment @@ -2946,7 +2946,7 @@ mod cache_config_converters { table_data_cache_population_queue_size: value .table_data_cache_population_queue_size, disk_cache_config: value.disk_cache_config.try_into()?, - table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, + table_data_deserialized_memory_ratio: value.table_data_deserialized_memory_ratio, }) } } @@ -2967,7 +2967,7 @@ mod cache_config_converters { table_data_cache_population_queue_size: value .table_data_cache_population_queue_size, disk_cache_config: value.disk_cache_config.into(), - table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, + table_data_deserialized_memory_ratio: value.table_data_deserialized_memory_ratio, table_meta_segment_count: None, } } diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 39b5fb915f4d..ac1952cbca12 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -557,13 +557,13 @@ pub struct CacheConfig { /// Storage that hold the raw data caches pub disk_cache_config: DiskCacheConfig, - /// Max size of in memory table column object cache. By default it is 0 (disabled) + /// Max percentage of in memory table column object cache relative to whole memory. By default it is 0 (disabled) /// - /// CAUTION: The cache items are deserialized table column objects, may take a lot of memory. + /// CAUTION: The cached items are deserialized table column objects, ma take a lot of memory. /// /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, /// and the access pattern will benefit from caching, consider enabled this cache. - pub table_data_deserialized_data_bytes: u64, + pub table_data_deserialized_memory_ratio: u64, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -621,7 +621,7 @@ impl Default for CacheConfig { data_cache_storage: Default::default(), table_data_cache_population_queue_size: 0, disk_cache_config: Default::default(), - table_data_deserialized_data_bytes: 0, + table_data_deserialized_memory_ratio: 0, } } } diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 7019821a764f..febdf5ec136e 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -116,7 +116,11 @@ impl GlobalServices { &config.query.share_endpoint_auth_token_file, config.query.tenant_id.to_string(), )?; - CacheManager::init(&config.cache, config.query.tenant_id.to_string())?; + CacheManager::init( + &config.cache, + &config.query.max_server_memory_usage, + config.query.tenant_id.to_string(), + )?; if let Some(addr) = config.query.cloud_control_grpc_server_address.clone() { CloudControlApiProvider::init(addr, config.query.cloud_control_grpc_timeout).await?; diff --git a/src/query/service/tests/it/storages/fuse/io.rs b/src/query/service/tests/it/storages/fuse/io.rs index aa70923b6c5c..66ae39b93dfc 100644 --- a/src/query/service/tests/it/storages/fuse/io.rs +++ b/src/query/service/tests/it/storages/fuse/io.rs @@ -48,7 +48,7 @@ async fn test_array_cache_of_nested_column_iusse_14502() -> Result<()> { let mut config = InnerConfig::default(); // memory cache is not enabled by default, let's enable it - config.cache.table_data_deserialized_data_bytes = 1024 * 1024 * 10; + config.cache.table_data_deserialized_memory_ratio = 10; let fixture = TestFixture::setup_with_config(&config).await?; fixture.create_default_database().await?; diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index 5d87c838b9a1..89c552783133 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -13,7 +13,7 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | 'cache' | 'table_bloom_index_filter_size' | '2147483648' | '' | | 'cache' | 'table_bloom_index_meta_count' | '3000' | '' | | 'cache' | 'table_data_cache_population_queue_size' | '0' | '' | -| 'cache' | 'table_data_deserialized_data_bytes' | '0' | '' | +| 'cache' | 'table_data_deserialized_memory_ratio' | '0' | '' | | 'cache' | 'table_meta_segment_bytes' | '1073741824' | '' | | 'cache' | 'table_meta_segment_count' | 'null' | '' | | 'cache' | 'table_meta_snapshot_count' | '256' | '' | diff --git a/src/query/storages/common/cache_manager/src/cache_manager.rs b/src/query/storages/common/cache_manager/src/cache_manager.rs index 758d4fbfa47e..2b7bebf0e59e 100644 --- a/src/query/storages/common/cache_manager/src/cache_manager.rs +++ b/src/query/storages/common/cache_manager/src/cache_manager.rs @@ -58,7 +58,11 @@ pub struct CacheManager { impl CacheManager { /// Initialize the caches according to the relevant configurations. - pub fn init(config: &CacheConfig, tenant_id: impl Into) -> Result<()> { + pub fn init( + config: &CacheConfig, + max_server_memory_usage: &u64, + tenant_id: impl Into, + ) -> Result<()> { // setup table data cache let table_data_cache = { match config.data_cache_storage { @@ -94,8 +98,10 @@ impl CacheManager { }; // setup in-memory table column cache + let memory_cache_capacity = + max_server_memory_usage * config.table_data_deserialized_memory_ratio / 100; let table_column_array_cache = Self::new_in_memory_cache( - config.table_data_deserialized_data_bytes, + memory_cache_capacity, ColumnArrayMeter, "table_data_column_array", ); From 673786b92b3aa3039ff21c1dbc15fba5471c9548 Mon Sep 17 00:00:00 2001 From: chienguo <50683886+chienguo@users.noreply.github.com> Date: Mon, 11 Mar 2024 11:25:41 +0800 Subject: [PATCH 2/4] Update src/query/config/src/config.rs Co-authored-by: dantengsky --- src/query/config/src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index ed9dee21b751..80c8b5d5acef 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -2805,7 +2805,7 @@ pub struct CacheConfig { /// Max percentage of in memory table column object cache relative to whole memory. By default it is 0 (disabled) /// - /// CAUTION: The cached items are deserialized table column objects, ma take a lot of memory. + /// CAUTION: The cached items are deserialized table column objects, may take a lot of memory. /// /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, /// and the access pattern will benefit from caching, consider enabled this cache. From d0cf90a15ff523aaa556aab0fbf5941f35908da4 Mon Sep 17 00:00:00 2001 From: chienguo <50683886+chienguo@users.noreply.github.com> Date: Mon, 11 Mar 2024 11:25:50 +0800 Subject: [PATCH 3/4] Update src/query/config/src/inner.rs Co-authored-by: dantengsky --- src/query/config/src/inner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index ac1952cbca12..4bf4e0e30983 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -559,7 +559,7 @@ pub struct CacheConfig { /// Max percentage of in memory table column object cache relative to whole memory. By default it is 0 (disabled) /// - /// CAUTION: The cached items are deserialized table column objects, ma take a lot of memory. + /// CAUTION: The cached items are deserialized table column objects, may take a lot of memory. /// /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, /// and the access pattern will benefit from caching, consider enabled this cache. From 0eae24766d78b6a3f84dddb2aa5b6fea8c6d3814 Mon Sep 17 00:00:00 2001 From: chienguo <2778180460@qq.com> Date: Mon, 11 Mar 2024 15:05:14 +0800 Subject: [PATCH 4/4] add a new ratio config for data cache --- src/query/config/src/config.rs | 17 ++++++++++++++++- src/query/config/src/inner.rs | 11 ++++++++++- src/query/service/tests/it/storages/fuse/io.rs | 2 +- .../storages/testdata/configs_table_basic.txt | 1 + .../common/cache_manager/src/cache_manager.rs | 7 +++++-- 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 80c8b5d5acef..24f1c3781b43 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -2803,9 +2803,22 @@ pub struct CacheConfig { #[serde(rename = "disk")] pub disk_cache_config: DiskCacheConfig, + /// Max size of in memory table column object cache. By default it is 0 (disabled) + /// + /// CAUTION: The cache items are deserialized table column objects, may take a lot of memory. + /// + /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, + /// and the access pattern will benefit from caching, consider enabled this cache. + #[clap( + long = "cache-table-data-deserialized-data-bytes", + value_name = "VALUE", + default_value = "0" + )] + pub table_data_deserialized_data_bytes: u64, + /// Max percentage of in memory table column object cache relative to whole memory. By default it is 0 (disabled) /// - /// CAUTION: The cached items are deserialized table column objects, may take a lot of memory. + /// CAUTION: The cache items are deserialized table column objects, may take a lot of memory. /// /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, /// and the access pattern will benefit from caching, consider enabled this cache. @@ -2946,6 +2959,7 @@ mod cache_config_converters { table_data_cache_population_queue_size: value .table_data_cache_population_queue_size, disk_cache_config: value.disk_cache_config.try_into()?, + table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, table_data_deserialized_memory_ratio: value.table_data_deserialized_memory_ratio, }) } @@ -2967,6 +2981,7 @@ mod cache_config_converters { table_data_cache_population_queue_size: value .table_data_cache_population_queue_size, disk_cache_config: value.disk_cache_config.into(), + table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, table_data_deserialized_memory_ratio: value.table_data_deserialized_memory_ratio, table_meta_segment_count: None, } diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 4bf4e0e30983..13c911035d33 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -557,9 +557,17 @@ pub struct CacheConfig { /// Storage that hold the raw data caches pub disk_cache_config: DiskCacheConfig, + /// Max size of in memory table column object cache. By default it is 0 (disabled) + /// + /// CAUTION: The cache items are deserialized table column objects, may take a lot of memory. + /// + /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, + /// and the access pattern will benefit from caching, consider enabled this cache. + pub table_data_deserialized_data_bytes: u64, + /// Max percentage of in memory table column object cache relative to whole memory. By default it is 0 (disabled) /// - /// CAUTION: The cached items are deserialized table column objects, may take a lot of memory. + /// CAUTION: The cache items are deserialized table column objects, may take a lot of memory. /// /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, /// and the access pattern will benefit from caching, consider enabled this cache. @@ -621,6 +629,7 @@ impl Default for CacheConfig { data_cache_storage: Default::default(), table_data_cache_population_queue_size: 0, disk_cache_config: Default::default(), + table_data_deserialized_data_bytes: 0, table_data_deserialized_memory_ratio: 0, } } diff --git a/src/query/service/tests/it/storages/fuse/io.rs b/src/query/service/tests/it/storages/fuse/io.rs index 66ae39b93dfc..aa70923b6c5c 100644 --- a/src/query/service/tests/it/storages/fuse/io.rs +++ b/src/query/service/tests/it/storages/fuse/io.rs @@ -48,7 +48,7 @@ async fn test_array_cache_of_nested_column_iusse_14502() -> Result<()> { let mut config = InnerConfig::default(); // memory cache is not enabled by default, let's enable it - config.cache.table_data_deserialized_memory_ratio = 10; + config.cache.table_data_deserialized_data_bytes = 1024 * 1024 * 10; let fixture = TestFixture::setup_with_config(&config).await?; fixture.create_default_database().await?; diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index 89c552783133..950377c1564c 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -13,6 +13,7 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | 'cache' | 'table_bloom_index_filter_size' | '2147483648' | '' | | 'cache' | 'table_bloom_index_meta_count' | '3000' | '' | | 'cache' | 'table_data_cache_population_queue_size' | '0' | '' | +| 'cache' | 'table_data_deserialized_data_bytes' | '0' | '' | | 'cache' | 'table_data_deserialized_memory_ratio' | '0' | '' | | 'cache' | 'table_meta_segment_bytes' | '1073741824' | '' | | 'cache' | 'table_meta_segment_count' | 'null' | '' | diff --git a/src/query/storages/common/cache_manager/src/cache_manager.rs b/src/query/storages/common/cache_manager/src/cache_manager.rs index 2b7bebf0e59e..f74ee7f4fb5e 100644 --- a/src/query/storages/common/cache_manager/src/cache_manager.rs +++ b/src/query/storages/common/cache_manager/src/cache_manager.rs @@ -98,8 +98,11 @@ impl CacheManager { }; // setup in-memory table column cache - let memory_cache_capacity = - max_server_memory_usage * config.table_data_deserialized_memory_ratio / 100; + let memory_cache_capacity = if config.table_data_deserialized_data_bytes != 0 { + config.table_data_deserialized_data_bytes + } else { + max_server_memory_usage * config.table_data_deserialized_memory_ratio / 100 + }; let table_column_array_cache = Self::new_in_memory_cache( memory_cache_capacity, ColumnArrayMeter,