diff --git a/Cargo.lock b/Cargo.lock index 59894b0562760..34072f72526e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5188,6 +5188,7 @@ dependencies = [ "rand 0.8.5", "risingwave_common", "risingwave_pb", + "risingwave_rpc_client", "risingwave_storage", "rust_decimal", "serde", @@ -5233,6 +5234,7 @@ dependencies = [ "risingwave_pb", "risingwave_rpc_client", "risingwave_storage", + "risingwave_stream", "serde_json", "size", "tracing", diff --git a/docs/relational_table/relational-table-schema.md b/docs/relational_table/relational-table-schema.md index 2ae01a36f08e2..e654d5c0ea48d 100644 --- a/docs/relational_table/relational-table-schema.md +++ b/docs/relational_table/relational-table-schema.md @@ -7,7 +7,7 @@ In this doc, we will take HashAgg with extreme state (`max`, `min`) or value sta [Code](https://github.com/risingwavelabs/risingwave/blob/7f9ad2240712aa0cfe3edffb4535d43b42f32cc5/src/frontend/src/optimizer/plan_node/logical_agg.rs#L144) ## Table id -For all relational table states, the keyspace must start with `table_id`. This is a globally unique id allocated in meta. Meta is responsible for traversing the Plan Tree and calculating the total number of Relational Tables needed. For example, the Hash Join Operator needs 2, one for the left table and one for the right table. The number of tables needed for Agg depends on the number of agg calls. +`table_id` is a globally unique id allocated in meta for each relational table object. Meta is responsible for traversing the Plan Tree and calculating the total number of Relational Tables needed. For example, the Hash Join Operator needs 2, one for the left table and one for the right table. The number of tables needed for Agg depends on the number of agg calls. ## Value State (Sum, Count) Query example: diff --git a/docs/state-store-overview.md b/docs/state-store-overview.md index 0184613c30ef4..e3efdb9e060ed 100644 --- a/docs/state-store-overview.md +++ b/docs/state-store-overview.md @@ -28,11 +28,11 @@ Hummock consists of a manager service on the meta node, clients on worker nodes The streaming state store has distinguished workload characteristics. -* Every streaming executor will only ***read and write its own portion of data***, which are multiple consecutive non-overlapping ranges of keys (we call it ***key space***). +* Every streaming executor will only ***read and write its own portion of data***. * Data (generally) ***won’t be shared across nodes***, so every worker node will only read and write its own data. Therefore, every Hummock API, like `get` or `scan`, only guarantees that writes on one node can be immediately read from the same node. In some cases, if we want to read data written from other nodes, we will need to ***wait for the epoch***. * Streaming data are ***committed in serial***. Based on the [barrier-based checkpoint algorithm](https://en.wikipedia.org/wiki/Chandy%E2%80%93Lamport_algorithm), the states are persisted epoch by epoch. We can tailor the write path specifically for the epoch-based checkpoint workload. -This leads to the design of Hummock, the cloud-native KV-based streaming state store. We’ll explain concepts like “epoch”, “key space” and “barrier” in the following chapters. +This leads to the design of Hummock, the cloud-native KV-based streaming state store. We’ll explain concepts like “epoch” and “barrier” in the following chapters. ## The Hummock User API @@ -119,8 +119,8 @@ For `scan`, we simply select by overlapping key range. For point get, we will fi Hummock implements the following iterators: - `BlockIterator`: iterates a block of an SSTable. - `SSTableIterator`: iterates an SSTable. -- `ConcatIterator`: iterates SSTables with non-overlapping keyspaces. -- `MergeIterator`: iterates SSTables with overlapping keyspaces. +- `ConcatIterator`: iterates SSTables with non-overlapping key ranges. +- `MergeIterator`: iterates SSTables with overlapping key ranges. - `UserIterator`: wraps internal iterators and outputs user key-value with epoch <= read epoch. [iterators source code](https://github.com/risingwavelabs/risingwave/tree/main/src/storage/src/hummock/iterator) diff --git a/e2e_test/nexmark/create_sources.slt.part b/e2e_test/nexmark/create_sources.slt.part index 6b5c6cb26b608..6c651822bc613 100644 --- a/e2e_test/nexmark/create_sources.slt.part +++ b/e2e_test/nexmark/create_sources.slt.part @@ -1,5 +1,5 @@ statement ok -CREATE SOURCE person ( +CREATE MATERIALIZED SOURCE person ( "id" BIGINT, "name" VARCHAR, "email_address" VARCHAR, @@ -17,7 +17,7 @@ CREATE SOURCE person ( ) ROW FORMAT JSON; statement ok -CREATE SOURCE auction ( +CREATE MATERIALIZED SOURCE auction ( "id" BIGINT, "item_name" VARCHAR, "description" VARCHAR, diff --git a/proto/cdc_service.proto b/proto/cdc_service.proto new file mode 100644 index 0000000000000..98f82260daea8 --- /dev/null +++ b/proto/cdc_service.proto @@ -0,0 +1,47 @@ +syntax = "proto3"; + +package cdc_service; + +option optimize_for = SPEED; + +// Notes: This proto needs to be self-contained +message Status { + enum Code { + UNSPECIFIED = 0; + OK = 1; + } + Code code = 1; + string message = 2; +} + +message DbConnectorProperties { + string database_host = 1; + string database_port = 2; + string database_user = 3; + string database_password = 4; + string database_name = 5; + string table_name = 6; + string partition = 7; + string start_offset = 8; + bool include_schema_events = 9; +} + +message CdcMessage { + string payload = 1; + string partition = 2; + string offset = 3; +} + +message GetEventStreamRequest { + uint64 source_id = 1; + DbConnectorProperties properties = 2; +} + +message GetEventStreamResponse { + uint64 source_id = 1; + repeated CdcMessage events = 2; +} + +service CdcService { + rpc GetEventStream(GetEventStreamRequest) returns (stream GetEventStreamResponse); +} diff --git a/proto/hummock.proto b/proto/hummock.proto index 62986f3941c34..fb689ac59251c 100644 --- a/proto/hummock.proto +++ b/proto/hummock.proto @@ -197,9 +197,12 @@ message UnpinSnapshotBeforeResponse { common.Status status = 1; } +// When `right_exclusive=false`, it represents [left, right], of which both boundary are open. When `right_exclusive=true`, +// it represents [left, right), of which right is exclusive. message KeyRange { bytes left = 1; bytes right = 2; + bool right_exclusive = 3; } message TableOption { diff --git a/risedev.yml b/risedev.yml index 5ea94f0323cb6..0591a101e9da3 100644 --- a/risedev.yml +++ b/risedev.yml @@ -543,6 +543,9 @@ template: # If `enable-tiered-cache` is true, hummock will use data directory as file cache. enable-tiered-cache: false + # RPC endpoint for source connector node + connector-source-endpoint: "127.0.0.1:61261" + # Minio instances used by this compute node provide-minio: "minio*" diff --git a/src/batch/src/executor/row_seq_scan.rs b/src/batch/src/executor/row_seq_scan.rs index 8da79eaf297bb..92920fa429d7b 100644 --- a/src/batch/src/executor/row_seq_scan.rs +++ b/src/batch/src/executor/row_seq_scan.rs @@ -405,136 +405,3 @@ impl RowSeqScanExecutor { } } } - -#[cfg(test)] -mod tests { - use std::collections::Bound::Unbounded; - - use futures::StreamExt; - use risingwave_common::catalog::{ColumnDesc, ColumnId, TableId, TableOption}; - use risingwave_common::row::Row; - use risingwave_common::types::DataType; - use risingwave_common::util::epoch::EpochPair; - use risingwave_common::util::sort_util::OrderType; - use risingwave_storage::memory::MemoryStateStore; - use risingwave_storage::table::batch_table::storage_table::StorageTable; - use risingwave_storage::table::streaming_table::state_table::StateTable; - use risingwave_storage::table::Distribution; - - use crate::executor::{Executor, RowSeqScanExecutor, ScanRange}; - - #[tokio::test] - async fn test_row_seq_scan() { - let state_store = MemoryStateStore::new(); - let column_ids = vec![ColumnId::from(0), ColumnId::from(1), ColumnId::from(2)]; - let column_descs = vec![ - ColumnDesc::unnamed(column_ids[0], DataType::Int32), - ColumnDesc::unnamed(column_ids[1], DataType::Int32), - ColumnDesc::unnamed(column_ids[2], DataType::Int32), - ]; - let pk_indices = vec![0_usize, 1_usize]; - let order_types = vec![OrderType::Ascending, OrderType::Descending]; - let mut state = StateTable::new_without_distribution( - state_store.clone(), - TableId::from(0x42), - column_descs.clone(), - order_types.clone(), - pk_indices.clone(), - ) - .await; - let column_ids_partial = vec![ColumnId::from(1), ColumnId::from(2)]; - let value_indices: Vec = vec![0, 1, 2]; - let table = StorageTable::new_partial( - state_store.clone(), - TableId::from(0x42), - column_descs.clone(), - column_ids_partial, - order_types.clone(), - pk_indices, - Distribution::fallback(), - TableOption::default(), - value_indices, - ); - let epoch = EpochPair::new_test_epoch(1); - state.init_epoch(epoch); - epoch.inc(); - - state.insert(Row(vec![ - Some(1_i32.into()), - Some(11_i32.into()), - Some(111_i32.into()), - ])); - state.insert(Row(vec![ - Some(2_i32.into()), - Some(22_i32.into()), - Some(222_i32.into()), - ])); - state.insert(Row(vec![ - Some(3_i32.into()), - Some(33_i32.into()), - Some(333_i32.into()), - ])); - - state.commit_for_test(epoch).await.unwrap(); - - let scan_range1 = ScanRange { - pk_prefix: Row(vec![Some(1_i32.into()), Some(11_i32.into())]), - next_col_bounds: (Unbounded, Unbounded), - }; - - let scan_range2 = ScanRange { - pk_prefix: Row(vec![Some(2_i32.into()), Some(22_i32.into())]), - next_col_bounds: (Unbounded, Unbounded), - }; - - let row_seq_scan_exec = RowSeqScanExecutor::new( - table.clone(), - vec![scan_range1, scan_range2], - epoch.curr, - 1024, - "row_seq_scan_exec".to_string(), - None, - ); - - let point_get_row_seq_scan_exec = Box::new(row_seq_scan_exec); - - let mut stream = point_get_row_seq_scan_exec.execute(); - let chunk = stream.next().await.unwrap().unwrap(); - - assert_eq!( - chunk.row_at(0).0.to_owned_row(), - Row(vec![Some(11_i32.into()), Some(111_i32.into())]) - ); - assert_eq!( - chunk.row_at(1).0.to_owned_row(), - Row(vec![Some(22_i32.into()), Some(222_i32.into())]) - ); - - let full_row_seq_scan_exec = RowSeqScanExecutor::new( - table, - vec![ScanRange::full()], - epoch.curr, - 1024, - "row_seq_scan_exec".to_string(), - None, - ); - - let row_seq_scan_exec = Box::new(full_row_seq_scan_exec); - - let mut stream = row_seq_scan_exec.execute(); - let chunk = stream.next().await.unwrap().unwrap(); - - assert_eq!( - chunk.row_at(0).0.to_owned_row(), - Row(vec![Some(11_i32.into()), Some(111_i32.into())]) - ); - assert_eq!( - chunk.row_at(1).0.to_owned_row(), - Row(vec![Some(22_i32.into()), Some(222_i32.into())]) - ); - assert_eq!( - chunk.row_at(2).0.to_owned_row(), - Row(vec![Some(33_i32.into()), Some(333_i32.into())]) - ); - } -} diff --git a/src/batch/src/task/task_execution.rs b/src/batch/src/task/task_execution.rs index dc8206ebc4b81..c1ea463c3d47c 100644 --- a/src/batch/src/task/task_execution.rs +++ b/src/batch/src/task/task_execution.rs @@ -319,8 +319,7 @@ impl BatchTaskExecution { }) .await { - // Prints the entire backtrace of error. - error!("Execution failed [{:?}]: {:?}", &task_id, &e); + error!("Execution failed [{:?}]: {}", &task_id, e); let err_str = e.to_string(); *failure.lock() = Some(e); if let Err(_e) = t_1 diff --git a/src/compute/src/lib.rs b/src/compute/src/lib.rs index fe2fe60608d60..4545cd5f4a110 100644 --- a/src/compute/src/lib.rs +++ b/src/compute/src/lib.rs @@ -78,6 +78,10 @@ pub struct ComputeNodeOpts { /// Left empty to disable file cache. #[clap(long, default_value = "")] pub file_cache_dir: String, + + /// Endpoint of the connector node + #[clap(long, default_value = "127.0.0.1:60061")] + pub connector_source_endpoint: String, } use std::future::Future; diff --git a/src/compute/src/server.rs b/src/compute/src/server.rs index 19c479f2cacde..d120b61ef6a2d 100644 --- a/src/compute/src/server.rs +++ b/src/compute/src/server.rs @@ -225,6 +225,7 @@ pub async fn compute_node_serve( let stream_env = StreamEnvironment::new( source_mgr, client_addr.clone(), + opts.connector_source_endpoint, stream_config, worker_id, state_store, diff --git a/src/compute/tests/integration_tests.rs b/src/compute/tests/integration_tests.rs index c3ac691371bd5..528d2713c75a8 100644 --- a/src/compute/tests/integration_tests.rs +++ b/src/compute/tests/integration_tests.rs @@ -39,7 +39,7 @@ use risingwave_source::table_test_utils::create_table_source_desc_builder; use risingwave_source::{TableSourceManager, TableSourceManagerRef}; use risingwave_storage::memory::MemoryStateStore; use risingwave_storage::table::batch_table::storage_table::StorageTable; -use risingwave_storage::table::streaming_table::state_table::StateTable; +use risingwave_stream::common::table::state_table::StateTable; use risingwave_stream::error::StreamResult; use risingwave_stream::executor::monitor::StreamingMetrics; use risingwave_stream::executor::state_table_handler::SourceStateTableHandler; diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml index cc7bb5f76a678..6a49a4e28f88a 100644 --- a/src/connector/Cargo.toml +++ b/src/connector/Cargo.toml @@ -46,6 +46,7 @@ rand = "0.8" rdkafka = { package = "madsim-rdkafka", version = "=0.2.8-alpha", features = ["cmake-build", "ssl-vendored", "gssapi"] } risingwave_common = { path = "../common" } risingwave_pb = { path = "../prost" } +risingwave_rpc_client = { path = "../rpc_client" } risingwave_storage = { path = "../storage" } serde = { version = "1", features = ["derive", "rc"] } serde_derive = "1" diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs index 6c4b8e0c7f15d..af66a003f42b1 100644 --- a/src/connector/src/source/base.rs +++ b/src/connector/src/source/base.rs @@ -29,6 +29,9 @@ use serde::{Deserialize, Serialize}; use tokio::runtime::Runtime; use tokio::sync::mpsc; +use crate::source::cdc::{ + CdcProperties, CdcSplit, CdcSplitEnumerator, CdcSplitReader, CDC_CONNECTOR, +}; use crate::source::datagen::{ DatagenProperties, DatagenSplit, DatagenSplitEnumerator, DatagenSplitReader, DATAGEN_CONNECTOR, }; @@ -91,6 +94,7 @@ pub enum SplitImpl { Kinesis(KinesisSplit), Nexmark(NexmarkSplit), Datagen(DatagenSplit), + Cdc(CdcSplit), } pub enum SplitReaderImpl { @@ -100,6 +104,7 @@ pub enum SplitReaderImpl { Nexmark(Box), Pulsar(Box), Datagen(Box), + Cdc(Box), } pub enum SplitEnumeratorImpl { @@ -108,6 +113,7 @@ pub enum SplitEnumeratorImpl { Kinesis(KinesisSplitEnumerator), Nexmark(NexmarkSplitEnumerator), Datagen(DatagenSplitEnumerator), + Cdc(CdcSplitEnumerator), } #[derive(Clone, Debug, Deserialize)] @@ -118,6 +124,7 @@ pub enum ConnectorProperties { Nexmark(Box), Datagen(Box), S3(Box), + Cdc(Box), Dummy(Box<()>), } @@ -127,7 +134,8 @@ impl_connector_properties! { { Kinesis, KINESIS_CONNECTOR }, { Nexmark, NEXMARK_CONNECTOR }, { Datagen, DATAGEN_CONNECTOR }, - { S3, S3_CONNECTOR } + { S3, S3_CONNECTOR }, + { Cdc, CDC_CONNECTOR } } impl_split_enumerator! { @@ -135,7 +143,8 @@ impl_split_enumerator! { { Pulsar, PulsarSplitEnumerator }, { Kinesis, KinesisSplitEnumerator }, { Nexmark, NexmarkSplitEnumerator }, - { Datagen, DatagenSplitEnumerator } + { Datagen, DatagenSplitEnumerator }, + { Cdc, CdcSplitEnumerator } } impl_split! { @@ -143,7 +152,8 @@ impl_split! { { Pulsar, PULSAR_CONNECTOR, PulsarSplit }, { Kinesis, KINESIS_CONNECTOR, KinesisSplit }, { Nexmark, NEXMARK_CONNECTOR, NexmarkSplit }, - { Datagen, DATAGEN_CONNECTOR, DatagenSplit } + { Datagen, DATAGEN_CONNECTOR, DatagenSplit }, + { Cdc, CDC_CONNECTOR, CdcSplit } } impl_split_reader! { @@ -152,6 +162,7 @@ impl_split_reader! { { Kinesis, KinesisSplitReader }, { Nexmark, NexmarkSplitReader }, { Datagen, DatagenSplitReader }, + { Cdc, CdcSplitReader}, { Dummy, DummySplitReader } } @@ -251,4 +262,30 @@ mod tests { panic!("extract nexmark config failed"); } } + + #[test] + fn test_extract_cdc_properties() { + let props: HashMap = convert_args!(hashmap!( + "connector" => "cdc", + "database.name" => "mydb", + "database.hostname" => "127.0.0.1", + "database.port" => "3306", + "database.user" => "root", + "database.password" => "123456", + "table.name" => "products", + )); + + let props = ConnectorProperties::extract(props).unwrap(); + + if let ConnectorProperties::Cdc(props) = props { + assert_eq!(props.source_id, 0); + assert_eq!(props.start_offset, ""); + assert_eq!(props.database_name, "mydb"); + assert_eq!(props.table_name, "products"); + assert_eq!(props.database_host, "127.0.0.1"); + assert_eq!(props.database_password, "123456"); + } else { + panic!("extract nexmark config failed"); + } + } } diff --git a/src/connector/src/source/cdc/enumerator/mod.rs b/src/connector/src/source/cdc/enumerator/mod.rs new file mode 100644 index 0000000000000..c5dfc4a655711 --- /dev/null +++ b/src/connector/src/source/cdc/enumerator/mod.rs @@ -0,0 +1,44 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use async_trait::async_trait; + +use crate::source::cdc::{CdcProperties, CdcSplit}; +use crate::source::SplitEnumerator; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct CdcSplitEnumerator { + source_id: u32, +} + +#[async_trait] +impl SplitEnumerator for CdcSplitEnumerator { + type Properties = CdcProperties; + type Split = CdcSplit; + + async fn new(props: CdcProperties) -> anyhow::Result { + Ok(Self { + source_id: props.source_id, + }) + } + + async fn list_splits(&mut self) -> anyhow::Result> { + // CDC source only supports single split + let splits = vec![CdcSplit { + source_id: self.source_id, + start_offset: None, + }]; + Ok(splits) + } +} diff --git a/src/connector/src/source/cdc/mod.rs b/src/connector/src/source/cdc/mod.rs new file mode 100644 index 0000000000000..dfdb39fae2b2a --- /dev/null +++ b/src/connector/src/source/cdc/mod.rs @@ -0,0 +1,48 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod enumerator; +pub mod source; +pub mod split; + +pub use enumerator::*; +use serde::Deserialize; +pub use source::*; +pub use split::*; + +pub const CDC_CONNECTOR: &str = "cdc"; + +#[derive(Clone, Debug, Deserialize)] +pub struct CdcProperties { + #[serde(default)] + pub connector_node_addr: String, + #[serde(default)] + pub source_id: u32, + #[serde(default)] + pub start_offset: String, + #[serde(default)] + pub parititon: String, + #[serde(rename = "database.name")] + pub database_name: String, + #[serde(rename = "table.name")] + pub table_name: String, + #[serde(rename = "database.hostname")] + pub database_host: String, + #[serde(rename = "database.port")] + pub database_port: String, + #[serde(rename = "database.user")] + pub database_user: String, + #[serde(rename = "database.password")] + pub database_password: String, +} diff --git a/src/connector/src/source/cdc/source/message.rs b/src/connector/src/source/cdc/source/message.rs new file mode 100644 index 0000000000000..a039c9c78f407 --- /dev/null +++ b/src/connector/src/source/cdc/source/message.rs @@ -0,0 +1,28 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bytes::Bytes; +use risingwave_pb::cdc_service::CdcMessage; + +use crate::source::base::SourceMessage; + +impl From for SourceMessage { + fn from(message: CdcMessage) -> Self { + SourceMessage { + payload: Some(Bytes::from(message.payload)), + offset: message.offset, + split_id: message.partition.into(), + } + } +} diff --git a/src/connector/src/source/cdc/source/mod.rs b/src/connector/src/source/cdc/source/mod.rs new file mode 100644 index 0000000000000..c4d0b0e36debc --- /dev/null +++ b/src/connector/src/source/cdc/source/mod.rs @@ -0,0 +1,20 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod message; +mod reader; + +pub use reader::*; + +pub use crate::source::cdc::split::*; diff --git a/src/connector/src/source/cdc/source/reader.rs b/src/connector/src/source/cdc/source/reader.rs new file mode 100644 index 0000000000000..f264055cb8ced --- /dev/null +++ b/src/connector/src/source/cdc/source/reader.rs @@ -0,0 +1,110 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::str::FromStr; + +use anyhow::{anyhow, Result}; +use async_trait::async_trait; +use futures::pin_mut; +use futures_async_stream::try_stream; +use itertools::Itertools; +use risingwave_common::util::addr::HostAddr; +use risingwave_pb::cdc_service::{DbConnectorProperties, GetEventStreamResponse}; +use risingwave_rpc_client::CdcClient; + +use crate::source::base::{SourceMessage, SplitReader}; +use crate::source::cdc::CdcProperties; +use crate::source::{BoxSourceStream, Column, ConnectorState, SplitImpl}; + +pub struct CdcSplitReader { + source_id: u64, + props: CdcProperties, +} + +#[async_trait] +impl SplitReader for CdcSplitReader { + type Properties = CdcProperties; + + async fn new( + props: CdcProperties, + state: ConnectorState, + _columns: Option>, + ) -> Result { + if let Some(splits) = state { + let split = splits + .into_iter() + .exactly_one() + .map_err(|e| anyhow!("failed to create cdc split reader: {e}"))?; + + if let SplitImpl::Cdc(cdc_split) = split { + return Ok(Self { + source_id: cdc_split.source_id as u64, + props, + }); + } + } + Err(anyhow!("failed to create cdc split reader: invalid state")) + } + + fn into_stream(self) -> BoxSourceStream { + self.into_stream() + } +} + +impl CdcSplitReader { + #[try_stream(boxed, ok = Vec, error = anyhow::Error)] + pub async fn into_stream(self) { + let props = &self.props; + let cdc_client = CdcClient::new(HostAddr::from_str(&props.connector_node_addr)?).await?; + let cdc_stream = cdc_client + .get_event_stream( + self.source_id, + DbConnectorProperties { + database_host: props.database_host.clone(), + database_port: props.database_port.clone(), + database_user: props.database_user.clone(), + database_password: props.database_password.clone(), + database_name: props.database_name.clone(), + table_name: props.table_name.clone(), + partition: props.parititon.clone(), + start_offset: props.start_offset.clone(), + include_schema_events: false, + }, + ) + .await?; + pin_mut!(cdc_stream); + #[for_await] + for event_res in cdc_stream { + match event_res { + Ok(GetEventStreamResponse { events, .. }) => { + if events.is_empty() { + continue; + } + let mut msgs = Vec::with_capacity(events.len()); + for event in events { + msgs.push(SourceMessage::from(event)); + } + yield msgs; + } + Err(e) => { + return Err(anyhow!( + "Cdc service error: code {}, msg {}", + e.code(), + e.message() + )) + } + } + } + } +} diff --git a/src/connector/src/source/cdc/split.rs b/src/connector/src/source/cdc/split.rs new file mode 100644 index 0000000000000..98b2bd4a3f774 --- /dev/null +++ b/src/connector/src/source/cdc/split.rs @@ -0,0 +1,55 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::anyhow; +use bytes::Bytes; +use serde::{Deserialize, Serialize}; + +use crate::source::{SplitId, SplitMetaData}; + +/// The states of a CDC split, which will be persisted to checkpoint. +/// The offset will be updated when received a new chunk, see `StreamChunkWithState`. +/// CDC source only has single split +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)] +pub struct CdcSplit { + pub source_id: u32, + pub start_offset: Option, +} + +impl SplitMetaData for CdcSplit { + fn id(&self) -> SplitId { + format!("{}", self.source_id).into() + } + + fn encode_to_bytes(&self) -> Bytes { + Bytes::from(serde_json::to_string(self).unwrap()) + } + + fn restore_from_bytes(bytes: &[u8]) -> anyhow::Result { + serde_json::from_slice(bytes).map_err(|e| anyhow!(e)) + } +} + +impl CdcSplit { + pub fn new(source_id: u32, start_offset: String) -> CdcSplit { + Self { + source_id, + start_offset: Some(start_offset), + } + } + + pub fn copy_with_offset(&self, start_offset: String) -> Self { + Self::new(self.source_id, start_offset) + } +} diff --git a/src/connector/src/source/mod.rs b/src/connector/src/source/mod.rs index 60d01b9ff4c17..acf5f2fee306a 100644 --- a/src/connector/src/source/mod.rs +++ b/src/connector/src/source/mod.rs @@ -13,6 +13,7 @@ // limitations under the License. pub mod base; +pub mod cdc; pub mod datagen; pub mod dummy_connector; pub mod filesystem; diff --git a/src/ctl/Cargo.toml b/src/ctl/Cargo.toml index feaffa23f08f7..df62662857ae1 100644 --- a/src/ctl/Cargo.toml +++ b/src/ctl/Cargo.toml @@ -25,6 +25,7 @@ risingwave_object_store = { path = "../object_store" } risingwave_pb = { path = "../prost" } risingwave_rpc_client = { path = "../rpc_client" } risingwave_storage = { path = "../storage" } +risingwave_stream = { path = "../stream" } serde_json = "1" size = "0.4" tokio = { version = "0.2", package = "madsim-tokio", features = [ diff --git a/src/ctl/src/cmd_impl/table/scan.rs b/src/ctl/src/cmd_impl/table/scan.rs index 79eb463041ac5..305e103c87d8b 100644 --- a/src/ctl/src/cmd_impl/table/scan.rs +++ b/src/ctl/src/cmd_impl/table/scan.rs @@ -23,9 +23,9 @@ use risingwave_rpc_client::MetaClient; use risingwave_storage::hummock::HummockStorage; use risingwave_storage::monitor::MonitoredStateStore; use risingwave_storage::table::batch_table::storage_table::StorageTable; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::table::Distribution; use risingwave_storage::StateStore; +use risingwave_stream::common::table::state_table::StateTable; use crate::common::HummockServiceOpts; diff --git a/src/frontend/planner_test/tests/testdata/union.yaml b/src/frontend/planner_test/tests/testdata/union.yaml index 5557b581bd432..f73a546ea001f 100644 --- a/src/frontend/planner_test/tests/testdata/union.yaml +++ b/src/frontend/planner_test/tests/testdata/union.yaml @@ -13,6 +13,11 @@ create table t1 (a int, b numeric, c bigint); create table t2 (a int, b numeric, c bigint); select * from t1 union select * from t2; + optimized_logical_plan: | + LogicalAgg { group_key: [t1.a, t1.b, t1.c], aggs: [] } + └─LogicalUnion { all: true } + ├─LogicalScan { table: t1, columns: [t1.a, t1.b, t1.c] } + └─LogicalScan { table: t2, columns: [t2.a, t2.b, t2.c] } batch_plan: | BatchExchange { order: [], dist: Single } └─BatchHashAgg { group_key: [t1.a, t1.b, t1.c], aggs: [] } @@ -26,6 +31,13 @@ create table t1 (a int, b numeric, c bigint); create table t2 (a int, b numeric, c bigint); (select * from t1 limit 1) union (select * from t2 limit 1); + optimized_logical_plan: | + LogicalAgg { group_key: [t1.a, t1.b, t1.c], aggs: [] } + └─LogicalUnion { all: true } + ├─LogicalLimit { limit: 1, offset: 0 } + | └─LogicalScan { table: t1, columns: [t1.a, t1.b, t1.c] } + └─LogicalLimit { limit: 1, offset: 0 } + └─LogicalScan { table: t2, columns: [t2.a, t2.b, t2.c] } batch_plan: | BatchExchange { order: [], dist: Single } └─BatchHashAgg { group_key: [t1.a, t1.b, t1.c], aggs: [] } @@ -43,6 +55,13 @@ create table t1 (a int, b numeric, c bigint); create table t2 (a int, b numeric, c bigint); select a from ((select * from t1 limit 1) union (select * from t2 limit 1)) T; + optimized_logical_plan: | + LogicalAgg { group_key: [t1.a], aggs: [] } + └─LogicalUnion { all: true } + ├─LogicalLimit { limit: 1, offset: 0 } + | └─LogicalScan { table: t1, columns: [t1.a] } + └─LogicalLimit { limit: 1, offset: 0 } + └─LogicalScan { table: t2, columns: [t2.a] } batch_plan: | BatchExchange { order: [], dist: Single } └─BatchHashAgg { group_key: [t1.a], aggs: [] } @@ -58,6 +77,12 @@ └─BatchScan { table: t2, columns: [t2.a], distribution: SomeShard } - sql: | select 1 union all select 1 + optimized_logical_plan: | + LogicalUnion { all: true } + ├─LogicalProject { exprs: [1:Int32] } + | └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } + └─LogicalProject { exprs: [1:Int32] } + └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | BatchUnion { all: true } ├─BatchProject { exprs: [1:Int32] } diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index f3a3e61ec607b..9a09b354963ec 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use itertools::Itertools; use pgwire::pg_response::{PgResponse, StatementType}; -use risingwave_common::error::ErrorCode::ProtocolError; +use risingwave_common::error::ErrorCode::{self, ProtocolError}; use risingwave_common::error::{Result, RwError}; use risingwave_pb::catalog::source::Info; use risingwave_pb::catalog::{ @@ -116,7 +116,12 @@ pub async fn handle_create_source( let (column_descs, pk_column_id_from_columns) = bind_sql_columns(stmt.columns)?; let (mut columns, pk_column_ids, row_id_index) = bind_sql_table_constraints(column_descs, pk_column_id_from_columns, stmt.constraints)?; - + if row_id_index.is_none() && !is_materialized { + return Err(ErrorCode::InvalidInputSyntax( + "The non-materialized source does not support PRIMARY KEY constraint, please use \"CREATE MATERIALIZED SOURCE\" instead".to_owned(), + ) + .into()); + } let with_properties = context.with_options.inner().clone(); const UPSTREAM_SOURCE_KEY: &str = "connector"; // confluent schema registry must be used with kafka diff --git a/src/frontend/src/optimizer/mod.rs b/src/frontend/src/optimizer/mod.rs index eda79c2e3aa2f..dd3ff22c90ca9 100644 --- a/src/frontend/src/optimizer/mod.rs +++ b/src/frontend/src/optimizer/mod.rs @@ -287,7 +287,7 @@ impl PlanRoot { plan = self.optimize_by_rules( plan, "Convert Distinct Aggregation".to_string(), - vec![DistinctAggRule::create()], + vec![UnionToDistinctRule::create(), DistinctAggRule::create()], ApplyOrder::TopDown, ); diff --git a/src/frontend/src/optimizer/plan_node/logical_union.rs b/src/frontend/src/optimizer/plan_node/logical_union.rs index c1580a13aad5b..7f87132d55bd3 100644 --- a/src/frontend/src/optimizer/plan_node/logical_union.rs +++ b/src/frontend/src/optimizer/plan_node/logical_union.rs @@ -107,7 +107,9 @@ impl ToBatch for LogicalUnion { let new_inputs: Result> = self.inputs().iter().map(|input| input.to_batch()).collect(); let new_logical = Self::new(true, new_inputs?); - // convert union to union all + agg + // We still need to handle !all even if we already have `UnionToDistinctRule`, because it + // can be generated by index selection which is an optimization during the `to_batch`. + // Convert union to union all + agg if !self.all { let batch_union = BatchUnion::new(new_logical).into(); Ok(BatchHashAgg::new(LogicalAgg::new( diff --git a/src/frontend/src/optimizer/rule/mod.rs b/src/frontend/src/optimizer/rule/mod.rs index 5af51a7d85502..78c8dc0d617e4 100644 --- a/src/frontend/src/optimizer/rule/mod.rs +++ b/src/frontend/src/optimizer/rule/mod.rs @@ -69,6 +69,8 @@ mod join_commute; mod over_agg_to_topn; pub use join_commute::*; pub use over_agg_to_topn::*; +mod union_to_distinct; +pub use union_to_distinct::*; #[macro_export] macro_rules! for_all_rules { @@ -94,6 +96,7 @@ macro_rules! for_all_rules { ,{IndexSelectionRule} ,{OverAggToTopNRule} ,{JoinCommuteRule} + ,{UnionToDistinctRule} } }; } diff --git a/src/frontend/src/optimizer/rule/union_to_distinct.rs b/src/frontend/src/optimizer/rule/union_to_distinct.rs new file mode 100644 index 0000000000000..4a547f6b20f63 --- /dev/null +++ b/src/frontend/src/optimizer/rule/union_to_distinct.rs @@ -0,0 +1,44 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use itertools::Itertools; + +use super::{BoxedRule, Rule}; +use crate::optimizer::plan_node::{LogicalAgg, LogicalUnion, PlanTreeNode}; +use crate::optimizer::PlanRef; + +/// Convert union to distinct + union all +pub struct UnionToDistinctRule {} +impl Rule for UnionToDistinctRule { + fn apply(&self, plan: PlanRef) -> Option { + let union: &LogicalUnion = plan.as_logical_union()?; + if !union.all() { + let union_all = LogicalUnion::create(true, union.inputs().into_iter().collect()); + let distinct = LogicalAgg::new( + vec![], + (0..union.base.schema.len()).collect_vec(), + union_all, + ); + Some(distinct.into()) + } else { + None + } + } +} + +impl UnionToDistinctRule { + pub fn create() -> BoxedRule { + Box::new(UnionToDistinctRule {}) + } +} diff --git a/src/meta/src/hummock/compaction/level_selector.rs b/src/meta/src/hummock/compaction/level_selector.rs index d4673190b5118..019c7bdf04761 100644 --- a/src/meta/src/hummock/compaction/level_selector.rs +++ b/src/meta/src/hummock/compaction/level_selector.rs @@ -385,6 +385,7 @@ pub mod tests { key_range: Some(KeyRange { left: iterator_test_key_of_epoch(table_prefix, left, epoch), right: iterator_test_key_of_epoch(table_prefix, right, epoch), + right_exclusive: false, }), file_size: (right - left + 1) as u64, table_ids: vec![], @@ -674,6 +675,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, internal_table_id: HashSet::default(), level: 0, @@ -701,6 +703,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, internal_table_id: HashSet::default(), level: 0, @@ -760,6 +763,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, internal_table_id: HashSet::default(), level: 3, @@ -789,6 +793,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, internal_table_id: HashSet::default(), level: 4, diff --git a/src/meta/src/hummock/compaction/manual_compaction_picker.rs b/src/meta/src/hummock/compaction/manual_compaction_picker.rs index fef689078ffd9..e84c19550d082 100644 --- a/src/meta/src/hummock/compaction/manual_compaction_picker.rs +++ b/src/meta/src/hummock/compaction/manual_compaction_picker.rs @@ -400,6 +400,7 @@ pub mod tests { key_range: KeyRange { left: iterator_test_key_of_epoch(1, 0, 1), right: iterator_test_key_of_epoch(1, 201, 1), + right_exclusive: false, }, ..Default::default() }; @@ -486,6 +487,7 @@ pub mod tests { key_range: KeyRange { left: iterator_test_key_of_epoch(1, 101, 1), right: iterator_test_key_of_epoch(1, 199, 1), + right_exclusive: false, }, internal_table_id: HashSet::from([2]), }; @@ -614,6 +616,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, internal_table_id: HashSet::default(), }; @@ -633,6 +636,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, internal_table_id: HashSet::default(), }; @@ -674,6 +678,7 @@ pub mod tests { key_range: KeyRange { left: iterator_test_key_of_epoch(1, 0, 2), right: iterator_test_key_of_epoch(1, 200, 2), + right_exclusive: false, }, internal_table_id: HashSet::default(), }; @@ -721,6 +726,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, internal_table_id: HashSet::default(), }; @@ -758,6 +764,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, // No matching internal table id. internal_table_id: HashSet::from([100]), @@ -777,6 +784,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, // Include all sub level's table ids internal_table_id: HashSet::from([1, 2, 3]), @@ -818,6 +826,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, // Only include bottom sub level's table id internal_table_id: HashSet::from([3]), @@ -858,6 +867,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, // Only include partial top sub level's table id, but the whole top sub level is // picked. @@ -899,6 +909,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, // Only include bottom sub level's table id internal_table_id: HashSet::from([3]), @@ -928,6 +939,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, // No matching internal table id. internal_table_id: HashSet::from([100]), @@ -948,6 +960,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, // Only include partial input level's table id internal_table_id: HashSet::from([1]), @@ -996,6 +1009,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, internal_table_id: HashSet::default(), }; @@ -1039,6 +1053,7 @@ pub mod tests { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, internal_table_id: HashSet::default(), }; diff --git a/src/meta/src/hummock/compaction/mod.rs b/src/meta/src/hummock/compaction/mod.rs index 33f8381d8313d..e8e53886e91a9 100644 --- a/src/meta/src/hummock/compaction/mod.rs +++ b/src/meta/src/hummock/compaction/mod.rs @@ -271,6 +271,7 @@ impl Default for ManualCompactionOption { key_range: KeyRange { left: vec![], right: vec![], + right_exclusive: false, }, internal_table_id: HashSet::default(), level: 1, diff --git a/src/meta/src/hummock/compaction/overlap_strategy.rs b/src/meta/src/hummock/compaction/overlap_strategy.rs index 453fda77b6cac..ca225e4bfa6d4 100644 --- a/src/meta/src/hummock/compaction/overlap_strategy.rs +++ b/src/meta/src/hummock/compaction/overlap_strategy.rs @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp; + use itertools::Itertools; -use risingwave_hummock_sdk::key::user_key; use risingwave_hummock_sdk::key_range::KeyRangeCommon; use risingwave_pb::hummock::{KeyRange, SstableInfo}; @@ -76,15 +77,19 @@ impl OverlapInfo for RangeOverlapInfo { Some(key_range) => { let mut tables = vec![]; let overlap_begin = others.partition_point(|table_status| { - user_key(&table_status.key_range.as_ref().unwrap().right) - < user_key(&key_range.left) + table_status + .key_range + .as_ref() + .unwrap() + .compare_right_with(&key_range.left) + == cmp::Ordering::Less }); if overlap_begin >= others.len() { return vec![]; } for table in &others[overlap_begin..] { - if user_key(&table.key_range.as_ref().unwrap().left) - > user_key(&key_range.right) + if key_range.compare_right_with(&table.key_range.as_ref().unwrap().left) + == cmp::Ordering::Less { break; } @@ -122,5 +127,5 @@ impl OverlapStrategy for RangeOverlapStrategy { fn check_table_overlap(key_range: &KeyRange, table: &SstableInfo) -> bool { let other = table.key_range.as_ref().unwrap(); - key_range.user_key_overlap(other) + key_range.sstable_overlap(other) } diff --git a/src/meta/src/hummock/compactor_manager.rs b/src/meta/src/hummock/compactor_manager.rs index c869f516ae045..a25ced274601a 100644 --- a/src/meta/src/hummock/compactor_manager.rs +++ b/src/meta/src/hummock/compactor_manager.rs @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; +use std::ops::Deref; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use std::time::SystemTime; use fail::fail_point; +use itertools::Itertools; use parking_lot::RwLock; use risingwave_hummock_sdk::compact::CompactorRuntimeConfig; use risingwave_hummock_sdk::{HummockCompactionTaskId, HummockContextId}; @@ -257,23 +259,64 @@ impl CompactorManager { self.task_heartbeats.write().remove(&context_id).is_some() } - pub fn get_expired_tasks(&self) -> Vec<(HummockContextId, CompactTask)> { + pub fn get_expired_tasks( + &self, + split_cancel: Vec, + ) -> Vec<(HummockContextId, CompactTask)> { let now = SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) .expect("Clock may have gone backwards") .as_secs(); - let mut cancellable_tasks = vec![]; + let cancellable_tasks; { let guard = self.task_heartbeats.read(); - for (context_id, heartbeats) in guard.iter() { + let task_heartbeats = guard.deref(); + let mut split_cancel_tasks = + Self::get_group_split_expired_tasks(task_heartbeats, &split_cancel); + for (task_id, info) in Self::get_heartbeat_expired_tasks(task_heartbeats, now) { + split_cancel_tasks.insert(task_id, info); + } + cancellable_tasks = split_cancel_tasks.into_values().collect_vec(); + } + cancellable_tasks + } + + fn get_group_split_expired_tasks( + task_heartbeats: &HashMap< + HummockContextId, + HashMap, + >, + split_cancel: &[HummockCompactionTaskId], + ) -> BTreeMap { + let mut ret = BTreeMap::new(); + for (context_id, heartbeats) in task_heartbeats { + { + for TaskHeartbeat { task, .. } in heartbeats.values() { + if split_cancel.binary_search(&task.task_id).is_ok() { + ret.insert(task.get_task_id(), (*context_id, task.clone())); + } + } + } + } + ret + } + + fn get_heartbeat_expired_tasks( + task_heartbeats: &HashMap< + HummockContextId, + HashMap, + >, + now: u64, + ) -> Vec<(HummockCompactionTaskId, (HummockContextId, CompactTask))> { + let mut cancellable_tasks = vec![]; + for (context_id, heartbeats) in task_heartbeats { + { + for TaskHeartbeat { + expire_at, task, .. + } in heartbeats.values() { - for TaskHeartbeat { - expire_at, task, .. - } in heartbeats.values() - { - if *expire_at < now { - cancellable_tasks.push((*context_id, task.clone())); - } + if *expire_at < now { + cancellable_tasks.push((task.get_task_id(), (*context_id, task.clone()))); } } } @@ -388,7 +431,7 @@ mod tests { // Ensure task is expired. tokio::time::sleep(Duration::from_secs(2)).await; - let expired = compactor_manager.get_expired_tasks(); + let expired = compactor_manager.get_expired_tasks(vec![]); assert_eq!(expired.len(), 1); assert_eq!(expired[0].0, context_id); @@ -401,7 +444,7 @@ mod tests { num_ssts_uploaded: 0, }], ); - assert_eq!(compactor_manager.get_expired_tasks().len(), 1); + assert_eq!(compactor_manager.get_expired_tasks(vec![]).len(), 1); // Mimic compaction heartbeat with invalid task id compactor_manager.update_task_heartbeats( @@ -412,7 +455,7 @@ mod tests { num_ssts_uploaded: 1, }], ); - assert_eq!(compactor_manager.get_expired_tasks().len(), 1); + assert_eq!(compactor_manager.get_expired_tasks(vec![]).len(), 1); // Mimic effective compaction heartbeat compactor_manager.update_task_heartbeats( @@ -423,7 +466,7 @@ mod tests { num_ssts_uploaded: 1, }], ); - assert_eq!(compactor_manager.get_expired_tasks().len(), 0); + assert_eq!(compactor_manager.get_expired_tasks(vec![]).len(), 0); assert!(compactor_manager.purge_heartbeats_for_context(context_id)); // Test add diff --git a/src/meta/src/hummock/level_handler.rs b/src/meta/src/hummock/level_handler.rs index 890e42f9e2144..781fb46d1b1d4 100644 --- a/src/meta/src/hummock/level_handler.rs +++ b/src/meta/src/hummock/level_handler.rs @@ -15,14 +15,14 @@ use std::collections::HashMap; use itertools::Itertools; -use risingwave_hummock_sdk::HummockSstableId; +use risingwave_hummock_sdk::{HummockCompactionTaskId, HummockSstableId}; use risingwave_pb::hummock::level_handler::RunningCompactTask; use risingwave_pb::hummock::{Level, SstableInfo}; #[derive(Clone, Debug, PartialEq)] pub struct LevelHandler { level: u32, - compacting_files: HashMap, + compacting_files: HashMap, pending_tasks: Vec, } @@ -55,6 +55,13 @@ impl LevelHandler { self.compacting_files.contains_key(sst_id) } + pub fn pending_task_id_by_sst( + &self, + sst_id: &HummockSstableId, + ) -> Option { + self.compacting_files.get(sst_id).cloned() + } + pub fn is_level_pending_compact(&self, level: &Level) -> bool { level .table_infos diff --git a/src/meta/src/hummock/manager/compaction_group_manager.rs b/src/meta/src/hummock/manager/compaction_group_manager.rs index d290aa5a4969a..223763be06d2d 100644 --- a/src/meta/src/hummock/manager/compaction_group_manager.rs +++ b/src/meta/src/hummock/manager/compaction_group_manager.rs @@ -394,6 +394,7 @@ impl CompactionGroupManagerInner { if compaction_group_id_set.len() > old_id_cnt { hummock_manager .sync_group( + None, versioning, &Self::gen_compaction_group_snapshot( &compaction_groups, @@ -450,6 +451,7 @@ impl CompactionGroupManagerInner { let (hummock_manager, versioning) = empty_group_vacuum_context.unwrap(); hummock_manager .sync_group( + None, versioning, &Self::gen_compaction_group_snapshot( &compaction_groups, @@ -519,6 +521,7 @@ impl CompactionGroupManagerInner { if compaction_group_id_set.len() > old_id_cnt { hummock_manager .sync_group( + None, versioning, &Self::gen_compaction_group_snapshot( &compaction_groups, @@ -567,6 +570,7 @@ impl CompactionGroupManagerInner { let mut trx_wrapper = Some(trx); hummock_manager .sync_group( + None, versioning, &Self::gen_compaction_group_snapshot( &compaction_groups, @@ -613,6 +617,7 @@ impl CompactionGroupManagerInner { let mut trx_wrapper = Some(trx); hummock_manager .sync_group( + None, versioning, &Self::gen_compaction_group_snapshot(&compaction_groups, compaction_group_ids), &mut trx_wrapper, diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs index 61e8afed8b5c9..042f45b08d5b5 100644 --- a/src/meta/src/hummock/manager/mod.rs +++ b/src/meta/src/hummock/manager/mod.rs @@ -105,6 +105,7 @@ pub struct HummockManager { // CompactionGroupId compaction_request_channel: parking_lot::RwLock>, compaction_resume_notifier: parking_lot::RwLock>>, + compaction_tasks_to_cancel: parking_lot::Mutex>, compactor_manager: CompactorManagerRef, } @@ -270,6 +271,7 @@ where compaction_group_manager, compaction_request_channel: parking_lot::RwLock::new(None), compaction_resume_notifier: parking_lot::RwLock::new(None), + compaction_tasks_to_cancel: parking_lot::Mutex::new(vec![]), compactor_manager, latest_snapshot: ArcSwap::from_pointee(HummockSnapshot { committed_epoch: INVALID_EPOCH, @@ -303,8 +305,14 @@ where return; } } + let mut split_cancel = { + let mut manager_cancel = hummock_manager.compaction_tasks_to_cancel.lock(); + manager_cancel.drain(..).collect_vec() + }; + split_cancel.sort(); + split_cancel.dedup(); // TODO: add metrics to track expired tasks - for (context_id, mut task) in compactor_manager.get_expired_tasks() { + for (context_id, mut task) in compactor_manager.get_expired_tasks(split_cancel) { tracing::info!("Task with task_id {} with context_id {context_id} has expired due to lack of visible progress", task.task_id); if let Some(compactor) = compactor_manager.get_compactor(context_id) { // Forcefully cancel the task so that it terminates early on the compactor @@ -1225,6 +1233,7 @@ where async fn sync_group<'a>( &'a self, + compaction: Option<&'a Compaction>, versioning: &'a mut Versioning, compaction_groups: &HashMap, trx_extern_part: &mut Option, @@ -1329,6 +1338,7 @@ where } } let mut new_groups = vec![]; + let mut tasks_to_cancel = vec![]; // these `group_id`s must be unique for ( group_id, @@ -1366,7 +1376,21 @@ where *group_id, member_table_ids, ); - for (id, divide_ver) in split_id_vers { + if !split_id_vers.is_empty() && let Some(parent_compact_status) = + compaction.and_then(|compaction| compaction.compaction_statuses.get(parent_group_id)) + { + for (sst_id, _, level_idx) in &split_id_vers { + if let Some(level_handler) = parent_compact_status + .level_handlers + .get(*level_idx as usize) + { + if let Some(task_id) = level_handler.pending_task_id_by_sst(sst_id) { + tasks_to_cancel.push(task_id); + } + } + } + } + for (id, divide_ver, _) in split_id_vers { match branched_ssts.get_mut(id) { Some(mut entry) => { *entry.get_mut(parent_group_id).unwrap() += 1; @@ -1382,6 +1406,8 @@ where } } } + tasks_to_cancel.sort(); + tasks_to_cancel.dedup(); new_version_delta.max_committed_epoch = new_hummock_version.max_committed_epoch; commit_multi_var!( @@ -1412,6 +1438,8 @@ where for compaction_group_id in deleted_compaction_groups { remove_compaction_group_in_sst_stat(&self.metrics, compaction_group_id); } + let mut manager_cancel = self.compaction_tasks_to_cancel.lock(); + manager_cancel.append(&mut tasks_to_cancel); Ok(None) } diff --git a/src/meta/src/hummock/test_utils.rs b/src/meta/src/hummock/test_utils.rs index 44070d1478004..0ab26f4996594 100644 --- a/src/meta/src/hummock/test_utils.rs +++ b/src/meta/src/hummock/test_utils.rs @@ -146,6 +146,7 @@ pub fn generate_test_tables(epoch: u64, sst_ids: Vec) -> Vec MetaResult { - let properties = ConnectorProperties::extract(source.properties.clone())?; + let mut properties = ConnectorProperties::extract(source.properties.clone())?; + if let ConnectorProperties::Cdc(prop) = &mut properties { + prop.as_mut().source_id = source.id; + } let enumerator = SplitEnumeratorImpl::create(properties).await?; let splits = Arc::new(Mutex::new(SharedSplitMap { splits: None })); Ok(Self { diff --git a/src/prost/build.rs b/src/prost/build.rs index 696595dfe0790..83be6521ebf62 100644 --- a/src/prost/build.rs +++ b/src/prost/build.rs @@ -38,6 +38,7 @@ fn main() -> Result<(), Box> { "source", "monitor_service", "health", + "cdc_service", ]; let protos: Vec = proto_files .iter() diff --git a/src/prost/src/lib.rs b/src/prost/src/lib.rs index 9b8337541a432..ce0d0d9ae70fb 100644 --- a/src/prost/src/lib.rs +++ b/src/prost/src/lib.rs @@ -71,6 +71,12 @@ pub mod monitor_service; #[rustfmt::skip] #[cfg_attr(madsim, path = "sim/health.rs")] pub mod health; +#[rustfmt::skip] +#[cfg_attr(madsim, path = "sim/cdc_service.rs")] +pub mod cdc_service; +#[rustfmt::skip] +#[path = "cdc_service.serde.rs"] +pub mod cdc_service_serde; #[rustfmt::skip] #[path = "catalog.serde.rs"] diff --git a/src/risedevtool/src/service_config.rs b/src/risedevtool/src/service_config.rs index 13df81197479e..db17ae6435c87 100644 --- a/src/risedevtool/src/service_config.rs +++ b/src/risedevtool/src/service_config.rs @@ -38,6 +38,7 @@ pub struct ComputeNodeConfig { pub provide_compactor: Option>, pub user_managed: bool, pub enable_in_memory_kv_state_backend: bool, + pub connector_source_endpoint: String, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] diff --git a/src/risedevtool/src/task/compute_node_service.rs b/src/risedevtool/src/task/compute_node_service.rs index 11995cf8cdf57..cb2d01456448a 100644 --- a/src/risedevtool/src/task/compute_node_service.rs +++ b/src/risedevtool/src/task/compute_node_service.rs @@ -59,7 +59,9 @@ impl ComputeNodeService { .arg("--metrics-level") .arg("1") .arg("--async-stack-trace") - .arg(&config.async_stack_trace); + .arg(&config.async_stack_trace) + .arg("--connector-source-endpoint") + .arg(&config.connector_source_endpoint); let provide_jaeger = config.provide_jaeger.as_ref().unwrap(); match provide_jaeger.len() { diff --git a/src/rpc_client/src/cdc_client.rs b/src/rpc_client/src/cdc_client.rs new file mode 100644 index 0000000000000..36be8da06fd03 --- /dev/null +++ b/src/rpc_client/src/cdc_client.rs @@ -0,0 +1,66 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::time::Duration; + +use async_trait::async_trait; +use risingwave_common::config::MAX_CONNECTION_WINDOW_SIZE; +use risingwave_common::util::addr::HostAddr; +use risingwave_pb::cdc_service::cdc_service_client::CdcServiceClient; +use risingwave_pb::cdc_service::*; +use tonic::transport::{Channel, Endpoint}; +use tonic::Streaming; + +use crate::error::Result; +use crate::RpcClient; + +#[derive(Clone)] +pub struct CdcClient(CdcServiceClient); + +impl CdcClient { + pub async fn new(host_addr: HostAddr) -> Result { + let channel = Endpoint::from_shared(format!("http://{}", &host_addr))? + .initial_connection_window_size(MAX_CONNECTION_WINDOW_SIZE) + .connect_timeout(Duration::from_secs(5)) + .connect() + .await?; + Ok(Self(CdcServiceClient::new(channel))) + } + + pub async fn get_event_stream( + &self, + source_id: u64, + props: DbConnectorProperties, + ) -> Result> { + Ok(self + .0 + .to_owned() + .get_event_stream(GetEventStreamRequest { + source_id, + properties: Some(props), + }) + .await + .inspect_err(|_| { + tracing::error!("failed to create event stream for CDC source {}", source_id) + })? + .into_inner()) + } +} + +#[async_trait] +impl RpcClient for CdcClient { + async fn new_client(host_addr: HostAddr) -> Result { + Self::new(host_addr).await + } +} diff --git a/src/rpc_client/src/lib.rs b/src/rpc_client/src/lib.rs index af86172adb03f..e622c67b362f0 100644 --- a/src/rpc_client/src/lib.rs +++ b/src/rpc_client/src/lib.rs @@ -42,11 +42,13 @@ use tokio::sync::Mutex; pub mod error; use error::{Result, RpcError}; +mod cdc_client; mod compute_client; mod hummock_meta_client; mod meta_client; mod stream_client; +pub use cdc_client::CdcClient; pub use compute_client::{ComputeClient, ComputeClientPool, ComputeClientPoolRef}; pub use hummock_meta_client::HummockMetaClient; pub use meta_client::MetaClient; diff --git a/src/source/src/manager.rs b/src/source/src/manager.rs index 05b4e3f47b3dd..8035ae841356e 100644 --- a/src/source/src/manager.rs +++ b/src/source/src/manager.rs @@ -203,9 +203,11 @@ pub struct SourceDescBuilder { properties: HashMap, info: ProstSourceInfo, source_manager: TableSourceManagerRef, + connector_node_addr: String, } impl SourceDescBuilder { + #[allow(clippy::too_many_arguments)] pub fn new( source_id: TableId, row_id_index: Option, @@ -214,6 +216,7 @@ impl SourceDescBuilder { properties: HashMap, info: ProstSourceInfo, source_manager: TableSourceManagerRef, + connector_node_addr: String, ) -> Self { Self { source_id, @@ -223,6 +226,7 @@ impl SourceDescBuilder { properties, info, source_manager, + connector_node_addr, } } @@ -286,7 +290,14 @@ impl SourceDescBuilder { "source should have at least one pk column" ); - let config = ConnectorProperties::extract(self.properties.clone()) + // store the connector node address to properties for later use + let mut source_props: HashMap = + HashMap::from_iter(self.properties.clone().into_iter()); + source_props.insert( + "connector_node_addr".to_string(), + self.connector_node_addr.clone(), + ); + let config = ConnectorProperties::extract(source_props) .map_err(|e| RwError::from(ConnectorError(e.into())))?; let source = SourceImpl::Connector(ConnectorSource { @@ -350,6 +361,7 @@ pub mod test_utils { properties: Default::default(), info, source_manager, + connector_node_addr: "127.0.0.1:60061".to_string(), } } } @@ -365,8 +377,6 @@ mod tests { use risingwave_pb::catalog::{ColumnIndex, StreamSourceInfo, TableSourceInfo}; use risingwave_pb::plan_common::ColumnCatalog; use risingwave_pb::stream_plan::source_node::Info; - use risingwave_storage::memory::MemoryStateStore; - use risingwave_storage::Keyspace; use crate::*; @@ -401,6 +411,7 @@ mod tests { properties, Info::StreamSource(info), mem_source_manager, + Default::default(), ); let source = source_builder.build().await; @@ -441,8 +452,6 @@ mod tests { let pk_column_ids = vec![1]; let info = TableSourceInfo {}; - let _keyspace = Keyspace::table_root(MemoryStateStore::new(), table_id); - let mem_source_manager: TableSourceManagerRef = Arc::new(TableSourceManager::default()); let mut source_builder = SourceDescBuilder::new( table_id, @@ -452,6 +461,7 @@ mod tests { Default::default(), Info::TableSource(info), mem_source_manager.clone(), + Default::default(), ); let res = source_builder.build().await; assert!(res.is_ok()); diff --git a/src/source/src/table.rs b/src/source/src/table.rs index 78d246dfdb67d..6e4928da8ff8c 100644 --- a/src/source/src/table.rs +++ b/src/source/src/table.rs @@ -175,15 +175,10 @@ mod tests { use risingwave_common::array::{Array, I64Array, Op}; use risingwave_common::column_nonnull; use risingwave_common::types::DataType; - use risingwave_storage::memory::MemoryStateStore; - use risingwave_storage::Keyspace; use super::*; fn new_source() -> TableSource { - let store = MemoryStateStore::new(); - let _keyspace = Keyspace::table_root(store, Default::default()); - TableSource::new(vec![ColumnDesc::unnamed( ColumnId::from(0), DataType::Int64, diff --git a/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs b/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs index 7bfa5c7bef807..e153a05d190ef 100644 --- a/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs +++ b/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs @@ -120,7 +120,7 @@ pub trait HummockVersionExt { parent_group_id: CompactionGroupId, group_id: CompactionGroupId, member_table_ids: &HashSet, - ) -> Vec<(HummockSstableId, u64)>; + ) -> Vec<(HummockSstableId, u64, u32)>; fn apply_version_delta(&mut self, version_delta: &HummockVersionDelta); fn build_compaction_group_info(&self) -> HashMap; @@ -252,7 +252,7 @@ impl HummockVersionExt for HummockVersion { parent_group_id: CompactionGroupId, group_id: CompactionGroupId, member_table_ids: &HashSet, - ) -> Vec<(HummockSstableId, u64)> { + ) -> Vec<(HummockSstableId, u64, u32)> { let mut split_id_vers = vec![]; if parent_group_id == StaticCompactionGroupId::NewCompactionGroup as CompactionGroupId || !self.levels.contains_key(&parent_group_id) @@ -273,7 +273,11 @@ impl HummockVersionExt for HummockVersion { .any(|table_id| member_table_ids.contains(table_id)) { table_info.divide_version += 1; - split_id_vers.push((table_info.get_id(), table_info.get_divide_version())); + split_id_vers.push(( + table_info.get_id(), + table_info.get_divide_version(), + 0, + )); let mut branch_table_info = table_info.clone(); branch_table_info.table_ids = table_info .table_ids @@ -291,6 +295,7 @@ impl HummockVersionExt for HummockVersion { } } for (z, level) in parent_levels.levels.iter_mut().enumerate() { + let level_idx = level.get_level_idx(); for table_info in &mut level.table_infos { if table_info .get_table_ids() @@ -298,7 +303,11 @@ impl HummockVersionExt for HummockVersion { .any(|table_id| member_table_ids.contains(table_id)) { table_info.divide_version += 1; - split_id_vers.push((table_info.get_id(), table_info.get_divide_version())); + split_id_vers.push(( + table_info.get_id(), + table_info.get_divide_version(), + level_idx, + )); let mut branch_table_info = table_info.clone(); branch_table_info.table_ids = table_info .table_ids diff --git a/src/storage/hummock_sdk/src/key_range.rs b/src/storage/hummock_sdk/src/key_range.rs index 697c313990817..2ababf5d46c76 100644 --- a/src/storage/hummock_sdk/src/key_range.rs +++ b/src/storage/hummock_sdk/src/key_range.rs @@ -16,24 +16,30 @@ use std::cmp; use bytes::Bytes; -use super::key; use super::key_cmp::KeyComparator; +use crate::user_key; #[derive(PartialEq, Eq, Clone, Debug)] pub struct KeyRange { pub left: Bytes, pub right: Bytes, + pub right_exclusive: bool, } impl KeyRange { pub fn new(left: Bytes, right: Bytes) -> Self { - Self { left, right } + Self { + left, + right, + right_exclusive: false, + } } pub fn inf() -> Self { Self { left: Bytes::new(), right: Bytes::new(), + right_exclusive: false, } } @@ -51,7 +57,11 @@ impl KeyRange { pub trait KeyRangeCommon { fn full_key_overlap(&self, other: &Self) -> bool; fn full_key_extend(&mut self, other: &Self); - fn user_key_overlap(&self, other: &Self) -> bool; + fn sstable_overlap(&self, other: &Self) -> bool; + fn compare_right_with(&self, full_key: &[u8]) -> std::cmp::Ordering { + self.compare_right_with_user_key(user_key(full_key)) + } + fn compare_right_with_user_key(&self, ukey: &[u8]) -> std::cmp::Ordering; } #[macro_export] @@ -83,16 +93,26 @@ macro_rules! impl_key_range_common { == cmp::Ordering::Greater) { self.right = other.right.clone(); + self.right_exclusive = other.right_exclusive; } } - fn user_key_overlap(&self, other: &Self) -> bool { + fn sstable_overlap(&self, other: &Self) -> bool { (self.end_bound_inf() || other.start_bound_inf() - || key::user_key(&self.right) >= key::user_key(&other.left)) + || self.compare_right_with(&other.left) != std::cmp::Ordering::Less) && (other.end_bound_inf() || self.start_bound_inf() - || key::user_key(&other.right) >= key::user_key(&self.left)) + || other.compare_right_with(&self.left) != std::cmp::Ordering::Less) + } + + fn compare_right_with_user_key(&self, ukey: &[u8]) -> std::cmp::Ordering { + let ret = user_key(&self.right).cmp(ukey); + if ret == cmp::Ordering::Equal && self.right_exclusive { + cmp::Ordering::Less + } else { + ret + } } } }; @@ -144,16 +164,18 @@ impl From for risingwave_pb::hummock::KeyRange { risingwave_pb::hummock::KeyRange { left: kr.left.to_vec(), right: kr.right.to_vec(), + right_exclusive: kr.right_exclusive, } } } impl From<&risingwave_pb::hummock::KeyRange> for KeyRange { fn from(kr: &risingwave_pb::hummock::KeyRange) -> Self { - KeyRange::new( - Bytes::copy_from_slice(&kr.left), - Bytes::copy_from_slice(&kr.right), - ) + KeyRange { + left: Bytes::copy_from_slice(&kr.left), + right: Bytes::copy_from_slice(&kr.right), + right_exclusive: kr.right_exclusive, + } } } diff --git a/src/storage/hummock_sdk/src/lib.rs b/src/storage/hummock_sdk/src/lib.rs index 1a18d98b26662..33d23048c135b 100644 --- a/src/storage/hummock_sdk/src/lib.rs +++ b/src/storage/hummock_sdk/src/lib.rs @@ -33,6 +33,7 @@ use risingwave_pb::hummock::SstableInfo; use crate::compaction_group::StaticCompactionGroupId; use crate::key::user_key; +use crate::key_range::KeyRangeCommon; use crate::table_stats::TableStats; pub mod compact; @@ -153,9 +154,12 @@ impl SstIdRange { pub fn can_concat(ssts: &[impl Deref]) -> bool { let len = ssts.len(); for i in 0..len - 1 { - if user_key(&ssts[i].get_key_range().as_ref().unwrap().right).cmp(user_key( - &ssts[i + 1].get_key_range().as_ref().unwrap().left, - )) != Ordering::Less + if ssts[i] + .key_range + .as_ref() + .unwrap() + .compare_right_with(&ssts[i + 1].key_range.as_ref().unwrap().left) + != Ordering::Less { return false; } diff --git a/src/storage/hummock_sdk/src/prost_key_range.rs b/src/storage/hummock_sdk/src/prost_key_range.rs index e962dcf64715a..72b9eafab6612 100644 --- a/src/storage/hummock_sdk/src/prost_key_range.rs +++ b/src/storage/hummock_sdk/src/prost_key_range.rs @@ -17,7 +17,7 @@ use std::cmp; use risingwave_pb::hummock::KeyRange; use crate::key_range::KeyRangeCommon; -use crate::{impl_key_range_common, key, key_range_cmp, KeyComparator}; +use crate::{impl_key_range_common, key_range_cmp, user_key, KeyComparator}; impl_key_range_common!(KeyRange); @@ -34,11 +34,16 @@ impl KeyRangeExt for KeyRange { Self { left: vec![], right: vec![], + right_exclusive: false, } } fn new(left: Vec, right: Vec) -> Self { - Self { left, right } + Self { + left, + right, + right_exclusive: false, + } } fn compare(&self, other: &Self) -> cmp::Ordering { diff --git a/src/storage/hummock_test/src/compactor_tests.rs b/src/storage/hummock_test/src/compactor_tests.rs index aa9589f6c16c1..a0d511418c781 100644 --- a/src/storage/hummock_test/src/compactor_tests.rs +++ b/src/storage/hummock_test/src/compactor_tests.rs @@ -13,7 +13,7 @@ // limitations under the License. #[cfg(test)] -mod tests { +pub(crate) mod tests { use std::collections::{BTreeSet, HashMap}; use std::ops::Bound; @@ -56,14 +56,13 @@ mod tests { use risingwave_storage::store::{ ReadOptions, StateStoreReadExt, StateStoreWrite, WriteOptions, }; - use risingwave_storage::Keyspace; use crate::test_utils::{ get_test_notification_client, HummockV2MixedStateStore, HummockV2MixedStateStore as HummockStorage, }; - async fn get_hummock_storage( + pub(crate) async fn get_hummock_storage( hummock_meta_client: Arc, notification_client: impl NotificationClient, ) -> HummockStorage { @@ -416,7 +415,7 @@ mod tests { assert!(compact_task.is_none()); } - async fn flush_and_commit( + pub(crate) async fn flush_and_commit( hummock_meta_client: &Arc, storage: &HummockStorage, epoch: u64, @@ -431,7 +430,6 @@ mod tests { async fn prepare_data( hummock_meta_client: Arc, - keyspace: &Keyspace, storage: &HummockStorage, existing_table_id: u32, keys_per_epoch: usize, @@ -443,7 +441,7 @@ mod tests { let val = Bytes::from(b"0"[..].repeat(1 << 10)); // 1024 Byte value for idx in 0..kv_count { epoch += 1; - let mut local = keyspace.start_write_batch(WriteOptions { + let mut local = storage.local.start_write_batch(WriteOptions { epoch, table_id: existing_table_id.into(), }); @@ -514,16 +512,8 @@ mod tests { existing_table_id, ) .await; - let keyspace = Keyspace::table_root(storage.clone(), TableId::new(existing_table_id)); - prepare_data( - hummock_meta_client.clone(), - &keyspace, - &storage, - existing_table_id, - 1, - ) - .await; + prepare_data(hummock_meta_client.clone(), &storage, existing_table_id, 1).await; // Mimic dropping table unregister_table_ids_from_compaction_group(&hummock_manager_ref, &[existing_table_id]) @@ -627,7 +617,6 @@ mod tests { } else { existing_table_ids }; - let keyspace = Keyspace::table_root(storage.clone(), TableId::new(table_id)); register_table_ids_to_compaction_group( &hummock_manager_ref, &[table_id], @@ -635,7 +624,7 @@ mod tests { ) .await; epoch += 1; - let mut local = keyspace.start_write_batch(WriteOptions { + let mut local = storage.start_write_batch(WriteOptions { epoch, table_id: TableId::from(table_id), }); @@ -789,7 +778,6 @@ mod tests { let base_epoch = Epoch::now(); let mut epoch: u64 = base_epoch.0; let millisec_interval_epoch: u64 = (1 << 16) * 100; - let keyspace = Keyspace::table_root(storage.clone(), TableId::new(existing_table_id)); register_table_ids_to_compaction_group( &hummock_manager_ref, &[existing_table_id], @@ -800,7 +788,7 @@ mod tests { for _ in 0..kv_count { epoch += millisec_interval_epoch; epoch_set.insert(epoch); - let mut local = keyspace.start_write_batch(WriteOptions { + let mut local = storage.start_write_batch(WriteOptions { epoch, table_id: TableId::from(existing_table_id), }); @@ -963,10 +951,9 @@ mod tests { let base_epoch = Epoch::now(); let mut epoch: u64 = base_epoch.0; let millisec_interval_epoch: u64 = (1 << 16) * 100; - let keyspace = Keyspace::table_root(storage.clone(), TableId::new(existing_table_id)); register_table_ids_to_compaction_group( &hummock_manager_ref, - &[keyspace.table_id().table_id], + &[existing_table_id], StaticCompactionGroupId::StateDefault.into(), ) .await; @@ -974,9 +961,9 @@ mod tests { for _ in 0..kv_count { epoch += millisec_interval_epoch; epoch_set.insert(epoch); - let mut local = keyspace.start_write_batch(WriteOptions { + let mut local = storage.start_write_batch(WriteOptions { epoch, - table_id: keyspace.table_id(), + table_id: TableId::new(existing_table_id), }); let ramdom_key = [key_prefix, &rand::thread_rng().gen::<[u8; 32]>()].concat(); @@ -1121,16 +1108,8 @@ mod tests { ) .await; - let keyspace = Keyspace::table_root(storage.clone(), TableId::new(existing_table_id)); - prepare_data( - hummock_meta_client.clone(), - &keyspace, - &storage, - existing_table_id, - 2, - ) - .await; - let mut local = keyspace.start_write_batch(WriteOptions { + prepare_data(hummock_meta_client.clone(), &storage, existing_table_id, 2).await; + let mut local = storage.start_write_batch(WriteOptions { epoch: 130, table_id: existing_table_id.into(), }); diff --git a/src/storage/hummock_test/src/hummock_read_version_tests.rs b/src/storage/hummock_test/src/hummock_read_version_tests.rs index d2fa84285a2d6..64498711ea223 100644 --- a/src/storage/hummock_test/src/hummock_read_version_tests.rs +++ b/src/storage/hummock_test/src/hummock_read_version_tests.rs @@ -145,6 +145,7 @@ async fn test_read_version_basic() { key_range: Some(KeyRange { left: key_with_epoch(iterator_test_user_key_of(1).encode(), 1), right: key_with_epoch(iterator_test_user_key_of(2).encode(), 2), + right_exclusive: false, }), file_size: 1, table_ids: vec![0], @@ -158,6 +159,7 @@ async fn test_read_version_basic() { key_range: Some(KeyRange { left: key_with_epoch(iterator_test_user_key_of(3).encode(), 3), right: key_with_epoch(iterator_test_user_key_of(3).encode(), 3), + right_exclusive: false, }), file_size: 1, table_ids: vec![0], diff --git a/src/storage/hummock_test/src/sync_point_tests.rs b/src/storage/hummock_test/src/sync_point_tests.rs index 6a139d359f761..46a423d3a4367 100644 --- a/src/storage/hummock_test/src/sync_point_tests.rs +++ b/src/storage/hummock_test/src/sync_point_tests.rs @@ -12,19 +12,40 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::Duration; +use bytes::Bytes; +use risingwave_common::catalog::hummock::CompactionFilterFlag; +use risingwave_common::catalog::TableId; +use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockVersionExt; use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; -use risingwave_meta::hummock::test_utils::{add_ssts, setup_compute_env}; -use risingwave_meta::hummock::{start_local_notification_receiver, MockHummockMetaClient}; +use risingwave_hummock_sdk::key::{next_key, user_key}; +use risingwave_meta::hummock::compaction::compaction_config::CompactionConfigBuilder; +use risingwave_meta::hummock::compaction::ManualCompactionOption; +use risingwave_meta::hummock::test_utils::{ + add_ssts, setup_compute_env, setup_compute_env_with_config, +}; +use risingwave_meta::hummock::{ + start_local_notification_receiver, HummockManagerRef, MockHummockMetaClient, +}; use risingwave_meta::manager::LocalNotification; +use risingwave_meta::storage::MemStore; use risingwave_pb::common::WorkerNode; use risingwave_pb::hummock::compact_task::TaskStatus; use risingwave_rpc_client::HummockMetaClient; +use risingwave_storage::hummock::compactor::{Compactor, CompactorContext}; use risingwave_storage::hummock::SstableIdManager; +use risingwave_storage::storage_value::StorageValue; +use risingwave_storage::store::{ReadOptions, StateStoreWrite, WriteOptions}; use serial_test::serial; +use super::compactor_tests::tests::{ + flush_and_commit, get_hummock_storage, prepare_compactor_and_filter, +}; +use crate::test_utils::get_test_notification_client; + #[tokio::test] #[serial] async fn test_syncpoints_sstable_id_manager() { @@ -177,3 +198,180 @@ async fn test_syncpoints_test_local_notification_receiver() { shutdown_sender.send(()).unwrap(); join_handle.await.unwrap(); } + +pub async fn compact_once( + hummock_manager_ref: HummockManagerRef, + compact_ctx: Arc, +) { + // 2. get compact task + let manual_compcation_option = ManualCompactionOption { + level: 0, + ..Default::default() + }; + // 2. get compact task + let mut compact_task = hummock_manager_ref + .manual_get_compact_task( + StaticCompactionGroupId::StateDefault.into(), + manual_compcation_option, + ) + .await + .unwrap() + .unwrap(); + compact_task.gc_delete_keys = false; + let compactor = hummock_manager_ref.get_idle_compactor().await.unwrap(); + hummock_manager_ref + .assign_compaction_task(&compact_task, compactor.context_id()) + .await + .unwrap(); + + let compaction_filter_flag = CompactionFilterFlag::STATE_CLEAN; + compact_task.compaction_filter_mask = compaction_filter_flag.bits(); + // 3. compact + let (_tx, rx) = tokio::sync::oneshot::channel(); + Compactor::compact(compact_ctx, compact_task.clone(), rx).await; +} + +#[tokio::test] +#[cfg(feature = "sync_point")] +#[serial] +async fn test_syncpoints_get_in_delete_range_boundary() { + let config = CompactionConfigBuilder::new() + .level0_tier_compact_file_number(1) + .max_bytes_for_level_base(4096) + .build(); + let (env, hummock_manager_ref, _cluster_manager_ref, worker_node) = + setup_compute_env_with_config(8080, config).await; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager_ref.clone(), + worker_node.id, + )); + let storage = get_hummock_storage( + hummock_meta_client.clone(), + get_test_notification_client(env, hummock_manager_ref.clone(), worker_node.clone()), + ) + .await; + let existing_table_id: u32 = 1; + let compact_ctx = Arc::new( + prepare_compactor_and_filter( + &storage, + &hummock_meta_client, + hummock_manager_ref.clone(), + existing_table_id, + ) + .await, + ); + + let compactor_manager = hummock_manager_ref.compactor_manager_ref_for_test(); + compactor_manager.add_compactor(worker_node.id, u64::MAX); + + // 1. add sstables + let val0 = Bytes::from(b"0"[..].repeat(1 << 10)); // 1024 Byte value + let val1 = Bytes::from(b"1"[..].repeat(1 << 10)); // 1024 Byte value + let mut local = storage.local.start_write_batch(WriteOptions { + epoch: 100, + table_id: existing_table_id.into(), + }); + let mut start_key = b"aaa".to_vec(); + for _ in 0..10 { + local.put(&start_key, StorageValue::new_put(val0.clone())); + start_key = next_key(&start_key); + } + local.put(b"ggg", StorageValue::new_put(val0.clone())); + local.put(b"hhh", StorageValue::new_put(val0.clone())); + local.put(b"kkk", StorageValue::new_put(val0.clone())); + local.ingest().await.unwrap(); + flush_and_commit(&hummock_meta_client, &storage, 100).await; + compact_once(hummock_manager_ref.clone(), compact_ctx.clone()).await; + let mut local = storage.local.start_write_batch(WriteOptions { + epoch: 101, + table_id: existing_table_id.into(), + }); + local.put(b"aaa", StorageValue::new_put(val1.clone())); + local.put(b"bbb", StorageValue::new_put(val1.clone())); + local.delete_range(b"ggg", b"hhh"); + local.ingest().await.unwrap(); + flush_and_commit(&hummock_meta_client, &storage, 101).await; + compact_once(hummock_manager_ref.clone(), compact_ctx.clone()).await; + let mut local = storage.local.start_write_batch(WriteOptions { + epoch: 102, + table_id: existing_table_id.into(), + }); + local.put(b"hhh", StorageValue::new_put(val1.clone())); + local.put(b"iii", StorageValue::new_put(val1.clone())); + local.delete_range(b"jjj", b"kkk"); + local.ingest().await.unwrap(); + flush_and_commit(&hummock_meta_client, &storage, 102).await; + // move this two file to the same level. + compact_once(hummock_manager_ref.clone(), compact_ctx.clone()).await; + + let mut local = storage.local.start_write_batch(WriteOptions { + epoch: 103, + table_id: existing_table_id.into(), + }); + local.put(b"lll", StorageValue::new_put(val1.clone())); + local.put(b"mmm", StorageValue::new_put(val1.clone())); + local.ingest().await.unwrap(); + flush_and_commit(&hummock_meta_client, &storage, 103).await; + // move this two file to the same level. + compact_once(hummock_manager_ref.clone(), compact_ctx.clone()).await; + + // 4. get the latest version and check + let version = hummock_manager_ref.get_current_version().await; + let base_level = &version + .get_compaction_group_levels(StaticCompactionGroupId::StateDefault.into()) + .levels[4]; + assert_eq!(base_level.table_infos.len(), 3); + assert!( + base_level.table_infos[0] + .key_range + .as_ref() + .unwrap() + .right_exclusive + ); + assert_eq!( + user_key(&base_level.table_infos[0].key_range.as_ref().unwrap().right), + user_key(&base_level.table_infos[1].key_range.as_ref().unwrap().left), + ); + storage.wait_version(version).await; + let read_options = ReadOptions { + ignore_range_tombstone: false, + check_bloom_filter: true, + prefix_hint: None, + table_id: TableId::from(existing_table_id), + retention_seconds: None, + }; + let get_result = storage + .get(b"hhh", 120, read_options.clone()) + .await + .unwrap(); + assert_eq!(get_result.unwrap(), val1); + let get_result = storage + .get(b"ggg", 120, read_options.clone()) + .await + .unwrap(); + assert!(get_result.is_none()); + let get_result = storage + .get(b"aaa", 120, read_options.clone()) + .await + .unwrap(); + assert_eq!(get_result.unwrap(), val1); + let get_result = storage + .get(b"aab", 120, read_options.clone()) + .await + .unwrap(); + assert_eq!(get_result.unwrap(), val0); + let skip_flag = Arc::new(AtomicBool::new(false)); + let skip_flag_hook = skip_flag.clone(); + sync_point::hook("HUMMOCK_V2::GET::SKIP_BY_NO_FILE", move || { + let flag = skip_flag_hook.clone(); + async move { + flag.store(true, Ordering::Release); + } + }); + let get_result = storage + .get(b"kkk", 120, read_options.clone()) + .await + .unwrap(); + assert_eq!(get_result.unwrap(), val0); + assert!(skip_flag.load(Ordering::Acquire)); +} diff --git a/src/storage/src/hummock/compactor/compactor_runner.rs b/src/storage/src/hummock/compactor/compactor_runner.rs index c5058af2bb03d..721fb5df7ee7e 100644 --- a/src/storage/src/hummock/compactor/compactor_runner.rs +++ b/src/storage/src/hummock/compactor/compactor_runner.rs @@ -66,6 +66,7 @@ impl CompactorRunner { let key_range = KeyRange { left: Bytes::copy_from_slice(task.splits[split_index].get_left()), right: Bytes::copy_from_slice(task.splits[split_index].get_right()), + right_exclusive: true, }; let compactor = Compactor::new( context.context.clone(), diff --git a/src/storage/src/hummock/sstable/builder.rs b/src/storage/src/hummock/sstable/builder.rs index ce9802979116a..597ea890b8ac5 100644 --- a/src/storage/src/hummock/sstable/builder.rs +++ b/src/storage/src/hummock/sstable/builder.rs @@ -256,6 +256,7 @@ impl SstableBuilder { self.finalize_last_table_stats(); self.build_block().await?; + let mut right_exclusive = false; let meta_offset = self.writer.data_len() as u64; for tombstone in &self.range_tombstones { assert!(!tombstone.end_user_key.is_empty()); @@ -270,6 +271,7 @@ impl SstableBuilder { largest_key = FullKey::from_user_key(tombstone.end_user_key.clone(), HummockEpoch::MAX) .encode(); + right_exclusive = true; } if smallest_key.is_empty() || KeyComparator::encoded_greater_than_unencoded( @@ -310,6 +312,7 @@ impl SstableBuilder { key_range: Some(risingwave_pb::hummock::KeyRange { left: meta.smallest_key.clone(), right: meta.largest_key.clone(), + right_exclusive, }), file_size: meta.estimated_size as u64, table_ids: self.table_ids.into_iter().collect(), diff --git a/src/storage/src/hummock/sstable/mod.rs b/src/storage/src/hummock/sstable/mod.rs index 6a21b5ba7a8b9..f884d23a502e9 100644 --- a/src/storage/src/hummock/sstable/mod.rs +++ b/src/storage/src/hummock/sstable/mod.rs @@ -169,6 +169,7 @@ impl Sstable { key_range: Some(KeyRange { left: self.meta.smallest_key.clone(), right: self.meta.largest_key.clone(), + right_exclusive: false, }), file_size: self.meta.estimated_size as u64, table_ids: vec![], diff --git a/src/storage/src/hummock/state_store_v1.rs b/src/storage/src/hummock/state_store_v1.rs index 80ecbe4b4a5cf..672d361137c02 100644 --- a/src/storage/src/hummock/state_store_v1.rs +++ b/src/storage/src/hummock/state_store_v1.rs @@ -30,6 +30,7 @@ use risingwave_hummock_sdk::key::{ bound_table_key_range, map_table_key_range, next_key, user_key, FullKey, TableKey, TableKeyRange, UserKey, }; +use risingwave_hummock_sdk::key_range::KeyRangeCommon; use risingwave_hummock_sdk::{can_concat, HummockReadEpoch}; use risingwave_pb::hummock::LevelType; use tokio::sync::oneshot; @@ -162,14 +163,11 @@ impl HummockStorageV1 { continue; } table_info_idx = table_info_idx.saturating_sub(1); - let ord = user_key( - &level.table_infos[table_info_idx] - .key_range - .as_ref() - .unwrap() - .right, - ) - .cmp(encoded_user_key.as_ref()); + let ord = level.table_infos[table_info_idx] + .key_range + .as_ref() + .unwrap() + .compare_right_with_user_key(&encoded_user_key); // the case that the key falls into the gap between two ssts if ord == Ordering::Less { continue; @@ -502,11 +500,6 @@ impl StateStoreWrite for HummockStorageV1 { /// * Ordered. KV pairs will be directly written to the table, so it must be ordered. /// * Locally unique. There should not be two or more operations on the same key in one write /// batch. - /// * Globally unique. The streaming operators should ensure that different operators won't - /// operate on the same key. The operator operating on one keyspace should always wait for all - /// changes to be committed before reading and writing new keys to the engine. That is because - /// that the table with lower epoch might be committed after a table with higher epoch has - /// been committed. If such case happens, the outcome is non-predictable. fn ingest_batch( &self, kv_pairs: Vec<(Bytes, StorageValue)>, diff --git a/src/storage/src/hummock/store/version.rs b/src/storage/src/hummock/store/version.rs index 27e250f78bff5..87abc0fa62984 100644 --- a/src/storage/src/hummock/store/version.rs +++ b/src/storage/src/hummock/store/version.rs @@ -28,8 +28,10 @@ use risingwave_common::catalog::TableId; use risingwave_hummock_sdk::key::{ bound_table_key_range, user_key, FullKey, TableKey, TableKeyRange, UserKey, }; +use risingwave_hummock_sdk::key_range::KeyRangeCommon; use risingwave_hummock_sdk::{can_concat, HummockEpoch}; use risingwave_pb::hummock::{HummockVersionDelta, LevelType, SstableInfo}; +use sync_point::sync_point; use super::memtable::{ImmId, ImmutableMemtable}; use super::state_store::StagingDataIterator; @@ -441,16 +443,14 @@ impl HummockVersionReader { continue; } table_info_idx = table_info_idx.saturating_sub(1); - let ord = user_key( - &level.table_infos[table_info_idx] - .key_range - .as_ref() - .unwrap() - .right, - ) - .cmp(encoded_user_key.as_ref()); + let ord = level.table_infos[table_info_idx] + .key_range + .as_ref() + .unwrap() + .compare_right_with_user_key(&encoded_user_key); // the case that the key falls into the gap between two ssts if ord == Ordering::Less { + sync_point!("HUMMOCK_V2::GET::SKIP_BY_NO_FILE"); continue; } diff --git a/src/storage/src/hummock/test_utils.rs b/src/storage/src/hummock/test_utils.rs index 4d285dfa6c8bf..af8f2d38deda7 100644 --- a/src/storage/src/hummock/test_utils.rs +++ b/src/storage/src/hummock/test_utils.rs @@ -101,6 +101,7 @@ pub fn gen_dummy_sst_info( key_range: Some(KeyRange { left: FullKey::for_test(table_id, min_table_key, epoch).encode(), right: FullKey::for_test(table_id, max_table_key, epoch).encode(), + right_exclusive: false, }), file_size, table_ids: vec![], @@ -172,6 +173,7 @@ pub async fn put_sst( key_range: Some(KeyRange { left: meta.smallest_key.clone(), right: meta.largest_key.clone(), + right_exclusive: false, }), file_size: meta.estimated_size as u64, table_ids: vec![], diff --git a/src/storage/src/keyspace.rs b/src/storage/src/keyspace.rs deleted file mode 100644 index dac01e5a8a99a..0000000000000 --- a/src/storage/src/keyspace.rs +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2022 Singularity Data -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::future::Future; -use std::ops::{Bound, RangeBounds}; - -use bytes::Bytes; -use risingwave_common::catalog::TableId; -use risingwave_hummock_sdk::key::FullKey; - -use crate::error::StorageResult; -use crate::store::{ReadOptions, StateStoreRead, StateStoreReadExt, StateStoreWrite, WriteOptions}; -use crate::write_batch::KeySpaceWriteBatch; -use crate::StateStoreIter; - -/// Provides API to read key-value pairs of a prefix in the storage backend. -#[derive(Clone)] -pub struct Keyspace { - store: S, - - /// Encoded representation for all segments. - prefix: Vec, - - table_id: TableId, -} - -// TODO: remove storage interface from keyspace, and and call it directly in storage_table -impl Keyspace { - /// Creates a shared root [`Keyspace`] for all executors of the same operator. - /// - /// By design, all executors of the same operator should share the same keyspace in order to - /// support scaling out, and ensure not to overlap with each other. So we use `table_id` - /// here. - - /// Creates a root [`Keyspace`] for a table. - pub fn table_root(store: S, id: TableId) -> Self { - let prefix = id.table_id().to_be_bytes().to_vec(); - Self { - store, - prefix, - table_id: id, - } - } - - /// Concatenates this keyspace and the given key to produce a prefixed key. - pub fn prefixed_key(&self, key: impl AsRef<[u8]>) -> Vec { - [self.prefix.as_slice(), key.as_ref()].concat() - } - - /// Gets the underlying state store. - pub fn state_store(&self) -> &S { - &self.store - } - - pub fn table_id(&self) -> TableId { - self.table_id - } -} - -impl Keyspace { - /// Gets from the keyspace with the `prefixed_key` of given key. - /// The returned value is based on a snapshot corresponding to the given `epoch`. - pub async fn get( - &self, - key: impl AsRef<[u8]>, - epoch: u64, - read_options: ReadOptions, - ) -> StorageResult> { - self.store.get(key.as_ref(), epoch, read_options).await - } - - /// Scans `limit` keys from the keyspace and get their values. - /// If `limit` is None, all keys of the given prefix will be scanned. - /// The returned values are based on a snapshot corresponding to the given `epoch`. - pub async fn scan( - &self, - epoch: u64, - limit: Option, - read_options: ReadOptions, - ) -> StorageResult, Bytes)>> { - self.scan_with_range::<_, &[u8]>(.., epoch, limit, read_options) - .await - } - - /// Scans `limit` keys from the given `range` in this keyspace and get their values. - /// If `limit` is None, all keys of the given prefix will be scanned. - /// The returned values are based on a snapshot corresponding to the given `epoch`. - /// - /// **Note**: the `range` should not be prepended with the prefix of this keyspace. - pub async fn scan_with_range( - &self, - range: R, - epoch: u64, - limit: Option, - read_options: ReadOptions, - ) -> StorageResult, Bytes)>> - where - R: RangeBounds, - B: AsRef<[u8]>, - { - let range = to_owned_range(range); - let pairs = self.store.scan(range, epoch, limit, read_options).await?; - let pairs = pairs - .into_iter() - .map(|(k, v)| (k.user_key.table_key.0, v)) - .collect(); - Ok(pairs) - } - - /// Gets an iterator of this keyspace. - /// The returned iterator will iterate data from a snapshot corresponding to the given `epoch`. - pub async fn iter( - &self, - epoch: u64, - read_options: ReadOptions, - ) -> StorageResult> { - self.iter_with_range::<_, &[u8]>(.., epoch, read_options) - .await - } - - /// Gets an iterator of the given `range` in this keyspace. - /// The returned iterator will iterate data from a snapshot corresponding to the given `epoch`. - /// - /// **Note**: the `range` should not be prepended with the prefix of this keyspace. - pub async fn iter_with_range( - &self, - range: R, - epoch: u64, - read_options: ReadOptions, - ) -> StorageResult> - where - R: RangeBounds, - B: AsRef<[u8]>, - { - let range = to_owned_range(range); - let iter = self.store.iter(range, epoch, read_options).await?; - let extract_table_key_iter = ExtractTableKeyIterator { iter }; - - Ok(extract_table_key_iter) - } -} - -impl Keyspace { - pub fn start_write_batch(&self, option: WriteOptions) -> KeySpaceWriteBatch<'_, S> { - let write_batch = self.store.start_write_batch(option); - write_batch.prefixify() - } -} - -fn to_owned_range(range: R) -> (Bound>, Bound>) -where - R: RangeBounds, - B: AsRef<[u8]>, -{ - ( - range.start_bound().map(|b| b.as_ref().to_vec()), - range.end_bound().map(|b| b.as_ref().to_vec()), - ) -} - -pub struct ExtractTableKeyIterator>, Bytes)> + 'static> { - iter: I, -} - -impl>, Bytes)>> StateStoreIter - for ExtractTableKeyIterator -{ - type Item = (Vec, Bytes); - - type NextFuture<'a> = - impl Future>> + Send + 'a; - - fn next(&mut self) -> Self::NextFuture<'_> { - async move { - Ok(self - .iter - .next() - .await? - .map(|(key, value)| (key.user_key.table_key.0, value))) - } - } -} diff --git a/src/storage/src/lib.rs b/src/storage/src/lib.rs index 6449b90549998..8781152692d6d 100644 --- a/src/storage/src/lib.rs +++ b/src/storage/src/lib.rs @@ -44,7 +44,6 @@ #![expect(clippy::result_large_err, reason = "FIXME: HummockError is large")] pub mod hummock; -pub mod keyspace; pub mod memory; pub mod monitor; pub mod panic_store; @@ -61,7 +60,6 @@ pub mod write_batch; #[cfg(feature = "failpoints")] mod storage_failpoints; -pub use keyspace::Keyspace; pub use store::{StateStore, StateStoreIter}; pub use store_impl::StateStoreImpl; diff --git a/src/storage/src/store.rs b/src/storage/src/store.rs index f48d8aa4696f1..d3abda5714451 100644 --- a/src/storage/src/store.rs +++ b/src/storage/src/store.rs @@ -42,6 +42,12 @@ pub trait StateStoreIterExt: StateStoreIter { + Send + 'a; + fn map(self, f: F) -> StateStoreMapIter + where + Self: Sized, + B: Send, + F: FnMut(Self::Item) -> B; + fn collect(&mut self, limit: Option) -> Self::CollectFuture<'_>; } @@ -49,6 +55,15 @@ impl StateStoreIterExt for I { type CollectFuture<'a> = impl Future::Item>>> + Send + 'a; + fn map(self, f: F) -> StateStoreMapIter + where + Self: Sized, + B: Send, + F: FnMut(Self::Item) -> B, + { + StateStoreMapIter { iter: self, f } + } + fn collect(&mut self, limit: Option) -> Self::CollectFuture<'_> { async move { let mut kvs = Vec::with_capacity(limit.unwrap_or_default()); @@ -65,6 +80,26 @@ impl StateStoreIterExt for I { } } +pub struct StateStoreMapIter { + iter: I, + f: F, +} + +impl StateStoreIter for StateStoreMapIter +where + B: Send, + I: StateStoreIter, + F: FnMut(I::Item) -> B + StaticSendSync, +{ + type Item = B; + + type NextFuture<'a> = impl Future>> + Send + 'a; + + fn next(&mut self) -> Self::NextFuture<'_> { + async move { Ok(self.iter.next().await?.map(&mut self.f)) } + } +} + #[macro_export] macro_rules! define_state_store_read_associated_type { () => { diff --git a/src/storage/src/table/batch_table/mod.rs b/src/storage/src/table/batch_table/mod.rs index f7c29968c42fa..468d404b6a969 100644 --- a/src/storage/src/table/batch_table/mod.rs +++ b/src/storage/src/table/batch_table/mod.rs @@ -14,5 +14,3 @@ pub mod iter_utils; pub mod storage_table; -#[cfg(test)] -pub mod test_batch_table; diff --git a/src/storage/src/table/batch_table/storage_table.rs b/src/storage/src/table/batch_table/storage_table.rs index e48ab1dbc8065..917b73028a30b 100644 --- a/src/storage/src/table/batch_table/storage_table.rs +++ b/src/storage/src/table/batch_table/storage_table.rs @@ -34,21 +34,23 @@ use tracing::trace; use super::iter_utils; use crate::error::{StorageError, StorageResult}; -use crate::keyspace::ExtractTableKeyIterator; use crate::row_serde::row_serde_util::{ parse_raw_key_to_vnode_and_key, serialize_pk, serialize_pk_with_vnode, }; use crate::row_serde::{find_columns_by_ids, ColumnMapping}; use crate::store::ReadOptions; use crate::table::{compute_vnode, Distribution, TableIter}; -use crate::{Keyspace, StateStore, StateStoreIter}; +use crate::{StateStore, StateStoreIter}; /// [`StorageTable`] is the interface accessing relational data in KV(`StateStore`) with /// row-based encoding format, and is used in batch mode. #[derive(Clone)] pub struct StorageTable { - /// The keyspace that the pk and value of the original table has. - keyspace: Keyspace, + /// Id for this table. + table_id: TableId, + + /// State store backend. + store: S, /// The schema of the output columns, i.e., this table VIEWED BY some executor like /// RowSeqScanExecutor. @@ -196,9 +198,9 @@ impl StorageTable { }) .collect_vec(); - let keyspace = Keyspace::table_root(store, table_id); Self { - keyspace, + table_id, + store, schema, pk_serializer, mapping: Arc::new(mapping), @@ -243,10 +245,7 @@ impl StorageTable { wait_epoch: HummockReadEpoch, ) -> StorageResult> { let epoch = wait_epoch.get_epoch(); - self.keyspace - .state_store() - .try_wait_epoch(wait_epoch) - .await?; + self.store.try_wait_epoch(wait_epoch).await?; let serialized_pk = serialize_pk_with_vnode(pk, &self.pk_serializer, self.compute_vnode_by_pk(pk)); assert!(pk.size() <= self.pk_indices.len()); @@ -259,13 +258,9 @@ impl StorageTable { check_bloom_filter: self.dist_key_indices == key_indices, retention_seconds: self.table_option.retention_seconds, ignore_range_tombstone: false, - table_id: self.keyspace.table_id(), + table_id: self.table_id, }; - if let Some(value) = self - .keyspace - .get(&serialized_pk, epoch, read_options) - .await? - { + if let Some(value) = self.store.get(&serialized_pk, epoch, read_options).await? { let full_row = self.row_deserializer.deserialize(value)?; let result_row = self.mapping.project(full_row); Ok(Some(result_row)) @@ -337,10 +332,10 @@ impl StorageTable { check_bloom_filter, ignore_range_tombstone: false, retention_seconds: self.table_option.retention_seconds, - table_id: self.keyspace.table_id(), + table_id: self.table_id, }; let iter = StorageTableIterInner::::new( - &self.keyspace, + &self.store, self.mapping.clone(), self.row_deserializer.clone(), raw_key_range, @@ -450,7 +445,7 @@ impl StorageTable { { trace!( "iter_with_pk_bounds dist_key_indices table_id {} not match prefix pk_prefix {:?} dist_key_indices {:?} pk_prefix_indices {:?}", - self.keyspace.table_id(), + self.table_id, pk_prefix, self.dist_key_indices, pk_prefix_indices @@ -464,7 +459,7 @@ impl StorageTable { trace!( "iter_with_pk_bounds table_id {} prefix_hint {:?} start_key: {:?}, end_key: {:?} pk_prefix {:?} dist_key_indices {:?} pk_prefix_indices {:?}" , - self.keyspace.table_id(), + self.table_id, prefix_hint, start_key, end_key, @@ -505,7 +500,7 @@ impl StorageTable { /// [`StorageTableIterInner`] iterates on the storage table. struct StorageTableIterInner { /// An iterator that returns raw bytes from storage. - iter: ExtractTableKeyIterator, + iter: S::Iter, mapping: Arc, @@ -515,7 +510,7 @@ struct StorageTableIterInner { impl StorageTableIterInner { /// If `wait_epoch` is true, it will wait for the given epoch to be committed before iteration. async fn new( - keyspace: &Keyspace, + store: &S, mapping: Arc, row_deserializer: Arc, raw_key_range: R, @@ -527,10 +522,12 @@ impl StorageTableIterInner { B: AsRef<[u8]> + Send, { let raw_epoch = epoch.get_epoch(); - keyspace.state_store().try_wait_epoch(epoch).await?; - let iter = keyspace - .iter_with_range(raw_key_range, raw_epoch, read_options) - .await?; + let range = ( + raw_key_range.start_bound().map(|b| b.as_ref().to_vec()), + raw_key_range.end_bound().map(|b| b.as_ref().to_vec()), + ); + store.try_wait_epoch(epoch).await?; + let iter = store.iter(range, raw_epoch, read_options).await?; let iter = Self { iter, mapping, @@ -541,9 +538,12 @@ impl StorageTableIterInner { /// Yield a row with its primary key. #[try_stream(ok = (Vec, Row), error = StorageError)] - async fn into_stream(mut self) { - while let Some((raw_key, value)) = self - .iter + async fn into_stream(self) { + use crate::store::StateStoreIterExt; + + // No need for table id and epoch. + let mut iter = self.iter.map(|(k, v)| (k.user_key.table_key.0, v)); + while let Some((raw_key, value)) = iter .next() .verbose_stack_trace("storage_table_iter_next") .await? diff --git a/src/storage/src/table/mod.rs b/src/storage/src/table/mod.rs index db78af4d9cce2..8870bec8fd30b 100644 --- a/src/storage/src/table/mod.rs +++ b/src/storage/src/table/mod.rs @@ -108,7 +108,7 @@ pub trait TableIter: Send { } /// Get vnode value with `indices` on the given `row`. -fn compute_vnode(row: &Row, indices: &[usize], vnodes: &Bitmap) -> VirtualNode { +pub fn compute_vnode(row: &Row, indices: &[usize], vnodes: &Bitmap) -> VirtualNode { let vnode = if indices.is_empty() { DEFAULT_VNODE } else { diff --git a/src/storage/src/table/streaming_table/mod.rs b/src/storage/src/table/streaming_table/mod.rs index 74e6c0b2f9808..6e866329a3523 100644 --- a/src/storage/src/table/streaming_table/mod.rs +++ b/src/storage/src/table/streaming_table/mod.rs @@ -13,7 +13,3 @@ // limitations under the License. pub mod mem_table; -pub mod state_table; - -#[cfg(test)] -pub mod test_streaming_table; diff --git a/src/storage/src/write_batch.rs b/src/storage/src/write_batch.rs index 519c0c6014b8b..cae61d9d8f1f7 100644 --- a/src/storage/src/write_batch.rs +++ b/src/storage/src/write_batch.rs @@ -52,6 +52,30 @@ impl<'a, S: StateStoreWrite> WriteBatch<'a, S> { } } + /// Puts a value. + pub fn put(&mut self, key: impl AsRef<[u8]>, value: StorageValue) { + self.do_push(key.as_ref(), value); + } + + /// Deletes a value. + pub fn delete(&mut self, key: impl AsRef<[u8]>) { + self.do_push(key.as_ref(), StorageValue::new_delete()); + } + + /// Delete all keys starting with `prefix`. + pub fn delete_prefix(&mut self, prefix: impl AsRef<[u8]>) { + let start_key = Bytes::from(prefix.as_ref().to_owned()); + let end_key = Bytes::from(next_key(&start_key)); + self.delete_ranges.push((start_key, end_key)); + } + + /// Delete all keys in this range. + pub fn delete_range(&mut self, start: impl AsRef<[u8]>, end: impl AsRef<[u8]>) { + let start_key = Bytes::from(start.as_ref().to_owned()); + let end_key = Bytes::from(end.as_ref().to_owned()); + self.delete_ranges.push((start_key, end_key)); + } + /// Reserves capacity for at least `additional` more key-value pairs to be inserted in the /// batch. pub fn reserve(&mut self, additional: usize) { @@ -94,86 +118,35 @@ impl<'a, S: StateStoreWrite> WriteBatch<'a, S> { Ok(()) } - /// Creates a [`KeySpaceWriteBatch`] with the given `prefix`, which automatically prepends the - /// prefix when writing. - pub fn prefixify(self) -> KeySpaceWriteBatch<'a, S> { - KeySpaceWriteBatch { global: self } - } -} - -/// [`KeySpaceWriteBatch`] attaches a [`Keyspace`] to a mutable reference of global [`WriteBatch`], -/// which automatically prepends the keyspace prefix when writing. -pub struct KeySpaceWriteBatch<'a, S: StateStoreWrite> { - global: WriteBatch<'a, S>, -} - -impl<'a, S: StateStoreWrite> KeySpaceWriteBatch<'a, S> { /// Pushes `key` and `value` into the `WriteBatch`. - /// If `key` is valid, it will be prefixed with `keyspace` key. - /// Otherwise, only `keyspace` key is pushed. fn do_push(&mut self, key: &[u8], value: StorageValue) { let key = Bytes::from(key.to_vec()); - self.global.batch.push((key, value)); - } - - /// Puts a value, with the key prepended by the prefix of `keyspace`, like `[prefix | given - /// key]`. - pub fn put(&mut self, key: impl AsRef<[u8]>, value: StorageValue) { - self.do_push(key.as_ref(), value); - } - - /// Delete all keys with the key prepended by the prefix of `keyspace`, like `[prefix | given - /// key]`. - pub fn delete_prefix(&mut self, prefix: impl AsRef<[u8]>) { - let start_key = Bytes::from(prefix.as_ref().to_owned()); - let end_key = Bytes::from(next_key(&start_key)); - self.global.delete_ranges.push((start_key, end_key)); - } - - /// Delete all keys in this range prepended by the prefix of `keyspace` which is [prefix|start, - /// prefix|end). - pub fn delete_range(&mut self, start: impl AsRef<[u8]>, end: impl AsRef<[u8]>) { - let start_key = Bytes::from(start.as_ref().to_owned()); - let end_key = Bytes::from(end.as_ref().to_owned()); - self.global.delete_ranges.push((start_key, end_key)); - } - - /// Deletes a value, with the key prepended by the prefix of `keyspace`, like `[prefix | given - /// key]`. - pub fn delete(&mut self, key: impl AsRef<[u8]>) { - self.do_push(key.as_ref(), StorageValue::new_delete()); - } - - pub async fn ingest(self) -> StorageResult<()> { - self.global.ingest().await + self.batch.push((key, value)); } } #[cfg(test)] mod tests { use bytes::Bytes; - use risingwave_common::catalog::TableId; use crate::memory::MemoryStateStore; use crate::storage_value::StorageValue; - use crate::store::WriteOptions; - use crate::Keyspace; + use crate::store::{StateStoreWrite, WriteOptions}; #[tokio::test] async fn test_invalid_write_batch() { let state_store = MemoryStateStore::new(); - let key_space = Keyspace::table_root(state_store, TableId::from(0x118)); - - let mut key_space_batch = key_space.start_write_batch(WriteOptions { + let mut batch = state_store.start_write_batch(WriteOptions { epoch: 1, table_id: Default::default(), }); - key_space_batch.put(Bytes::from("aa"), StorageValue::new_put("444")); - key_space_batch.put(Bytes::from("cc"), StorageValue::new_put("444")); - key_space_batch.put(Bytes::from("bb"), StorageValue::new_put("444")); - key_space_batch.delete(Bytes::from("aa")); - key_space_batch + batch.put(Bytes::from("aa"), StorageValue::new_put("444")); + batch.put(Bytes::from("cc"), StorageValue::new_put("444")); + batch.put(Bytes::from("bb"), StorageValue::new_put("444")); + batch.delete(Bytes::from("aa")); + + batch .ingest() .await .expect_err("Should panic here because of duplicate key."); diff --git a/src/stream/src/common/mod.rs b/src/stream/src/common/mod.rs index a1ab068aa42a7..2855f4d58db2b 100644 --- a/src/stream/src/common/mod.rs +++ b/src/stream/src/common/mod.rs @@ -16,14 +16,15 @@ pub use builder::*; pub use column_mapping::*; pub use infallible_expr::*; use risingwave_common::row::Row; -use risingwave_storage::table::streaming_table::state_table::{RowStream, StateTable}; use risingwave_storage::StateStore; +use self::table::state_table::{RowStream, StateTable}; use crate::executor::StreamExecutorResult; mod builder; mod column_mapping; mod infallible_expr; +pub mod table; pub async fn iter_state_table<'a, S: StateStore>( state_table: &'a StateTable, diff --git a/src/stream/src/common/table/mod.rs b/src/stream/src/common/table/mod.rs new file mode 100644 index 0000000000000..6ed3cad604052 --- /dev/null +++ b/src/stream/src/common/table/mod.rs @@ -0,0 +1,20 @@ +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod state_table; + +#[cfg(test)] +pub mod test_batch_table; +#[cfg(test)] +pub mod test_streaming_table; diff --git a/src/storage/src/table/streaming_table/state_table.rs b/src/stream/src/common/table/state_table.rs similarity index 92% rename from src/storage/src/table/streaming_table/state_table.rs rename to src/stream/src/common/table/state_table.rs index 34522796794db..23becedcbd158 100644 --- a/src/storage/src/table/streaming_table/state_table.rs +++ b/src/stream/src/common/table/state_table.rs @@ -36,29 +36,34 @@ use risingwave_hummock_sdk::key::{ end_bound_of_prefix, prefixed_range, range_of_prefix, start_bound_of_excluded_prefix, }; use risingwave_pb::catalog::Table; -use tracing::trace; - -use super::mem_table::{MemTable, MemTableIter, RowOp}; -use crate::error::{StorageError, StorageResult}; -use crate::keyspace::ExtractTableKeyIterator; -use crate::row_serde::row_serde_util::{ +use risingwave_storage::row_serde::row_serde_util::{ deserialize_pk_with_vnode, serialize_pk, serialize_pk_with_vnode, }; -use crate::storage_value::StorageValue; -use crate::store::{LocalStateStore, ReadOptions, WriteOptions}; -use crate::table::streaming_table::mem_table::MemTableError; -use crate::table::{compute_chunk_vnode, compute_vnode, Distribution}; -use crate::{Keyspace, StateStore, StateStoreIter}; +use risingwave_storage::storage_value::StorageValue; +use risingwave_storage::store::{ + LocalStateStore, ReadOptions, StateStoreRead, StateStoreWrite, WriteOptions, +}; +use risingwave_storage::table::streaming_table::mem_table::{ + MemTable, MemTableError, MemTableIter, RowOp, +}; +use risingwave_storage::table::{compute_chunk_vnode, compute_vnode, Distribution}; +use risingwave_storage::{StateStore, StateStoreIter}; +use tracing::trace; + +use crate::executor::{StreamExecutorError, StreamExecutorResult}; /// `StateTable` is the interface accessing relational data in KV(`StateStore`) with /// row-based encoding. #[derive(Clone)] pub struct StateTable { - /// buffer row operations. + /// Id for this table. + table_id: TableId, + + /// Buffer row operations. mem_table: MemTable, - /// write into state store. - keyspace: Keyspace, + /// State store backend. + local_store: S::Local, /// Used for serializing and deserializing the primary key. pk_serde: OrderedRowSerde, @@ -93,13 +98,13 @@ pub struct StateTable { /// If true, sanity check is disabled on this table. disable_sanity_check: bool, - /// an optional column index which is the vnode of each row computed by the table's consistent - /// hash distribution + /// An optional column index which is the vnode of each row computed by the table's consistent + /// hash distribution. vnode_col_idx_in_pk: Option, value_indices: Option>, - /// the epoch flush to the state store last time + /// The epoch flush to the state store last time. epoch: Option, } @@ -154,7 +159,6 @@ impl StateTable { .collect_vec(); let local_state_store = store.new_local(table_id).await; - let keyspace = Keyspace::table_root(local_state_store, table_id); let pk_data_types = pk_indices .iter() @@ -201,8 +205,9 @@ impl StateTable { false => Some(input_value_indices), }; Self { + table_id, mem_table: MemTable::new(), - keyspace, + local_store: local_state_store, pk_serde, row_deserializer: RowDeserializer::new(data_types), pk_indices: pk_indices.to_vec(), @@ -273,7 +278,6 @@ impl StateTable { value_indices: Option>, ) -> Self { let local_state_store = store.new_local(table_id).await; - let keyspace = Keyspace::table_root(local_state_store, table_id); let pk_data_types = pk_indices .iter() @@ -303,8 +307,9 @@ impl StateTable { }) .collect_vec(); Self { + table_id, mem_table: MemTable::new(), - keyspace, + local_store: local_state_store, pk_serde, row_deserializer: RowDeserializer::new(data_types), pk_indices, @@ -325,7 +330,7 @@ impl StateTable { } fn table_id(&self) -> TableId { - self.keyspace.table_id() + self.table_id } /// get the newest epoch of the state store and panic if the `init_epoch()` has never be called @@ -409,7 +414,7 @@ const ENABLE_SANITY_CHECK: bool = cfg!(debug_assertions); // point get impl StateTable { /// Get a single row from state table. - pub async fn get_row<'a>(&'a self, pk: &'a Row) -> StorageResult> { + pub async fn get_row<'a>(&'a self, pk: &'a Row) -> StreamExecutorResult> { let compacted_row: Option = self.get_compacted_row(pk).await?; match compacted_row { Some(compacted_row) => { @@ -426,7 +431,7 @@ impl StateTable { pub async fn get_compacted_row<'a>( &'a self, pk: &'a Row, - ) -> StorageResult> { + ) -> StreamExecutorResult> { let serialized_pk = serialize_pk_with_vnode(pk, &self.pk_serde, self.compute_prefix_vnode(pk)); let mem_table_res = self.mem_table.get_row_op(&serialized_pk); @@ -451,11 +456,11 @@ impl StateTable { prefix_hint: None, check_bloom_filter: self.dist_key_indices == key_indices, retention_seconds: self.table_option.retention_seconds, - table_id: self.keyspace.table_id(), + table_id: self.table_id, ignore_range_tombstone: false, }; if let Some(storage_row_bytes) = self - .keyspace + .local_store .get(&serialized_pk, self.epoch(), read_options) .await? { @@ -615,7 +620,7 @@ impl StateTable { self.epoch = Some(new_epoch); } - pub async fn commit(&mut self, new_epoch: EpochPair) -> StorageResult<()> { + pub async fn commit(&mut self, new_epoch: EpochPair) -> StreamExecutorResult<()> { assert_eq!(self.epoch(), new_epoch.prev); let mem_table = std::mem::take(&mut self.mem_table).into_parts(); self.batch_write_rows(mem_table, new_epoch.prev).await?; @@ -624,7 +629,7 @@ impl StateTable { } /// used for unit test, and do not need to assert epoch. - pub async fn commit_for_test(&mut self, new_epoch: EpochPair) -> StorageResult<()> { + pub async fn commit_for_test(&mut self, new_epoch: EpochPair) -> StreamExecutorResult<()> { let mem_table = std::mem::take(&mut self.mem_table).into_parts(); self.batch_write_rows(mem_table, new_epoch.prev).await?; self.update_epoch(new_epoch); @@ -645,8 +650,8 @@ impl StateTable { &mut self, buffer: BTreeMap, RowOp>, epoch: u64, - ) -> StorageResult<()> { - let mut write_batch = self.keyspace.start_write_batch(WriteOptions { + ) -> StreamExecutorResult<()> { + let mut write_batch = self.local_store.start_write_batch(WriteOptions { epoch, table_id: self.table_id(), }); @@ -686,15 +691,15 @@ impl StateTable { key: &[u8], value: &[u8], epoch: u64, - ) -> StorageResult<()> { + ) -> StreamExecutorResult<()> { let read_options = ReadOptions { prefix_hint: None, check_bloom_filter: false, retention_seconds: self.table_option.retention_seconds, - table_id: self.keyspace.table_id(), + table_id: self.table_id, ignore_range_tombstone: false, }; - let stored_value = self.keyspace.get(key, epoch, read_options).await?; + let stored_value = self.local_store.get(key, epoch, read_options).await?; if let Some(stored_value) = stored_value { let (vnode, key) = deserialize_pk_with_vnode(key, &self.pk_serde).unwrap(); @@ -718,15 +723,15 @@ impl StateTable { key: &[u8], old_row: &[u8], epoch: u64, - ) -> StorageResult<()> { + ) -> StreamExecutorResult<()> { let read_options = ReadOptions { prefix_hint: None, check_bloom_filter: false, retention_seconds: self.table_option.retention_seconds, - table_id: self.keyspace.table_id(), + table_id: self.table_id, ignore_range_tombstone: false, }; - let stored_value = self.keyspace.get(key, epoch, read_options).await?; + let stored_value = self.local_store.get(key, epoch, read_options).await?; if stored_value.is_none() || stored_value.as_ref().unwrap() != old_row { let (vnode, key) = deserialize_pk_with_vnode(key, &self.pk_serde).unwrap(); @@ -752,15 +757,15 @@ impl StateTable { old_row: &[u8], new_row: &[u8], epoch: u64, - ) -> StorageResult<()> { + ) -> StreamExecutorResult<()> { let read_options = ReadOptions { prefix_hint: None, ignore_range_tombstone: false, check_bloom_filter: false, retention_seconds: self.table_option.retention_seconds, - table_id: self.keyspace.table_id(), + table_id: self.table_id, }; - let stored_value = self.keyspace.get(key, epoch, read_options).await?; + let stored_value = self.local_store.get(key, epoch, read_options).await?; if stored_value.is_none() || stored_value.as_ref().unwrap() != old_row { let (vnode, key) = deserialize_pk_with_vnode(key, &self.pk_serde).unwrap(); @@ -783,14 +788,14 @@ impl StateTable { } } -fn get_second(arg: StorageResult<(T, U)>) -> StorageResult { +fn get_second(arg: StreamExecutorResult<(T, U)>) -> StreamExecutorResult { arg.map(|x| x.1) } // Iterator functions impl StateTable { /// This function scans rows from the relational table. - pub async fn iter(&self) -> StorageResult> { + pub async fn iter(&self) -> StreamExecutorResult> { self.iter_with_pk_prefix(Row::empty()).await } @@ -798,7 +803,7 @@ impl StateTable { pub async fn iter_with_pk_prefix<'a>( &'a self, pk_prefix: &'a Row, - ) -> StorageResult> { + ) -> StreamExecutorResult> { let (mem_table_iter, storage_iter_stream) = self .iter_with_pk_prefix_inner(pk_prefix, self.epoch()) .await?; @@ -819,7 +824,7 @@ impl StateTable { // For now, we require this parameter, and will panic. In the future, when `None`, we can // iterate over each vnode that the `StateTable` owns. vnode: u8, - ) -> StorageResult<(MemTableIter<'_>, StorageIterInner)> { + ) -> StreamExecutorResult<(MemTableIter<'_>, StorageIterInner)> { let memcomparable_range = prefix_range_to_memcomparable(&self.pk_serde, pk_range); let memcomparable_range_with_vnode = prefixed_range(memcomparable_range, &[vnode]); @@ -840,7 +845,7 @@ impl StateTable { // For now, we require this parameter, and will panic. In the future, when `None`, we can // iterate over each vnode that the `StateTable` owns. vnode: u8, - ) -> StorageResult> { + ) -> StreamExecutorResult> { let (mem_table_iter, storage_iter_stream) = self.iter_with_pk_range_inner(pk_range, vnode).await?; let storage_iter = storage_iter_stream.into_stream(); @@ -858,7 +863,7 @@ impl StateTable { // For now, we require this parameter, and will panic. In the future, when `None`, we can // iterate over each vnode that the `StateTable` owns. vnode: u8, - ) -> StorageResult> { + ) -> StreamExecutorResult> { let (mem_table_iter, storage_iter_stream) = self.iter_with_pk_range_inner(pk_range, vnode).await?; let storage_iter = storage_iter_stream.into_stream(); @@ -872,7 +877,7 @@ impl StateTable { pub async fn iter_prev_epoch_with_pk_prefix<'a>( &'a self, pk_prefix: &'a Row, - ) -> StorageResult> { + ) -> StreamExecutorResult> { let (mem_table_iter, storage_iter_stream) = self .iter_with_pk_prefix_inner(pk_prefix, self.prev_epoch()) .await?; @@ -890,7 +895,7 @@ impl StateTable { pub async fn iter_key_and_val<'a>( &'a self, pk_prefix: &'a Row, - ) -> StorageResult> { + ) -> StreamExecutorResult> { let (mem_table_iter, storage_iter_stream) = self .iter_with_pk_prefix_inner(pk_prefix, self.epoch()) .await?; @@ -906,7 +911,7 @@ impl StateTable { &'a self, pk_prefix: &'a Row, epoch: u64, - ) -> StorageResult<(MemTableIter<'_>, StorageIterInner)> { + ) -> StreamExecutorResult<(MemTableIter<'_>, StorageIterInner)> { let prefix_serializer = self.pk_serde.prefix(pk_prefix.size()); let encoded_prefix = serialize_pk(pk_prefix, &prefix_serializer); let encoded_key_range = range_of_prefix(&encoded_prefix); @@ -943,7 +948,7 @@ impl StateTable { key_range: (Bound>, Bound>), prefix_hint: Option>, epoch: u64, - ) -> StorageResult<(MemTableIter<'_>, StorageIterInner)> { + ) -> StreamExecutorResult<(MemTableIter<'_>, StorageIterInner)> { // Mem table iterator. let mem_table_iter = self.mem_table.iter(key_range.clone()); @@ -953,12 +958,12 @@ impl StateTable { check_bloom_filter, ignore_range_tombstone: false, retention_seconds: self.table_option.retention_seconds, - table_id: self.keyspace.table_id(), + table_id: self.table_id, }; // Storage iterator. let storage_iter = StorageIterInner::::new( - &self.keyspace, + &self.local_store, epoch, key_range, read_options, @@ -974,9 +979,9 @@ impl StateTable { } } -pub type RowStream<'a, S: StateStore> = impl Stream>>; +pub type RowStream<'a, S: StateStore> = impl Stream>>; pub type RowStreamWithPk<'a, S: StateStore> = - impl Stream>, Cow<'a, Row>)>>; + impl Stream>, Cow<'a, Row>)>>; /// `StateTableRowIter` is able to read the just written data (uncommitted data). /// It will merge the result of `mem_table_iter` and `state_store_iter`. @@ -990,7 +995,7 @@ struct StateTableRowIter<'a, M, C> { impl<'a, M, C> StateTableRowIter<'a, M, C> where M: Iterator, &'a RowOp)>, - C: Stream, Row)>>, + C: Stream, Row)>>, { fn new(mem_table_iter: M, storage_iter: C, deserializer: RowDeserializer) -> Self { Self { @@ -1004,7 +1009,7 @@ where /// This function scans kv pairs from the `shared_storage` and /// memory(`mem_table`) with optional pk_bounds. If a record exist in both `shared_storage` and /// `mem_table`, result `mem_table` is returned according to the operation(RowOp) on it. - #[try_stream(ok = (Cow<'a, Vec>, Cow<'a, Row>), error = StorageError)] + #[try_stream(ok = (Cow<'a, Vec>, Cow<'a, Row>), error = StreamExecutorError)] async fn into_stream(self) { let storage_iter = self.storage_iter.peekable(); pin_mut!(storage_iter); @@ -1092,37 +1097,38 @@ where struct StorageIterInner { /// An iterator that returns raw bytes from storage. - iter: ExtractTableKeyIterator, + iter: S::Iter, deserializer: RowDeserializer, } impl StorageIterInner { async fn new( - keyspace: &Keyspace, + store: &S, epoch: u64, raw_key_range: (Bound>, Bound>), read_options: ReadOptions, deserializer: RowDeserializer, - ) -> StorageResult { - let iter = keyspace - .iter_with_range(raw_key_range, epoch, read_options) - .await?; + ) -> StreamExecutorResult { + let iter = store.iter(raw_key_range, epoch, read_options).await?; let iter = Self { iter, deserializer }; Ok(iter) } /// Yield a row with its primary key. - #[try_stream(ok = (Vec, Row), error = StorageError)] - async fn into_stream(mut self) { - while let Some((key, value)) = self - .iter + #[try_stream(ok = (Vec, Row), error = StreamExecutorError)] + async fn into_stream(self) { + use risingwave_storage::store::StateStoreIterExt; + + // No need for table id and epoch. + let mut iter = self.iter.map(|(k, v)| (k.user_key.table_key.0, v)); + while let Some((key, value)) = iter .next() .verbose_stack_trace("storage_table_iter_next") .await? { let row = self.deserializer.deserialize(value.as_ref())?; - yield (key.to_vec(), row); + yield (key, row); } } } diff --git a/src/storage/src/table/batch_table/test_batch_table.rs b/src/stream/src/common/table/test_batch_table.rs similarity index 96% rename from src/storage/src/table/batch_table/test_batch_table.rs rename to src/stream/src/common/table/test_batch_table.rs index 11162c9e9dc79..395d106321480 100644 --- a/src/storage/src/table/batch_table/test_batch_table.rs +++ b/src/stream/src/common/table/test_batch_table.rs @@ -19,22 +19,19 @@ use risingwave_common::types::DataType; use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::sort_util::OrderType; use risingwave_hummock_sdk::HummockReadEpoch; +use risingwave_storage::memory::MemoryStateStore; +use risingwave_storage::table::batch_table::storage_table::StorageTable; +use risingwave_storage::table::{Distribution, TableIter}; -use crate::error::StorageResult; -use crate::memory::MemoryStateStore; -use crate::table::batch_table::storage_table::StorageTable; -use crate::table::streaming_table::state_table::StateTable; -use crate::table::{Distribution, TableIter}; +use crate::common::table::state_table::StateTable; /// There are three struct in relational layer, StateTable, MemTable and CellBasedTable. /// `StateTable` provides read/write interfaces to the upper layer streaming operator. /// `MemTable` is an in-memory buffer used to cache operator operations. -/// `CellBasedTable` provides the transform from the kv encoding (hummock) to cell_based row -/// encoding. // test storage table #[tokio::test] -async fn test_storage_table_get_row() -> StorageResult<()> { +async fn test_storage_table_get_row() { let state_store = MemoryStateStore::new(); let column_ids = vec![ColumnId::from(0), ColumnId::from(1), ColumnId::from(2)]; let column_descs = vec![ @@ -113,8 +110,6 @@ async fn test_storage_table_get_row() -> StorageResult<()> { .await .unwrap(); assert_eq!(get_no_exist_res, None); - - Ok(()) } #[tokio::test] diff --git a/src/storage/src/table/streaming_table/test_streaming_table.rs b/src/stream/src/common/table/test_streaming_table.rs similarity index 99% rename from src/storage/src/table/streaming_table/test_streaming_table.rs rename to src/stream/src/common/table/test_streaming_table.rs index a92bb7a75afbb..a0c20d612be92 100644 --- a/src/storage/src/table/streaming_table/test_streaming_table.rs +++ b/src/stream/src/common/table/test_streaming_table.rs @@ -20,15 +20,14 @@ use risingwave_common::row::Row; use risingwave_common::types::DataType; use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::sort_util::OrderType; +use risingwave_storage::memory::MemoryStateStore; +use risingwave_storage::table::DEFAULT_VNODE; -use crate::error::StorageResult; -use crate::memory::MemoryStateStore; -use crate::table::streaming_table::state_table::StateTable; -use crate::table::DEFAULT_VNODE; +use crate::common::table::state_table::StateTable; // test state table #[tokio::test] -async fn test_state_table() -> StorageResult<()> { +async fn test_state_table() { let state_store = MemoryStateStore::new(); let column_descs = vec![ ColumnDesc::unnamed(ColumnId::from(0), DataType::Int32), @@ -148,12 +147,10 @@ async fn test_state_table() -> StorageResult<()> { .await .unwrap(); assert_eq!(row4_delete, None); - - Ok(()) } #[tokio::test] -async fn test_state_table_update_insert() -> StorageResult<()> { +async fn test_state_table_update_insert() { let state_store = MemoryStateStore::new(); let column_descs = vec![ ColumnDesc::unnamed(ColumnId::from(0), DataType::Int32), @@ -319,7 +316,6 @@ async fn test_state_table_update_insert() -> StorageResult<()> { .await .unwrap(); assert_eq!(row1_commit, None); - Ok(()) } #[tokio::test] diff --git a/src/stream/src/executor/aggregation/agg_group.rs b/src/stream/src/executor/aggregation/agg_group.rs index 62b376e0d148a..74e7d0eaf5680 100644 --- a/src/stream/src/executor/aggregation/agg_group.rs +++ b/src/stream/src/executor/aggregation/agg_group.rs @@ -22,11 +22,11 @@ use risingwave_common::catalog::Schema; use risingwave_common::must_match; use risingwave_common::row::Row; use risingwave_common::types::Datum; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::agg_state::{AggState, AggStateStorage}; use super::AggCall; +use crate::common::table::state_table::StateTable; use crate::executor::error::StreamExecutorResult; use crate::executor::PkIndices; diff --git a/src/stream/src/executor/aggregation/agg_impl/approx_distinct_append.rs b/src/stream/src/executor/aggregation/agg_impl/approx_distinct_append.rs index 50a2d2d502438..3920831cf8a2d 100644 --- a/src/stream/src/executor/aggregation/agg_impl/approx_distinct_append.rs +++ b/src/stream/src/executor/aggregation/agg_impl/approx_distinct_append.rs @@ -19,13 +19,13 @@ use risingwave_common::array::*; use risingwave_common::bail; use risingwave_common::buffer::Bitmap; use risingwave_common::types::{Datum, ScalarImpl}; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::approx_distinct_utils::{ deserialize_buckets_from_list, serialize_buckets, RegisterBucket, StreamingApproxCountDistinct, }; use crate::common::iter_state_table; +use crate::common::table::state_table::StateTable; use crate::executor::aggregation::table::TableStateImpl; use crate::executor::StreamExecutorResult; diff --git a/src/stream/src/executor/aggregation/agg_state.rs b/src/stream/src/executor/aggregation/agg_state.rs index 28e0ad3606fec..f9bde96674092 100644 --- a/src/stream/src/executor/aggregation/agg_state.rs +++ b/src/stream/src/executor/aggregation/agg_state.rs @@ -19,13 +19,13 @@ use risingwave_common::catalog::Schema; use risingwave_common::must_match; use risingwave_common::row::Row; use risingwave_common::types::Datum; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::minput::MaterializedInputState; use super::table::TableState; use super::value::ValueState; use super::AggCall; +use crate::common::table::state_table::StateTable; use crate::common::StateTableColumnMapping; use crate::executor::{PkIndices, StreamExecutorResult}; diff --git a/src/stream/src/executor/aggregation/minput.rs b/src/stream/src/executor/aggregation/minput.rs index 8574e434aa3ec..ad5d1c3da6653 100644 --- a/src/stream/src/executor/aggregation/minput.rs +++ b/src/stream/src/executor/aggregation/minput.rs @@ -26,7 +26,6 @@ use risingwave_common::types::{Datum, DatumRef, ScalarImpl}; use risingwave_common::util::ordered::OrderedRowSerde; use risingwave_common::util::sort_util::OrderType; use risingwave_expr::expr::AggKind; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use smallvec::SmallVec; @@ -35,6 +34,7 @@ use super::state_cache::extreme::ExtremeAgg; use super::state_cache::string_agg::StringAgg; use super::state_cache::{CacheKey, GenericStateCache, StateCache}; use super::AggCall; +use crate::common::table::state_table::StateTable; use crate::common::{iter_state_table, StateTableColumnMapping}; use crate::executor::{PkIndices, StreamExecutorResult}; @@ -294,10 +294,10 @@ mod tests { use risingwave_common::util::sort_util::{OrderPair, OrderType}; use risingwave_expr::expr::AggKind; use risingwave_storage::memory::MemoryStateStore; - use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::MaterializedInputState; + use crate::common::table::state_table::StateTable; use crate::common::StateTableColumnMapping; use crate::executor::aggregation::{AggArgs, AggCall}; use crate::executor::StreamExecutorResult; diff --git a/src/stream/src/executor/aggregation/mod.rs b/src/stream/src/executor/aggregation/mod.rs index 0341dd52ad6a1..6b69905ee1b2f 100644 --- a/src/stream/src/executor/aggregation/mod.rs +++ b/src/stream/src/executor/aggregation/mod.rs @@ -22,10 +22,10 @@ use risingwave_common::bail; use risingwave_common::buffer::Bitmap; use risingwave_common::catalog::{Field, Schema}; use risingwave_expr::expr::AggKind; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::ActorContextRef; +use crate::common::table::state_table::StateTable; use crate::common::InfallibleExpression; use crate::executor::error::StreamExecutorResult; use crate::executor::Executor; diff --git a/src/stream/src/executor/aggregation/table.rs b/src/stream/src/executor/aggregation/table.rs index 08b0a847b7eb5..81a4399c33e1f 100644 --- a/src/stream/src/executor/aggregation/table.rs +++ b/src/stream/src/executor/aggregation/table.rs @@ -18,11 +18,11 @@ use risingwave_common::array::*; use risingwave_common::buffer::Bitmap; use risingwave_common::types::Datum; use risingwave_expr::expr::AggKind; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::agg_impl::AppendOnlyStreamingApproxCountDistinct; use super::AggCall; +use crate::common::table::state_table::StateTable; use crate::executor::StreamExecutorResult; #[async_trait::async_trait] diff --git a/src/stream/src/executor/batch_query.rs b/src/stream/src/executor/batch_query.rs index 41c2c2217232f..f9e246cc96b59 100644 --- a/src/stream/src/executor/batch_query.rs +++ b/src/stream/src/executor/batch_query.rs @@ -40,12 +40,10 @@ impl BatchQueryExecutor where S: StateStore, { - const DEFAULT_BATCH_SIZE: usize = 100; - - pub fn new(table: StorageTable, batch_size: Option, info: ExecutorInfo) -> Self { + pub fn new(table: StorageTable, batch_size: usize, info: ExecutorInfo) -> Self { Self { table, - batch_size: batch_size.unwrap_or(Self::DEFAULT_BATCH_SIZE), + batch_size, info, } } @@ -117,7 +115,7 @@ mod test { identity: "BatchQuery".to_owned(), }; - let executor = Box::new(BatchQueryExecutor::new(table, Some(test_batch_size), info)); + let executor = Box::new(BatchQueryExecutor::new(table, test_batch_size, info)); let stream = executor.execute_with_epoch(u64::MAX); let mut batch_cnt = 0; diff --git a/src/stream/src/executor/dynamic_filter.rs b/src/stream/src/executor/dynamic_filter.rs index 355f3f7beead6..38030d07ff804 100644 --- a/src/stream/src/executor/dynamic_filter.rs +++ b/src/stream/src/executor/dynamic_filter.rs @@ -28,7 +28,6 @@ use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; use risingwave_expr::expr::{BoxedExpression, InputRefExpression, LiteralExpression}; use risingwave_pb::expr::expr_node::Type as ExprNodeType; use risingwave_pb::expr::expr_node::Type::*; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::barrier_align::*; @@ -38,6 +37,7 @@ use super::monitor::StreamingMetrics; use super::{ ActorContextRef, BoxedExecutor, BoxedMessageStream, Executor, Message, PkIndices, PkIndicesRef, }; +use crate::common::table::state_table::StateTable; use crate::common::{InfallibleExpression, StreamChunkBuilder}; use crate::executor::expect_first_barrier_from_aligned_stream; diff --git a/src/stream/src/executor/global_simple_agg.rs b/src/stream/src/executor/global_simple_agg.rs index 2018e235a1e4a..a37c3e93a0f17 100644 --- a/src/stream/src/executor/global_simple_agg.rs +++ b/src/stream/src/executor/global_simple_agg.rs @@ -17,13 +17,13 @@ use futures_async_stream::try_stream; use risingwave_common::array::StreamChunk; use risingwave_common::catalog::Schema; use risingwave_common::row::Row; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::aggregation::{ agg_call_filter_res, iter_table_storage, AggChangesInfo, AggStateStorage, }; use super::*; +use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::aggregation::{generate_agg_schema, AggCall, AggGroup}; use crate::executor::error::StreamExecutorError; diff --git a/src/stream/src/executor/hash_agg.rs b/src/stream/src/executor/hash_agg.rs index 3bf444b9c925f..e29a42b81c096 100644 --- a/src/stream/src/executor/hash_agg.rs +++ b/src/stream/src/executor/hash_agg.rs @@ -28,12 +28,12 @@ use risingwave_common::hash::{HashCode, HashKey, PrecomputedBuildHasher}; use risingwave_common::row::Row; use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::hash_util::Crc32FastBuilder; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::aggregation::{agg_call_filter_res, iter_table_storage, AggStateStorage}; use super::{expect_first_barrier, ActorContextRef, Executor, PkIndicesRef, StreamExecutorResult}; use crate::cache::{cache_may_stale, EvictableHashMap, ExecutorCache, LruManagerRef}; +use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::aggregation::{generate_agg_schema, AggCall, AggChangesInfo, AggGroup}; use crate::executor::error::StreamExecutorError; diff --git a/src/stream/src/executor/hash_join.rs b/src/stream/src/executor/hash_join.rs index b139698060ba8..fc1bbc3754753 100644 --- a/src/stream/src/executor/hash_join.rs +++ b/src/stream/src/executor/hash_join.rs @@ -28,7 +28,6 @@ use risingwave_common::row::Row; use risingwave_common::types::{DataType, ToOwnedDatum}; use risingwave_common::util::epoch::EpochPair; use risingwave_expr::expr::BoxedExpression; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use self::JoinType::{FullOuter, LeftOuter, LeftSemi, RightAnti, RightOuter, RightSemi}; @@ -40,6 +39,7 @@ use super::{ ActorContextRef, BoxedExecutor, BoxedMessageStream, Executor, Message, PkIndices, PkIndicesRef, }; use crate::cache::LruManagerRef; +use crate::common::table::state_table::StateTable; use crate::common::{InfallibleExpression, StreamChunkBuilder}; use crate::executor::expect_first_barrier_from_aligned_stream; use crate::executor::JoinType::LeftAnti; @@ -947,6 +947,7 @@ mod tests { use risingwave_storage::memory::MemoryStateStore; use super::*; + use crate::common::table::state_table::StateTable; use crate::executor::test_utils::{MessageSender, MockSource}; use crate::executor::{ActorContext, Barrier, EpochPair, Message}; diff --git a/src/stream/src/executor/lookup/impl_.rs b/src/stream/src/executor/lookup/impl_.rs index d54c21c6a879b..79385ddd3227b 100644 --- a/src/stream/src/executor/lookup/impl_.rs +++ b/src/stream/src/executor/lookup/impl_.rs @@ -20,11 +20,11 @@ use risingwave_common::catalog::{ColumnDesc, Schema}; use risingwave_common::row::Row; use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::sort_util::OrderPair; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::sides::{stream_lookup_arrange_prev_epoch, stream_lookup_arrange_this_epoch}; use crate::cache::LruManagerRef; +use crate::common::table::state_table::StateTable; use crate::common::StreamChunkBuilder; use crate::executor::error::{StreamExecutorError, StreamExecutorResult}; use crate::executor::lookup::cache::LookupCache; diff --git a/src/stream/src/executor/lookup/sides.rs b/src/stream/src/executor/lookup/sides.rs index 8cb6f1737a9f1..e86c7a1a37ccd 100644 --- a/src/stream/src/executor/lookup/sides.rs +++ b/src/stream/src/executor/lookup/sides.rs @@ -20,9 +20,9 @@ use risingwave_common::array::StreamChunk; use risingwave_common::catalog::ColumnDesc; use risingwave_common::types::DataType; use risingwave_common::util::sort_util::OrderPair; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; +use crate::common::table::state_table::StateTable; use crate::executor::error::StreamExecutorError; use crate::executor::{Barrier, Executor, Message, MessageStream}; diff --git a/src/stream/src/executor/lookup/tests.rs b/src/stream/src/executor/lookup/tests.rs index 84977487271b3..56a502d336f36 100644 --- a/src/stream/src/executor/lookup/tests.rs +++ b/src/stream/src/executor/lookup/tests.rs @@ -21,9 +21,9 @@ use risingwave_common::catalog::{ColumnDesc, ColumnId, Field, Schema, TableId}; use risingwave_common::types::DataType; use risingwave_common::util::sort_util::{OrderPair, OrderType}; use risingwave_storage::memory::MemoryStateStore; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; +use crate::common::table::state_table::StateTable; use crate::executor::lookup::impl_::LookupExecutorParams; use crate::executor::lookup::LookupExecutor; use crate::executor::test_utils::*; diff --git a/src/stream/src/executor/managed_state/dynamic_filter.rs b/src/stream/src/executor/managed_state/dynamic_filter.rs index 53a2110d82114..f638e08225454 100644 --- a/src/stream/src/executor/managed_state/dynamic_filter.rs +++ b/src/stream/src/executor/managed_state/dynamic_filter.rs @@ -25,11 +25,9 @@ use risingwave_common::buffer::Bitmap; use risingwave_common::row::{CompactedRow, Row}; use risingwave_common::types::{ScalarImpl, VIRTUAL_NODE_SIZE}; use risingwave_common::util::epoch::EpochPair; -use risingwave_storage::table::streaming_table::state_table::{ - prefix_range_to_memcomparable, StateTable, -}; use risingwave_storage::StateStore; +use crate::common::table::state_table::{prefix_range_to_memcomparable, StateTable}; use crate::executor::error::StreamExecutorError; use crate::executor::StreamExecutorResult; diff --git a/src/stream/src/executor/managed_state/join/mod.rs b/src/stream/src/executor/managed_state/join/mod.rs index 3d915680eed67..578612d3b0a7b 100644 --- a/src/stream/src/executor/managed_state/join/mod.rs +++ b/src/stream/src/executor/managed_state/join/mod.rs @@ -34,11 +34,11 @@ use risingwave_common::types::{DataType, Datum, ScalarImpl}; use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::ordered::OrderedRowSerde; use risingwave_common::util::sort_util::OrderType; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use self::iter_utils::zip_by_order_key; use crate::cache::{cache_may_stale, EvictableHashMap, ExecutorCache, LruManagerRef}; +use crate::common::table::state_table::StateTable; use crate::executor::error::StreamExecutorResult; use crate::executor::monitor::StreamingMetrics; use crate::task::ActorId; diff --git a/src/stream/src/executor/managed_state/top_n/top_n_state.rs b/src/stream/src/executor/managed_state/top_n/top_n_state.rs index 4af80880941ed..74d78325d4156 100644 --- a/src/stream/src/executor/managed_state/top_n/top_n_state.rs +++ b/src/stream/src/executor/managed_state/top_n/top_n_state.rs @@ -18,10 +18,10 @@ use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::ordered::OrderedRowSerde; use risingwave_common::util::sort_util::OrderType; use risingwave_connector::source::DataType; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use crate::common::iter_state_table; +use crate::common::table::state_table::StateTable; use crate::executor::error::StreamExecutorResult; use crate::executor::top_n::{serialize_pk_to_cache_key, CacheKey, TopNCache}; diff --git a/src/stream/src/executor/mview/materialize.rs b/src/stream/src/executor/mview/materialize.rs index 6706420373992..95d6e4c98cf9c 100644 --- a/src/stream/src/executor/mview/materialize.rs +++ b/src/stream/src/executor/mview/materialize.rs @@ -29,10 +29,10 @@ use risingwave_common::util::ordered::OrderedRowSerde; use risingwave_common::util::sort_util::OrderPair; use risingwave_pb::catalog::Table; use risingwave_storage::table::streaming_table::mem_table::RowOp; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use crate::cache::{EvictableHashMap, ExecutorCache, LruManagerRef}; +use crate::common::table::state_table::StateTable; use crate::executor::error::StreamExecutorError; use crate::executor::{ expect_first_barrier, ActorContext, ActorContextRef, BoxedExecutor, BoxedMessageStream, diff --git a/src/stream/src/executor/mview/test_utils.rs b/src/stream/src/executor/mview/test_utils.rs index 25711caf54218..6251c8625b2d4 100644 --- a/src/stream/src/executor/mview/test_utils.rs +++ b/src/stream/src/executor/mview/test_utils.rs @@ -19,7 +19,8 @@ use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::sort_util::OrderType; use risingwave_storage::memory::MemoryStateStore; use risingwave_storage::table::batch_table::storage_table::StorageTable; -use risingwave_storage::table::streaming_table::state_table::StateTable; + +use crate::common::table::state_table::StateTable; pub async fn gen_basic_table(row_count: usize) -> StorageTable { let state_store = MemoryStateStore::new(); diff --git a/src/stream/src/executor/sort.rs b/src/stream/src/executor/sort.rs index 41d24f76aadb2..ad7d8c9e192b7 100644 --- a/src/stream/src/executor/sort.rs +++ b/src/stream/src/executor/sort.rs @@ -25,7 +25,6 @@ use risingwave_common::row::Row; use risingwave_common::types::ScalarImpl; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; use risingwave_common::util::select_all; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::error::StreamExecutorError; @@ -33,6 +32,7 @@ use super::{ expect_first_barrier, ActorContextRef, BoxedExecutor, BoxedMessageStream, Executor, Message, PkIndices, StreamExecutorResult, Watermark, }; +use crate::common::table::state_table::StateTable; /// [`SortBufferKey`] contains a record's timestamp and pk. type SortBufferKey = (ScalarImpl, Row); @@ -330,7 +330,6 @@ mod tests { use risingwave_common::types::{DataType, ScalarImpl}; use risingwave_common::util::sort_util::OrderType; use risingwave_storage::memory::MemoryStateStore; - use risingwave_storage::table::streaming_table::state_table::StateTable; use super::*; use crate::executor::test_utils::{MessageSender, MockSource}; diff --git a/src/stream/src/executor/source/source_executor.rs b/src/stream/src/executor/source/source_executor.rs index 2fb2ab1069f6e..806742380185a 100644 --- a/src/stream/src/executor/source/source_executor.rs +++ b/src/stream/src/executor/source/source_executor.rs @@ -776,6 +776,7 @@ mod tests { properties, ProstSourceInfo::StreamSource(stream_source_info), source_manager, + Default::default(), ) } diff --git a/src/stream/src/executor/source/state_table_handler.rs b/src/stream/src/executor/source/state_table_handler.rs index efdf34766a21b..003257b6bc1d1 100644 --- a/src/stream/src/executor/source/state_table_handler.rs +++ b/src/stream/src/executor/source/state_table_handler.rs @@ -25,9 +25,9 @@ use risingwave_pb::catalog::Table as ProstTable; use risingwave_pb::data::data_type::TypeName; use risingwave_pb::data::DataType; use risingwave_pb::plan_common::{ColumnCatalog, ColumnDesc, ColumnOrder}; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; +use crate::common::table::state_table::StateTable; use crate::executor::error::StreamExecutorError; use crate::executor::StreamExecutorResult; diff --git a/src/stream/src/executor/test_utils.rs b/src/stream/src/executor/test_utils.rs index 2df954cc084bc..e344dc553e6e9 100644 --- a/src/stream/src/executor/test_utils.rs +++ b/src/stream/src/executor/test_utils.rs @@ -155,9 +155,9 @@ pub mod agg_executor { use risingwave_common::types::DataType; use risingwave_common::util::sort_util::OrderType; use risingwave_expr::expr::AggKind; - use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; + use crate::common::table::state_table::StateTable; use crate::common::StateTableColumnMapping; use crate::executor::aggregation::{AggCall, AggStateStorage}; use crate::executor::{ @@ -326,8 +326,8 @@ pub mod top_n_executor { use risingwave_common::types::DataType; use risingwave_common::util::sort_util::OrderType; use risingwave_storage::memory::MemoryStateStore; - use risingwave_storage::table::streaming_table::state_table::StateTable; + use crate::common::table::state_table::StateTable; pub async fn create_in_memory_state_table( data_types: &[DataType], order_types: &[OrderType], diff --git a/src/stream/src/executor/top_n/group_top_n.rs b/src/stream/src/executor/top_n/group_top_n.rs index 82fe6122ef4cd..ba68578436a34 100644 --- a/src/stream/src/executor/top_n/group_top_n.rs +++ b/src/stream/src/executor/top_n/group_top_n.rs @@ -25,13 +25,13 @@ use risingwave_common::types::Datum; use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::ordered::OrderedRowSerde; use risingwave_common::util::sort_util::OrderPair; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::top_n_cache::TopNCacheTrait; use super::utils::*; use super::TopNCache; use crate::cache::cache_may_stale; +use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::error::StreamExecutorResult; use crate::executor::managed_state::top_n::ManagedTopNState; diff --git a/src/stream/src/executor/top_n/top_n_appendonly.rs b/src/stream/src/executor/top_n/top_n_appendonly.rs index 1134e5b8c31d8..d9b7748c3b32a 100644 --- a/src/stream/src/executor/top_n/top_n_appendonly.rs +++ b/src/stream/src/executor/top_n/top_n_appendonly.rs @@ -21,11 +21,11 @@ use risingwave_common::row::RowDeserializer; use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::ordered::OrderedRowSerde; use risingwave_common::util::sort_util::OrderPair; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::utils::*; use super::TopNCache; +use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::error::StreamExecutorResult; use crate::executor::managed_state::top_n::ManagedTopNState; diff --git a/src/stream/src/executor/top_n/top_n_plain.rs b/src/stream/src/executor/top_n/top_n_plain.rs index ae665b4db82d7..3a4db40270249 100644 --- a/src/stream/src/executor/top_n/top_n_plain.rs +++ b/src/stream/src/executor/top_n/top_n_plain.rs @@ -20,11 +20,11 @@ use risingwave_common::catalog::Schema; use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::ordered::OrderedRowSerde; use risingwave_common::util::sort_util::OrderPair; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::utils::*; use super::{TopNCache, TopNCacheTrait}; +use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::error::StreamExecutorResult; use crate::executor::managed_state::top_n::ManagedTopNState; diff --git a/src/stream/src/executor/watermark_filter.rs b/src/stream/src/executor/watermark_filter.rs index bab878c5b45f3..b9a3775566bc6 100644 --- a/src/stream/src/executor/watermark_filter.rs +++ b/src/stream/src/executor/watermark_filter.rs @@ -25,7 +25,6 @@ use risingwave_expr::expr::expr_binary_nonnull::new_binary_expr; use risingwave_expr::expr::{BoxedExpression, Expression, InputRefExpression, LiteralExpression}; use risingwave_expr::Result as ExprResult; use risingwave_pb::expr::expr_node::Type; -use risingwave_storage::table::streaming_table::state_table::StateTable; use risingwave_storage::StateStore; use super::error::StreamExecutorError; @@ -33,6 +32,7 @@ use super::filter::SimpleFilterExecutor; use super::{ ActorContextRef, BoxedExecutor, Executor, ExecutorInfo, Message, StreamExecutorResult, }; +use crate::common::table::state_table::StateTable; use crate::common::InfallibleExpression; use crate::executor::{expect_first_barrier, Watermark}; diff --git a/src/stream/src/from_proto/agg_common.rs b/src/stream/src/from_proto/agg_common.rs index e27eef878ed30..355f6e4197173 100644 --- a/src/stream/src/from_proto/agg_common.rs +++ b/src/stream/src/from_proto/agg_common.rs @@ -21,9 +21,9 @@ use risingwave_common::types::DataType; use risingwave_common::util::sort_util::{OrderPair, OrderType}; use risingwave_expr::expr::{build_from_prost, AggKind}; use risingwave_pb::plan_common::OrderType as ProstOrderType; -use risingwave_storage::table::streaming_table::state_table::StateTable; use super::*; +use crate::common::table::state_table::StateTable; use crate::common::StateTableColumnMapping; use crate::executor::aggregation::{AggArgs, AggCall, AggStateStorage}; diff --git a/src/stream/src/from_proto/batch_query.rs b/src/stream/src/from_proto/batch_query.rs index 69513bf628282..e183f30c91063 100644 --- a/src/stream/src/from_proto/batch_query.rs +++ b/src/stream/src/from_proto/batch_query.rs @@ -31,7 +31,7 @@ impl ExecutorBuilder for BatchQueryExecutorBuilder { params: ExecutorParams, node: &StreamNode, state_store: impl StateStore, - _stream: &mut LocalStreamManagerCore, + stream: &mut LocalStreamManagerCore, ) -> StreamResult { let node = try_match_expand!(node.get_node_body().unwrap(), NodeBody::BatchPlan)?; @@ -101,7 +101,7 @@ impl ExecutorBuilder for BatchQueryExecutorBuilder { let schema = table.schema().clone(); let executor = BatchQueryExecutor::new( table, - None, + stream.config.developer.stream_chunk_size, ExecutorInfo { schema, pk_indices: params.pk_indices, diff --git a/src/stream/src/from_proto/dynamic_filter.rs b/src/stream/src/from_proto/dynamic_filter.rs index 1e48341a2524d..8a533e88f8a28 100644 --- a/src/stream/src/from_proto/dynamic_filter.rs +++ b/src/stream/src/from_proto/dynamic_filter.rs @@ -15,9 +15,9 @@ use std::sync::Arc; use risingwave_common::bail; use risingwave_pb::expr::expr_node::Type::*; -use risingwave_storage::table::streaming_table::state_table::StateTable; use super::*; +use crate::common::table::state_table::StateTable; use crate::executor::DynamicFilterExecutor; pub struct DynamicFilterExecutorBuilder; diff --git a/src/stream/src/from_proto/global_simple_agg.rs b/src/stream/src/from_proto/global_simple_agg.rs index e3992489824bf..61cf0c120e092 100644 --- a/src/stream/src/from_proto/global_simple_agg.rs +++ b/src/stream/src/from_proto/global_simple_agg.rs @@ -14,10 +14,9 @@ //! Streaming Aggregators -use risingwave_storage::table::streaming_table::state_table::StateTable; - use super::agg_common::{build_agg_call_from_prost, build_agg_state_storages_from_proto}; use super::*; +use crate::common::table::state_table::StateTable; use crate::executor::aggregation::AggCall; use crate::executor::GlobalSimpleAggExecutor; diff --git a/src/stream/src/from_proto/group_top_n.rs b/src/stream/src/from_proto/group_top_n.rs index fb4ea1edeb815..1356de868a46e 100644 --- a/src/stream/src/from_proto/group_top_n.rs +++ b/src/stream/src/from_proto/group_top_n.rs @@ -15,9 +15,9 @@ use std::sync::Arc; use risingwave_common::util::sort_util::OrderPair; -use risingwave_storage::table::streaming_table::state_table::StateTable; use super::*; +use crate::common::table::state_table::StateTable; use crate::executor::GroupTopNExecutor; pub struct GroupTopNExecutorBuilder; diff --git a/src/stream/src/from_proto/hash_agg.rs b/src/stream/src/from_proto/hash_agg.rs index 5a622d2259e6a..929eee2b83aee 100644 --- a/src/stream/src/from_proto/hash_agg.rs +++ b/src/stream/src/from_proto/hash_agg.rs @@ -18,11 +18,11 @@ use std::sync::Arc; use risingwave_common::hash::{HashKey, HashKeyDispatcher}; use risingwave_common::types::DataType; -use risingwave_storage::table::streaming_table::state_table::StateTable; use super::agg_common::{build_agg_call_from_prost, build_agg_state_storages_from_proto}; use super::*; use crate::cache::LruManagerRef; +use crate::common::table::state_table::StateTable; use crate::executor::aggregation::{AggCall, AggStateStorage}; use crate::executor::monitor::StreamingMetrics; use crate::executor::{ActorContextRef, HashAggExecutor, PkIndices}; diff --git a/src/stream/src/from_proto/hash_join.rs b/src/stream/src/from_proto/hash_join.rs index 230f8b803f21c..603815e5083f5 100644 --- a/src/stream/src/from_proto/hash_join.rs +++ b/src/stream/src/from_proto/hash_join.rs @@ -18,10 +18,10 @@ use risingwave_common::hash::{HashKey, HashKeyDispatcher}; use risingwave_common::types::DataType; use risingwave_expr::expr::{build_from_prost, BoxedExpression}; use risingwave_pb::plan_common::JoinType as JoinTypeProto; -use risingwave_storage::table::streaming_table::state_table::StateTable; use super::*; use crate::cache::LruManagerRef; +use crate::common::table::state_table::StateTable; use crate::executor::hash_join::*; use crate::executor::monitor::StreamingMetrics; use crate::executor::{ActorContextRef, PkIndices}; diff --git a/src/stream/src/from_proto/lookup.rs b/src/stream/src/from_proto/lookup.rs index 5bfefc629ad3a..890b077fd8e55 100644 --- a/src/stream/src/from_proto/lookup.rs +++ b/src/stream/src/from_proto/lookup.rs @@ -16,9 +16,9 @@ use std::sync::Arc; use risingwave_common::catalog::{ColumnDesc, Field, Schema}; use risingwave_common::util::sort_util::OrderPair; -use risingwave_storage::table::streaming_table::state_table::StateTable; use super::*; +use crate::common::table::state_table::StateTable; use crate::executor::{LookupExecutor, LookupExecutorParams}; pub struct LookupExecutorBuilder; diff --git a/src/stream/src/from_proto/sort.rs b/src/stream/src/from_proto/sort.rs index cffdc8b3bc909..c090404f00c85 100644 --- a/src/stream/src/from_proto/sort.rs +++ b/src/stream/src/from_proto/sort.rs @@ -14,9 +14,8 @@ use std::sync::Arc; -use risingwave_storage::table::streaming_table::state_table::StateTable; - use super::*; +use crate::common::table::state_table::StateTable; use crate::executor::SortExecutor; pub struct SortExecutorBuilder; diff --git a/src/stream/src/from_proto/source.rs b/src/stream/src/from_proto/source.rs index c29ec3e809021..cc1742aaf6eeb 100644 --- a/src/stream/src/from_proto/source.rs +++ b/src/stream/src/from_proto/source.rs @@ -39,6 +39,7 @@ impl ExecutorBuilder for SourceExecutorBuilder { .register_sender(params.actor_context.id, sender); let source_id = TableId::new(node.source_id); + let source_builder = SourceDescBuilder::new( source_id, node.row_id_index.clone(), @@ -47,6 +48,7 @@ impl ExecutorBuilder for SourceExecutorBuilder { node.properties.clone(), node.get_info()?.clone(), params.env.source_manager_ref(), + params.env.connector_source_endpoint(), ); let columns = node.columns.clone(); diff --git a/src/stream/src/from_proto/top_n.rs b/src/stream/src/from_proto/top_n.rs index 2f6fa167716c5..59abf7467357b 100644 --- a/src/stream/src/from_proto/top_n.rs +++ b/src/stream/src/from_proto/top_n.rs @@ -15,9 +15,9 @@ use std::sync::Arc; use risingwave_common::util::sort_util::OrderPair; -use risingwave_storage::table::streaming_table::state_table::StateTable; use super::*; +use crate::common::table::state_table::StateTable; use crate::executor::TopNExecutor; pub struct TopNExecutorNewBuilder; diff --git a/src/stream/src/from_proto/top_n_appendonly.rs b/src/stream/src/from_proto/top_n_appendonly.rs index 755e8720a4ba1..f1beb16e4742f 100644 --- a/src/stream/src/from_proto/top_n_appendonly.rs +++ b/src/stream/src/from_proto/top_n_appendonly.rs @@ -15,9 +15,9 @@ use std::sync::Arc; use risingwave_common::util::sort_util::OrderPair; -use risingwave_storage::table::streaming_table::state_table::StateTable; use super::*; +use crate::common::table::state_table::StateTable; use crate::executor::AppendOnlyTopNExecutor; pub struct AppendOnlyTopNExecutorBuilder; diff --git a/src/stream/src/from_proto/watermark_filter.rs b/src/stream/src/from_proto/watermark_filter.rs index 5503618edb9e8..5a8c69ae807f3 100644 --- a/src/stream/src/from_proto/watermark_filter.rs +++ b/src/stream/src/from_proto/watermark_filter.rs @@ -15,9 +15,9 @@ use std::sync::Arc; use risingwave_expr::expr::build_from_prost; -use risingwave_storage::table::streaming_table::state_table::StateTable; use super::*; +use crate::common::table::state_table::StateTable; use crate::executor::WatermarkFilterExecutor; pub struct WatermarkFilterBuilder; diff --git a/src/stream/src/lib.rs b/src/stream/src/lib.rs index 316fb6ad9f038..6c7da9f901065 100644 --- a/src/stream/src/lib.rs +++ b/src/stream/src/lib.rs @@ -42,7 +42,7 @@ extern crate tracing; pub mod cache; -mod common; +pub mod common; pub mod error; pub mod executor; mod from_proto; diff --git a/src/stream/src/task/env.rs b/src/stream/src/task/env.rs index ebcaac18998d2..c212f5bc2fbef 100644 --- a/src/stream/src/task/env.rs +++ b/src/stream/src/task/env.rs @@ -28,6 +28,9 @@ pub struct StreamEnvironment { /// Endpoint the stream manager listens on. server_addr: HostAddr, + /// Endpoint of the source connector node + connector_source_endpoint: String, + /// Reference to the source manager. source_manager: TableSourceManagerRef, @@ -45,12 +48,14 @@ impl StreamEnvironment { pub fn new( source_manager: TableSourceManagerRef, server_addr: HostAddr, + connector_source_endpoint: String, config: Arc, worker_id: WorkerNodeId, state_store: StateStoreImpl, ) -> Self { StreamEnvironment { server_addr, + connector_source_endpoint, source_manager, config, worker_id, @@ -64,6 +69,7 @@ impl StreamEnvironment { use risingwave_storage::monitor::StateStoreMetrics; StreamEnvironment { server_addr: "127.0.0.1:5688".parse().unwrap(), + connector_source_endpoint: "127.0.0.1:60061".parse().unwrap(), source_manager: Arc::new(TableSourceManager::default()), config: Arc::new(StreamingConfig::default()), worker_id: WorkerNodeId::default(), @@ -96,4 +102,8 @@ impl StreamEnvironment { pub fn state_store(&self) -> StateStoreImpl { self.state_store.clone() } + + pub fn connector_source_endpoint(&self) -> String { + self.connector_source_endpoint.clone() + } } diff --git a/src/stream/src/task/mod.rs b/src/stream/src/task/mod.rs index 67fbe4a197e19..fc8a4e6c355a8 100644 --- a/src/stream/src/task/mod.rs +++ b/src/stream/src/task/mod.rs @@ -193,13 +193,13 @@ impl SharedContext { } } -/// Generate a globally unique executor id. Useful when constructing per-actor keyspace +/// Generate a globally unique executor id. pub fn unique_executor_id(actor_id: u32, operator_id: u64) -> u64 { assert!(operator_id <= u32::MAX as u64); ((actor_id as u64) << 32) + operator_id } -/// Generate a globally unique operator id. Useful when constructing per-fragment keyspace. +/// Generate a globally unique operator id. pub fn unique_operator_id(fragment_id: u32, operator_id: u64) -> u64 { assert!(operator_id <= u32::MAX as u64); ((fragment_id as u64) << 32) + operator_id diff --git a/src/tests/regress/data/schedule b/src/tests/regress/data/schedule index b5a0b2eb38b90..0479d9f4b408c 100644 --- a/src/tests/regress/data/schedule +++ b/src/tests/regress/data/schedule @@ -7,4 +7,4 @@ # interferes with crash-recovery testing. # test: tablespace -test: boolean varchar int2 int4 +test: boolean varchar int2 int4 float4 diff --git a/src/tests/regress/data/sql/float4.sql b/src/tests/regress/data/sql/float4.sql index 612486ecbd2ee..5f5de3094dab9 100644 --- a/src/tests/regress/data/sql/float4.sql +++ b/src/tests/regress/data/sql/float4.sql @@ -11,20 +11,20 @@ INSERT INTO FLOAT4_TBL(f1) VALUES ('1.2345678901234e+20'); INSERT INTO FLOAT4_TBL(f1) VALUES ('1.2345678901234e-20'); -- test for over and under flow -INSERT INTO FLOAT4_TBL(f1) VALUES ('10e70'); -INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e70'); -INSERT INTO FLOAT4_TBL(f1) VALUES ('10e-70'); -INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e-70'); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('10e70'); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e70'); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('10e-70'); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e-70'); -INSERT INTO FLOAT4_TBL(f1) VALUES ('10e70'::float8); -INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e70'::float8); -INSERT INTO FLOAT4_TBL(f1) VALUES ('10e-70'::float8); -INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e-70'::float8); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('10e70'::float8); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e70'::float8); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('10e-70'::float8); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e-70'::float8); -INSERT INTO FLOAT4_TBL(f1) VALUES ('10e400'); -INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e400'); -INSERT INTO FLOAT4_TBL(f1) VALUES ('10e-400'); -INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e-400'); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('10e400'); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e400'); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('10e-400'); +--@ INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e-400'); -- bad input INSERT INTO FLOAT4_TBL(f1) VALUES (''); @@ -37,11 +37,11 @@ INSERT INTO FLOAT4_TBL(f1) VALUES (' - 3.0'); INSERT INTO FLOAT4_TBL(f1) VALUES ('123 5'); -- special inputs -SELECT 'NaN'::float4; -SELECT 'nan'::float4; -SELECT ' NAN '::float4; -SELECT 'infinity'::float4; -SELECT ' -INFINiTY '::float4; +--@ SELECT 'NaN'::float4; +--@ SELECT 'nan'::float4; +--@ SELECT ' NAN '::float4; +--@ SELECT 'infinity'::float4; +--@ SELECT ' -INFINiTY '::float4; -- bad special inputs SELECT 'N A N'::float4; SELECT 'NaN x'::float4; @@ -52,82 +52,82 @@ SELECT 'Infinity'::float4 / 'Infinity'::float4; SELECT '42'::float4 / 'Infinity'::float4; SELECT 'nan'::float4 / 'nan'::float4; SELECT 'nan'::float4 / '0'::float4; -SELECT 'nan'::numeric::float4; +--@ SELECT 'nan'::numeric::float4; -SELECT * FROM FLOAT4_TBL; +--@ SELECT * FROM FLOAT4_TBL; -SELECT f.* FROM FLOAT4_TBL f WHERE f.f1 <> '1004.3'; +--@ SELECT f.* FROM FLOAT4_TBL f WHERE f.f1 <> '1004.3'; SELECT f.* FROM FLOAT4_TBL f WHERE f.f1 = '1004.3'; -SELECT f.* FROM FLOAT4_TBL f WHERE '1004.3' > f.f1; +--@ SELECT f.* FROM FLOAT4_TBL f WHERE '1004.3' > f.f1; -SELECT f.* FROM FLOAT4_TBL f WHERE f.f1 < '1004.3'; +--@ SELECT f.* FROM FLOAT4_TBL f WHERE f.f1 < '1004.3'; -SELECT f.* FROM FLOAT4_TBL f WHERE '1004.3' >= f.f1; +--@ SELECT f.* FROM FLOAT4_TBL f WHERE '1004.3' >= f.f1; -SELECT f.* FROM FLOAT4_TBL f WHERE f.f1 <= '1004.3'; +--@ SELECT f.* FROM FLOAT4_TBL f WHERE f.f1 <= '1004.3'; -SELECT f.f1, f.f1 * '-10' AS x FROM FLOAT4_TBL f - WHERE f.f1 > '0.0'; +--@ SELECT f.f1, f.f1 * '-10' AS x FROM FLOAT4_TBL f +--@ WHERE f.f1 > '0.0'; -SELECT f.f1, f.f1 + '-10' AS x FROM FLOAT4_TBL f - WHERE f.f1 > '0.0'; +--@ SELECT f.f1, f.f1 + '-10' AS x FROM FLOAT4_TBL f +--@ WHERE f.f1 > '0.0'; -SELECT f.f1, f.f1 / '-10' AS x FROM FLOAT4_TBL f - WHERE f.f1 > '0.0'; +--@ SELECT f.f1, f.f1 / '-10' AS x FROM FLOAT4_TBL f +--@ WHERE f.f1 > '0.0'; -SELECT f.f1, f.f1 - '-10' AS x FROM FLOAT4_TBL f - WHERE f.f1 > '0.0'; +--@ SELECT f.f1, f.f1 - '-10' AS x FROM FLOAT4_TBL f +--@ WHERE f.f1 > '0.0'; -- test divide by zero -SELECT f.f1 / '0.0' from FLOAT4_TBL f; +--@ SELECT f.f1 / '0.0' from FLOAT4_TBL f; -SELECT * FROM FLOAT4_TBL; +--@ SELECT * FROM FLOAT4_TBL; -- test the unary float4abs operator -SELECT f.f1, @f.f1 AS abs_f1 FROM FLOAT4_TBL f; +--@ SELECT f.f1, @f.f1 AS abs_f1 FROM FLOAT4_TBL f; -UPDATE FLOAT4_TBL - SET f1 = FLOAT4_TBL.f1 * '-1' - WHERE FLOAT4_TBL.f1 > '0.0'; +--@ UPDATE FLOAT4_TBL +--@ SET f1 = FLOAT4_TBL.f1 * '-1' +--@ WHERE FLOAT4_TBL.f1 > '0.0'; -SELECT * FROM FLOAT4_TBL; +--@ SELECT * FROM FLOAT4_TBL; -- test edge-case coercions to integer -SELECT '32767.4'::float4::int2; -SELECT '32767.6'::float4::int2; -SELECT '-32768.4'::float4::int2; -SELECT '-32768.6'::float4::int2; -SELECT '2147483520'::float4::int4; +--@ SELECT '32767.4'::float4::int2; +--@ SELECT '32767.6'::float4::int2; +--@ SELECT '-32768.4'::float4::int2; +--@ SELECT '-32768.6'::float4::int2; +--@ SELECT '2147483520'::float4::int4; SELECT '2147483647'::float4::int4; -SELECT '-2147483648.5'::float4::int4; +--@ SELECT '-2147483648.5'::float4::int4; SELECT '-2147483900'::float4::int4; -SELECT '9223369837831520256'::float4::int8; +--@ SELECT '9223369837831520256'::float4::int8; SELECT '9223372036854775807'::float4::int8; -SELECT '-9223372036854775808.5'::float4::int8; +--@ SELECT '-9223372036854775808.5'::float4::int8; SELECT '-9223380000000000000'::float4::int8; -- Test for correct input rounding in edge cases. -- These lists are from Paxson 1991, excluding subnormals and -- inputs of over 9 sig. digits. -SELECT float4send('5e-20'::float4); -SELECT float4send('67e14'::float4); -SELECT float4send('985e15'::float4); -SELECT float4send('55895e-16'::float4); -SELECT float4send('7038531e-32'::float4); -SELECT float4send('702990899e-20'::float4); - -SELECT float4send('3e-23'::float4); -SELECT float4send('57e18'::float4); -SELECT float4send('789e-35'::float4); -SELECT float4send('2539e-18'::float4); -SELECT float4send('76173e28'::float4); -SELECT float4send('887745e-11'::float4); -SELECT float4send('5382571e-37'::float4); -SELECT float4send('82381273e-35'::float4); -SELECT float4send('750486563e-38'::float4); +--@ SELECT float4send('5e-20'::float4); +--@ SELECT float4send('67e14'::float4); +--@ SELECT float4send('985e15'::float4); +--@ SELECT float4send('55895e-16'::float4); +--@ SELECT float4send('7038531e-32'::float4); +--@ SELECT float4send('702990899e-20'::float4); + +--@ SELECT float4send('3e-23'::float4); +--@ SELECT float4send('57e18'::float4); +--@ SELECT float4send('789e-35'::float4); +--@ SELECT float4send('2539e-18'::float4); +--@ SELECT float4send('76173e28'::float4); +--@ SELECT float4send('887745e-11'::float4); +--@ SELECT float4send('5382571e-37'::float4); +--@ SELECT float4send('82381273e-35'::float4); +--@ SELECT float4send('750486563e-38'::float4); -- Test that the smallest possible normalized input value inputs -- correctly, either in 9-significant-digit or shortest-decimal @@ -137,24 +137,24 @@ SELECT float4send('750486563e-38'::float4); -- shortest val is 1.1754944000 -- midpoint to next val is 1.1754944208... -SELECT float4send('1.17549435e-38'::float4); -SELECT float4send('1.1754944e-38'::float4); +--@ SELECT float4send('1.17549435e-38'::float4); +--@ SELECT float4send('1.1754944e-38'::float4); -- test output (and round-trip safety) of various values. -- To ensure we're testing what we think we're testing, start with -- float values specified by bit patterns (as a useful side effect, -- this means we'll fail on non-IEEE platforms). -create type xfloat4; -create function xfloat4in(cstring) returns xfloat4 immutable strict - language internal as 'int4in'; -create function xfloat4out(xfloat4) returns cstring immutable strict - language internal as 'int4out'; -create type xfloat4 (input = xfloat4in, output = xfloat4out, like = float4); -create cast (xfloat4 as float4) without function; -create cast (float4 as xfloat4) without function; -create cast (xfloat4 as integer) without function; -create cast (integer as xfloat4) without function; +--@ create type xfloat4; +--@ create function xfloat4in(cstring) returns xfloat4 immutable strict +--@ language internal as 'int4in'; +--@ create function xfloat4out(xfloat4) returns cstring immutable strict +--@ language internal as 'int4out'; +--@ create type xfloat4 (input = xfloat4in, output = xfloat4out, like = float4); +--@ create cast (xfloat4 as float4) without function; +--@ create cast (float4 as xfloat4) without function; +--@ create cast (xfloat4 as integer) without function; +--@ create cast (integer as xfloat4) without function; -- float4: seeeeeee emmmmmmm mmmmmmmm mmmmmmmm @@ -162,193 +162,193 @@ create cast (integer as xfloat4) without function; -- correctly; those are "use at your own risk". However we do test -- subnormal outputs, since those are under our control. -with testdata(bits) as (values - -- small subnormals - (x'00000001'), - (x'00000002'), (x'00000003'), - (x'00000010'), (x'00000011'), (x'00000100'), (x'00000101'), - (x'00004000'), (x'00004001'), (x'00080000'), (x'00080001'), - -- stress values - (x'0053c4f4'), -- 7693e-42 - (x'006c85c4'), -- 996622e-44 - (x'0041ca76'), -- 60419369e-46 - (x'004b7678'), -- 6930161142e-48 - -- taken from upstream testsuite - (x'00000007'), - (x'00424fe2'), - -- borderline between subnormal and normal - (x'007ffff0'), (x'007ffff1'), (x'007ffffe'), (x'007fffff')) -select float4send(flt) as ibits, - flt - from (select bits::integer::xfloat4::float4 as flt - from testdata - offset 0) s; - -with testdata(bits) as (values - (x'00000000'), - -- smallest normal values - (x'00800000'), (x'00800001'), (x'00800004'), (x'00800005'), - (x'00800006'), - -- small normal values chosen for short vs. long output - (x'008002f1'), (x'008002f2'), (x'008002f3'), - (x'00800e17'), (x'00800e18'), (x'00800e19'), - -- assorted values (random mantissae) - (x'01000001'), (x'01102843'), (x'01a52c98'), - (x'0219c229'), (x'02e4464d'), (x'037343c1'), (x'03a91b36'), - (x'047ada65'), (x'0496fe87'), (x'0550844f'), (x'05999da3'), - (x'060ea5e2'), (x'06e63c45'), (x'07f1e548'), (x'0fc5282b'), - (x'1f850283'), (x'2874a9d6'), - -- values around 5e-08 - (x'3356bf94'), (x'3356bf95'), (x'3356bf96'), - -- around 1e-07 - (x'33d6bf94'), (x'33d6bf95'), (x'33d6bf96'), - -- around 3e-07 .. 1e-04 - (x'34a10faf'), (x'34a10fb0'), (x'34a10fb1'), - (x'350637bc'), (x'350637bd'), (x'350637be'), - (x'35719786'), (x'35719787'), (x'35719788'), - (x'358637bc'), (x'358637bd'), (x'358637be'), - (x'36a7c5ab'), (x'36a7c5ac'), (x'36a7c5ad'), - (x'3727c5ab'), (x'3727c5ac'), (x'3727c5ad'), - -- format crossover at 1e-04 - (x'38d1b714'), (x'38d1b715'), (x'38d1b716'), - (x'38d1b717'), (x'38d1b718'), (x'38d1b719'), - (x'38d1b71a'), (x'38d1b71b'), (x'38d1b71c'), - (x'38d1b71d'), - -- - (x'38dffffe'), (x'38dfffff'), (x'38e00000'), - (x'38efffff'), (x'38f00000'), (x'38f00001'), - (x'3a83126e'), (x'3a83126f'), (x'3a831270'), - (x'3c23d709'), (x'3c23d70a'), (x'3c23d70b'), - (x'3dcccccc'), (x'3dcccccd'), (x'3dccccce'), - -- chosen to need 9 digits for 3dcccd70 - (x'3dcccd6f'), (x'3dcccd70'), (x'3dcccd71'), - -- - (x'3effffff'), (x'3f000000'), (x'3f000001'), - (x'3f333332'), (x'3f333333'), (x'3f333334'), - -- approach 1.0 with increasing numbers of 9s - (x'3f666665'), (x'3f666666'), (x'3f666667'), - (x'3f7d70a3'), (x'3f7d70a4'), (x'3f7d70a5'), - (x'3f7fbe76'), (x'3f7fbe77'), (x'3f7fbe78'), - (x'3f7ff971'), (x'3f7ff972'), (x'3f7ff973'), - (x'3f7fff57'), (x'3f7fff58'), (x'3f7fff59'), - (x'3f7fffee'), (x'3f7fffef'), - -- values very close to 1 - (x'3f7ffff0'), (x'3f7ffff1'), (x'3f7ffff2'), - (x'3f7ffff3'), (x'3f7ffff4'), (x'3f7ffff5'), - (x'3f7ffff6'), (x'3f7ffff7'), (x'3f7ffff8'), - (x'3f7ffff9'), (x'3f7ffffa'), (x'3f7ffffb'), - (x'3f7ffffc'), (x'3f7ffffd'), (x'3f7ffffe'), - (x'3f7fffff'), - (x'3f800000'), - (x'3f800001'), (x'3f800002'), (x'3f800003'), - (x'3f800004'), (x'3f800005'), (x'3f800006'), - (x'3f800007'), (x'3f800008'), (x'3f800009'), - -- values 1 to 1.1 - (x'3f80000f'), (x'3f800010'), (x'3f800011'), - (x'3f800012'), (x'3f800013'), (x'3f800014'), - (x'3f800017'), (x'3f800018'), (x'3f800019'), - (x'3f80001a'), (x'3f80001b'), (x'3f80001c'), - (x'3f800029'), (x'3f80002a'), (x'3f80002b'), - (x'3f800053'), (x'3f800054'), (x'3f800055'), - (x'3f800346'), (x'3f800347'), (x'3f800348'), - (x'3f8020c4'), (x'3f8020c5'), (x'3f8020c6'), - (x'3f8147ad'), (x'3f8147ae'), (x'3f8147af'), - (x'3f8ccccc'), (x'3f8ccccd'), (x'3f8cccce'), - -- - (x'3fc90fdb'), -- pi/2 - (x'402df854'), -- e - (x'40490fdb'), -- pi - -- - (x'409fffff'), (x'40a00000'), (x'40a00001'), - (x'40afffff'), (x'40b00000'), (x'40b00001'), - (x'411fffff'), (x'41200000'), (x'41200001'), - (x'42c7ffff'), (x'42c80000'), (x'42c80001'), - (x'4479ffff'), (x'447a0000'), (x'447a0001'), - (x'461c3fff'), (x'461c4000'), (x'461c4001'), - (x'47c34fff'), (x'47c35000'), (x'47c35001'), - (x'497423ff'), (x'49742400'), (x'49742401'), - (x'4b18967f'), (x'4b189680'), (x'4b189681'), - (x'4cbebc1f'), (x'4cbebc20'), (x'4cbebc21'), - (x'4e6e6b27'), (x'4e6e6b28'), (x'4e6e6b29'), - (x'501502f8'), (x'501502f9'), (x'501502fa'), - (x'51ba43b6'), (x'51ba43b7'), (x'51ba43b8'), - -- stress values - (x'1f6c1e4a'), -- 5e-20 - (x'59be6cea'), -- 67e14 - (x'5d5ab6c4'), -- 985e15 - (x'2cc4a9bd'), -- 55895e-16 - (x'15ae43fd'), -- 7038531e-32 - (x'2cf757ca'), -- 702990899e-20 - (x'665ba998'), -- 25933168707e13 - (x'743c3324'), -- 596428896559e20 - -- exercise fixed-point memmoves - (x'47f1205a'), - (x'4640e6ae'), - (x'449a5225'), - (x'42f6e9d5'), - (x'414587dd'), - (x'3f9e064b'), - -- these cases come from the upstream's testsuite - -- BoundaryRoundEven - (x'4c000004'), - (x'50061c46'), - (x'510006a8'), - -- ExactValueRoundEven - (x'48951f84'), - (x'45fd1840'), - -- LotsOfTrailingZeros - (x'39800000'), - (x'3b200000'), - (x'3b900000'), - (x'3bd00000'), - -- Regression - (x'63800000'), - (x'4b000000'), - (x'4b800000'), - (x'4c000001'), - (x'4c800b0d'), - (x'00d24584'), - (x'00d90b88'), - (x'45803f34'), - (x'4f9f24f7'), - (x'3a8722c3'), - (x'5c800041'), - (x'15ae43fd'), - (x'5d4cccfb'), - (x'4c800001'), - (x'57800ed8'), - (x'5f000000'), - (x'700000f0'), - (x'5f23e9ac'), - (x'5e9502f9'), - (x'5e8012b1'), - (x'3c000028'), - (x'60cde861'), - (x'03aa2a50'), - (x'43480000'), - (x'4c000000'), - -- LooksLikePow5 - (x'5D1502F9'), - (x'5D9502F9'), - (x'5E1502F9'), - -- OutputLength - (x'3f99999a'), - (x'3f9d70a4'), - (x'3f9df3b6'), - (x'3f9e0419'), - (x'3f9e0610'), - (x'3f9e064b'), - (x'3f9e0651'), - (x'03d20cfe') -) -select float4send(flt) as ibits, - flt, - flt::text::float4 as r_flt, - float4send(flt::text::float4) as obits, - float4send(flt::text::float4) = float4send(flt) as correct - from (select bits::integer::xfloat4::float4 as flt - from testdata - offset 0) s; +--@ with testdata(bits) as (values +--@ -- small subnormals +--@ (x'00000001'), +--@ (x'00000002'), (x'00000003'), +--@ (x'00000010'), (x'00000011'), (x'00000100'), (x'00000101'), +--@ (x'00004000'), (x'00004001'), (x'00080000'), (x'00080001'), +--@ -- stress values +--@ (x'0053c4f4'), -- 7693e-42 +--@ (x'006c85c4'), -- 996622e-44 +--@ (x'0041ca76'), -- 60419369e-46 +--@ (x'004b7678'), -- 6930161142e-48 +--@ -- taken from upstream testsuite +--@ (x'00000007'), +--@ (x'00424fe2'), +--@ -- borderline between subnormal and normal +--@ (x'007ffff0'), (x'007ffff1'), (x'007ffffe'), (x'007fffff')) +--@ select float4send(flt) as ibits, +--@ flt +--@ from (select bits::integer::xfloat4::float4 as flt +--@ from testdata +--@ offset 0) s; + +--@ with testdata(bits) as (values +--@ (x'00000000'), +--@ -- smallest normal values +--@ (x'00800000'), (x'00800001'), (x'00800004'), (x'00800005'), +--@ (x'00800006'), +--@ -- small normal values chosen for short vs. long output +--@ (x'008002f1'), (x'008002f2'), (x'008002f3'), +--@ (x'00800e17'), (x'00800e18'), (x'00800e19'), +--@ -- assorted values (random mantissae) +--@ (x'01000001'), (x'01102843'), (x'01a52c98'), +--@ (x'0219c229'), (x'02e4464d'), (x'037343c1'), (x'03a91b36'), +--@ (x'047ada65'), (x'0496fe87'), (x'0550844f'), (x'05999da3'), +--@ (x'060ea5e2'), (x'06e63c45'), (x'07f1e548'), (x'0fc5282b'), +--@ (x'1f850283'), (x'2874a9d6'), +--@ -- values around 5e-08 +--@ (x'3356bf94'), (x'3356bf95'), (x'3356bf96'), +--@ -- around 1e-07 +--@ (x'33d6bf94'), (x'33d6bf95'), (x'33d6bf96'), +--@ -- around 3e-07 .. 1e-04 +--@ (x'34a10faf'), (x'34a10fb0'), (x'34a10fb1'), +--@ (x'350637bc'), (x'350637bd'), (x'350637be'), +--@ (x'35719786'), (x'35719787'), (x'35719788'), +--@ (x'358637bc'), (x'358637bd'), (x'358637be'), +--@ (x'36a7c5ab'), (x'36a7c5ac'), (x'36a7c5ad'), +--@ (x'3727c5ab'), (x'3727c5ac'), (x'3727c5ad'), +--@ -- format crossover at 1e-04 +--@ (x'38d1b714'), (x'38d1b715'), (x'38d1b716'), +--@ (x'38d1b717'), (x'38d1b718'), (x'38d1b719'), +--@ (x'38d1b71a'), (x'38d1b71b'), (x'38d1b71c'), +--@ (x'38d1b71d'), +--@ -- +--@ (x'38dffffe'), (x'38dfffff'), (x'38e00000'), +--@ (x'38efffff'), (x'38f00000'), (x'38f00001'), +--@ (x'3a83126e'), (x'3a83126f'), (x'3a831270'), +--@ (x'3c23d709'), (x'3c23d70a'), (x'3c23d70b'), +--@ (x'3dcccccc'), (x'3dcccccd'), (x'3dccccce'), +--@ -- chosen to need 9 digits for 3dcccd70 +--@ (x'3dcccd6f'), (x'3dcccd70'), (x'3dcccd71'), +--@ -- +--@ (x'3effffff'), (x'3f000000'), (x'3f000001'), +--@ (x'3f333332'), (x'3f333333'), (x'3f333334'), +--@ -- approach 1.0 with increasing numbers of 9s +--@ (x'3f666665'), (x'3f666666'), (x'3f666667'), +--@ (x'3f7d70a3'), (x'3f7d70a4'), (x'3f7d70a5'), +--@ (x'3f7fbe76'), (x'3f7fbe77'), (x'3f7fbe78'), +--@ (x'3f7ff971'), (x'3f7ff972'), (x'3f7ff973'), +--@ (x'3f7fff57'), (x'3f7fff58'), (x'3f7fff59'), +--@ (x'3f7fffee'), (x'3f7fffef'), +--@ -- values very close to 1 +--@ (x'3f7ffff0'), (x'3f7ffff1'), (x'3f7ffff2'), +--@ (x'3f7ffff3'), (x'3f7ffff4'), (x'3f7ffff5'), +--@ (x'3f7ffff6'), (x'3f7ffff7'), (x'3f7ffff8'), +--@ (x'3f7ffff9'), (x'3f7ffffa'), (x'3f7ffffb'), +--@ (x'3f7ffffc'), (x'3f7ffffd'), (x'3f7ffffe'), +--@ (x'3f7fffff'), +--@ (x'3f800000'), +--@ (x'3f800001'), (x'3f800002'), (x'3f800003'), +--@ (x'3f800004'), (x'3f800005'), (x'3f800006'), +--@ (x'3f800007'), (x'3f800008'), (x'3f800009'), +--@ -- values 1 to 1.1 +--@ (x'3f80000f'), (x'3f800010'), (x'3f800011'), +--@ (x'3f800012'), (x'3f800013'), (x'3f800014'), +--@ (x'3f800017'), (x'3f800018'), (x'3f800019'), +--@ (x'3f80001a'), (x'3f80001b'), (x'3f80001c'), +--@ (x'3f800029'), (x'3f80002a'), (x'3f80002b'), +--@ (x'3f800053'), (x'3f800054'), (x'3f800055'), +--@ (x'3f800346'), (x'3f800347'), (x'3f800348'), +--@ (x'3f8020c4'), (x'3f8020c5'), (x'3f8020c6'), +--@ (x'3f8147ad'), (x'3f8147ae'), (x'3f8147af'), +--@ (x'3f8ccccc'), (x'3f8ccccd'), (x'3f8cccce'), +--@ -- +--@ (x'3fc90fdb'), -- pi/2 +--@ (x'402df854'), -- e +--@ (x'40490fdb'), -- pi +--@ -- +--@ (x'409fffff'), (x'40a00000'), (x'40a00001'), +--@ (x'40afffff'), (x'40b00000'), (x'40b00001'), +--@ (x'411fffff'), (x'41200000'), (x'41200001'), +--@ (x'42c7ffff'), (x'42c80000'), (x'42c80001'), +--@ (x'4479ffff'), (x'447a0000'), (x'447a0001'), +--@ (x'461c3fff'), (x'461c4000'), (x'461c4001'), +--@ (x'47c34fff'), (x'47c35000'), (x'47c35001'), +--@ (x'497423ff'), (x'49742400'), (x'49742401'), +--@ (x'4b18967f'), (x'4b189680'), (x'4b189681'), +--@ (x'4cbebc1f'), (x'4cbebc20'), (x'4cbebc21'), +--@ (x'4e6e6b27'), (x'4e6e6b28'), (x'4e6e6b29'), +--@ (x'501502f8'), (x'501502f9'), (x'501502fa'), +--@ (x'51ba43b6'), (x'51ba43b7'), (x'51ba43b8'), +--@ -- stress values +--@ (x'1f6c1e4a'), -- 5e-20 +--@ (x'59be6cea'), -- 67e14 +--@ (x'5d5ab6c4'), -- 985e15 +--@ (x'2cc4a9bd'), -- 55895e-16 +--@ (x'15ae43fd'), -- 7038531e-32 +--@ (x'2cf757ca'), -- 702990899e-20 +--@ (x'665ba998'), -- 25933168707e13 +--@ (x'743c3324'), -- 596428896559e20 +--@ -- exercise fixed-point memmoves +--@ (x'47f1205a'), +--@ (x'4640e6ae'), +--@ (x'449a5225'), +--@ (x'42f6e9d5'), +--@ (x'414587dd'), +--@ (x'3f9e064b'), +--@ -- these cases come from the upstream's testsuite +--@ -- BoundaryRoundEven +--@ (x'4c000004'), +--@ (x'50061c46'), +--@ (x'510006a8'), +--@ -- ExactValueRoundEven +--@ (x'48951f84'), +--@ (x'45fd1840'), +--@ -- LotsOfTrailingZeros +--@ (x'39800000'), +--@ (x'3b200000'), +--@ (x'3b900000'), +--@ (x'3bd00000'), +--@ -- Regression +--@ (x'63800000'), +--@ (x'4b000000'), +--@ (x'4b800000'), +--@ (x'4c000001'), +--@ (x'4c800b0d'), +--@ (x'00d24584'), +--@ (x'00d90b88'), +--@ (x'45803f34'), +--@ (x'4f9f24f7'), +--@ (x'3a8722c3'), +--@ (x'5c800041'), +--@ (x'15ae43fd'), +--@ (x'5d4cccfb'), +--@ (x'4c800001'), +--@ (x'57800ed8'), +--@ (x'5f000000'), +--@ (x'700000f0'), +--@ (x'5f23e9ac'), +--@ (x'5e9502f9'), +--@ (x'5e8012b1'), +--@ (x'3c000028'), +--@ (x'60cde861'), +--@ (x'03aa2a50'), +--@ (x'43480000'), +--@ (x'4c000000'), +--@ -- LooksLikePow5 +--@ (x'5D1502F9'), +--@ (x'5D9502F9'), +--@ (x'5E1502F9'), +--@ -- OutputLength +--@ (x'3f99999a'), +--@ (x'3f9d70a4'), +--@ (x'3f9df3b6'), +--@ (x'3f9e0419'), +--@ (x'3f9e0610'), +--@ (x'3f9e064b'), +--@ (x'3f9e0651'), +--@ (x'03d20cfe') +--@ ) +--@ select float4send(flt) as ibits, +--@ flt, +--@ flt::text::float4 as r_flt, +--@ float4send(flt::text::float4) as obits, +--@ float4send(flt::text::float4) = float4send(flt) as correct +--@ from (select bits::integer::xfloat4::float4 as flt +--@ from testdata +--@ offset 0) s; -- clean up, lest opr_sanity complain -drop type xfloat4 cascade; +--@ drop type xfloat4 cascade; \ No newline at end of file diff --git a/src/tests/regress/src/schedule.rs b/src/tests/regress/src/schedule.rs index 901f1b6dcac3f..9bfc2d759f324 100644 --- a/src/tests/regress/src/schedule.rs +++ b/src/tests/regress/src/schedule.rs @@ -53,6 +53,11 @@ pub(crate) struct Schedule { schedules: Vec>, } +// Test queries commented out with `--@ ` are ignored for comparison. +// Unlike normal comment `--`, this does not require modification to expected output file. +// We can simplify toggle the case on/off by just updating the input sql file. +const PREFIX_IGNORE: &str = "--@ "; + impl Schedule { pub(crate) fn new(opts: Opts) -> anyhow::Result { Ok(Self { @@ -284,7 +289,9 @@ impl TestCase { let expected_output_path = self.file_manager.expected_output_of(&self.test_name)?; - let input_lines = input_file_content.lines().filter(|s| !s.is_empty()); + let input_lines = input_file_content + .lines() + .filter(|s| !s.is_empty() && *s != PREFIX_IGNORE); let mut expected_lines = std::io::BufReader::new(File::open(expected_output_path)?) .lines() .map(|s| s.unwrap()) @@ -293,7 +300,7 @@ impl TestCase { .lines() .skip(extra_lines_added_to_input.len()) .map(|s| s.unwrap()) - .filter(|s| !s.is_empty()); + .filter(|s| !s.is_empty() && s != PREFIX_IGNORE); // We split the output lines (either expected or actual) based on matching lines from input. // For example: @@ -317,10 +324,6 @@ impl TestCase { // * query 9..=9 and output 10..=12 let mut is_diff = false; let mut pending_input = vec![]; - // Test queries commented out with `--@ ` are ignored for comparison. - // Unlike normal comment `--`, this does not require modification to expected output file. - // We can simplify toggle the case on/off by just updating the input sql file. - const PREFIX_IGNORE: &str = "--@ "; for input_line in input_lines { let original_input_line = input_line.strip_prefix(PREFIX_IGNORE).unwrap_or(input_line); @@ -344,9 +347,6 @@ impl TestCase { let query_input = std::mem::replace(&mut pending_input, vec![input_line]); - if let Some(l) = query_input.last() && l.starts_with(PREFIX_IGNORE) { - continue; - } is_diff = !compare_output(&query_input, &expected_output, &actual_output) || is_diff; } // There may be more lines after the final matching lines. @@ -371,6 +371,9 @@ fn compare_output(query: &[&str], expected: &[String], actual: &[String]) -> boo eq }; + if let Some(l) = query.last() && l.starts_with(PREFIX_IGNORE) { + return true; + } if !expected.is_empty() && !actual.is_empty() && expected[0].starts_with("ERROR: ")