risingwavelabs · xxchan · Nov 20, 2024 · Nov 18, 2024 · BugenZhao · Nov 20, 2024
diff --git a/e2e_test/source_inline/cdc/mysql/mysql_create_drop.slt.serial b/e2e_test/source_inline/cdc/mysql/mysql_create_drop.slt.serial
@@ -49,12 +49,6 @@ create source s with (
 
 sleep 2s
 
-# At the beginning, the source is paused. It will resume after a downstream is created.
-system ok
-internal_table.mjs --name s --type '' --count
-----
-count: 0
-
 
 statement ok
 create table tt1_shared (v1 int,

diff --git a/e2e_test/source_inline/kafka/shared_source.slt.serial b/e2e_test/source_inline/kafka/shared_source.slt.serial
@@ -59,11 +59,17 @@ select count(*) from rw_internal_tables where name like '%s0%';
 
 sleep 1s
 
-# SourceExecutor's ingestion does not start (state table is empty), even after sleep
+statement ok
+flush;
+
+# SourceExecutor's starts from latest.
 system ok
 internal_table.mjs --name s0 --type source
 ----
-(empty)
+0,"{""split_info"": {""partition"": 0, ""start_offset"": 0, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}"
+1,"{""split_info"": {""partition"": 1, ""start_offset"": 0, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}"
+2,"{""split_info"": {""partition"": 2, ""start_offset"": 1, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}"
+3,"{""split_info"": {""partition"": 3, ""start_offset"": 2, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}"
 
 
 statement ok
@@ -72,12 +78,6 @@ create materialized view mv_1 as select * from s0;
 # Wait enough time to ensure SourceExecutor consumes all Kafka data.
 sleep 2s
 
-# SourceExecutor's ingestion started, but it only starts from latest (offset 1).
-system ok
-internal_table.mjs --name s0 --type source
-----
-(empty)
-
 
 # SourceBackfill starts from offset 0, with backfill_info: HasDataToBackfill { latest_offset: "0" } (decided by kafka high watermark).
 # (meaning upstream already consumed offset 0, so we only need to backfill to offset 0)
@@ -144,7 +144,7 @@ EOF
 
 sleep 2s
 
-# SourceExecutor's finally got new data now.
+# SourceExecutor's got new data.
 system ok
 internal_table.mjs --name s0 --type source
 ----
@@ -185,16 +185,6 @@ select v1, v2 from mv_1;
 4	dd
 
 
-# start_offset changed to 1
-system ok
-internal_table.mjs --name s0 --type source
-----
-0,"{""split_info"": {""partition"": 0, ""start_offset"": 1, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}"
-1,"{""split_info"": {""partition"": 1, ""start_offset"": 1, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}"
-2,"{""split_info"": {""partition"": 2, ""start_offset"": 2, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}"
-3,"{""split_info"": {""partition"": 3, ""start_offset"": 3, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}"
-
-
 # Transition from SourceCachingUp to Finished after consuming one upstream message.
 system ok
 internal_table.mjs --name mv_1 --type sourcebackfill
@@ -334,6 +324,47 @@ internal_table.mjs --name s0 --type source
 # # risedev psql -c "select name, flags, parallelism from rw_fragments JOIN rw_relations ON rw_fragments.table_id = rw_relations.id order by name;"
 
 
+# Test: rate limit and resume won't lose data
+
+statement ok
+alter source s0 set source_rate_limit to 0;
+
+
+system ok
+cat <<EOF | rpk topic produce shared_source -f "%p %v\n" -p 0
+0 {"v1": 1, "v2": "a"}
+1 {"v1": 2, "v2": "b"}
+2 {"v1": 3, "v2": "c"}
+3 {"v1": 4, "v2": "d"}
+EOF
+
+sleep 2s
+
+# no data goes in
+
+query ?? rowsort
+select v1, count(*) from mv_1 group by v1;
+----
+1	12
+2	12
+3	13
+4	14
+
+statement ok
+alter source s0 set source_rate_limit to default;
+
+sleep 3s
+
+# data comes in
+query ?? rowsort
+select v1, count(*) from mv_1 group by v1;
+----
+1	13
+2	13
+3	14
+4	15
+
+
 statement ok
 drop source s0 cascade;
 

diff --git a/src/batch/src/executor/source.rs b/src/batch/src/executor/source.rs
@@ -157,9 +157,9 @@ impl SourceExecutor {
             ConnectorProperties::default(),
             None,
         ));
-        let stream = self
+        let (stream, _) = self
             .source
-            .build_stream(Some(self.split_list), self.column_ids, source_ctx)
+            .build_stream(Some(self.split_list), self.column_ids, source_ctx, false)
             .await?;
 
         #[for_await]

diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs
@@ -69,7 +69,7 @@ pub trait TryFromBTreeMap: Sized + UnknownFields {
 /// Represents `WITH` options for sources.
 ///
 /// Each instance should add a `#[derive(with_options::WithOptions)]` marker.
-pub trait SourceProperties: TryFromBTreeMap + Clone + WithOptions {
+pub trait SourceProperties: TryFromBTreeMap + Clone + WithOptions + std::fmt::Debug {
     const SOURCE_NAME: &'static str;
     type Split: SplitMetaData
         + TryFrom<SplitImpl, Error = crate::error::ConnectorError>
@@ -108,7 +108,7 @@ impl<P: DeserializeOwned + UnknownFields> TryFromBTreeMap for P {
     }
 }
 
-pub async fn create_split_reader<P: SourceProperties + std::fmt::Debug>(
+pub async fn create_split_reader<P: SourceProperties>(
     prop: P,
     splits: Vec<SplitImpl>,
     parser_config: ParserConfig,
@@ -375,6 +375,10 @@ pub trait SplitReader: Sized + Send {
     fn backfill_info(&self) -> HashMap<SplitId, BackfillInfo> {
         HashMap::new()
     }
+
+    async fn seek_to_latest(&mut self) -> Result<Vec<SplitImpl>> {
+        Err(anyhow!("seek_to_latest is not supported for this connector").into())
+    }
 }
 
 /// Information used to determine whether we should start and finish source backfill.

diff --git a/src/connector/src/source/cdc/mod.rs b/src/connector/src/source/cdc/mod.rs
@@ -58,7 +58,7 @@ pub fn build_cdc_table_id(source_id: u32, external_table_name: &str) -> String {
     format!("{}.{}", source_id, external_table_name)
 }
 
-pub trait CdcSourceTypeTrait: Send + Sync + Clone + 'static {
+pub trait CdcSourceTypeTrait: Send + Sync + Clone + std::fmt::Debug + 'static {
     const CDC_CONNECTOR_NAME: &'static str;
     fn source_type() -> CdcSourceType;
 }

diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs
@@ -191,7 +191,6 @@ impl SplitEnumerator for KafkaSplitEnumerator {
                 partition,
                 start_offset: start_offsets.remove(&partition).unwrap(),
                 stop_offset: stop_offsets.remove(&partition).unwrap(),
-                hack_seek_to_latest: false,
             })
             .collect();
 
@@ -299,7 +298,6 @@ impl KafkaSplitEnumerator {
                     partition: *partition,
                     start_offset: Some(start_offset),
                     stop_offset: Some(stop_offset),
-                    hack_seek_to_latest:false
                 }
             })
             .collect::<Vec<KafkaSplit>>())

diff --git a/src/connector/src/source/kafka/source/reader.rs b/src/connector/src/source/kafka/source/reader.rs
@@ -37,13 +37,15 @@ use crate::source::kafka::{
 };
 use crate::source::{
     into_chunk_stream, BackfillInfo, BoxChunkSourceStream, Column, SourceContextRef, SplitId,
-    SplitMetaData, SplitReader,
+    SplitImpl, SplitMetaData, SplitReader,
 };
 
 pub struct KafkaSplitReader {
     consumer: StreamConsumer<RwConsumerContext>,
     offsets: HashMap<SplitId, (Option<i64>, Option<i64>)>,
     backfill_info: HashMap<SplitId, BackfillInfo>,
+    splits: Vec<KafkaSplit>,
+    sync_call_timeout: Duration,
     bytes_per_second: usize,
     max_num_messages: usize,
     parser_config: ParserConfig,
@@ -110,12 +112,10 @@ impl SplitReader for KafkaSplitReader {
 
         let mut offsets = HashMap::new();
         let mut backfill_info = HashMap::new();
-        for split in splits {
+        for split in splits.clone() {
             offsets.insert(split.id(), (split.start_offset, split.stop_offset));
 
-            if split.hack_seek_to_latest {
-                tpl.add_partition_offset(split.topic.as_str(), split.partition, Offset::End)?;
-            } else if let Some(offset) = split.start_offset {
+            if let Some(offset) = split.start_offset {
                 tpl.add_partition_offset(
                     split.topic.as_str(),
                     split.partition,
@@ -168,8 +168,10 @@ impl SplitReader for KafkaSplitReader {
         Ok(Self {
             consumer,
             offsets,
+            splits,
             backfill_info,
             bytes_per_second,
+            sync_call_timeout: properties.common.sync_call_timeout,
             max_num_messages,
             parser_config,
             source_ctx,
@@ -185,6 +187,28 @@ impl SplitReader for KafkaSplitReader {
     fn backfill_info(&self) -> HashMap<SplitId, BackfillInfo> {
         self.backfill_info.clone()
     }
+
+    async fn seek_to_latest(&mut self) -> Result<Vec<SplitImpl>> {
+        let mut latest_splits: Vec<SplitImpl> = Vec::new();
+        let mut tpl = TopicPartitionList::with_capacity(self.splits.len());
+        for mut split in self.splits.clone() {
+            // we can't get latest offset if we use Offset::End, so we just fetch watermark here.
+            let (_low, high) = self
+                .consumer
+                .fetch_watermarks(
+                    split.topic.as_str(),
+                    split.partition,
+                    self.sync_call_timeout,
+                )
+                .await?;
+            tpl.add_partition_offset(split.topic.as_str(), split.partition, Offset::Offset(high))?;
+            split.start_offset = Some(high - 1);
+            latest_splits.push(split.into());
+        }
+        // replace the previous assignment
+        self.consumer.assign(&tpl)?;
+        Ok(latest_splits)
+    }
 }
 
 impl KafkaSplitReader {

diff --git a/src/connector/src/source/kafka/split.rs b/src/connector/src/source/kafka/split.rs
@@ -32,12 +32,6 @@ pub struct KafkaSplit {
     /// A better approach would be to make it **inclusive**. <https://github.com/risingwavelabs/risingwave/pull/16257>
     pub(crate) start_offset: Option<i64>,
     pub(crate) stop_offset: Option<i64>,
-    #[serde(skip)]
-    /// Used by shared source to hackily seek to the latest offset without fetching start offset first.
-    /// XXX: But why do we fetch low watermark for latest start offset..?
-    ///
-    /// When this is `true`, `start_offset` will be ignored.
-    pub(crate) hack_seek_to_latest: bool,
 }
 
 impl SplitMetaData for KafkaSplit {
@@ -72,16 +66,10 @@ impl KafkaSplit {
             partition,
             start_offset,
             stop_offset,
-            hack_seek_to_latest: false,
         }
     }
 
     pub fn get_topic_and_partition(&self) -> (String, i32) {
         (self.topic.clone(), self.partition)
     }
-
-    /// This should only be used for a fresh split, not persisted in state table yet.
-    pub fn seek_to_latest_offset(&mut self) {
-        self.hack_seek_to_latest = true;
-    }
 }
diff --git a/src/connector/src/source/reader/reader.rs b/src/connector/src/source/reader/reader.rs
@@ -37,8 +37,8 @@ use crate::source::filesystem::opendal_source::{
 use crate::source::filesystem::{FsPageItem, OpendalFsSplit};
 use crate::source::{
     create_split_reader, BackfillInfo, BoxChunkSourceStream, BoxTryStream, Column,
-    ConnectorProperties, ConnectorState, SourceColumnDesc, SourceContext, SplitId, SplitReader,
-    WaitCheckpointTask,
+    ConnectorProperties, ConnectorState, SourceColumnDesc, SourceContext, SplitId, SplitImpl,
+    SplitReader, WaitCheckpointTask,
 };
 use crate::{dispatch_source_prop, WithOptionsSecResolved};
 
@@ -211,14 +211,17 @@ impl SourceReader {
     }
 
     /// Build `SplitReader`s and then `BoxChunkSourceStream` from the given `ConnectorState` (`SplitImpl`s).
+    ///
+    /// If `seek_to_latest` is true, will also return the latest splits after seek.
     pub async fn build_stream(
         &self,
         state: ConnectorState,
         column_ids: Vec<ColumnId>,
         source_ctx: Arc<SourceContext>,
-    ) -> ConnectorResult<BoxChunkSourceStream> {
+        seek_to_latest: bool,
+    ) -> ConnectorResult<(BoxChunkSourceStream, Option<Vec<SplitImpl>>)> {
         let Some(splits) = state else {
-            return Ok(pending().boxed());
+            return Ok((pending().boxed(), None));
         };
         let config = self.config.clone();
         let columns = self.get_target_columns(column_ids)?;
@@ -243,7 +246,7 @@ impl SourceReader {
 
         let support_multiple_splits = config.support_multiple_splits();
         dispatch_source_prop!(config, prop, {
-            let readers = if support_multiple_splits {
+            let mut readers = if support_multiple_splits {
                 tracing::debug!(
                     "spawning connector split reader for multiple splits {:?}",
                     splits
@@ -268,7 +271,20 @@ impl SourceReader {
                 .await?
             };
 
-            Ok(select_all(readers.into_iter().map(|r| r.into_stream())).boxed())
+            let latest_splits = if seek_to_latest {
+                let mut latest_splits = Vec::new();
+                for reader in &mut readers {
+                    latest_splits.extend(reader.seek_to_latest().await?);
+                }
+                Some(latest_splits)
+            } else {
+                None
+            };
+
+            Ok((
+                select_all(readers.into_iter().map(|r| r.into_stream())).boxed(),
+                latest_splits,
+            ))
         })
     }
 }

diff --git a/src/stream/src/executor/source/fetch_executor.rs b/src/stream/src/executor/source/fetch_executor.rs
@@ -160,9 +160,9 @@ impl<S: StateStore, Src: OpendalSource> FsFetchExecutor<S, Src> {
         batch: SplitBatch,
         rate_limit_rps: Option<u32>,
     ) -> StreamExecutorResult<BoxChunkSourceStream> {
-        let stream = source_desc
+        let (stream, _) = source_desc
             .source
-            .build_stream(batch, column_ids, Arc::new(source_ctx))
+            .build_stream(batch, column_ids, Arc::new(source_ctx), false)
             .await
             .map_err(StreamExecutorError::connector_error)?;
         Ok(apply_rate_limit(stream, rate_limit_rps).boxed())

diff --git a/src/stream/src/executor/source/source_backfill_executor.rs b/src/stream/src/executor/source/source_backfill_executor.rs
@@ -609,7 +609,6 @@ impl<S: StateStore> SourceBackfillExecutorInner<S> {
                                     .await?;
 
                                 if self.should_report_finished(&backfill_stage.states) {
-                                    tracing::debug!("progress finish");
                                     self.progress.finish(
                                         barrier.epoch,
                                         backfill_stage.total_backfilled_rows(),