diff --git a/crates/core/src/inverted_index/indexing.rs b/crates/core/src/inverted_index/indexing.rs index fa7c7fe5..9a4a62c2 100644 --- a/crates/core/src/inverted_index/indexing.rs +++ b/crates/core/src/inverted_index/indexing.rs @@ -153,7 +153,7 @@ impl InvertedIndex { Ok(()) } - #[allow(clippy::missing_panics_doc)] // cannot panic as writer is prepared + #[allow(clippy::missing_panics_doc)] // should not panic as writer is prepared pub fn merge_segments_by_id(&mut self, segments: &[SegmentId]) -> Result> { self.prepare_writer()?; diff --git a/crates/core/src/live_index/index.rs b/crates/core/src/live_index/index.rs index f812ff96..5058afd5 100644 --- a/crates/core/src/live_index/index.rs +++ b/crates/core/src/live_index/index.rs @@ -142,19 +142,13 @@ impl InnerIndex { .unwrap(); self.sync_meta_with_index(); + self.re_open(); } pub fn compact_segments_by_date(&mut self) { - let mut segments_by_date: HashMap> = HashMap::new(); - - for segment in self.meta.segments.clone() { - segments_by_date - .entry(segment.created.date_naive()) - .or_default() - .push(segment); - } + let segments_to_compact = self.prepare_segments_for_compaction(); - for (_, segments) in segments_by_date { + for (_, segments) in segments_to_compact { if segments.len() <= 1 { continue; } @@ -162,15 +156,14 @@ impl InnerIndex { let segment_ids: Vec = segments.iter().map(|s| s.id).collect(); let newest_creation_date = segments.iter().map(|s| s.created).max().unwrap(); - if let Ok(Some(new_segment_id)) = - self.index.inverted_index.merge_segments_by_id(&segment_ids) - { - // Update meta with the new segment, using the newest creation date - self.meta.segments.retain(|s| !segment_ids.contains(&s.id)); - self.meta.segments.push(Segment { - id: new_segment_id, - created: newest_creation_date, - }); + let merge_result = self.index.inverted_index.merge_segments_by_id(&segment_ids); + + if let Ok(Some(new_segment_id)) = merge_result { + self.update_meta_after_compaction( + segment_ids, + new_segment_id, + newest_creation_date, + ); } } @@ -178,6 +171,34 @@ impl InnerIndex { self.re_open(); } + fn prepare_segments_for_compaction(&self) -> HashMap> { + let mut segments_by_date: HashMap> = HashMap::new(); + + for segment in self.meta.segments.clone() { + segments_by_date + .entry(segment.created.date_naive()) + .or_default() + .push(segment); + } + + segments_by_date + } + + fn update_meta_after_compaction( + &mut self, + old_segment_ids: Vec, + new_segment_id: SegmentId, + newest_creation_date: DateTime, + ) { + self.meta + .segments + .retain(|s| !old_segment_ids.contains(&s.id)); + self.meta.segments.push(Segment { + id: new_segment_id, + created: newest_creation_date, + }); + } + fn re_open(&mut self) { self.index.inverted_index.re_open().unwrap(); self.index.prepare_writer().unwrap(); diff --git a/crates/core/src/live_index/mod.rs b/crates/core/src/live_index/mod.rs index afc5b732..60448f7d 100644 --- a/crates/core/src/live_index/mod.rs +++ b/crates/core/src/live_index/mod.rs @@ -25,8 +25,8 @@ mod index_manager; pub use self::crawler::Crawler; const TTL: Duration = Duration::from_secs(60 * 60 * 24 * 60); // 60 days -const PRUNE_INTERVAL: Duration = Duration::from_secs(60 * 60); // 1 hour -const COMPACT_INTERVAL: Duration = Duration::from_secs(60 * 60); // 1 hour -const AUTO_COMMIT_INTERVAL: Duration = Duration::from_secs(60 * 5); // 5 minutes +const PRUNE_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60); // 6 hours +const COMPACT_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60); // 6 hours +const AUTO_COMMIT_INTERVAL: Duration = Duration::from_secs(10 * 60); // 10 minutes const EVENT_LOOP_INTERVAL: Duration = Duration::from_secs(5); const BATCH_SIZE: usize = 512;