Skip to content

Commit

Permalink
increase compaction interval
Browse files Browse the repository at this point in the history
merging the segments right now takes a write lock for the entirety of the operation. this causes all searches to time out whenever the live index compacts its segments. we should actually be able to split up the merge operation to create the merged segment on a read lock and only take a write lock when switching and cleaning the old segments for the new one. increasing the compaction interval is only a temporary fix
  • Loading branch information
mikkeldenker committed Oct 4, 2024
1 parent 87fbd3d commit db2c6c3
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 22 deletions.
2 changes: 1 addition & 1 deletion crates/core/src/inverted_index/indexing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ impl InvertedIndex {
Ok(())
}

#[allow(clippy::missing_panics_doc)] // cannot panic as writer is prepared
#[allow(clippy::missing_panics_doc)] // should not panic as writer is prepared
pub fn merge_segments_by_id(&mut self, segments: &[SegmentId]) -> Result<Option<SegmentId>> {
self.prepare_writer()?;

Expand Down
57 changes: 39 additions & 18 deletions crates/core/src/live_index/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,42 +142,63 @@ impl InnerIndex {
.unwrap();

self.sync_meta_with_index();
self.re_open();
}

pub fn compact_segments_by_date(&mut self) {
let mut segments_by_date: HashMap<NaiveDate, Vec<Segment>> = HashMap::new();

for segment in self.meta.segments.clone() {
segments_by_date
.entry(segment.created.date_naive())
.or_default()
.push(segment);
}
let segments_to_compact = self.prepare_segments_for_compaction();

for (_, segments) in segments_by_date {
for (_, segments) in segments_to_compact {
if segments.len() <= 1 {
continue;
}

let segment_ids: Vec<SegmentId> = segments.iter().map(|s| s.id).collect();
let newest_creation_date = segments.iter().map(|s| s.created).max().unwrap();

if let Ok(Some(new_segment_id)) =
self.index.inverted_index.merge_segments_by_id(&segment_ids)
{
// Update meta with the new segment, using the newest creation date
self.meta.segments.retain(|s| !segment_ids.contains(&s.id));
self.meta.segments.push(Segment {
id: new_segment_id,
created: newest_creation_date,
});
let merge_result = self.index.inverted_index.merge_segments_by_id(&segment_ids);

if let Ok(Some(new_segment_id)) = merge_result {
self.update_meta_after_compaction(
segment_ids,
new_segment_id,
newest_creation_date,
);
}
}

self.save_meta();
self.re_open();
}

fn prepare_segments_for_compaction(&self) -> HashMap<NaiveDate, Vec<Segment>> {
let mut segments_by_date: HashMap<NaiveDate, Vec<Segment>> = HashMap::new();

for segment in self.meta.segments.clone() {
segments_by_date
.entry(segment.created.date_naive())
.or_default()
.push(segment);
}

segments_by_date
}

fn update_meta_after_compaction(
&mut self,
old_segment_ids: Vec<SegmentId>,
new_segment_id: SegmentId,
newest_creation_date: DateTime<Utc>,
) {
self.meta
.segments
.retain(|s| !old_segment_ids.contains(&s.id));
self.meta.segments.push(Segment {
id: new_segment_id,
created: newest_creation_date,
});
}

fn re_open(&mut self) {
self.index.inverted_index.re_open().unwrap();
self.index.prepare_writer().unwrap();
Expand Down
6 changes: 3 additions & 3 deletions crates/core/src/live_index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ mod index_manager;
pub use self::crawler::Crawler;

const TTL: Duration = Duration::from_secs(60 * 60 * 24 * 60); // 60 days
const PRUNE_INTERVAL: Duration = Duration::from_secs(60 * 60); // 1 hour
const COMPACT_INTERVAL: Duration = Duration::from_secs(60 * 60); // 1 hour
const AUTO_COMMIT_INTERVAL: Duration = Duration::from_secs(60 * 5); // 5 minutes
const PRUNE_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60); // 6 hours
const COMPACT_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60); // 6 hours
const AUTO_COMMIT_INTERVAL: Duration = Duration::from_secs(10 * 60); // 10 minutes
const EVENT_LOOP_INTERVAL: Duration = Duration::from_secs(5);
const BATCH_SIZE: usize = 512;

0 comments on commit db2c6c3

Please sign in to comment.