diff --git a/src/core/index.rs b/src/core/index.rs index ed254c393e..a683fd6aaa 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -13,7 +13,7 @@ use crate::directory::INDEX_WRITER_LOCK; use crate::directory::{Directory, RamDirectory}; use crate::error::DataCorruption; use crate::error::TantivyError; -use crate::indexer::index_writer::HEAP_SIZE_MIN; +use crate::indexer::index_writer::{HEAP_SIZE_MIN, MAX_NUM_THREAD}; use crate::indexer::segment_updater::save_new_metas; use crate::reader::IndexReader; use crate::reader::IndexReaderBuilder; @@ -432,7 +432,8 @@ impl Index { /// Creates a multithreaded writer /// - /// Tantivy will automatically define the number of threads to use. + /// Tantivy will automatically define the number of threads to use, but + /// no more than [`MAX_NUM_THREAD`] threads. /// `overall_heap_size_in_bytes` is the total target memory usage that will be split /// between a given number of threads. /// @@ -441,7 +442,7 @@ impl Index { /// # Panics /// If the heap size per thread is too small, panics. pub fn writer(&self, overall_heap_size_in_bytes: usize) -> crate::Result { - let mut num_threads = num_cpus::get(); + let mut num_threads = std::cmp::min(num_cpus::get(), MAX_NUM_THREAD); let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads; if heap_size_in_bytes_per_thread < HEAP_SIZE_MIN { num_threads = (overall_heap_size_in_bytes / HEAP_SIZE_MIN).max(1); diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 11b273016d..eac379ca4c 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -43,6 +43,9 @@ pub const MARGIN_IN_BYTES: usize = 1_000_000; pub const HEAP_SIZE_MIN: usize = ((MARGIN_IN_BYTES as u32) * 3u32) as usize; pub const HEAP_SIZE_MAX: usize = u32::max_value() as usize - MARGIN_IN_BYTES; +// We impose the number of index writter thread to be at most this. +pub const MAX_NUM_THREAD: usize = 8; + // Add document will block if the number of docs waiting in the queue to be indexed // reaches `PIPELINE_MAX_SIZE_IN_DOCS` const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000;