diff --git a/Cargo.toml b/Cargo.toml index 5d9414beb8..8bdb09c53c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,6 +65,8 @@ maplit = "1.0.2" matches = "0.1.8" proptest = "1.0" criterion = "0.3.5" +test-env-log = "0.2.7" +env_logger = "0.9.0" [dev-dependencies.fail] version = "0.4" diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index d03313ddcc..6c07340aab 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -8,14 +8,22 @@ pub use self::writer::MultiValuedFastFieldWriter; mod tests { use crate::collector::TopDocs; + use crate::indexer::NoMergePolicy; use crate::query::QueryParser; use crate::schema::Cardinality; use crate::schema::Facet; use crate::schema::IntOptions; use crate::schema::Schema; use crate::schema::INDEXED; + use crate::Document; use crate::Index; + use crate::Term; use chrono::Duration; + use futures::executor::block_on; + use proptest::prop_oneof; + use proptest::proptest; + use proptest::strategy::Strategy; + use test_env_log::test; #[test] fn test_multivalued_u64() { @@ -225,6 +233,111 @@ mod tests { multi_value_reader.get_vals(3, &mut vals); assert_eq!(&vals, &[-5i64, -20i64, 1i64]); } + + fn test_multivalued_no_panic(ops: &[IndexingOp]) { + let mut schema_builder = Schema::builder(); + let field = schema_builder.add_u64_field( + "multifield", + IntOptions::default() + .set_fast(Cardinality::MultiValues) + .set_indexed(), + ); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests().unwrap(); + index_writer.set_merge_policy(Box::new(NoMergePolicy)); + + for &op in ops { + match op { + IndexingOp::AddDoc { id } => { + match id % 3 { + 0 => { + index_writer.add_document(doc!()); + } + 1 => { + let mut doc = Document::new(); + for _ in 0..5001 { + doc.add_u64(field, id as u64); + } + index_writer.add_document(doc); + } + _ => { + let mut doc = Document::new(); + doc.add_u64(field, id as u64); + index_writer.add_document(doc); + } + }; + } + IndexingOp::DeleteDoc { id } => { + index_writer.delete_term(Term::from_field_u64(field, id as u64)); + } + IndexingOp::Commit => { + index_writer.commit().unwrap(); + } + IndexingOp::Merge => { + let segment_ids = index + .searchable_segment_ids() + .expect("Searchable segments failed."); + if segment_ids.len() >= 2 { + block_on(index_writer.merge(&segment_ids)).unwrap(); + assert!(index_writer.segment_updater().wait_merging_thread().is_ok()); + } + } + } + } + + assert!(index_writer.commit().is_ok()); + + // Merging the segments + { + let segment_ids = index + .searchable_segment_ids() + .expect("Searchable segments failed."); + if !segment_ids.is_empty() { + block_on(index_writer.merge(&segment_ids)).unwrap(); + assert!(index_writer.wait_merging_threads().is_ok()); + } + } + } + + #[derive(Debug, Clone, Copy)] + enum IndexingOp { + AddDoc { id: u32 }, + DeleteDoc { id: u32 }, + Commit, + Merge, + } + + fn operation_strategy() -> impl Strategy { + prop_oneof![ + (0u32..10u32).prop_map(|id| IndexingOp::DeleteDoc { id }), + (0u32..10u32).prop_map(|id| IndexingOp::AddDoc { id }), + (0u32..2u32).prop_map(|_| IndexingOp::Commit), + (0u32..1u32).prop_map(|_| IndexingOp::Merge), + ] + } + + proptest! { + #[test] + fn test_multivalued_proptest(ops in proptest::collection::vec(operation_strategy(), 1..10)) { + test_multivalued_no_panic(&ops[..]); + } + } + + #[test] + fn test_multivalued_proptest_off_by_one_bug_1151() { + use IndexingOp::*; + let ops = [ + AddDoc { id: 3 }, + AddDoc { id: 1 }, + AddDoc { id: 3 }, + Commit, + Merge, + ]; + + test_multivalued_no_panic(&ops[..]); + } + #[test] #[ignore] fn test_many_facets() { diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index f498099b9f..30dd7f4f15 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1361,6 +1361,7 @@ mod tests { AddDoc { id: u64 }, DeleteDoc { id: u64 }, Commit, + Merge, } fn operation_strategy() -> impl Strategy { @@ -1368,6 +1369,7 @@ mod tests { (0u64..10u64).prop_map(|id| IndexingOp::DeleteDoc { id }), (0u64..10u64).prop_map(|id| IndexingOp::AddDoc { id }), (0u64..2u64).prop_map(|_| IndexingOp::Commit), + (0u64..1u64).prop_map(|_| IndexingOp::Merge), ] } @@ -1393,7 +1395,7 @@ mod tests { fn test_operation_strategy( ops: &[IndexingOp], sort_index: bool, - force_merge: bool, + force_end_merge: bool, ) -> crate::Result<()> { let mut schema_builder = schema::Schema::builder(); let id_field = schema_builder.add_u64_field("id", FAST | INDEXED | STORED); @@ -1435,6 +1437,8 @@ mod tests { .settings(settings) .create_in_ram()?; let mut index_writer = index.writer_for_tests()?; + index_writer.set_merge_policy(Box::new(NoMergePolicy)); + for &op in ops { match op { IndexingOp::AddDoc { id } => { @@ -1448,12 +1452,21 @@ mod tests { IndexingOp::Commit => { index_writer.commit()?; } + IndexingOp::Merge => { + let segment_ids = index + .searchable_segment_ids() + .expect("Searchable segments failed."); + if segment_ids.len() >= 2 { + block_on(index_writer.merge(&segment_ids)).unwrap(); + assert!(index_writer.segment_updater().wait_merging_thread().is_ok()); + } + } } } index_writer.commit()?; let searcher = index.reader()?.searcher(); - if force_merge { + if force_end_merge { index_writer.wait_merging_threads()?; let mut index_writer = index.writer_for_tests()?; let segment_ids = index diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 295a5fde66..ef3874256f 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -501,10 +501,10 @@ impl IndexMerger { // // This is required by the bitpacker, as it needs to know // what should be the bit length use for bitpacking. - let mut idx_num_vals = 0; + let mut num_docs = 0; for (reader, u64s_reader) in reader_and_field_accessors.iter() { if let Some(delete_bitset) = reader.delete_bitset() { - idx_num_vals += reader.max_doc() as u64 - delete_bitset.len() as u64; + num_docs += reader.max_doc() as u64 - delete_bitset.len() as u64; for doc in 0u32..reader.max_doc() { if delete_bitset.is_alive(doc) { let num_vals = u64s_reader.get_len(doc) as u64; @@ -512,14 +512,15 @@ impl IndexMerger { } } } else { - idx_num_vals += reader.max_doc() as u64; + num_docs += reader.max_doc() as u64; total_num_vals += u64s_reader.get_total_len(); } } let stats = FastFieldStats { max_value: total_num_vals, - num_vals: idx_num_vals, + // The fastfield offset index contains (num_docs + 1) values. + num_vals: num_docs + 1, min_value: 0, }; // We can now create our `idx` serializer, and in a second pass,