Skip to content

Commit

Permalink
fix #1151
Browse files Browse the repository at this point in the history
Fixes a off by one error in the stats for the index fast field in the multi value fast field.
When retrieving the data range for a docid, `get(doc)..get(docid+1)` is requested. On creation
the num_vals statistic was set to doc instead of docid + 1. In the multivaluelinearinterpol fast
field the last value was therefore not serialized (and would return 0 instead in most cases).
So the last document get(lastdoc)..get(lastdoc + 1) would return the invalid range `value..0`.

This PR adds a proptest to cover this scenario. A combination of a large number values, since multilinear
interpolation is only active for more than 5_000 values, and a merge is required.
  • Loading branch information
PSeitz committed Sep 8, 2021
1 parent 319609e commit 1383039
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ maplit = "1.0.2"
matches = "0.1.8"
proptest = "1.0"
criterion = "0.3.5"
test-env-log = "0.2.7"
env_logger = "0.9.0"

[dev-dependencies.fail]
version = "0.4"
Expand Down
113 changes: 113 additions & 0 deletions src/fastfield/multivalued/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,22 @@ pub use self::writer::MultiValuedFastFieldWriter;
mod tests {

use crate::collector::TopDocs;
use crate::indexer::NoMergePolicy;
use crate::query::QueryParser;
use crate::schema::Cardinality;
use crate::schema::Facet;
use crate::schema::IntOptions;
use crate::schema::Schema;
use crate::schema::INDEXED;
use crate::Document;
use crate::Index;
use crate::Term;
use chrono::Duration;
use futures::executor::block_on;
use proptest::prop_oneof;
use proptest::proptest;
use proptest::strategy::Strategy;
use test_env_log::test;

#[test]
fn test_multivalued_u64() {
Expand Down Expand Up @@ -225,6 +233,111 @@ mod tests {
multi_value_reader.get_vals(3, &mut vals);
assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
}

fn test_multivalued_no_panic(ops: &[IndexingOp]) {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field(
"multifield",
IntOptions::default()
.set_fast(Cardinality::MultiValues)
.set_indexed(),
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.set_merge_policy(Box::new(NoMergePolicy));

for &op in ops {
match op {
IndexingOp::AddDoc { id } => {
match id % 3 {
0 => {
index_writer.add_document(doc!());
}
1 => {
let mut doc = Document::new();
for _ in 0..5001 {
doc.add_u64(field, id as u64);
}
index_writer.add_document(doc);
}
_ => {
let mut doc = Document::new();
doc.add_u64(field, id as u64);
index_writer.add_document(doc);
}
};
}
IndexingOp::DeleteDoc { id } => {
index_writer.delete_term(Term::from_field_u64(field, id as u64));
}
IndexingOp::Commit => {
index_writer.commit().unwrap();
}
IndexingOp::Merge => {
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
if !segment_ids.is_empty() {
block_on(index_writer.merge(&segment_ids)).unwrap();
assert!(index_writer.segment_updater().wait_merging_thread().is_ok());
}
}
}
}

assert!(index_writer.commit().is_ok());

// Merging the segments
{
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
if !segment_ids.is_empty() {
block_on(index_writer.merge(&segment_ids)).unwrap();
assert!(index_writer.wait_merging_threads().is_ok());
}
}
}

#[derive(Debug, Clone, Copy)]
enum IndexingOp {
AddDoc { id: u32 },
DeleteDoc { id: u32 },
Commit,
Merge,
}

fn operation_strategy() -> impl Strategy<Value = IndexingOp> {
prop_oneof![
(0u32..10u32).prop_map(|id| IndexingOp::DeleteDoc { id }),
(0u32..10u32).prop_map(|id| IndexingOp::AddDoc { id }),
(0u32..2u32).prop_map(|_| IndexingOp::Commit),
(0u32..1u32).prop_map(|_| IndexingOp::Merge),
]
}

proptest! {
#[test]
fn test_multivalued_proptest(ops in proptest::collection::vec(operation_strategy(), 1..10)) {
test_multivalued_no_panic(&ops[..]);
}
}

#[test]
fn test_multivalued_proptest_off_by_one_bug_1151() {
use IndexingOp::*;
let ops = [
AddDoc { id: 3 },
AddDoc { id: 1 },
AddDoc { id: 3 },
Commit,
Merge,
];

test_multivalued_no_panic(&ops[..]);
}

#[test]
#[ignore]
fn test_many_facets() {
Expand Down
2 changes: 2 additions & 0 deletions src/indexer/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,8 @@ impl IndexMerger {
total_num_vals += u64s_reader.get_total_len();
}
}
// analougus to offset.push below, for the (last docid .. last docid + 1) range
idx_num_vals += 1;

let stats = FastFieldStats {
max_value: total_num_vals,
Expand Down

0 comments on commit 1383039

Please sign in to comment.