From 3edf0a272409729d5892ed5d2a85de42879b171f Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 9 Nov 2022 19:20:41 +0900 Subject: [PATCH] Using the manual reload policy in IndexWriter. (#1667) --- src/core/searcher.rs | 9 ++++-- src/fastfield/bytes/mod.rs | 7 +++-- src/fieldnorm/mod.rs | 6 ++-- src/indexer/index_writer.rs | 13 +++----- src/indexer/merger.rs | 8 ++--- src/lib.rs | 2 ++ src/query/all_query.rs | 12 ++++---- src/query/boolean_query/boolean_query.rs | 11 +++---- src/query/boolean_query/mod.rs | 17 ++++++----- src/query/boost_query.rs | 10 +++--- src/query/const_score_query.rs | 10 +++--- src/query/disjunction_max_query.rs | 10 +++--- src/query/empty_query.rs | 8 ++--- src/query/fuzzy_query.rs | 9 ++---- src/query/mod.rs | 4 +-- src/query/more_like_this/mod.rs | 2 ++ src/query/more_like_this/query.rs | 17 ++++++++--- src/query/phrase_query/mod.rs | 9 ++++-- src/query/phrase_query/phrase_query.rs | 21 +++++++------ src/query/phrase_query/phrase_scorer.rs | 19 ++++++------ src/query/phrase_query/phrase_weight.rs | 29 ++++++++++-------- src/query/query.rs | 39 +++++++++++++++++++++--- src/query/range_query.rs | 12 +++----- src/query/regex_query.rs | 9 ++---- src/query/set_query.rs | 18 +++++------ src/query/term_query/mod.rs | 8 ++--- src/query/term_query/term_query.rs | 28 +++++++++-------- src/query/term_query/term_scorer.rs | 4 +-- 28 files changed, 189 insertions(+), 162 deletions(-) diff --git a/src/core/searcher.rs b/src/core/searcher.rs index 9a28ebce00..6d8d143920 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -4,7 +4,7 @@ use std::{fmt, io}; use crate::collector::Collector; use crate::core::{Executor, SegmentReader}; -use crate::query::Query; +use crate::query::{EnableScoring, Query}; use crate::schema::{Document, Schema, Term}; use crate::space_usage::SearcherSpaceUsage; use crate::store::{CacheStats, StoreReader}; @@ -199,7 +199,12 @@ impl Searcher { executor: &Executor, ) -> crate::Result { let scoring_enabled = collector.requires_scoring(); - let weight = query.weight(self, scoring_enabled)?; + let enabled_scoring = if scoring_enabled { + EnableScoring::Enabled(self) + } else { + EnableScoring::Disabled(self.schema()) + }; + let weight = query.weight(enabled_scoring)?; let segment_readers = self.segment_readers(); let fruits = executor.map( |(segment_ord, segment_reader)| { diff --git a/src/fastfield/bytes/mod.rs b/src/fastfield/bytes/mod.rs index 37bda14f9f..d120431e16 100644 --- a/src/fastfield/bytes/mod.rs +++ b/src/fastfield/bytes/mod.rs @@ -6,7 +6,7 @@ pub use self::writer::BytesFastFieldWriter; #[cfg(test)] mod tests { - use crate::query::TermQuery; + use crate::query::{EnableScoring, TermQuery}; use crate::schema::{BytesOptions, IndexRecordOption, Schema, Value, FAST, INDEXED, STORED}; use crate::{DocAddress, DocSet, Index, Searcher, Term}; @@ -82,7 +82,7 @@ mod tests { let field = searcher.schema().get_field("string_bytes").unwrap(); let term = Term::from_field_bytes(field, b"lucene".as_ref()); let term_query = TermQuery::new(term, IndexRecordOption::Basic); - let term_weight = term_query.specialized_weight(&searcher, true)?; + let term_weight = term_query.specialized_weight(EnableScoring::Enabled(&searcher))?; let term_scorer = term_weight.specialized_scorer(searcher.segment_reader(0), 1.0)?; assert_eq!(term_scorer.doc(), 0u32); Ok(()) @@ -95,7 +95,8 @@ mod tests { let field = searcher.schema().get_field("string_bytes").unwrap(); let term = Term::from_field_bytes(field, b"lucene".as_ref()); let term_query = TermQuery::new(term, IndexRecordOption::Basic); - let term_weight_err = term_query.specialized_weight(&searcher, false); + let term_weight_err = + term_query.specialized_weight(EnableScoring::Disabled(searcher.schema())); assert!(matches!( term_weight_err, Err(crate::TantivyError::SchemaError(_)) diff --git a/src/fieldnorm/mod.rs b/src/fieldnorm/mod.rs index 1876c04a3b..e362a9af68 100644 --- a/src/fieldnorm/mod.rs +++ b/src/fieldnorm/mod.rs @@ -34,7 +34,7 @@ mod tests { use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr}; use crate::fieldnorm::{FieldNormReader, FieldNormsSerializer, FieldNormsWriter}; - use crate::query::{Query, TermQuery}; + use crate::query::{EnableScoring, Query, TermQuery}; use crate::schema::{ Field, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, STORED, TEXT, }; @@ -112,7 +112,7 @@ mod tests { Term::from_field_text(text, "hello"), IndexRecordOption::WithFreqs, ); - let weight = query.weight(&searcher, true)?; + let weight = query.weight(EnableScoring::Enabled(&searcher))?; let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32)?; assert_eq!(scorer.doc(), 0); assert!((scorer.score() - 0.22920431).abs() < 0.001f32); @@ -141,7 +141,7 @@ mod tests { Term::from_field_text(text, "hello"), IndexRecordOption::WithFreqs, ); - let weight = query.weight(&searcher, true)?; + let weight = query.weight(EnableScoring::Enabled(&searcher))?; let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32)?; assert_eq!(scorer.doc(), 0); assert!((scorer.score() - 0.22920431).abs() < 0.001f32); diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 9869cd08a5..7dac500a41 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -19,9 +19,9 @@ use crate::indexer::index_writer_status::IndexWriterStatus; use crate::indexer::operation::DeleteOperation; use crate::indexer::stamper::Stamper; use crate::indexer::{MergePolicy, SegmentEntry, SegmentWriter}; -use crate::query::{Query, TermQuery}; +use crate::query::{EnableScoring, Query, TermQuery}; use crate::schema::{Document, IndexRecordOption, Term}; -use crate::{FutureResult, IndexReader, Opstamp}; +use crate::{FutureResult, Opstamp}; // Size of the margin for the `memory_arena`. A segment is closed when the remaining memory // in the `memory_arena` goes below MARGIN_IN_BYTES. @@ -57,7 +57,6 @@ pub struct IndexWriter { _directory_lock: Option, index: Index, - index_reader: IndexReader, memory_arena_in_bytes_per_thread: usize, @@ -298,8 +297,6 @@ impl IndexWriter { memory_arena_in_bytes_per_thread, index: index.clone(), - index_reader: index.reader()?, - index_writer_status: IndexWriterStatus::from(document_receiver), operation_sender: document_sender, @@ -681,8 +678,7 @@ impl IndexWriter { /// only after calling `commit()`. #[doc(hidden)] pub fn delete_query(&self, query: Box) -> crate::Result { - let weight = query.weight(&self.index_reader.searcher(), false)?; - + let weight = query.weight(EnableScoring::Disabled(&self.index.schema()))?; let opstamp = self.stamper.stamp(); let delete_operation = DeleteOperation { opstamp, @@ -763,8 +759,7 @@ impl IndexWriter { match user_op { UserOperation::Delete(term) => { let query = TermQuery::new(term, IndexRecordOption::Basic); - let weight = query.weight(&self.index_reader.searcher(), false)?; - + let weight = query.weight(EnableScoring::Disabled(&self.index.schema()))?; let delete_operation = DeleteOperation { opstamp, target: weight, diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 69734b41cc..aa9d4df210 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -1064,7 +1064,7 @@ mod tests { }; use crate::collector::{Count, FacetCollector}; use crate::core::Index; - use crate::query::{AllQuery, BooleanQuery, Scorer, TermQuery}; + use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery}; use crate::schema::{ Cardinality, Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term, TextFieldIndexing, INDEXED, TEXT, @@ -1977,7 +1977,7 @@ mod tests { let reader = index.reader()?; let searcher = reader.searcher(); let mut term_scorer = term_query - .specialized_weight(&searcher, true)? + .specialized_weight(EnableScoring::Enabled(&searcher))? .specialized_scorer(searcher.segment_reader(0u32), 1.0)?; assert_eq!(term_scorer.doc(), 0); assert_nearly_equals!(term_scorer.block_max_score(), 0.0079681855); @@ -1992,7 +1992,7 @@ mod tests { assert_eq!(searcher.segment_readers().len(), 2); for segment_reader in searcher.segment_readers() { let mut term_scorer = term_query - .specialized_weight(&searcher, true)? + .specialized_weight(EnableScoring::Enabled(&searcher))? .specialized_scorer(segment_reader, 1.0)?; // the difference compared to before is intrinsic to the bm25 formula. no worries // there. @@ -2017,7 +2017,7 @@ mod tests { let segment_reader = searcher.segment_reader(0u32); let mut term_scorer = term_query - .specialized_weight(&searcher, true)? + .specialized_weight(EnableScoring::Enabled(&searcher))? .specialized_scorer(segment_reader, 1.0)?; // the difference compared to before is intrinsic to the bm25 formula. no worries there. for doc in segment_reader.doc_ids_alive() { diff --git a/src/lib.rs b/src/lib.rs index f3c9c0e337..f57b99d1e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -277,6 +277,8 @@ pub mod fastfield; pub mod fieldnorm; pub mod positions; pub mod postings; + +/// Module containing the different query implementations. pub mod query; pub mod schema; pub mod space_usage; diff --git a/src/query/all_query.rs b/src/query/all_query.rs index f5793a15c6..6696494547 100644 --- a/src/query/all_query.rs +++ b/src/query/all_query.rs @@ -1,8 +1,8 @@ -use crate::core::{Searcher, SegmentReader}; +use crate::core::SegmentReader; use crate::docset::{DocSet, TERMINATED}; use crate::query::boost_query::BoostScorer; use crate::query::explanation::does_not_match; -use crate::query::{Explanation, Query, Scorer, Weight}; +use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight}; use crate::{DocId, Score}; /// Query that matches all of the documents. @@ -12,7 +12,7 @@ use crate::{DocId, Score}; pub struct AllQuery; impl Query for AllQuery { - fn weight(&self, _: &Searcher, _: bool) -> crate::Result> { + fn weight(&self, _: EnableScoring<'_>) -> crate::Result> { Ok(Box::new(AllWeight)) } } @@ -72,7 +72,7 @@ impl Scorer for AllScorer { mod tests { use super::AllQuery; use crate::docset::TERMINATED; - use crate::query::Query; + use crate::query::{EnableScoring, Query}; use crate::schema::{Schema, TEXT}; use crate::Index; @@ -95,7 +95,7 @@ mod tests { let index = create_test_index()?; let reader = index.reader()?; let searcher = reader.searcher(); - let weight = AllQuery.weight(&searcher, false)?; + let weight = AllQuery.weight(EnableScoring::Disabled(&index.schema()))?; { let reader = searcher.segment_reader(0); let mut scorer = weight.scorer(reader, 1.0)?; @@ -118,7 +118,7 @@ mod tests { let index = create_test_index()?; let reader = index.reader()?; let searcher = reader.searcher(); - let weight = AllQuery.weight(&searcher, false)?; + let weight = AllQuery.weight(EnableScoring::Disabled(searcher.schema()))?; let reader = searcher.segment_reader(0); { let mut scorer = weight.scorer(reader, 2.0)?; diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs index c06f52e6a9..c3fdaa754a 100644 --- a/src/query/boolean_query/boolean_query.rs +++ b/src/query/boolean_query/boolean_query.rs @@ -1,7 +1,6 @@ use super::boolean_weight::BooleanWeight; -use crate::query::{Occur, Query, SumWithCoordsCombiner, TermQuery, Weight}; +use crate::query::{EnableScoring, Occur, Query, SumWithCoordsCombiner, TermQuery, Weight}; use crate::schema::{IndexRecordOption, Term}; -use crate::Searcher; /// The boolean query returns a set of documents /// that matches the Boolean combination of constituent subqueries. @@ -143,17 +142,15 @@ impl From)>> for BooleanQuery { } impl Query for BooleanQuery { - fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result> { + fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result> { let sub_weights = self .subqueries .iter() - .map(|&(ref occur, ref subquery)| { - Ok((*occur, subquery.weight(searcher, scoring_enabled)?)) - }) + .map(|&(ref occur, ref subquery)| Ok((*occur, subquery.weight(enable_scoring)?))) .collect::>()?; Ok(Box::new(BooleanWeight::new( sub_weights, - scoring_enabled, + enable_scoring.is_scoring_enabled(), Box::new(SumWithCoordsCombiner::default), ))) } diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index f596a4b66a..404c8d77d3 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -15,7 +15,8 @@ mod tests { use crate::query::score_combiner::SumWithCoordsCombiner; use crate::query::term_query::TermScorer; use crate::query::{ - Intersection, Occur, Query, QueryParser, RequiredOptionalScorer, Scorer, TermQuery, + EnableScoring, Intersection, Occur, Query, QueryParser, RequiredOptionalScorer, Scorer, + TermQuery, }; use crate::schema::*; use crate::{assert_nearly_equals, DocAddress, DocId, Index, Score}; @@ -54,7 +55,7 @@ mod tests { let query_parser = QueryParser::for_index(&index, vec![text_field]); let query = query_parser.parse_query("+a")?; let searcher = index.reader()?.searcher(); - let weight = query.weight(&searcher, true)?; + let weight = query.weight(EnableScoring::Enabled(&searcher))?; let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?; assert!(scorer.is::()); Ok(()) @@ -67,13 +68,13 @@ mod tests { let searcher = index.reader()?.searcher(); { let query = query_parser.parse_query("+a +b +c")?; - let weight = query.weight(&searcher, true)?; + let weight = query.weight(EnableScoring::Enabled(&searcher))?; let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?; assert!(scorer.is::>()); } { let query = query_parser.parse_query("+a +(b c)")?; - let weight = query.weight(&searcher, true)?; + let weight = query.weight(EnableScoring::Enabled(&searcher))?; let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?; assert!(scorer.is::>>()); } @@ -87,7 +88,7 @@ mod tests { let searcher = index.reader()?.searcher(); { let query = query_parser.parse_query("+a b")?; - let weight = query.weight(&searcher, true)?; + let weight = query.weight(EnableScoring::Enabled(&searcher))?; let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?; assert!(scorer.is::, @@ -97,7 +98,7 @@ mod tests { } { let query = query_parser.parse_query("+a b")?; - let weight = query.weight(&searcher, false)?; + let weight = query.weight(EnableScoring::Disabled(searcher.schema()))?; let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?; assert!(scorer.is::()); } @@ -241,7 +242,9 @@ mod tests { let searcher = reader.searcher(); let boolean_query = BooleanQuery::new(vec![(Occur::Should, term_a), (Occur::Should, term_b)]); - let boolean_weight = boolean_query.weight(&searcher, true).unwrap(); + let boolean_weight = boolean_query + .weight(EnableScoring::Enabled(&searcher)) + .unwrap(); { let mut boolean_scorer = boolean_weight.scorer(searcher.segment_reader(0u32), 1.0)?; assert_eq!(boolean_scorer.doc(), 0u32); diff --git a/src/query/boost_query.rs b/src/query/boost_query.rs index c03489ca06..b3c76a0a57 100644 --- a/src/query/boost_query.rs +++ b/src/query/boost_query.rs @@ -2,8 +2,8 @@ use std::fmt; use crate::fastfield::AliveBitSet; use crate::query::explanation::does_not_match; -use crate::query::{Explanation, Query, Scorer, Weight}; -use crate::{DocId, DocSet, Score, Searcher, SegmentReader, Term}; +use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight}; +use crate::{DocId, DocSet, Score, SegmentReader, Term}; /// `BoostQuery` is a wrapper over a query used to boost its score. /// @@ -38,9 +38,9 @@ impl fmt::Debug for BoostQuery { } impl Query for BoostQuery { - fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result> { - let weight_without_boost = self.query.weight(searcher, scoring_enabled)?; - let boosted_weight = if scoring_enabled { + fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result> { + let weight_without_boost = self.query.weight(enable_scoring)?; + let boosted_weight = if enable_scoring.is_scoring_enabled() { Box::new(BoostWeight::new(weight_without_boost, self.boost)) } else { weight_without_boost diff --git a/src/query/const_score_query.rs b/src/query/const_score_query.rs index 8864bc32c3..7a812e0990 100644 --- a/src/query/const_score_query.rs +++ b/src/query/const_score_query.rs @@ -1,7 +1,7 @@ use std::fmt; -use crate::query::{Explanation, Query, Scorer, Weight}; -use crate::{DocId, DocSet, Score, Searcher, SegmentReader, TantivyError, Term}; +use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight}; +use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term}; /// `ConstScoreQuery` is a wrapper over a query to provide a constant score. /// It can avoid unnecessary score computation on the wrapped query. @@ -36,9 +36,9 @@ impl fmt::Debug for ConstScoreQuery { } impl Query for ConstScoreQuery { - fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result> { - let inner_weight = self.query.weight(searcher, scoring_enabled)?; - Ok(if scoring_enabled { + fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result> { + let inner_weight = self.query.weight(enable_scoring)?; + Ok(if enable_scoring.is_scoring_enabled() { Box::new(ConstWeight::new(inner_weight, self.score)) } else { inner_weight diff --git a/src/query/disjunction_max_query.rs b/src/query/disjunction_max_query.rs index eec9dfe0d4..552950d50c 100644 --- a/src/query/disjunction_max_query.rs +++ b/src/query/disjunction_max_query.rs @@ -1,7 +1,7 @@ use tantivy_query_grammar::Occur; -use crate::query::{BooleanWeight, DisjunctionMaxCombiner, Query, Weight}; -use crate::{Score, Searcher, Term}; +use crate::query::{BooleanWeight, DisjunctionMaxCombiner, EnableScoring, Query, Weight}; +use crate::{Score, Term}; /// The disjunction max query кeturns documents matching one or more wrapped queries, /// called query clauses or clauses. @@ -91,16 +91,16 @@ impl Clone for DisjunctionMaxQuery { } impl Query for DisjunctionMaxQuery { - fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result> { + fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result> { let disjuncts = self .disjuncts .iter() - .map(|disjunct| Ok((Occur::Should, disjunct.weight(searcher, scoring_enabled)?))) + .map(|disjunct| Ok((Occur::Should, disjunct.weight(enable_scoring)?))) .collect::>()?; let tie_breaker = self.tie_breaker; Ok(Box::new(BooleanWeight::new( disjuncts, - scoring_enabled, + enable_scoring.is_scoring_enabled(), Box::new(move || DisjunctionMaxCombiner::with_tie_breaker(tie_breaker)), ))) } diff --git a/src/query/empty_query.rs b/src/query/empty_query.rs index 7a16e69508..76eadddb4d 100644 --- a/src/query/empty_query.rs +++ b/src/query/empty_query.rs @@ -1,7 +1,7 @@ use super::Scorer; use crate::docset::TERMINATED; use crate::query::explanation::does_not_match; -use crate::query::{Explanation, Query, Weight}; +use crate::query::{EnableScoring, Explanation, Query, Weight}; use crate::{DocId, DocSet, Score, Searcher, SegmentReader}; /// `EmptyQuery` is a dummy `Query` in which no document matches. @@ -11,11 +11,7 @@ use crate::{DocId, DocSet, Score, Searcher, SegmentReader}; pub struct EmptyQuery; impl Query for EmptyQuery { - fn weight( - &self, - _searcher: &Searcher, - _scoring_enabled: bool, - ) -> crate::Result> { + fn weight(&self, _enable_scoring: EnableScoring<'_>) -> crate::Result> { Ok(Box::new(EmptyWeight)) } diff --git a/src/query/fuzzy_query.rs b/src/query/fuzzy_query.rs index 4f3492da9e..70424dee47 100644 --- a/src/query/fuzzy_query.rs +++ b/src/query/fuzzy_query.rs @@ -5,9 +5,8 @@ use levenshtein_automata::{Distance, LevenshteinAutomatonBuilder, DFA}; use once_cell::sync::Lazy; use tantivy_fst::Automaton; -use crate::query::{AutomatonWeight, Query, Weight}; +use crate::query::{AutomatonWeight, EnableScoring, Query, Weight}; use crate::schema::Term; -use crate::Searcher; use crate::TantivyError::InvalidArgument; pub(crate) struct DfaWrapper(pub DFA); @@ -158,11 +157,7 @@ impl FuzzyTermQuery { } impl Query for FuzzyTermQuery { - fn weight( - &self, - _searcher: &Searcher, - _scoring_enabled: bool, - ) -> crate::Result> { + fn weight(&self, _enable_scoring: EnableScoring<'_>) -> crate::Result> { Ok(Box::new(self.specialized_weight()?)) } } diff --git a/src/query/mod.rs b/src/query/mod.rs index c46aed50ff..9bf897ed6d 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -1,5 +1,3 @@ -//! Query Module - mod all_query; mod automaton_weight; mod bitset; @@ -51,7 +49,7 @@ pub use self::fuzzy_query::FuzzyTermQuery; pub use self::intersection::{intersect_scorers, Intersection}; pub use self::more_like_this::{MoreLikeThisQuery, MoreLikeThisQueryBuilder}; pub use self::phrase_query::PhraseQuery; -pub use self::query::{Query, QueryClone}; +pub use self::query::{EnableScoring, Query, QueryClone}; pub use self::query_parser::{QueryParser, QueryParserError}; pub use self::range_query::RangeQuery; pub use self::regex_query::RegexQuery; diff --git a/src/query/more_like_this/mod.rs b/src/query/more_like_this/mod.rs index 6b8dec1251..277d2b9bbd 100644 --- a/src/query/more_like_this/mod.rs +++ b/src/query/more_like_this/mod.rs @@ -1,4 +1,6 @@ mod more_like_this; + +/// Module containing the different query implementations. mod query; pub use self::more_like_this::MoreLikeThis; diff --git a/src/query/more_like_this/query.rs b/src/query/more_like_this/query.rs index 6d12c3272e..125a73075e 100644 --- a/src/query/more_like_this/query.rs +++ b/src/query/more_like_this/query.rs @@ -1,7 +1,7 @@ use super::MoreLikeThis; -use crate::query::{Query, Weight}; +use crate::query::{EnableScoring, Query, Weight}; use crate::schema::{Field, Value}; -use crate::{DocAddress, Result, Searcher}; +use crate::DocAddress; /// A query that matches all of the documents similar to a document /// or a set of field values provided. @@ -42,16 +42,23 @@ impl MoreLikeThisQuery { } impl Query for MoreLikeThisQuery { - fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result> { + fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result> { + let searcher = match enable_scoring { + EnableScoring::Enabled(searcher) => searcher, + EnableScoring::Disabled(_) => { + let err = "MoreLikeThisQuery requires to enable scoring.".to_string(); + return Err(crate::TantivyError::InvalidArgument(err)); + } + }; match &self.target { TargetDocument::DocumentAdress(doc_address) => self .mlt .query_with_document(searcher, *doc_address)? - .weight(searcher, scoring_enabled), + .weight(enable_scoring), TargetDocument::DocumentFields(doc_fields) => self .mlt .query_with_document_fields(searcher, doc_fields)? - .weight(searcher, scoring_enabled), + .weight(enable_scoring), } } } diff --git a/src/query/phrase_query/mod.rs b/src/query/phrase_query/mod.rs index 95a8a94631..3ebee84751 100644 --- a/src/query/phrase_query/mod.rs +++ b/src/query/phrase_query/mod.rs @@ -14,7 +14,7 @@ pub mod tests { use super::*; use crate::collector::tests::{TEST_COLLECTOR_WITHOUT_SCORE, TEST_COLLECTOR_WITH_SCORE}; use crate::core::Index; - use crate::query::{QueryParser, Weight}; + use crate::query::{EnableScoring, QueryParser, Weight}; use crate::schema::{Schema, Term, TEXT}; use crate::{assert_nearly_equals, DocAddress, DocId, TERMINATED}; @@ -79,7 +79,8 @@ pub mod tests { .map(|text| Term::from_field_text(text_field, text)) .collect(); let phrase_query = PhraseQuery::new(terms); - let phrase_weight = phrase_query.phrase_weight(&searcher, false)?; + let phrase_weight = + phrase_query.phrase_weight(EnableScoring::Disabled(searcher.schema()))?; let mut phrase_scorer = phrase_weight.scorer(searcher.segment_reader(0), 1.0)?; assert_eq!(phrase_scorer.doc(), 1); assert_eq!(phrase_scorer.advance(), TERMINATED); @@ -359,7 +360,9 @@ pub mod tests { let matching_docs = |query: &str| { let query_parser = QueryParser::for_index(&index, vec![json_field]); let phrase_query = query_parser.parse_query(query).unwrap(); - let phrase_weight = phrase_query.weight(&searcher, false).unwrap(); + let phrase_weight = phrase_query + .weight(EnableScoring::Disabled(searcher.schema())) + .unwrap(); let mut phrase_scorer = phrase_weight .scorer(searcher.segment_reader(0), 1.0f32) .unwrap(); diff --git a/src/query/phrase_query/phrase_query.rs b/src/query/phrase_query/phrase_query.rs index d5fcbd62b3..2846639978 100644 --- a/src/query/phrase_query/phrase_query.rs +++ b/src/query/phrase_query/phrase_query.rs @@ -1,7 +1,6 @@ use super::PhraseWeight; -use crate::core::searcher::Searcher; use crate::query::bm25::Bm25Weight; -use crate::query::{Query, Weight}; +use crate::query::{EnableScoring, Query, Weight}; use crate::schema::{Field, IndexRecordOption, Term}; /// `PhraseQuery` matches a specific sequence of words. @@ -67,7 +66,7 @@ impl PhraseQuery { /// Slop allowed for the phrase. /// /// The query will match if its terms are separated by `slop` terms at most. - /// By default the slop is 0 meaning query terms need to be adjacent. + /// By default the slop is 0 meaning query terms need to be adjacent. pub fn set_slop(&mut self, value: u32) { self.slop = value; } @@ -91,10 +90,9 @@ impl PhraseQuery { /// a specialized type [`PhraseWeight`] instead of a Boxed trait. pub(crate) fn phrase_weight( &self, - searcher: &Searcher, - scoring_enabled: bool, + enable_scoring: EnableScoring<'_>, ) -> crate::Result { - let schema = searcher.schema(); + let schema = enable_scoring.schema(); let field_entry = schema.get_field_entry(self.field); let has_positions = field_entry .field_type() @@ -109,8 +107,11 @@ impl PhraseQuery { ))); } let terms = self.phrase_terms(); - let bm25_weight = Bm25Weight::for_terms(searcher, &terms)?; - let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight, scoring_enabled); + let bm25_weight_opt = match enable_scoring { + EnableScoring::Enabled(searcher) => Some(Bm25Weight::for_terms(searcher, &terms)?), + EnableScoring::Disabled(_) => None, + }; + let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight_opt); if self.slop > 0 { weight.slop(self.slop); } @@ -122,8 +123,8 @@ impl Query for PhraseQuery { /// Create the weight associated with a query. /// /// See [`Weight`]. - fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result> { - let phrase_weight = self.phrase_weight(searcher, scoring_enabled)?; + fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result> { + let phrase_weight = self.phrase_weight(enable_scoring)?; Ok(Box::new(phrase_weight)) } diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs index cea2600695..fde78d15e1 100644 --- a/src/query/phrase_query/phrase_scorer.rs +++ b/src/query/phrase_query/phrase_scorer.rs @@ -50,8 +50,7 @@ pub struct PhraseScorer { right: Vec, phrase_count: u32, fieldnorm_reader: FieldNormReader, - similarity_weight: Bm25Weight, - scoring_enabled: bool, + similarity_weight_opt: Option, slop: u32, } @@ -245,11 +244,11 @@ fn intersection_exists_with_slop(left: &[u32], right: &[u32], slop: u32) -> bool } impl PhraseScorer { + // If similarity_weight is None, then scoring is disabled. pub fn new( term_postings: Vec<(usize, TPostings)>, - similarity_weight: Bm25Weight, + similarity_weight_opt: Option, fieldnorm_reader: FieldNormReader, - scoring_enabled: bool, slop: u32, ) -> PhraseScorer { let max_offset = term_postings @@ -270,9 +269,8 @@ impl PhraseScorer { left: Vec::with_capacity(100), right: Vec::with_capacity(100), phrase_count: 0u32, - similarity_weight, + similarity_weight_opt, fieldnorm_reader, - scoring_enabled, slop, }; if scorer.doc() != TERMINATED && !scorer.phrase_match() { @@ -286,7 +284,7 @@ impl PhraseScorer { } fn phrase_match(&mut self) -> bool { - if self.scoring_enabled { + if self.similarity_weight_opt.is_some() { let count = self.compute_phrase_count(); self.phrase_count = count; count > 0u32 @@ -388,8 +386,11 @@ impl Scorer for PhraseScorer { fn score(&mut self) -> Score { let doc = self.doc(); let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc); - self.similarity_weight - .score(fieldnorm_id, self.phrase_count) + if let Some(similarity_weight) = self.similarity_weight_opt.as_ref() { + similarity_weight.score(fieldnorm_id, self.phrase_count) + } else { + 1.0f32 + } } } diff --git a/src/query/phrase_query/phrase_weight.rs b/src/query/phrase_query/phrase_weight.rs index ff39c2e8ec..cb7e7ba5d4 100644 --- a/src/query/phrase_query/phrase_weight.rs +++ b/src/query/phrase_query/phrase_weight.rs @@ -10,30 +10,28 @@ use crate::{DocId, DocSet, Score}; pub struct PhraseWeight { phrase_terms: Vec<(usize, Term)>, - similarity_weight: Bm25Weight, - scoring_enabled: bool, + similarity_weight_opt: Option, slop: u32, } impl PhraseWeight { /// Creates a new phrase weight. + /// If `similarity_weight_opt` is None, then scoring is disabled pub fn new( phrase_terms: Vec<(usize, Term)>, - similarity_weight: Bm25Weight, - scoring_enabled: bool, + similarity_weight_opt: Option, ) -> PhraseWeight { let slop = 0; PhraseWeight { phrase_terms, - similarity_weight, - scoring_enabled, + similarity_weight_opt, slop, } } fn fieldnorm_reader(&self, reader: &SegmentReader) -> crate::Result { let field = self.phrase_terms[0].1.field(); - if self.scoring_enabled { + if self.similarity_weight_opt.is_some() { if let Some(fieldnorm_reader) = reader.fieldnorms_readers().get_field(field)? { return Ok(fieldnorm_reader); } @@ -46,7 +44,10 @@ impl PhraseWeight { reader: &SegmentReader, boost: Score, ) -> crate::Result>> { - let similarity_weight = self.similarity_weight.boost_by(boost); + let similarity_weight_opt = self + .similarity_weight_opt + .as_ref() + .map(|similarity_weight| similarity_weight.boost_by(boost)); let fieldnorm_reader = self.fieldnorm_reader(reader)?; let mut term_postings_list = Vec::new(); if reader.has_deletes() { @@ -74,9 +75,8 @@ impl PhraseWeight { } Ok(Some(PhraseScorer::new( term_postings_list, - similarity_weight, + similarity_weight_opt, fieldnorm_reader, - self.scoring_enabled, self.slop, ))) } @@ -108,7 +108,9 @@ impl Weight for PhraseWeight { let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc); let phrase_count = scorer.phrase_count(); let mut explanation = Explanation::new("Phrase Scorer", scorer.score()); - explanation.add_detail(self.similarity_weight.explain(fieldnorm_id, phrase_count)); + if let Some(similarity_weight) = self.similarity_weight_opt.as_ref() { + explanation.add_detail(similarity_weight.explain(fieldnorm_id, phrase_count)); + } Ok(explanation) } } @@ -117,7 +119,7 @@ impl Weight for PhraseWeight { mod tests { use super::super::tests::create_index; use crate::docset::TERMINATED; - use crate::query::PhraseQuery; + use crate::query::{EnableScoring, PhraseQuery}; use crate::{DocSet, Term}; #[test] @@ -130,7 +132,8 @@ mod tests { Term::from_field_text(text_field, "a"), Term::from_field_text(text_field, "b"), ]); - let phrase_weight = phrase_query.phrase_weight(&searcher, true).unwrap(); + let enable_scoring = EnableScoring::Enabled(&searcher); + let phrase_weight = phrase_query.phrase_weight(enable_scoring).unwrap(); let mut phrase_scorer = phrase_weight .phrase_scorer(searcher.segment_reader(0u32), 1.0)? .unwrap(); diff --git a/src/query/query.rs b/src/query/query.rs index 245569077e..df518d2bcb 100644 --- a/src/query/query.rs +++ b/src/query/query.rs @@ -5,8 +5,37 @@ use downcast_rs::impl_downcast; use super::Weight; use crate::core::searcher::Searcher; use crate::query::Explanation; +use crate::schema::Schema; use crate::{DocAddress, Term}; +/// Argument used in `Query::weight(..)` +#[derive(Copy, Clone)] +pub enum EnableScoring<'a> { + /// Pass this to enable scoring. + Enabled(&'a Searcher), + /// Pass this to disable scoring. + /// This can improve performance. + Disabled(&'a Schema), +} + +impl<'a> EnableScoring<'a> { + /// Returns the schema. + pub fn schema(&self) -> &Schema { + match self { + EnableScoring::Enabled(searcher) => searcher.schema(), + EnableScoring::Disabled(schema) => schema, + } + } + + /// Returns true if the scoring is enabled. + pub fn is_scoring_enabled(&self) -> bool { + match self { + EnableScoring::Enabled(_) => true, + EnableScoring::Disabled(_) => false, + } + } +} + /// The `Query` trait defines a set of documents and a scoring method /// for those documents. /// @@ -48,18 +77,18 @@ pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug { /// can increase performances. /// /// See [`Weight`]. - fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result>; + fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result>; /// Returns an `Explanation` for the score of the document. fn explain(&self, searcher: &Searcher, doc_address: DocAddress) -> crate::Result { + let weight = self.weight(EnableScoring::Enabled(searcher))?; let reader = searcher.segment_reader(doc_address.segment_ord); - let weight = self.weight(searcher, true)?; weight.explain(reader, doc_address.doc_id) } /// Returns the number of documents matching the query. fn count(&self, searcher: &Searcher) -> crate::Result { - let weight = self.weight(searcher, false)?; + let weight = self.weight(EnableScoring::Disabled(searcher.schema()))?; let mut result = 0; for reader in searcher.segment_readers() { result += weight.count(reader)? as usize; @@ -93,8 +122,8 @@ where T: 'static + Query + Clone } impl Query for Box { - fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result> { - self.as_ref().weight(searcher, scoring_enabled) + fn weight(&self, enabled_scoring: EnableScoring) -> crate::Result> { + self.as_ref().weight(enabled_scoring) } fn count(&self, searcher: &Searcher) -> crate::Result { diff --git a/src/query/range_query.rs b/src/query/range_query.rs index 390a452d3a..91332b0088 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -3,11 +3,11 @@ use std::ops::{Bound, Range}; use common::BitSet; -use crate::core::{Searcher, SegmentReader}; +use crate::core::SegmentReader; use crate::error::TantivyError; use crate::query::explanation::does_not_match; use crate::query::range_query_ip_fastfield::IPFastFieldRangeWeight; -use crate::query::{BitSetDocSet, ConstScorer, Explanation, Query, Scorer, Weight}; +use crate::query::{BitSetDocSet, ConstScorer, EnableScoring, Explanation, Query, Scorer, Weight}; use crate::schema::{Field, IndexRecordOption, Term, Type}; use crate::termdict::{TermDictionary, TermStreamer}; use crate::{DocId, Score}; @@ -253,12 +253,8 @@ impl RangeQuery { } impl Query for RangeQuery { - fn weight( - &self, - searcher: &Searcher, - _scoring_enabled: bool, - ) -> crate::Result> { - let schema = searcher.schema(); + fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result> { + let schema = enable_scoring.schema(); let field_type = schema.get_field_entry(self.field).field_type(); let value_type = field_type.value_type(); if value_type != self.value_type { diff --git a/src/query/regex_query.rs b/src/query/regex_query.rs index 24dbcdab7e..6fa89b5134 100644 --- a/src/query/regex_query.rs +++ b/src/query/regex_query.rs @@ -4,9 +4,8 @@ use std::sync::Arc; use tantivy_fst::Regex; use crate::error::TantivyError; -use crate::query::{AutomatonWeight, Query, Weight}; +use crate::query::{AutomatonWeight, EnableScoring, Query, Weight}; use crate::schema::Field; -use crate::Searcher; /// A Regex Query matches all of the documents /// containing a specific term that matches @@ -82,11 +81,7 @@ impl RegexQuery { } impl Query for RegexQuery { - fn weight( - &self, - _searcher: &Searcher, - _scoring_enabled: bool, - ) -> crate::Result> { + fn weight(&self, _enabled_scoring: EnableScoring<'_>) -> crate::Result> { Ok(Box::new(self.specialized_weight())) } } diff --git a/src/query/set_query.rs b/src/query/set_query.rs index 7029945ddd..26df4c8287 100644 --- a/src/query/set_query.rs +++ b/src/query/set_query.rs @@ -4,9 +4,9 @@ use tantivy_fst::raw::CompiledAddr; use tantivy_fst::{Automaton, Map}; use crate::query::score_combiner::DoNothingCombiner; -use crate::query::{AutomatonWeight, BooleanWeight, Occur, Query, Weight}; -use crate::schema::Field; -use crate::{Searcher, Term}; +use crate::query::{AutomatonWeight, BooleanWeight, EnableScoring, Occur, Query, Weight}; +use crate::schema::{Field, Schema}; +use crate::Term; /// A Term Set Query matches all of the documents containing any of the Term provided #[derive(Debug, Clone)] @@ -32,12 +32,12 @@ impl TermSetQuery { fn specialized_weight( &self, - searcher: &Searcher, + schema: &Schema, ) -> crate::Result> { let mut sub_queries: Vec<(_, Box)> = Vec::with_capacity(self.terms_map.len()); for (&field, sorted_terms) in self.terms_map.iter() { - let field_entry = searcher.schema().get_field_entry(field); + let field_entry = schema.get_field_entry(field); let field_type = field_entry.field_type(); if !field_type.is_indexed() { let error_msg = format!("Field {:?} is not indexed.", field_entry.name()); @@ -65,12 +65,8 @@ impl TermSetQuery { } impl Query for TermSetQuery { - fn weight( - &self, - searcher: &Searcher, - _scoring_enabled: bool, - ) -> crate::Result> { - Ok(Box::new(self.specialized_weight(searcher)?)) + fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result> { + Ok(Box::new(self.specialized_weight(enable_scoring.schema())?)) } } diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index 2be4bcbbea..53cf9750c6 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -12,7 +12,7 @@ mod tests { use crate::collector::TopDocs; use crate::docset::DocSet; use crate::postings::compression::COMPRESSION_BLOCK_SIZE; - use crate::query::{Query, QueryParser, Scorer, TermQuery}; + use crate::query::{EnableScoring, Query, QueryParser, Scorer, TermQuery}; use crate::schema::{Field, IndexRecordOption, Schema, STRING, TEXT}; use crate::{assert_nearly_equals, DocAddress, Index, Term, TERMINATED}; @@ -34,7 +34,7 @@ mod tests { Term::from_field_text(text_field, "a"), IndexRecordOption::Basic, ); - let term_weight = term_query.weight(&searcher, true)?; + let term_weight = term_query.weight(EnableScoring::Enabled(&searcher))?; let segment_reader = searcher.segment_reader(0); let mut term_scorer = term_weight.scorer(segment_reader, 1.0)?; assert_eq!(term_scorer.doc(), 0); @@ -62,7 +62,7 @@ mod tests { Term::from_field_text(text_field, "a"), IndexRecordOption::Basic, ); - let term_weight = term_query.weight(&searcher, true)?; + let term_weight = term_query.weight(EnableScoring::Enabled(&searcher))?; let segment_reader = searcher.segment_reader(0); let mut term_scorer = term_weight.scorer(segment_reader, 1.0)?; for i in 0u32..COMPRESSION_BLOCK_SIZE as u32 { @@ -158,7 +158,7 @@ mod tests { let term_a = Term::from_field_text(text_field, "a"); let term_query = TermQuery::new(term_a, IndexRecordOption::Basic); let searcher = index.reader()?.searcher(); - let term_weight = term_query.weight(&searcher, false)?; + let term_weight = term_query.weight(EnableScoring::Disabled(searcher.schema()))?; let mut term_scorer = term_weight.scorer(searcher.segment_reader(0u32), 1.0)?; assert_eq!(term_scorer.doc(), 0u32); term_scorer.seek(1u32); diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs index 5d848d6c3f..0b1cfb82d1 100644 --- a/src/query/term_query/term_query.rs +++ b/src/query/term_query/term_query.rs @@ -2,9 +2,9 @@ use std::fmt; use super::term_weight::TermWeight; use crate::query::bm25::Bm25Weight; -use crate::query::{Explanation, Query, Weight}; +use crate::query::{EnableScoring, Explanation, Query, Weight}; use crate::schema::IndexRecordOption; -use crate::{Searcher, Term}; +use crate::Term; /// A Term query matches all of the documents /// containing a specific term. @@ -87,19 +87,23 @@ impl TermQuery { /// This is useful for optimization purpose. pub fn specialized_weight( &self, - searcher: &Searcher, - scoring_enabled: bool, + enable_scoring: EnableScoring<'_>, ) -> crate::Result { - let field_entry = searcher.schema().get_field_entry(self.term.field()); + let schema = enable_scoring.schema(); + let field_entry = schema.get_field_entry(self.term.field()); if !field_entry.is_indexed() { let error_msg = format!("Field {:?} is not indexed.", field_entry.name()); return Err(crate::TantivyError::SchemaError(error_msg)); } - let bm25_weight = if scoring_enabled { - Bm25Weight::for_terms(searcher, &[self.term.clone()])? - } else { - Bm25Weight::new(Explanation::new("".to_string(), 1.0f32), 1.0f32) + let bm25_weight = match enable_scoring { + EnableScoring::Enabled(searcher) => { + Bm25Weight::for_terms(searcher, &[self.term.clone()])? + } + EnableScoring::Disabled(_schema) => { + Bm25Weight::new(Explanation::new("".to_string(), 1.0f32), 1.0f32) + } }; + let scoring_enabled = enable_scoring.is_scoring_enabled(); let index_record_option = if scoring_enabled { self.index_record_option } else { @@ -115,10 +119,8 @@ impl TermQuery { } impl Query for TermQuery { - fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result> { - Ok(Box::new( - self.specialized_weight(searcher, scoring_enabled)?, - )) + fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result> { + Ok(Box::new(self.specialized_weight(enable_scoring)?)) } fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) { visitor(&self.term, false); diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index bb952e222b..2e7aeeaa4e 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -130,7 +130,7 @@ mod tests { use crate::merge_policy::NoMergePolicy; use crate::postings::compression::COMPRESSION_BLOCK_SIZE; use crate::query::term_query::TermScorer; - use crate::query::{Bm25Weight, Scorer, TermQuery}; + use crate::query::{Bm25Weight, EnableScoring, Scorer, TermQuery}; use crate::schema::{IndexRecordOption, Schema, TEXT}; use crate::{ assert_nearly_equals, DocId, DocSet, Index, Score, Searcher, SegmentId, Term, TERMINATED, @@ -250,7 +250,7 @@ mod tests { } fn test_block_wand_aux(term_query: &TermQuery, searcher: &Searcher) -> crate::Result<()> { - let term_weight = term_query.specialized_weight(searcher, true)?; + let term_weight = term_query.specialized_weight(EnableScoring::Enabled(searcher))?; for reader in searcher.segment_readers() { let mut block_max_scores = vec![]; let mut block_max_scores_b = vec![];