Skip to content

Commit

Permalink
Make facet indexation and storage optional
Browse files Browse the repository at this point in the history
Added a FacetOptions for HierarchicalFacet which add indexed and stored flags to it.
Propagate change and update tests accordingly
Added a test to ensure that a not indexed flag was taken care of.
Added on Value implem the `path()` function to return the stored facet.
  • Loading branch information
Laurent Pouget committed Mar 23, 2021
1 parent 5de9961 commit 5df8166
Show file tree
Hide file tree
Showing 20 changed files with 223 additions and 61 deletions.
2 changes: 1 addition & 1 deletion examples/faceted_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn main() -> tantivy::Result<()> {

let name = schema_builder.add_text_field("felin_name", TEXT | STORED);
// this is our faceted field: its scientific classification
let classification = schema_builder.add_facet_field("classification");
let classification = schema_builder.add_facet_field("classification", INDEXED);

let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
Expand Down
2 changes: 1 addition & 1 deletion examples/faceted_search_with_tweaked_score.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ fn main() -> tantivy::Result<()> {
let mut schema_builder = Schema::builder();

let title = schema_builder.add_text_field("title", STORED);
let ingredient = schema_builder.add_facet_field("ingredient");
let ingredient = schema_builder.add_facet_field("ingredient", INDEXED);

let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
Expand Down
18 changes: 9 additions & 9 deletions src/collector/facet_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// ```rust
/// use tantivy::collector::FacetCollector;
/// use tantivy::query::AllQuery;
/// use tantivy::schema::{Facet, Schema, TEXT};
/// use tantivy::schema::{Facet, Schema, INDEXED, TEXT};
/// use tantivy::{doc, Index};
///
/// fn example() -> tantivy::Result<()> {
Expand All @@ -89,7 +89,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// // Facet have their own specific type.
/// // It is not a bad practise to put all of your
/// // facet information in the same field.
/// let facet = schema_builder.add_facet_field("facet");
/// let facet = schema_builder.add_facet_field("facet", INDEXED);
/// let title = schema_builder.add_text_field("title", TEXT);
/// let schema = schema_builder.build();
/// let index = Index::create_in_ram(schema);
Expand Down Expand Up @@ -461,7 +461,7 @@ mod tests {
use crate::collector::Count;
use crate::core::Index;
use crate::query::{AllQuery, QueryParser, TermQuery};
use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema};
use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema, INDEXED};
use crate::Term;
use rand::distributions::Uniform;
use rand::prelude::SliceRandom;
Expand All @@ -471,7 +471,7 @@ mod tests {
#[test]
fn test_facet_collector_drilldown() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down Expand Up @@ -531,7 +531,7 @@ mod tests {
#[test]
fn test_doc_unsorted_multifacet() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets");
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
Expand All @@ -555,7 +555,7 @@ mod tests {
#[test]
fn test_doc_search_by_facet() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down Expand Up @@ -612,7 +612,7 @@ mod tests {
#[test]
fn test_facet_collector_topk() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down Expand Up @@ -664,7 +664,7 @@ mod bench {

use crate::collector::FacetCollector;
use crate::query::AllQuery;
use crate::schema::{Facet, Schema};
use crate::schema::{Facet, Schema, INDEXED};
use crate::Index;
use rand::seq::SliceRandom;
use rand::thread_rng;
Expand All @@ -673,7 +673,7 @@ mod bench {
#[bench]
fn bench_facet_collector(b: &mut Bencher) {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down
23 changes: 13 additions & 10 deletions src/core/segment_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,22 @@ impl SegmentReader {
/// Accessor to the `FacetReader` associated to a given `Field`.
pub fn facet_reader(&self, field: Field) -> crate::Result<FacetReader> {
let field_entry = self.schema.get_field_entry(field);
if field_entry.field_type() != &FieldType::HierarchicalFacet {
return Err(crate::TantivyError::InvalidArgument(format!(

match field_entry.field_type() {
FieldType::HierarchicalFacet(_) => {
let term_ords_reader = self.fast_fields().u64s(field)?;
let termdict = self
.termdict_composite
.open_read(field)
.map(TermDictionary::open)
.unwrap_or_else(|| Ok(TermDictionary::empty()))?;
Ok(FacetReader::new(term_ords_reader, termdict))
}
_ => Err(crate::TantivyError::InvalidArgument(format!(
"Field {:?} is not a facet field.",
field_entry.name()
)));
))),
}
let term_ords_reader = self.fast_fields().u64s(field)?;
let termdict = self
.termdict_composite
.open_read(field)
.map(TermDictionary::open)
.unwrap_or_else(|| Ok(TermDictionary::empty()))?;
Ok(FacetReader::new(term_ords_reader, termdict))
}

/// Accessor to the segment's `Field norms`'s reader.
Expand Down
34 changes: 30 additions & 4 deletions src/fastfield/facet_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,40 @@ impl FacetReader {
mod tests {
use crate::Index;
use crate::{
schema::{Facet, SchemaBuilder},
Document,
schema::{Facet, SchemaBuilder, Value, INDEXED, STORED},
DocAddress, Document,
};

#[test]
fn test_facet_not_indexed_for_all_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b")));
index_writer.add_document(Document::default());
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert!(facet_ords.is_empty());
let doc = searcher.doc(DocAddress(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path).unwrap();
assert_eq!(value, "/a/b".to_string());
facet_reader.facet_ords(1u32, &mut facet_ords);
assert!(facet_ords.is_empty());
Ok(())
}

#[test]
fn test_facet_not_populated_for_all_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand All @@ -113,7 +139,7 @@ mod tests {
#[test]
fn test_facet_not_populated_for_any_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down
2 changes: 1 addition & 1 deletion src/fastfield/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ impl FastValue for u64 {
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
match *field_type {
FieldType::U64(ref integer_options) => integer_options.get_fastfield_cardinality(),
FieldType::HierarchicalFacet => Some(Cardinality::MultiValues),
FieldType::HierarchicalFacet(_) => Some(Cardinality::MultiValues),
_ => None,
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/fastfield/multivalued/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod tests {
use crate::schema::Facet;
use crate::schema::IntOptions;
use crate::schema::Schema;
use crate::schema::INDEXED;
use crate::Index;
use chrono::Duration;

Expand Down Expand Up @@ -212,7 +213,7 @@ mod tests {
#[ignore]
fn test_many_facets() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_facet_field("facetfield");
let field = schema_builder.add_facet_field("facetfield", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
Expand Down
4 changes: 2 additions & 2 deletions src/fastfield/multivalued/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
mod tests {

use crate::core::Index;
use crate::schema::{Facet, Schema};
use crate::schema::{Facet, Schema, INDEXED};

#[test]
fn test_multifastfield_reader() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets");
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index
Expand Down
2 changes: 1 addition & 1 deletion src/fastfield/readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality
FieldType::Date(options) => options
.get_fastfield_cardinality()
.map(|cardinality| (FastType::Date, cardinality)),
FieldType::HierarchicalFacet => Some((FastType::U64, Cardinality::MultiValues)),
FieldType::HierarchicalFacet(_) => Some((FastType::U64, Cardinality::MultiValues)),
_ => None,
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/fastfield/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ impl FastFieldsWriter {
None => {}
}
}
FieldType::HierarchicalFacet => {
FieldType::HierarchicalFacet(_) => {
let fast_field_writer = MultiValuedFastFieldWriter::new(field, true);
multi_values_writers.push(fast_field_writer);
}
Expand Down
11 changes: 5 additions & 6 deletions src/indexer/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ impl IndexMerger {
for (field, field_entry) in self.schema.fields() {
let field_type = field_entry.field_type();
match field_type {
FieldType::HierarchicalFacet => {
FieldType::HierarchicalFacet(_) => {
let term_ordinal_mapping = term_ord_mappings
.remove(&field)
.expect("Logic Error in Tantivy (Please report). HierarchicalFact field should have required a\
Expand Down Expand Up @@ -515,10 +515,9 @@ impl IndexMerger {
max_term_ords.push(terms.num_terms() as u64);
}

let mut term_ord_mapping_opt = if *field_type == FieldType::HierarchicalFacet {
Some(TermOrdinalMapping::new(max_term_ords))
} else {
None
let mut term_ord_mapping_opt = match field_type {
FieldType::HierarchicalFacet(_) => Some(TermOrdinalMapping::new(max_term_ords)),
_ => None,
};

let mut merged_terms = TermMerger::new(field_term_streams);
Expand Down Expand Up @@ -1179,7 +1178,7 @@ mod tests {
#[test]
fn test_merge_facets() {
let mut schema_builder = schema::Schema::builder();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let index = Index::create_in_ram(schema_builder.build());
let reader = index.reader().unwrap();
{
Expand Down
2 changes: 1 addition & 1 deletion src/indexer/segment_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ impl SegmentWriter {
let (term_buffer, multifield_postings) =
(&mut self.term_buffer, &mut self.multifield_postings);
match *field_entry.field_type() {
FieldType::HierarchicalFacet => {
FieldType::HierarchicalFacet(_) => {
term_buffer.set_field(field);
let facets =
field_values
Expand Down
6 changes: 4 additions & 2 deletions src/postings/postings_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box<dyn PostingsWriter>
| FieldType::F64(_)
| FieldType::Date(_)
| FieldType::Bytes(_)
| FieldType::HierarchicalFacet => SpecializedPostingsWriter::<NothingRecorder>::new_boxed(),
| FieldType::HierarchicalFacet(_) => {
SpecializedPostingsWriter::<NothingRecorder>::new_boxed()
}
}
}

Expand Down Expand Up @@ -142,7 +144,7 @@ impl MultiFieldPostingsWriter {
let field_entry = self.schema.get_field_entry(field);

match *field_entry.field_type() {
FieldType::Str(_) | FieldType::HierarchicalFacet => {
FieldType::Str(_) | FieldType::HierarchicalFacet(_) => {
// populating the (unordered term ord) -> (ordered term ord) mapping
// for the field.
let unordered_term_ids = term_offsets[byte_offsets.clone()]
Expand Down
16 changes: 12 additions & 4 deletions src/query/query_parser/query_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ impl QueryParser {
))
}
}
FieldType::HierarchicalFacet => {
FieldType::HierarchicalFacet(_) => {
let facet = Facet::from_text(phrase);
Ok(vec![(0, Term::from_field_text(field, facet.encoded_str()))])
}
Expand Down Expand Up @@ -605,7 +605,8 @@ mod test {
schema_builder.add_text_field("with_stop_words", text_options);
schema_builder.add_date_field("date", INDEXED);
schema_builder.add_f64_field("float", INDEXED);
schema_builder.add_facet_field("facet");
schema_builder.add_facet_field("facet", INDEXED);
schema_builder.add_facet_field("facet_not_indexed", STORED);
schema_builder.add_bytes_field("bytes", INDEXED);
schema_builder.add_bytes_field("bytes_not_indexed", STORED);
schema_builder.build()
Expand Down Expand Up @@ -658,6 +659,13 @@ mod test {
);
}

#[test]
fn test_parse_query_facet_not_indexed() {
let error =
parse_query_to_logical_ast("facet_not_indexed:/root/branch/leaf", false).unwrap_err();
assert!(matches!(error, QueryParserError::FieldNotIndexed(_)));
}

#[test]
pub fn test_parse_query_with_boost() {
let mut query_parser = make_query_parser();
Expand Down Expand Up @@ -799,7 +807,7 @@ mod test {
fn test_parse_bytes() {
test_parse_query_to_logical_ast_helper(
"bytes:YnVidQ==",
"Term(field=12,bytes=[98, 117, 98, 117])",
"Term(field=13,bytes=[98, 117, 98, 117])",
false,
);
}
Expand All @@ -814,7 +822,7 @@ mod test {
fn test_parse_bytes_phrase() {
test_parse_query_to_logical_ast_helper(
"bytes:\"YnVidQ==\"",
"Term(field=12,bytes=[98, 117, 98, 117])",
"Term(field=13,bytes=[98, 117, 98, 117])",
false,
);
}
Expand Down
Loading

0 comments on commit 5df8166

Please sign in to comment.