Skip to content

Commit

Permalink
Merge pull request #996 from lpouget/facet-optional-storage-and-index
Browse files Browse the repository at this point in the history
Make facet indexation and storage optional
  • Loading branch information
fulmicoton authored Mar 25, 2021
2 parents 8e7fe06 + 4b34231 commit 1557290
Show file tree
Hide file tree
Showing 20 changed files with 290 additions and 62 deletions.
2 changes: 1 addition & 1 deletion examples/faceted_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn main() -> tantivy::Result<()> {

let name = schema_builder.add_text_field("felin_name", TEXT | STORED);
// this is our faceted field: its scientific classification
let classification = schema_builder.add_facet_field("classification");
let classification = schema_builder.add_facet_field("classification", INDEXED);

let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
Expand Down
2 changes: 1 addition & 1 deletion examples/faceted_search_with_tweaked_score.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ fn main() -> tantivy::Result<()> {
let mut schema_builder = Schema::builder();

let title = schema_builder.add_text_field("title", STORED);
let ingredient = schema_builder.add_facet_field("ingredient");
let ingredient = schema_builder.add_facet_field("ingredient", INDEXED);

let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
Expand Down
18 changes: 9 additions & 9 deletions src/collector/facet_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// ```rust
/// use tantivy::collector::FacetCollector;
/// use tantivy::query::AllQuery;
/// use tantivy::schema::{Facet, Schema, TEXT};
/// use tantivy::schema::{Facet, Schema, INDEXED, TEXT};
/// use tantivy::{doc, Index};
///
/// fn example() -> tantivy::Result<()> {
Expand All @@ -89,7 +89,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// // Facet have their own specific type.
/// // It is not a bad practise to put all of your
/// // facet information in the same field.
/// let facet = schema_builder.add_facet_field("facet");
/// let facet = schema_builder.add_facet_field("facet", INDEXED);
/// let title = schema_builder.add_text_field("title", TEXT);
/// let schema = schema_builder.build();
/// let index = Index::create_in_ram(schema);
Expand Down Expand Up @@ -461,7 +461,7 @@ mod tests {
use crate::collector::Count;
use crate::core::Index;
use crate::query::{AllQuery, QueryParser, TermQuery};
use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema};
use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema, INDEXED};
use crate::Term;
use rand::distributions::Uniform;
use rand::prelude::SliceRandom;
Expand All @@ -471,7 +471,7 @@ mod tests {
#[test]
fn test_facet_collector_drilldown() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down Expand Up @@ -531,7 +531,7 @@ mod tests {
#[test]
fn test_doc_unsorted_multifacet() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets");
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
Expand All @@ -555,7 +555,7 @@ mod tests {
#[test]
fn test_doc_search_by_facet() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down Expand Up @@ -612,7 +612,7 @@ mod tests {
#[test]
fn test_facet_collector_topk() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down Expand Up @@ -664,7 +664,7 @@ mod bench {

use crate::collector::FacetCollector;
use crate::query::AllQuery;
use crate::schema::{Facet, Schema};
use crate::schema::{Facet, Schema, INDEXED};
use crate::Index;
use rand::seq::SliceRandom;
use rand::thread_rng;
Expand All @@ -673,7 +673,7 @@ mod bench {
#[bench]
fn bench_facet_collector(b: &mut Bencher) {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down
23 changes: 13 additions & 10 deletions src/core/segment_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,22 @@ impl SegmentReader {
/// Accessor to the `FacetReader` associated to a given `Field`.
pub fn facet_reader(&self, field: Field) -> crate::Result<FacetReader> {
let field_entry = self.schema.get_field_entry(field);
if field_entry.field_type() != &FieldType::HierarchicalFacet {
return Err(crate::TantivyError::InvalidArgument(format!(

match field_entry.field_type() {
FieldType::HierarchicalFacet(_) => {
let term_ords_reader = self.fast_fields().u64s(field)?;
let termdict = self
.termdict_composite
.open_read(field)
.map(TermDictionary::open)
.unwrap_or_else(|| Ok(TermDictionary::empty()))?;
Ok(FacetReader::new(term_ords_reader, termdict))
}
_ => Err(crate::TantivyError::InvalidArgument(format!(
"Field {:?} is not a facet field.",
field_entry.name()
)));
))),
}
let term_ords_reader = self.fast_fields().u64s(field)?;
let termdict = self
.termdict_composite
.open_read(field)
.map(TermDictionary::open)
.unwrap_or_else(|| Ok(TermDictionary::empty()))?;
Ok(FacetReader::new(term_ords_reader, termdict))
}

/// Accessor to the segment's `Field norms`'s reader.
Expand Down
101 changes: 97 additions & 4 deletions src/fastfield/facet_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,106 @@ impl FacetReader {
mod tests {
use crate::Index;
use crate::{
schema::{Facet, SchemaBuilder},
Document,
schema::{Facet, FacetOptions, SchemaBuilder, Value, INDEXED, STORED},
DocAddress, Document,
};

#[test]
fn test_facet_only_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b")));
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert_eq!(&facet_ords, &[2u64]);
let doc = searcher.doc(DocAddress(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, None);
Ok(())
}

#[test]
fn test_facet_only_stored() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b")));
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert!(facet_ords.is_empty());
let doc = searcher.doc(DocAddress(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, Some("/a/b".to_string()));
Ok(())
}

#[test]
fn test_facet_stored_and_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", STORED | INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b")));
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert_eq!(&facet_ords, &[2u64]);
let doc = searcher.doc(DocAddress(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, Some("/a/b".to_string()));
Ok(())
}

#[test]
fn test_facet_neither_stored_and_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b")));
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert!(facet_ords.is_empty());
let doc = searcher.doc(DocAddress(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, None);
Ok(())
}

#[test]
fn test_facet_not_populated_for_all_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand All @@ -110,10 +202,11 @@ mod tests {
assert!(facet_ords.is_empty());
Ok(())
}

#[test]
fn test_facet_not_populated_for_any_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down
2 changes: 1 addition & 1 deletion src/fastfield/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ impl FastValue for u64 {
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
match *field_type {
FieldType::U64(ref integer_options) => integer_options.get_fastfield_cardinality(),
FieldType::HierarchicalFacet => Some(Cardinality::MultiValues),
FieldType::HierarchicalFacet(_) => Some(Cardinality::MultiValues),
_ => None,
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/fastfield/multivalued/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod tests {
use crate::schema::Facet;
use crate::schema::IntOptions;
use crate::schema::Schema;
use crate::schema::INDEXED;
use crate::Index;
use chrono::Duration;

Expand Down Expand Up @@ -212,7 +213,7 @@ mod tests {
#[ignore]
fn test_many_facets() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_facet_field("facetfield");
let field = schema_builder.add_facet_field("facetfield", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
Expand Down
4 changes: 2 additions & 2 deletions src/fastfield/multivalued/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
mod tests {

use crate::core::Index;
use crate::schema::{Facet, Schema};
use crate::schema::{Facet, Schema, INDEXED};

#[test]
fn test_multifastfield_reader() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets");
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index
Expand Down
2 changes: 1 addition & 1 deletion src/fastfield/readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality
FieldType::Date(options) => options
.get_fastfield_cardinality()
.map(|cardinality| (FastType::Date, cardinality)),
FieldType::HierarchicalFacet => Some((FastType::U64, Cardinality::MultiValues)),
FieldType::HierarchicalFacet(_) => Some((FastType::U64, Cardinality::MultiValues)),
_ => None,
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/fastfield/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ impl FastFieldsWriter {
None => {}
}
}
FieldType::HierarchicalFacet => {
FieldType::HierarchicalFacet(_) => {
let fast_field_writer = MultiValuedFastFieldWriter::new(field, true);
multi_values_writers.push(fast_field_writer);
}
Expand Down
11 changes: 5 additions & 6 deletions src/indexer/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ impl IndexMerger {
for (field, field_entry) in self.schema.fields() {
let field_type = field_entry.field_type();
match field_type {
FieldType::HierarchicalFacet => {
FieldType::HierarchicalFacet(_) => {
let term_ordinal_mapping = term_ord_mappings
.remove(&field)
.expect("Logic Error in Tantivy (Please report). HierarchicalFact field should have required a\
Expand Down Expand Up @@ -515,10 +515,9 @@ impl IndexMerger {
max_term_ords.push(terms.num_terms() as u64);
}

let mut term_ord_mapping_opt = if *field_type == FieldType::HierarchicalFacet {
Some(TermOrdinalMapping::new(max_term_ords))
} else {
None
let mut term_ord_mapping_opt = match field_type {
FieldType::HierarchicalFacet(_) => Some(TermOrdinalMapping::new(max_term_ords)),
_ => None,
};

let mut merged_terms = TermMerger::new(field_term_streams);
Expand Down Expand Up @@ -1179,7 +1178,7 @@ mod tests {
#[test]
fn test_merge_facets() {
let mut schema_builder = schema::Schema::builder();
let facet_field = schema_builder.add_facet_field("facet");
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let index = Index::create_in_ram(schema_builder.build());
let reader = index.reader().unwrap();
{
Expand Down
2 changes: 1 addition & 1 deletion src/indexer/segment_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ impl SegmentWriter {
let (term_buffer, multifield_postings) =
(&mut self.term_buffer, &mut self.multifield_postings);
match *field_entry.field_type() {
FieldType::HierarchicalFacet => {
FieldType::HierarchicalFacet(_) => {
term_buffer.set_field(field);
let facets =
field_values
Expand Down
6 changes: 4 additions & 2 deletions src/postings/postings_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box<dyn PostingsWriter>
| FieldType::F64(_)
| FieldType::Date(_)
| FieldType::Bytes(_)
| FieldType::HierarchicalFacet => SpecializedPostingsWriter::<NothingRecorder>::new_boxed(),
| FieldType::HierarchicalFacet(_) => {
SpecializedPostingsWriter::<NothingRecorder>::new_boxed()
}
}
}

Expand Down Expand Up @@ -142,7 +144,7 @@ impl MultiFieldPostingsWriter {
let field_entry = self.schema.get_field_entry(field);

match *field_entry.field_type() {
FieldType::Str(_) | FieldType::HierarchicalFacet => {
FieldType::Str(_) | FieldType::HierarchicalFacet(_) => {
// populating the (unordered term ord) -> (ordered term ord) mapping
// for the field.
let unordered_term_ids = term_offsets[byte_offsets.clone()]
Expand Down
Loading

0 comments on commit 1557290

Please sign in to comment.