From 4b34231f28efe30327a3bbca38593a00968113d0 Mon Sep 17 00:00:00 2001 From: Laurent Pouget Date: Mon, 22 Mar 2021 21:01:39 +0100 Subject: [PATCH] Make facet indexation and storage optional Added a FacetOptions for HierarchicalFacet which add indexed and stored flags to it. Propagate change and update tests accordingly Added a test to ensure that a not indexed flag was taken care of. Added on Value implem the `path()` function to return the stored facet. --- examples/faceted_search.rs | 2 +- examples/faceted_search_with_tweaked_score.rs | 2 +- src/collector/facet_collector.rs | 18 ++-- src/core/segment_reader.rs | 23 ++-- src/fastfield/facet_reader.rs | 101 +++++++++++++++++- src/fastfield/mod.rs | 2 +- src/fastfield/multivalued/mod.rs | 3 +- src/fastfield/multivalued/reader.rs | 4 +- src/fastfield/readers.rs | 2 +- src/fastfield/writer.rs | 2 +- src/indexer/merger.rs | 11 +- src/indexer/segment_writer.rs | 2 +- src/postings/postings_writer.rs | 6 +- src/query/query_parser/query_parser.rs | 16 ++- src/schema/facet_options.rs | 96 +++++++++++++++++ src/schema/field_entry.rs | 23 ++-- src/schema/field_type.rs | 19 ++-- src/schema/mod.rs | 2 + src/schema/schema.rs | 8 +- src/schema/value.rs | 10 ++ 20 files changed, 290 insertions(+), 62 deletions(-) create mode 100644 src/schema/facet_options.rs diff --git a/examples/faceted_search.rs b/examples/faceted_search.rs index 2769af5c91..f6ed20ed76 100644 --- a/examples/faceted_search.rs +++ b/examples/faceted_search.rs @@ -23,7 +23,7 @@ fn main() -> tantivy::Result<()> { let name = schema_builder.add_text_field("felin_name", TEXT | STORED); // this is our faceted field: its scientific classification - let classification = schema_builder.add_facet_field("classification"); + let classification = schema_builder.add_facet_field("classification", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); diff --git a/examples/faceted_search_with_tweaked_score.rs b/examples/faceted_search_with_tweaked_score.rs index bb9ad002bf..0d35337cfd 100644 --- a/examples/faceted_search_with_tweaked_score.rs +++ b/examples/faceted_search_with_tweaked_score.rs @@ -9,7 +9,7 @@ fn main() -> tantivy::Result<()> { let mut schema_builder = Schema::builder(); let title = schema_builder.add_text_field("title", STORED); - let ingredient = schema_builder.add_facet_field("ingredient"); + let ingredient = schema_builder.add_facet_field("ingredient", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema.clone()); diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index 5952d48769..66f6bef3fc 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -80,7 +80,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize { /// ```rust /// use tantivy::collector::FacetCollector; /// use tantivy::query::AllQuery; -/// use tantivy::schema::{Facet, Schema, TEXT}; +/// use tantivy::schema::{Facet, Schema, INDEXED, TEXT}; /// use tantivy::{doc, Index}; /// /// fn example() -> tantivy::Result<()> { @@ -89,7 +89,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize { /// // Facet have their own specific type. /// // It is not a bad practise to put all of your /// // facet information in the same field. -/// let facet = schema_builder.add_facet_field("facet"); +/// let facet = schema_builder.add_facet_field("facet", INDEXED); /// let title = schema_builder.add_text_field("title", TEXT); /// let schema = schema_builder.build(); /// let index = Index::create_in_ram(schema); @@ -461,7 +461,7 @@ mod tests { use crate::collector::Count; use crate::core::Index; use crate::query::{AllQuery, QueryParser, TermQuery}; - use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema}; + use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema, INDEXED}; use crate::Term; use rand::distributions::Uniform; use rand::prelude::SliceRandom; @@ -471,7 +471,7 @@ mod tests { #[test] fn test_facet_collector_drilldown() { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet"); + let facet_field = schema_builder.add_facet_field("facet", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -531,7 +531,7 @@ mod tests { #[test] fn test_doc_unsorted_multifacet() { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facets"); + let facet_field = schema_builder.add_facet_field("facets", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests().unwrap(); @@ -555,7 +555,7 @@ mod tests { #[test] fn test_doc_search_by_facet() -> crate::Result<()> { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet"); + let facet_field = schema_builder.add_facet_field("facet", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; @@ -612,7 +612,7 @@ mod tests { #[test] fn test_facet_collector_topk() { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet"); + let facet_field = schema_builder.add_facet_field("facet", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -664,7 +664,7 @@ mod bench { use crate::collector::FacetCollector; use crate::query::AllQuery; - use crate::schema::{Facet, Schema}; + use crate::schema::{Facet, Schema, INDEXED}; use crate::Index; use rand::seq::SliceRandom; use rand::thread_rng; @@ -673,7 +673,7 @@ mod bench { #[bench] fn bench_facet_collector(b: &mut Bencher) { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet"); + let facet_field = schema_builder.add_facet_field("facet", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 5eea064732..02c093d9e0 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -108,19 +108,22 @@ impl SegmentReader { /// Accessor to the `FacetReader` associated to a given `Field`. pub fn facet_reader(&self, field: Field) -> crate::Result { let field_entry = self.schema.get_field_entry(field); - if field_entry.field_type() != &FieldType::HierarchicalFacet { - return Err(crate::TantivyError::InvalidArgument(format!( + + match field_entry.field_type() { + FieldType::HierarchicalFacet(_) => { + let term_ords_reader = self.fast_fields().u64s(field)?; + let termdict = self + .termdict_composite + .open_read(field) + .map(TermDictionary::open) + .unwrap_or_else(|| Ok(TermDictionary::empty()))?; + Ok(FacetReader::new(term_ords_reader, termdict)) + } + _ => Err(crate::TantivyError::InvalidArgument(format!( "Field {:?} is not a facet field.", field_entry.name() - ))); + ))), } - let term_ords_reader = self.fast_fields().u64s(field)?; - let termdict = self - .termdict_composite - .open_read(field) - .map(TermDictionary::open) - .unwrap_or_else(|| Ok(TermDictionary::empty()))?; - Ok(FacetReader::new(term_ords_reader, termdict)) } /// Accessor to the segment's `Field norms`'s reader. diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs index 6f802c153a..9b5b7f364c 100644 --- a/src/fastfield/facet_reader.rs +++ b/src/fastfield/facet_reader.rs @@ -84,14 +84,106 @@ impl FacetReader { mod tests { use crate::Index; use crate::{ - schema::{Facet, SchemaBuilder}, - Document, + schema::{Facet, FacetOptions, SchemaBuilder, Value, INDEXED, STORED}, + DocAddress, Document, }; + #[test] + fn test_facet_only_indexed() -> crate::Result<()> { + let mut schema_builder = SchemaBuilder::default(); + let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b"))); + index_writer.commit()?; + let searcher = index.reader()?.searcher(); + let facet_reader = searcher + .segment_reader(0u32) + .facet_reader(facet_field) + .unwrap(); + let mut facet_ords = Vec::new(); + facet_reader.facet_ords(0u32, &mut facet_ords); + assert_eq!(&facet_ords, &[2u64]); + let doc = searcher.doc(DocAddress(0u32, 0u32))?; + let value = doc.get_first(facet_field).and_then(Value::path); + assert_eq!(value, None); + Ok(()) + } + + #[test] + fn test_facet_only_stored() -> crate::Result<()> { + let mut schema_builder = SchemaBuilder::default(); + let facet_field = schema_builder.add_facet_field("facet", STORED); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b"))); + index_writer.commit()?; + let searcher = index.reader()?.searcher(); + let facet_reader = searcher + .segment_reader(0u32) + .facet_reader(facet_field) + .unwrap(); + let mut facet_ords = Vec::new(); + facet_reader.facet_ords(0u32, &mut facet_ords); + assert!(facet_ords.is_empty()); + let doc = searcher.doc(DocAddress(0u32, 0u32))?; + let value = doc.get_first(facet_field).and_then(Value::path); + assert_eq!(value, Some("/a/b".to_string())); + Ok(()) + } + + #[test] + fn test_facet_stored_and_indexed() -> crate::Result<()> { + let mut schema_builder = SchemaBuilder::default(); + let facet_field = schema_builder.add_facet_field("facet", STORED | INDEXED); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b"))); + index_writer.commit()?; + let searcher = index.reader()?.searcher(); + let facet_reader = searcher + .segment_reader(0u32) + .facet_reader(facet_field) + .unwrap(); + let mut facet_ords = Vec::new(); + facet_reader.facet_ords(0u32, &mut facet_ords); + assert_eq!(&facet_ords, &[2u64]); + let doc = searcher.doc(DocAddress(0u32, 0u32))?; + let value = doc.get_first(facet_field).and_then(Value::path); + assert_eq!(value, Some("/a/b".to_string())); + Ok(()) + } + + #[test] + fn test_facet_neither_stored_and_indexed() -> crate::Result<()> { + let mut schema_builder = SchemaBuilder::default(); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b"))); + index_writer.commit()?; + let searcher = index.reader()?.searcher(); + let facet_reader = searcher + .segment_reader(0u32) + .facet_reader(facet_field) + .unwrap(); + let mut facet_ords = Vec::new(); + facet_reader.facet_ords(0u32, &mut facet_ords); + assert!(facet_ords.is_empty()); + let doc = searcher.doc(DocAddress(0u32, 0u32))?; + let value = doc.get_first(facet_field).and_then(Value::path); + assert_eq!(value, None); + Ok(()) + } + #[test] fn test_facet_not_populated_for_all_docs() -> crate::Result<()> { let mut schema_builder = SchemaBuilder::default(); - let facet_field = schema_builder.add_facet_field("facet"); + let facet_field = schema_builder.add_facet_field("facet", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; @@ -110,10 +202,11 @@ mod tests { assert!(facet_ords.is_empty()); Ok(()) } + #[test] fn test_facet_not_populated_for_any_docs() -> crate::Result<()> { let mut schema_builder = SchemaBuilder::default(); - let facet_field = schema_builder.add_facet_field("facet"); + let facet_field = schema_builder.add_facet_field("facet", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index b24495ad49..798dcb3416 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -96,7 +96,7 @@ impl FastValue for u64 { fn fast_field_cardinality(field_type: &FieldType) -> Option { match *field_type { FieldType::U64(ref integer_options) => integer_options.get_fastfield_cardinality(), - FieldType::HierarchicalFacet => Some(Cardinality::MultiValues), + FieldType::HierarchicalFacet(_) => Some(Cardinality::MultiValues), _ => None, } } diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index 81cf249c81..235255b689 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -13,6 +13,7 @@ mod tests { use crate::schema::Facet; use crate::schema::IntOptions; use crate::schema::Schema; + use crate::schema::INDEXED; use crate::Index; use chrono::Duration; @@ -212,7 +213,7 @@ mod tests { #[ignore] fn test_many_facets() { let mut schema_builder = Schema::builder(); - let field = schema_builder.add_facet_field("facetfield"); + let field = schema_builder.add_facet_field("facetfield", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests().unwrap(); diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index fd4d95ae82..6ead853e7d 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -60,12 +60,12 @@ impl MultiValuedFastFieldReader { mod tests { use crate::core::Index; - use crate::schema::{Facet, Schema}; + use crate::schema::{Facet, Schema, INDEXED}; #[test] fn test_multifastfield_reader() { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facets"); + let facet_field = schema_builder.add_facet_field("facets", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs index 880e95aa24..731b36e497 100644 --- a/src/fastfield/readers.rs +++ b/src/fastfield/readers.rs @@ -38,7 +38,7 @@ fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality FieldType::Date(options) => options .get_fastfield_cardinality() .map(|cardinality| (FastType::Date, cardinality)), - FieldType::HierarchicalFacet => Some((FastType::U64, Cardinality::MultiValues)), + FieldType::HierarchicalFacet(_) => Some((FastType::U64, Cardinality::MultiValues)), _ => None, } } diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 79f9965ce2..3eb3b42e27 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -52,7 +52,7 @@ impl FastFieldsWriter { None => {} } } - FieldType::HierarchicalFacet => { + FieldType::HierarchicalFacet(_) => { let fast_field_writer = MultiValuedFastFieldWriter::new(field, true); multi_values_writers.push(fast_field_writer); } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 271c3d505f..43c7cd783c 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -195,7 +195,7 @@ impl IndexMerger { for (field, field_entry) in self.schema.fields() { let field_type = field_entry.field_type(); match field_type { - FieldType::HierarchicalFacet => { + FieldType::HierarchicalFacet(_) => { let term_ordinal_mapping = term_ord_mappings .remove(&field) .expect("Logic Error in Tantivy (Please report). HierarchicalFact field should have required a\ @@ -515,10 +515,9 @@ impl IndexMerger { max_term_ords.push(terms.num_terms() as u64); } - let mut term_ord_mapping_opt = if *field_type == FieldType::HierarchicalFacet { - Some(TermOrdinalMapping::new(max_term_ords)) - } else { - None + let mut term_ord_mapping_opt = match field_type { + FieldType::HierarchicalFacet(_) => Some(TermOrdinalMapping::new(max_term_ords)), + _ => None, }; let mut merged_terms = TermMerger::new(field_term_streams); @@ -1179,7 +1178,7 @@ mod tests { #[test] fn test_merge_facets() { let mut schema_builder = schema::Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet"); + let facet_field = schema_builder.add_facet_field("facet", INDEXED); let index = Index::create_in_ram(schema_builder.build()); let reader = index.reader().unwrap(); { diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 77be6aff02..b60b486a2e 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -142,7 +142,7 @@ impl SegmentWriter { let (term_buffer, multifield_postings) = (&mut self.term_buffer, &mut self.multifield_postings); match *field_entry.field_type() { - FieldType::HierarchicalFacet => { + FieldType::HierarchicalFacet(_) => { term_buffer.set_field(field); let facets = field_values diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 681d9cb744..3d097dea67 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -39,7 +39,9 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box | FieldType::F64(_) | FieldType::Date(_) | FieldType::Bytes(_) - | FieldType::HierarchicalFacet => SpecializedPostingsWriter::::new_boxed(), + | FieldType::HierarchicalFacet(_) => { + SpecializedPostingsWriter::::new_boxed() + } } } @@ -142,7 +144,7 @@ impl MultiFieldPostingsWriter { let field_entry = self.schema.get_field_entry(field); match *field_entry.field_type() { - FieldType::Str(_) | FieldType::HierarchicalFacet => { + FieldType::Str(_) | FieldType::HierarchicalFacet(_) => { // populating the (unordered term ord) -> (ordered term ord) mapping // for the field. let unordered_term_ids = term_offsets[byte_offsets.clone()] diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index d68389aa2b..aaba3dedae 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -357,7 +357,7 @@ impl QueryParser { )) } } - FieldType::HierarchicalFacet => { + FieldType::HierarchicalFacet(_) => { let facet = Facet::from_text(phrase); Ok(vec![(0, Term::from_field_text(field, facet.encoded_str()))]) } @@ -605,7 +605,8 @@ mod test { schema_builder.add_text_field("with_stop_words", text_options); schema_builder.add_date_field("date", INDEXED); schema_builder.add_f64_field("float", INDEXED); - schema_builder.add_facet_field("facet"); + schema_builder.add_facet_field("facet", INDEXED); + schema_builder.add_facet_field("facet_not_indexed", STORED); schema_builder.add_bytes_field("bytes", INDEXED); schema_builder.add_bytes_field("bytes_not_indexed", STORED); schema_builder.build() @@ -658,6 +659,13 @@ mod test { ); } + #[test] + fn test_parse_query_facet_not_indexed() { + let error = + parse_query_to_logical_ast("facet_not_indexed:/root/branch/leaf", false).unwrap_err(); + assert!(matches!(error, QueryParserError::FieldNotIndexed(_))); + } + #[test] pub fn test_parse_query_with_boost() { let mut query_parser = make_query_parser(); @@ -799,7 +807,7 @@ mod test { fn test_parse_bytes() { test_parse_query_to_logical_ast_helper( "bytes:YnVidQ==", - "Term(field=12,bytes=[98, 117, 98, 117])", + "Term(field=13,bytes=[98, 117, 98, 117])", false, ); } @@ -814,7 +822,7 @@ mod test { fn test_parse_bytes_phrase() { test_parse_query_to_logical_ast_helper( "bytes:\"YnVidQ==\"", - "Term(field=12,bytes=[98, 117, 98, 117])", + "Term(field=13,bytes=[98, 117, 98, 117])", false, ); } diff --git a/src/schema/facet_options.rs b/src/schema/facet_options.rs new file mode 100644 index 0000000000..7427c1336e --- /dev/null +++ b/src/schema/facet_options.rs @@ -0,0 +1,96 @@ +use crate::schema::flags::{IndexedFlag, SchemaFlagList, StoredFlag}; +use serde::{Deserialize, Serialize}; +use std::ops::BitOr; + +/// Define how a facet field should be handled by tantivy. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct FacetOptions { + indexed: bool, + stored: bool, +} + +impl FacetOptions { + /// Returns true iff the value is stored. + pub fn is_stored(&self) -> bool { + self.stored + } + + /// Returns true iff the value is indexed. + pub fn is_indexed(&self) -> bool { + self.indexed + } + + /// Set the field as stored. + /// + /// Only the fields that are set as *stored* are + /// persisted into the Tantivy's store. + pub fn set_stored(mut self) -> FacetOptions { + self.stored = true; + self + } + + /// Set the field as indexed. + /// + /// Setting a facet as indexed will generate + /// a walkable path. + pub fn set_indexed(mut self) -> FacetOptions { + self.indexed = true; + self + } +} + +impl Default for FacetOptions { + fn default() -> FacetOptions { + FacetOptions { + indexed: false, + stored: false, + } + } +} + +impl From<()> for FacetOptions { + fn from(_: ()) -> FacetOptions { + FacetOptions::default() + } +} + +impl From for FacetOptions { + fn from(_: StoredFlag) -> Self { + FacetOptions { + indexed: false, + stored: true, + } + } +} + +impl From for FacetOptions { + fn from(_: IndexedFlag) -> Self { + FacetOptions { + indexed: true, + stored: false, + } + } +} + +impl> BitOr for FacetOptions { + type Output = FacetOptions; + + fn bitor(self, other: T) -> FacetOptions { + let other = other.into(); + FacetOptions { + indexed: self.indexed | other.indexed, + stored: self.stored | other.stored, + } + } +} + +impl From> for FacetOptions +where + Head: Clone, + Tail: Clone, + Self: BitOr + From + From, +{ + fn from(head_tail: SchemaFlagList) -> Self { + Self::from(head_tail.head) | Self::from(head_tail.tail) + } +} diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 76c048afa4..67256bfbd3 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -1,3 +1,4 @@ +use crate::schema::FacetOptions; use crate::schema::TextOptions; use crate::schema::{is_valid_field_name, IntOptions}; @@ -73,11 +74,11 @@ impl FieldEntry { } /// Creates a field entry for a facet. - pub fn new_facet(field_name: String) -> FieldEntry { + pub fn new_facet(field_name: String, field_type: FacetOptions) -> FieldEntry { assert!(is_valid_field_name(&field_name)); FieldEntry { name: field_name, - field_type: FieldType::HierarchicalFacet, + field_type: FieldType::HierarchicalFacet(field_type), } } @@ -107,7 +108,7 @@ impl FieldEntry { | FieldType::I64(ref options) | FieldType::F64(ref options) | FieldType::Date(ref options) => options.is_indexed(), - FieldType::HierarchicalFacet => true, + FieldType::HierarchicalFacet(ref options) => options.is_indexed(), FieldType::Bytes(ref options) => options.is_indexed(), } } @@ -131,8 +132,7 @@ impl FieldEntry { | FieldType::F64(ref options) | FieldType::Date(ref options) => options.is_stored(), FieldType::Str(ref options) => options.is_stored(), - // TODO make stored hierarchical facet optional - FieldType::HierarchicalFacet => true, + FieldType::HierarchicalFacet(ref options) => options.is_stored(), FieldType::Bytes(ref options) => options.is_stored(), } } @@ -167,8 +167,9 @@ impl Serialize for FieldEntry { s.serialize_field("type", "date")?; s.serialize_field("options", options)?; } - FieldType::HierarchicalFacet => { + FieldType::HierarchicalFacet(ref options) => { s.serialize_field("type", "hierarchical_facet")?; + s.serialize_field("options", options)?; } FieldType::Bytes(ref options) => { s.serialize_field("type", "bytes")?; @@ -225,10 +226,8 @@ impl<'de> Deserialize<'de> for FieldEntry { } let type_string = map.next_value::()?; match type_string.as_str() { - "hierarchical_facet" => { - field_type = Some(FieldType::HierarchicalFacet); - } - "text" | "u64" | "i64" | "f64" | "date" | "bytes" => { + "text" | "u64" | "i64" | "f64" | "date" | "bytes" + | "hierarchical_facet" => { // These types require additional options to create a field_type } _ => panic!("unhandled type"), @@ -248,6 +247,10 @@ impl<'de> Deserialize<'de> for FieldEntry { "f64" => field_type = Some(FieldType::F64(map.next_value()?)), "date" => field_type = Some(FieldType::Date(map.next_value()?)), "bytes" => field_type = Some(FieldType::Bytes(map.next_value()?)), + "hierarchical_facet" => { + field_type = + Some(FieldType::HierarchicalFacet(map.next_value()?)) + } _ => { let msg = format!("Unrecognised type {}", ty); return Err(de::Error::custom(msg)); diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 9843a5df30..b3b9227047 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -1,4 +1,5 @@ use crate::schema::bytes_options::BytesOptions; +use crate::schema::facet_options::FacetOptions; use crate::schema::Facet; use crate::schema::IndexRecordOption; use crate::schema::TextFieldIndexing; @@ -60,7 +61,7 @@ pub enum FieldType { /// Signed 64-bits Date 64 field type configuration, Date(IntOptions), /// Hierachical Facet - HierarchicalFacet, + HierarchicalFacet(FacetOptions), /// Bytes (one per document) Bytes(BytesOptions), } @@ -74,7 +75,7 @@ impl FieldType { FieldType::I64(_) => Type::I64, FieldType::F64(_) => Type::F64, FieldType::Date(_) => Type::Date, - FieldType::HierarchicalFacet => Type::HierarchicalFacet, + FieldType::HierarchicalFacet(_) => Type::HierarchicalFacet, FieldType::Bytes(_) => Type::Bytes, } } @@ -87,7 +88,7 @@ impl FieldType { | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) => int_options.is_indexed(), FieldType::Date(ref date_options) => date_options.is_indexed(), - FieldType::HierarchicalFacet => true, + FieldType::HierarchicalFacet(ref facet_options) => facet_options.is_indexed(), FieldType::Bytes(ref bytes_options) => bytes_options.is_indexed(), } } @@ -111,7 +112,13 @@ impl FieldType { None } } - FieldType::HierarchicalFacet => Some(IndexRecordOption::Basic), + FieldType::HierarchicalFacet(ref facet_options) => { + if facet_options.is_indexed() { + Some(IndexRecordOption::Basic) + } else { + None + } + } FieldType::Bytes(ref bytes_options) => { if bytes_options.is_indexed() { Some(IndexRecordOption::Basic) @@ -144,7 +151,7 @@ impl FieldType { FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => Err( ValueParsingError::TypeError(format!("Expected an integer, got {:?}", json)), ), - FieldType::HierarchicalFacet => Ok(Value::Facet(Facet::from(field_text))), + FieldType::HierarchicalFacet(_) => Ok(Value::Facet(Facet::from(field_text))), FieldType::Bytes(_) => base64::decode(field_text).map(Value::Bytes).map_err(|_| { ValueParsingError::InvalidBase64(format!( "Expected base64 string, got {:?}", @@ -177,7 +184,7 @@ impl FieldType { Err(ValueParsingError::OverflowError(msg)) } } - FieldType::Str(_) | FieldType::HierarchicalFacet | FieldType::Bytes(_) => { + FieldType::Str(_) | FieldType::HierarchicalFacet(_) | FieldType::Bytes(_) => { let msg = format!("Expected a string, got {:?}", json); Err(ValueParsingError::TypeError(msg)) } diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 169fd96b30..1c1a62171b 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -104,6 +104,7 @@ let schema = schema_builder.build(); mod document; mod facet; +mod facet_options; mod schema; mod term; @@ -128,6 +129,7 @@ pub use self::value::Value; pub use self::facet::Facet; pub(crate) use self::facet::FACET_SEP_BYTE; +pub use self::facet_options::FacetOptions; pub use self::document::Document; pub use self::field::Field; diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 3a53781d19..70263a8f57 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -146,8 +146,12 @@ impl SchemaBuilder { } /// Adds a facet field to the schema. - pub fn add_facet_field(&mut self, field_name: &str) -> Field { - let field_entry = FieldEntry::new_facet(field_name.to_string()); + pub fn add_facet_field>( + &mut self, + field_name: &str, + facet_options: T, + ) -> Field { + let field_entry = FieldEntry::new_facet(field_name.to_string(), facet_options.into()); self.add_field(field_entry) } diff --git a/src/schema/value.rs b/src/schema/value.rs index fa8f43fb05..f34b1fb829 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -137,6 +137,16 @@ impl Value { } } + /// Returns the path value, provided the value is of the `Facet` type. + /// (Returns None if the value is not of the `Facet` type). + pub fn path(&self) -> Option { + if let Value::Facet(facet) = self { + Some(facet.to_path_string()) + } else { + None + } + } + /// Returns the tokenized text, provided the value is of the `PreTokStr` type. /// /// Returns None if the value is not of the `PreTokStr` type.