Skip to content

Commit

Permalink
Add option for fieldnorm (#1215)
Browse files Browse the repository at this point in the history
* Add option for fieldnorm
* Updated backward compatibility tests.

Closes #1134
  • Loading branch information
infiniteregrets authored Apr 8, 2022
1 parent fa826b6 commit d1f281e
Show file tree
Hide file tree
Showing 14 changed files with 488 additions and 0 deletions.
42 changes: 42 additions & 0 deletions quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,8 @@ struct FieldMappingEntryForSerialization {
#[serde(skip_serializing_if = "Option::is_none")]
indexed: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
fieldnorms: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
tokenizer: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
record: Option<IndexRecordOption>,
Expand Down Expand Up @@ -551,6 +553,7 @@ impl From<FieldMappingEntry> for FieldMappingEntryForSerialization {
let type_with_cardinality = value.mapping_type.type_with_cardinality();
let mut fast = false;
let mut indexed = None;
let mut fieldnorms = None;
let mut record = None;
let mut stored = false;
let mut tokenizer: Option<String> = None;
Expand All @@ -560,8 +563,11 @@ impl From<FieldMappingEntry> for FieldMappingEntryForSerialization {
if let Some(indexing_options) = text_options.get_indexing_options() {
tokenizer = Some(indexing_options.tokenizer().to_owned());
record = Some(indexing_options.index_option());
indexed = Some(true);
fieldnorms = Some(indexing_options.fieldnorms());
} else {
indexed = Some(false);
fieldnorms = Some(false);
}
}
FieldMappingType::I64(options, _)
Expand All @@ -585,6 +591,7 @@ impl From<FieldMappingEntry> for FieldMappingEntryForSerialization {
type_with_cardinality,
fast,
indexed,
fieldnorms,
record,
stored,
tokenizer,
Expand Down Expand Up @@ -623,8 +630,10 @@ impl FieldMappingEntryForSerialization {
)
}
let mut options = TextOptions::default();

if self.indexed.unwrap_or(true) {
let mut indexing_options = TextFieldIndexing::default();
indexing_options = indexing_options.set_fieldnorms(self.fieldnorms.unwrap_or(false));
if let Some(index_option) = self.record {
indexing_options = indexing_options.set_index_option(index_option);
}
Expand Down Expand Up @@ -673,6 +682,9 @@ impl FieldMappingEntryForSerialization {
}
if self.indexed.unwrap_or(true) {
options = options.set_indexed();
if self.fieldnorms.unwrap_or(false) {
options = options.set_fieldnorms();
}
}
if self.fast {
options = options.set_fast();
Expand Down Expand Up @@ -720,6 +732,9 @@ impl FieldMappingEntryForSerialization {
}
if self.indexed.unwrap_or(true) {
options = options.set_indexed();
if self.fieldnorms.unwrap_or(false) {
options = options.set_fieldnorm();
}
}
Ok(options)
}
Expand Down Expand Up @@ -821,6 +836,32 @@ mod tests {
Ok(())
}

#[test]
fn test_deserialize_valid_fieldnorms() -> anyhow::Result<()> {
let result = serde_json::from_str::<FieldMappingEntry>(
r#"
{
"name": "my_field_name",
"type": "text",
"stored": true,
"indexed": true,
"fieldnorms": true,
"record": "basic",
"tokenizer": "english"
}"#,
);
match result.unwrap().mapping_type {
FieldMappingType::Text(options, _) => {
assert_eq!(options.is_stored(), true);
let index_options = options.get_indexing_options().unwrap();
assert_eq!(index_options.fieldnorms(), true);
}
_ => panic!("wrong property type"),
}

Ok(())
}

#[test]
fn test_error_on_text_with_invalid_options() -> anyhow::Result<()> {
let result = serde_json::from_str::<FieldMappingEntry>(
Expand Down Expand Up @@ -963,6 +1004,7 @@ mod tests {
match result.mapping_type {
FieldMappingType::I64(options, cardinality) => {
assert_eq!(options.is_indexed(), true); // default
assert_eq!(options.fieldnorms(), false); // default
assert_eq!(options.is_fast(), false); // default
assert_eq!(options.is_stored(), true); // default
assert_eq!(cardinality, Cardinality::MultiValues);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
},
{
"fast": false,
"fieldnorms": false,
"indexed": true,
"name": "log_level",
"record": "basic",
"stored": true,
Expand All @@ -32,6 +34,8 @@
},
{
"fast": false,
"fieldnorms": false,
"indexed": true,
"name": "message",
"record": "position",
"stored": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
},
{
"fast": false,
"fieldnorms": false,
"indexed": true,
"name": "log_level",
"record": "basic",
"stored": true,
Expand All @@ -32,6 +34,8 @@
},
{
"fast": false,
"fieldnorms": false,
"indexed": true,
"name": "message",
"record": "position",
"stored": true,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
"index": {
"checkpoint": {
"kafka-source": {
"00000000000000000000": "00000000000000000042"
}
},
"create_timestamp": 1789,
"doc_mapping": {
"field_mappings": [
{
"fast": true,
"indexed": true,
"name": "tenant_id",
"stored": true,
"type": "u64"
},
{
"fast": true,
"indexed": true,
"name": "timestamp",
"stored": true,
"type": "i64"
},
{
"fast": false,
"fieldnorms": false,
"indexed": true,
"name": "log_level",
"record": "basic",
"stored": true,
"tokenizer": "raw",
"type": "text"
},
{
"fast": false,
"fieldnorms": false,
"indexed": true,
"name": "message",
"record": "position",
"stored": true,
"tokenizer": "default",
"type": "text"
}
],
"store_source": true,
"tag_fields": [
"log_level",
"tenant_id"
]
},
"index_id": "my-index",
"index_uri": "s3://quickwit-indexes/my-index",
"indexing_settings": {
"commit_timeout_secs": 301,
"demux_enabled": true,
"demux_field": "tenant_id",
"merge_enabled": true,
"merge_policy": {
"demux_factor": 7,
"max_merge_factor": 11,
"merge_factor": 9
},
"resources": {
"heap_size": 3,
"num_threads": 3
},
"sort_field": "timestamp",
"sort_order": "asc",
"split_num_docs_target": 10000001,
"timestamp_field": "timestamp"
},
"search_settings": {
"default_search_fields": [
"message"
]
},
"sources": [
{
"params": {
"client_params": {},
"topic": "kafka-topic"
},
"source_id": "kafka-source",
"source_type": "kafka"
}
],
"update_timestamp": 1789,
"version": "1"
},
"splits": [
{
"create_timestamp": 3,
"demux_num_ops": 1,
"footer_offsets": {
"end": 2000,
"start": 1000
},
"num_docs": 12303,
"size_in_bytes": 234234,
"split_id": "split",
"split_state": "Published",
"tags": [
"234",
"aaa"
],
"time_range": {
"end": 130198,
"start": 121000
},
"update_timestamp": 1789,
"version": "1"
}
],
"version": "0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
"index": {
"checkpoint": {
"kafka-source": {
"00000000000000000000": "00000000000000000042"
}
},
"create_timestamp": 1789,
"doc_mapping": {
"field_mappings": [
{
"fast": true,
"indexed": true,
"name": "tenant_id",
"stored": true,
"type": "u64"
},
{
"fast": true,
"indexed": true,
"name": "timestamp",
"stored": true,
"type": "i64"
},
{
"fast": false,
"fieldnorms": false,
"indexed": true,
"name": "log_level",
"record": "basic",
"stored": true,
"tokenizer": "raw",
"type": "text"
},
{
"fast": false,
"fieldnorms": false,
"indexed": true,
"name": "message",
"record": "position",
"stored": true,
"tokenizer": "default",
"type": "text"
}
],
"store_source": true,
"tag_fields": [
"log_level",
"tenant_id"
]
},
"index_id": "my-index",
"index_uri": "s3://quickwit-indexes/my-index",
"indexing_settings": {
"commit_timeout_secs": 301,
"demux_enabled": true,
"demux_field": "tenant_id",
"merge_enabled": true,
"merge_policy": {
"demux_factor": 7,
"max_merge_factor": 11,
"merge_factor": 9
},
"resources": {
"heap_size": 3,
"num_threads": 3
},
"sort_field": "timestamp",
"sort_order": "asc",
"split_num_docs_target": 10000001,
"timestamp_field": "timestamp"
},
"search_settings": {
"default_search_fields": [
"message"
]
},
"sources": [
{
"params": {
"client_params": {},
"topic": "kafka-topic"
},
"source_id": "kafka-source",
"source_type": "kafka"
}
],
"update_timestamp": 1789,
"version": "1"
},
"splits": [
{
"create_timestamp": 3,
"demux_num_ops": 1,
"footer_offsets": {
"end": 2000,
"start": 1000
},
"num_docs": 12303,
"size_in_bytes": 234234,
"split_id": "split",
"split_state": "Published",
"tags": [
"234",
"aaa"
],
"time_range": {
"end": 130198,
"start": 121000
},
"update_timestamp": 1789,
"version": "1"
}
],
"version": "0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
},
{
"fast": false,
"fieldnorms": false,
"indexed": true,
"name": "log_level",
"record": "basic",
"stored": true,
Expand All @@ -32,6 +34,8 @@
},
{
"fast": false,
"fieldnorms": false,
"indexed": true,
"name": "message",
"record": "position",
"stored": true,
Expand Down
Loading

0 comments on commit d1f281e

Please sign in to comment.