Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add indices field to _matchesPosition to specify where in an array a match comes from #5005

Merged
merged 2 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 48 additions & 78 deletions crates/meilisearch/src/search/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1733,46 +1733,51 @@ fn format_fields(
// select the attributes to retrieve
let displayable_names =
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| {
// To get the formatting option of each key we need to see all the rules that applies
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
// highlighted.
// Warn: The time to compute the format list scales with the number of fields to format;
// cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
// d*f where d is the total number of fields to display and f is the total number of fields to format.
let format = formatting_fields_options
.iter()
.filter(|(name, _option)| {
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
})
.map(|(_, option)| **option)
.reduce(|acc, option| acc.merge(option));
let mut infos = Vec::new();

// if no locales has been provided, we try to find the locales in the localized_attributes.
let locales = locales.or_else(|| {
localized_attributes
permissive_json_pointer::map_leaf_values(
&mut document,
displayable_names,
|key, array_indices, value| {
// To get the formatting option of each key we need to see all the rules that applies
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
// highlighted.
// Warn: The time to compute the format list scales with the number of fields to format;
// cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
// d*f where d is the total number of fields to display and f is the total number of fields to format.
let format = formatting_fields_options
.iter()
.find(|rule| rule.match_str(key))
.map(LocalizedAttributesRule::locales)
});

*value = format_value(
std::mem::take(value),
builder,
format,
&mut infos,
compute_matches,
locales,
);
.filter(|(name, _option)| {
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
})
.map(|(_, option)| **option)
.reduce(|acc, option| acc.merge(option));
let mut infos = Vec::new();

if let Some(matches) = matches_position.as_mut() {
if !infos.is_empty() {
matches.insert(key.to_owned(), infos);
// if no locales has been provided, we try to find the locales in the localized_attributes.
let locales = locales.or_else(|| {
localized_attributes
.iter()
.find(|rule| rule.match_str(key))
.map(LocalizedAttributesRule::locales)
});

*value = format_value(
std::mem::take(value),
builder,
format,
&mut infos,
compute_matches,
array_indices,
locales,
);

if let Some(matches) = matches_position.as_mut() {
if !infos.is_empty() {
matches.insert(key.to_owned(), infos);
}
}
}
});
},
);

let selectors = formatted_options
.keys()
Expand All @@ -1790,13 +1795,14 @@ fn format_value(
format_options: Option<FormatOptions>,
infos: &mut Vec<MatchBounds>,
compute_matches: bool,
array_indices: &[usize],
locales: Option<&[Language]>,
) -> Value {
match value {
Value::String(old_string) => {
let mut matcher = builder.build(&old_string, locales);
if compute_matches {
let matches = matcher.matches();
let matches = matcher.matches(array_indices);
infos.extend_from_slice(&matches[..]);
}

Expand All @@ -1808,51 +1814,15 @@ fn format_value(
None => Value::String(old_string),
}
}
Value::Array(values) => Value::Array(
values
.into_iter()
.map(|v| {
format_value(
v,
builder,
format_options.map(|format_options| FormatOptions {
highlight: format_options.highlight,
crop: None,
}),
infos,
compute_matches,
locales,
)
})
.collect(),
),
Value::Object(object) => Value::Object(
object
.into_iter()
.map(|(k, v)| {
(
k,
format_value(
v,
builder,
format_options.map(|format_options| FormatOptions {
highlight: format_options.highlight,
crop: None,
}),
infos,
compute_matches,
locales,
),
)
})
.collect(),
),
// `map_leaf_values` makes sure this is only called for leaf fields
Value::Array(_) => unreachable!(),
Value::Object(_) => unreachable!(),
Value::Number(number) => {
let s = number.to_string();

let mut matcher = builder.build(&s, locales);
if compute_matches {
let matches = matcher.matches();
let matches = matcher.matches(array_indices);
infos.extend_from_slice(&matches[..]);
}

Expand Down
5 changes: 4 additions & 1 deletion crates/meilisearch/tests/search/formatted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,10 @@ async fn format_nested() {
"doggos.name": [
{
"start": 0,
"length": 5
"length": 5,
"indices": [
0
]
}
]
}
Expand Down
11 changes: 9 additions & 2 deletions crates/milli/src/search/new/matches/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ impl FormatOptions {
pub struct MatchBounds {
pub start: usize,
pub length: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub indices: Option<Vec<usize>>,
}

/// Structure used to analyze a string, compute words that match,
Expand Down Expand Up @@ -220,15 +222,20 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
}

/// Returns boundaries of the words that match the query.
pub fn matches(&mut self) -> Vec<MatchBounds> {
pub fn matches(&mut self, array_indices: &[usize]) -> Vec<MatchBounds> {
match &self.matches {
None => self.compute_matches().matches(),
None => self.compute_matches().matches(array_indices),
Some((tokens, matches)) => matches
.iter()
.map(|m| MatchBounds {
start: tokens[m.get_first_token_pos()].byte_start,
// TODO: Why is this in chars, while start is in bytes?
length: m.char_count,
indices: if array_indices.is_empty() {
None
} else {
Some(array_indices.to_owned())
},
})
.collect(),
}
Expand Down
96 changes: 79 additions & 17 deletions crates/permissive-json-pointer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ fn contained_in(selector: &str, key: &str) -> bool {
/// map_leaf_values(
/// value.as_object_mut().unwrap(),
/// ["jean.race.name"],
/// |key, value| match (value, key) {
/// |key, _array_indices, value| match (value, key) {
/// (Value::String(name), "jean.race.name") => *name = "patou".to_string(),
/// _ => unreachable!(),
/// },
Expand All @@ -66,17 +66,18 @@ fn contained_in(selector: &str, key: &str) -> bool {
pub fn map_leaf_values<'a>(
value: &mut Map<String, Value>,
selectors: impl IntoIterator<Item = &'a str>,
mut mapper: impl FnMut(&str, &mut Value),
mut mapper: impl FnMut(&str, &[usize], &mut Value),
) {
let selectors: Vec<_> = selectors.into_iter().collect();
map_leaf_values_in_object(value, &selectors, "", &mut mapper);
map_leaf_values_in_object(value, &selectors, "", &[], &mut mapper);
}

pub fn map_leaf_values_in_object(
value: &mut Map<String, Value>,
selectors: &[&str],
base_key: &str,
mapper: &mut impl FnMut(&str, &mut Value),
array_indices: &[usize],
mapper: &mut impl FnMut(&str, &[usize], &mut Value),
) {
for (key, value) in value.iter_mut() {
let base_key = if base_key.is_empty() {
Expand All @@ -94,12 +95,12 @@ pub fn map_leaf_values_in_object(
if should_continue {
match value {
Value::Object(object) => {
map_leaf_values_in_object(object, selectors, &base_key, mapper)
map_leaf_values_in_object(object, selectors, &base_key, array_indices, mapper)
}
Value::Array(array) => {
map_leaf_values_in_array(array, selectors, &base_key, mapper)
map_leaf_values_in_array(array, selectors, &base_key, array_indices, mapper)
}
value => mapper(&base_key, value),
value => mapper(&base_key, array_indices, value),
}
}
}
Expand All @@ -109,13 +110,24 @@ pub fn map_leaf_values_in_array(
values: &mut [Value],
selectors: &[&str],
base_key: &str,
mapper: &mut impl FnMut(&str, &mut Value),
base_array_indices: &[usize],
mapper: &mut impl FnMut(&str, &[usize], &mut Value),
) {
for value in values.iter_mut() {
// This avoids allocating twice
let mut array_indices = Vec::with_capacity(base_array_indices.len() + 1);
array_indices.extend_from_slice(base_array_indices);
array_indices.push(0);

for (i, value) in values.iter_mut().enumerate() {
*array_indices.last_mut().unwrap() = i;
match value {
Value::Object(object) => map_leaf_values_in_object(object, selectors, base_key, mapper),
Value::Array(array) => map_leaf_values_in_array(array, selectors, base_key, mapper),
value => mapper(base_key, value),
Value::Object(object) => {
map_leaf_values_in_object(object, selectors, base_key, &array_indices, mapper)
}
Value::Array(array) => {
map_leaf_values_in_array(array, selectors, base_key, &array_indices, mapper)
}
value => mapper(base_key, &array_indices, value),
}
}
}
Expand Down Expand Up @@ -743,12 +755,14 @@ mod tests {
}
});

map_leaf_values(value.as_object_mut().unwrap(), ["jean.race.name"], |key, value| {
match (value, key) {
map_leaf_values(
value.as_object_mut().unwrap(),
["jean.race.name"],
|key, _, value| match (value, key) {
(Value::String(name), "jean.race.name") => *name = S("patou"),
_ => unreachable!(),
}
});
},
);

assert_eq!(
value,
Expand All @@ -775,7 +789,7 @@ mod tests {
});

let mut calls = 0;
map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, value| {
map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, _, value| {
calls += 1;
match (value, key) {
(Value::String(name), "jean.race.name") => *name = S("patou"),
Expand All @@ -798,4 +812,52 @@ mod tests {
})
);
}

#[test]
fn map_array() {
let mut value: Value = json!({
"no_array": "peter",
"simple": ["foo", "bar"],
"nested": [
{
"a": [
["cat", "dog"],
["fox", "bear"],
],
"b": "hi",
},
{
"a": ["green", "blue"],
},
],
});

map_leaf_values(
value.as_object_mut().unwrap(),
["no_array", "simple", "nested"],
|_key, array_indices, value| {
*value = format!("{array_indices:?}").into();
},
);

assert_eq!(
value,
json!({
"no_array": "[]",
"simple": ["[0]", "[1]"],
"nested": [
{
"a": [
["[0, 0, 0]", "[0, 0, 1]"],
["[0, 1, 0]", "[0, 1, 1]"],
],
"b": "[0]",
},
{
"a": ["[1, 0]", "[1, 1]"],
},
],
})
);
}
}
Loading