Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Merge #309
Browse files Browse the repository at this point in the history
309: Sort at query time r=Kerollmops a=Kerollmops

This PR:
 - Makes the `Asc/Desc` criteria work with strings too, it first returns documents ordered by numbers then by strings, and finally the documents that can't be ordered. Note that it is lexicographically ordered and not ordered by character, which means that it doesn't know about wide and short characters i.e. `a`, `丹`, `▲`.
 - Changes the syntax for the `Asc/Desc` criterion by now using a colon to separate the name and the order i.e. `title:asc`, `price:desc`.
 - Add the `Sort` criterion at the third position in the ranking rules by default.
 - Add the `sort_criteria` method to the `Search` builder struct to let the users define the `Asc/Desc` sortable attributes they want to use at query time. Note that we need to check that the fields are registered in the sortable attributes before performing the search.
 - Introduce a new `InvalidSortableAttribute` user error that is raised when the sort criteria declared at query time are not part of the sortable attributes.
 - `@ManyTheFish` introduced integration tests for the dynamic Sort criterion.

Fixes #305.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: many <maxime@meilisearch.com>
  • Loading branch information
3 people authored Aug 18, 2021
2 parents 198c416 + d1df0d2 commit 41fc0dc
Show file tree
Hide file tree
Showing 17 changed files with 701 additions and 148 deletions.
8 changes: 4 additions & 4 deletions benchmarks/benches/search_songs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ fn bench_songs(c: &mut criterion::Criterion) {
milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect();
let default_criterion = default_criterion.iter().map(|s| s.as_str());
let asc_default: Vec<&str> =
std::iter::once("asc(released-timestamp)").chain(default_criterion.clone()).collect();
std::iter::once("released-timestamp:asc").chain(default_criterion.clone()).collect();
let desc_default: Vec<&str> =
std::iter::once("desc(released-timestamp)").chain(default_criterion.clone()).collect();
std::iter::once("released-timestamp:desc").chain(default_criterion.clone()).collect();

let basic_with_quote: Vec<String> = BASE_CONF
.queries
Expand Down Expand Up @@ -118,12 +118,12 @@ fn bench_songs(c: &mut criterion::Criterion) {
},
utils::Conf {
group_name: "asc",
criterion: Some(&["asc(released-timestamp)"]),
criterion: Some(&["released-timestamp:desc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc",
criterion: Some(&["desc(released-timestamp)"]),
criterion: Some(&["released-timestamp:desc"]),
..BASE_CONF
},

Expand Down
4 changes: 2 additions & 2 deletions http-ui/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1030,7 +1030,7 @@ mod tests {
displayed_attributes: Setting::Set(vec!["name".to_string()]),
searchable_attributes: Setting::Set(vec!["age".to_string()]),
filterable_attributes: Setting::Set(hashset! { "age".to_string() }),
criteria: Setting::Set(vec!["asc(age)".to_string()]),
criteria: Setting::Set(vec!["age:asc".to_string()]),
stop_words: Setting::Set(btreeset! { "and".to_string() }),
synonyms: Setting::Set(hashmap! { "alex".to_string() => vec!["alexey".to_string()] }),
};
Expand Down Expand Up @@ -1058,7 +1058,7 @@ mod tests {
Token::Str("criteria"),
Token::Some,
Token::Seq { len: Some(1) },
Token::Str("asc(age)"),
Token::Str("age:asc"),
Token::SeqEnd,
Token::Str("stopWords"),
Token::Some,
Expand Down
1 change: 0 additions & 1 deletion milli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ obkv = "0.2.0"
once_cell = "1.5.2"
ordered-float = "2.1.1"
rayon = "1.5.0"
regex = "1.4.3"
roaring = "0.6.6"
serde = { version = "1.0.123", features = ["derive"] }
serde_json = { version = "1.0.62", features = ["preserve_order"] }
Expand Down
69 changes: 43 additions & 26 deletions milli/src/criterion.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
use std::fmt;
use std::str::FromStr;

use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};

use crate::error::{Error, UserError};

static ASC_DESC_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());

#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Criterion {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.
Words,
/// Sorted by increasing number of typos.
Typo,
/// Dynamically sort at query time the documents. None, one or multiple Asc/Desc sortable
/// attributes can be used in place of this criterion at query time.
Sort,
/// Sorted by increasing distance between matched query terms.
Proximity,
/// Documents with quey words contained in more important
/// attributes are considred better.
/// attributes are considered better.
Attribute,
/// Sorted by the similarity of the matched words with the query words.
Exactness,
Expand All @@ -43,29 +41,46 @@ impl Criterion {
impl FromStr for Criterion {
type Err = Error;

fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
match txt {
fn from_str(text: &str) -> Result<Criterion, Self::Err> {
match text {
"words" => Ok(Criterion::Words),
"typo" => Ok(Criterion::Typo),
"sort" => Ok(Criterion::Sort),
"proximity" => Ok(Criterion::Proximity),
"attribute" => Ok(Criterion::Attribute),
"exactness" => Ok(Criterion::Exactness),
text => {
let caps = ASC_DESC_REGEX
.captures(text)
.ok_or_else(|| UserError::InvalidCriterionName { name: text.to_string() })?;
let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str();
match order {
"asc" => Ok(Criterion::Asc(field_name.to_string())),
"desc" => Ok(Criterion::Desc(field_name.to_string())),
text => {
return Err(
UserError::InvalidCriterionName { name: text.to_string() }.into()
)
}
}
}
text => match AscDesc::from_str(text) {
Ok(AscDesc::Asc(field)) => Ok(Criterion::Asc(field)),
Ok(AscDesc::Desc(field)) => Ok(Criterion::Desc(field)),
Err(error) => Err(error.into()),
},
}
}
}

#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum AscDesc {
Asc(String),
Desc(String),
}

impl AscDesc {
pub fn field(&self) -> &str {
match self {
AscDesc::Asc(field) => field,
AscDesc::Desc(field) => field,
}
}
}

impl FromStr for AscDesc {
type Err = UserError;

fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
match text.rsplit_once(':') {
Some((field_name, "asc")) => Ok(AscDesc::Asc(field_name.to_string())),
Some((field_name, "desc")) => Ok(AscDesc::Desc(field_name.to_string())),
_ => Err(UserError::InvalidCriterionName { name: text.to_string() }),
}
}
}
Expand All @@ -74,6 +89,7 @@ pub fn default_criteria() -> Vec<Criterion> {
vec![
Criterion::Words,
Criterion::Typo,
Criterion::Sort,
Criterion::Proximity,
Criterion::Attribute,
Criterion::Exactness,
Expand All @@ -87,11 +103,12 @@ impl fmt::Display for Criterion {
match self {
Words => f.write_str("words"),
Typo => f.write_str("typo"),
Sort => f.write_str("sort"),
Proximity => f.write_str("proximity"),
Attribute => f.write_str("attribute"),
Exactness => f.write_str("exactness"),
Asc(attr) => write!(f, "asc({})", attr),
Desc(attr) => write!(f, "desc({})", attr),
Asc(attr) => write!(f, "{}:asc", attr),
Desc(attr) => write!(f, "{}:desc", attr),
}
}
}
10 changes: 10 additions & 0 deletions milli/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ pub enum UserError {
InvalidFacetsDistribution { invalid_facets_name: HashSet<String> },
InvalidFilter(pest::error::Error<ParserRule>),
InvalidFilterAttribute(pest::error::Error<ParserRule>),
InvalidSortableAttribute { field: String, valid_fields: HashSet<String> },
InvalidStoreFile,
MaxDatabaseSizeReached,
MissingDocumentId { document: Object },
Expand Down Expand Up @@ -226,6 +227,15 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
)
}
Self::InvalidFilterAttribute(error) => error.fmt(f),
Self::InvalidSortableAttribute { field, valid_fields } => {
let valid_names =
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<_>>().join(", ");
write!(
f,
"Attribute {} is not sortable, available sortable attributes are: {}",
field, valid_names
)
}
Self::MissingDocumentId { document } => {
let json = serde_json::to_string(document).unwrap();
write!(f, "document doesn't have an identifier {}", json)
Expand Down
36 changes: 35 additions & 1 deletion milli/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pub mod main_key {
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
Expand Down Expand Up @@ -446,13 +447,45 @@ impl Index {
Ok(fields_ids)
}

/* sortable fields */

/// Writes the sortable fields names in the database.
pub(crate) fn put_sortable_fields(
&self,
wtxn: &mut RwTxn,
fields: &HashSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::SORTABLE_FIELDS_KEY, fields)
}

/// Deletes the sortable fields ids in the database.
pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SORTABLE_FIELDS_KEY)
}

/// Returns the sortable fields names.
pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self
.main
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::SORTABLE_FIELDS_KEY)?
.unwrap_or_default())
}

/// Identical to `sortable_fields`, but returns ids instead.
pub fn sortable_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
let fields = self.sortable_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect())
}

/* faceted documents ids */

/// Returns the faceted fields names.
///
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
/// Faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
pub fn faceted_fields(&self, rtxn: &RoTxn) -> Result<HashSet<String>> {
let filterable_fields = self.filterable_fields(rtxn)?;
let sortable_fields = self.sortable_fields(rtxn)?;
let distinct_field = self.distinct_field(rtxn)?;
let asc_desc_fields =
self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
Expand All @@ -461,6 +494,7 @@ impl Index {
});

let mut faceted_fields = filterable_fields;
faceted_fields.extend(sortable_fields);
faceted_fields.extend(asc_desc_fields);
if let Some(field) = distinct_field {
faceted_fields.insert(field.to_owned());
Expand Down
2 changes: 1 addition & 1 deletion milli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use std::result::Result as StdResult;
use fxhash::{FxHasher32, FxHasher64};
use serde_json::{Map, Value};

pub use self::criterion::{default_criteria, Criterion};
pub use self::criterion::{default_criteria, AscDesc, Criterion};
pub use self::error::{
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
};
Expand Down
Loading

0 comments on commit 41fc0dc

Please sign in to comment.