From adbb0ff3186ccbd598f92b02b2c76235319c0b36 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 16 Jun 2022 10:17:58 +0200 Subject: [PATCH 1/2] Add deletion benchmarks --- benchmarks/Cargo.toml | 3 + benchmarks/benches/indexing.rs | 759 ++++++++++++++++++++++----------- 2 files changed, 513 insertions(+), 249 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 2c6c93bd8..c64a83c51 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -16,6 +16,9 @@ jemallocator = "0.3.2" [dev-dependencies] heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1" } criterion = { version = "0.3.5", features = ["html_reports"] } +rand = "0.8.5" +rand_chacha = "0.3.1" +roaring = "0.9.0" [build-dependencies] anyhow = "1.0.56" diff --git a/benchmarks/benches/indexing.rs b/benchmarks/benches/indexing.rs index 091c081b2..b773eca65 100644 --- a/benchmarks/benches/indexing.rs +++ b/benchmarks/benches/indexing.rs @@ -5,14 +5,21 @@ use std::fs::{create_dir_all, remove_dir_all}; use std::path::Path; use criterion::{criterion_group, criterion_main, Criterion}; -use heed::EnvOpenOptions; -use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; +use heed::{EnvOpenOptions, RwTxn}; +use milli::update::{ + DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings, +}; use milli::Index; +use rand::seq::SliceRandom; +use rand_chacha::rand_core::SeedableRng; +use roaring::RoaringBitmap; #[cfg(target_os = "linux")] #[global_allocator] static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; +const BENCHMARK_ITERATION: usize = 10; + fn setup_dir(path: impl AsRef) { match remove_dir_all(path.as_ref()) { Ok(_) => (), @@ -31,39 +38,95 @@ fn setup_index() -> Index { Index::new(options, path).unwrap() } +fn setup_settings<'t>( + wtxn: &mut RwTxn<'t, '_>, + index: &'t Index, + primary_key: &str, + searchable_fields: &[&str], + filterable_fields: &[&str], + sortable_fields: &[&str], +) { + let config = IndexerConfig::default(); + let mut builder = Settings::new(wtxn, index, &config); + + builder.set_primary_key(primary_key.to_owned()); + + let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect(); + builder.set_searchable_fields(searchable_fields); + + let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect(); + builder.set_filterable_fields(filterable_fields); + + let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect(); + builder.set_sortable_fields(sortable_fields); + + builder.execute(|_| ()).unwrap(); +} + +fn setup_index_with_settings<'t>( + primary_key: &str, + searchable_fields: &[&str], + filterable_fields: &[&str], + sortable_fields: &[&str], +) -> milli::Index { + let index = setup_index(); + let mut wtxn = index.write_txn().unwrap(); + setup_settings( + &mut wtxn, + &index, + primary_key, + searchable_fields, + filterable_fields, + sortable_fields, + ); + wtxn.commit().unwrap(); + + index +} + +fn choose_document_ids_from_index_batched( + index: &Index, + count: usize, + batch_size: usize, +) -> Vec { + let rtxn = index.read_txn().unwrap(); + // create batch of document ids to delete + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(7700); + let document_ids: Vec<_> = index.documents_ids(&rtxn).unwrap().into_iter().collect(); + let document_ids_to_delete: Vec<_> = + document_ids.choose_multiple(&mut rng, count).map(Clone::clone).collect(); + + document_ids_to_delete + .chunks(batch_size) + .map(|c| { + let mut batch = RoaringBitmap::new(); + for id in c { + batch.insert(*id); + } + + batch + }) + .collect() +} + fn indexing_songs_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); + group.sample_size(BENCHMARK_ITERATION); group.bench_function("Indexing songs with default settings", |b| { b.iter_with_setup( move || { - let index = setup_index(); - - let config = IndexerConfig::default(); - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); - - builder.set_primary_key("id".to_owned()); - let displayed_fields = - ["title", "album", "artist", "genre", "country", "released", "duration"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_displayed_fields(displayed_fields); - - let searchable_fields = - ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); - builder.set_searchable_fields(searchable_fields); - - let faceted_fields = - ["released-timestamp", "duration-float", "genre", "country", "artist"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_filterable_fields(faceted_fields); - builder.execute(|_| ()).unwrap(); - wtxn.commit().unwrap(); - index + let primary_key = "id"; + let searchable_fields = ["title", "album", "artist"]; + let filterable_fields = + ["released-timestamp", "duration-float", "genre", "country", "artist"]; + let sortable_fields = []; + + setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ) }, move |index| { let config = IndexerConfig::default(); @@ -84,41 +147,85 @@ fn indexing_songs_default(c: &mut Criterion) { }); } -fn indexing_songs_in_three_batches_default(c: &mut Criterion) { +fn deleting_songs_in_batches_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); - group.bench_function("Indexing songs in three batches with default settings", |b| { + group.sample_size(BENCHMARK_ITERATION); + group.bench_function("Deleting songs in batches with default settings", |b| { b.iter_with_setup( move || { - let index = setup_index(); + let primary_key = "id"; + let searchable_fields = ["title", "album", "artist"]; + let filterable_fields = + ["released-timestamp", "duration-float", "genre", "country", "artist"]; + let sortable_fields = []; + let index = setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ); + + // We index only one half of the dataset in the setup part + // as we don't care about the time it takes. let config = IndexerConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); - - builder.set_primary_key("id".to_owned()); - let displayed_fields = - ["title", "album", "artist", "genre", "country", "released", "duration"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_displayed_fields(displayed_fields); - - let searchable_fields = - ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); - builder.set_searchable_fields(searchable_fields); - - let faceted_fields = - ["released-timestamp", "duration-float", "genre", "country", "artist"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_filterable_fields(faceted_fields); - builder.execute(|_| ()).unwrap(); + let indexing_config = IndexDocumentsConfig::default(); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) + .unwrap(); + let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); + builder.add_documents(documents).unwrap(); + builder.execute().unwrap(); + wtxn.commit().unwrap(); + + let count = 1250; + let batch_size = 250; + let document_ids_to_delete = + choose_document_ids_from_index_batched(&index, count, batch_size); + + (index, document_ids_to_delete) + }, + move |(index, document_ids_to_delete)| { + let mut wtxn = index.write_txn().unwrap(); + + for ids in document_ids_to_delete { + let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); + builder.delete_documents(&ids); + builder.execute().unwrap(); + } + + wtxn.commit().unwrap(); + + index.prepare_for_closing().wait(); + }, + ) + }); +} + +fn indexing_songs_in_three_batches_default(c: &mut Criterion) { + let mut group = c.benchmark_group("indexing"); + group.sample_size(BENCHMARK_ITERATION); + group.bench_function("Indexing songs in three batches with default settings", |b| { + b.iter_with_setup( + move || { + let primary_key = "id"; + let searchable_fields = ["title", "album", "artist"]; + let filterable_fields = + ["released-timestamp", "duration-float", "genre", "country", "artist"]; + let sortable_fields = []; + + let index = setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ); // We index only one half of the dataset in the setup part // as we don't care about the time it takes. let config = IndexerConfig::default(); + let mut wtxn = index.write_txn().unwrap(); let indexing_config = IndexDocumentsConfig::default(); let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) @@ -160,34 +267,21 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); + group.sample_size(BENCHMARK_ITERATION); group.bench_function("Indexing songs without faceted numbers", |b| { b.iter_with_setup( move || { - let index = setup_index(); - - let config = IndexerConfig::default(); - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); - - builder.set_primary_key("id".to_owned()); - let displayed_fields = - ["title", "album", "artist", "genre", "country", "released", "duration"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_displayed_fields(displayed_fields); - - let searchable_fields = - ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); - builder.set_searchable_fields(searchable_fields); - - let faceted_fields = - ["genre", "country", "artist"].iter().map(|s| s.to_string()).collect(); - builder.set_filterable_fields(faceted_fields); - builder.execute(|_| ()).unwrap(); - wtxn.commit().unwrap(); - index + let primary_key = "id"; + let searchable_fields = ["title", "album", "artist"]; + let filterable_fields = ["genre", "country", "artist"]; + let sortable_fields = []; + + setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ) }, move |index| { let config = IndexerConfig::default(); @@ -211,30 +305,21 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { fn indexing_songs_without_faceted_fields(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); + group.sample_size(BENCHMARK_ITERATION); group.bench_function("Indexing songs without any facets", |b| { b.iter_with_setup( move || { - let index = setup_index(); - - let config = IndexerConfig::default(); - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); - - builder.set_primary_key("id".to_owned()); - let displayed_fields = - ["title", "album", "artist", "genre", "country", "released", "duration"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_displayed_fields(displayed_fields); - - let searchable_fields = - ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); - builder.set_searchable_fields(searchable_fields); - builder.execute(|_| ()).unwrap(); - wtxn.commit().unwrap(); - index + let primary_key = "id"; + let searchable_fields = ["title", "album", "artist"]; + let filterable_fields = []; + let sortable_fields = []; + + setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ) }, move |index| { let config = IndexerConfig::default(); @@ -257,29 +342,21 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { fn indexing_wiki(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); + group.sample_size(BENCHMARK_ITERATION); group.bench_function("Indexing wiki", |b| { b.iter_with_setup( move || { - let index = setup_index(); - - let config = IndexerConfig::default(); - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); - - builder.set_primary_key("id".to_owned()); - let displayed_fields = - ["title", "body", "url"].iter().map(|s| s.to_string()).collect(); - builder.set_displayed_fields(displayed_fields); - - let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect(); - builder.set_searchable_fields(searchable_fields); - - // there is NO faceted fields at all - - builder.execute(|_| ()).unwrap(); - wtxn.commit().unwrap(); - index + let primary_key = "id"; + let searchable_fields = ["title", "body"]; + let filterable_fields = []; + let sortable_fields = []; + + setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ) }, move |index| { let config = IndexerConfig::default(); @@ -301,28 +378,81 @@ fn indexing_wiki(c: &mut Criterion) { }); } -fn indexing_wiki_in_three_batches(c: &mut Criterion) { +fn deleting_wiki_in_batches_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); - group.bench_function("Indexing wiki in three batches", |b| { + group.sample_size(BENCHMARK_ITERATION); + group.bench_function("Deleting wiki in batches with default settings", |b| { b.iter_with_setup( move || { - let index = setup_index(); + let primary_key = "id"; + let searchable_fields = ["title", "body"]; + let filterable_fields = []; + let sortable_fields = []; + + let index = setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ); + // We index only one half of the dataset in the setup part + // as we don't care about the time it takes. let config = IndexerConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); + let indexing_config = + IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) + .unwrap(); + let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); + builder.add_documents(documents).unwrap(); + builder.execute().unwrap(); + wtxn.commit().unwrap(); + + let count = 1250; + let batch_size = 250; + let document_ids_to_delete = + choose_document_ids_from_index_batched(&index, count, batch_size); + + (index, document_ids_to_delete) + }, + move |(index, document_ids_to_delete)| { + let mut wtxn = index.write_txn().unwrap(); - builder.set_primary_key("id".to_owned()); - let displayed_fields = - ["title", "body", "url"].iter().map(|s| s.to_string()).collect(); - builder.set_displayed_fields(displayed_fields); + for ids in document_ids_to_delete { + let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); + builder.delete_documents(&ids); + builder.execute().unwrap(); + } - let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect(); - builder.set_searchable_fields(searchable_fields); + wtxn.commit().unwrap(); - // there is NO faceted fields at all - builder.execute(|_| ()).unwrap(); + index.prepare_for_closing().wait(); + }, + ) + }); +} + +fn indexing_wiki_in_three_batches(c: &mut Criterion) { + let mut group = c.benchmark_group("indexing"); + group.sample_size(BENCHMARK_ITERATION); + group.bench_function("Indexing wiki in three batches", |b| { + b.iter_with_setup( + move || { + let primary_key = "id"; + let searchable_fields = ["title", "body"]; + let filterable_fields = []; + let sortable_fields = []; + + let index = setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ); + + let mut wtxn = index.write_txn().unwrap(); // We index only one half of the dataset in the setup part // as we don't care about the time it takes. @@ -376,34 +506,21 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { fn indexing_movies_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); + group.sample_size(BENCHMARK_ITERATION); group.bench_function("Indexing movies with default settings", |b| { b.iter_with_setup( move || { - let index = setup_index(); - - let config = IndexerConfig::default(); - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); - - builder.set_primary_key("id".to_owned()); - let displayed_fields = ["title", "poster", "overview", "release_date", "genres"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_displayed_fields(displayed_fields); - - let searchable_fields = - ["title", "overview"].iter().map(|s| s.to_string()).collect(); - builder.set_searchable_fields(searchable_fields); - - let faceted_fields = - ["released_date", "genres"].iter().map(|s| s.to_string()).collect(); - builder.set_filterable_fields(faceted_fields); - - builder.execute(|_| ()).unwrap(); - wtxn.commit().unwrap(); - index + let primary_key = "id"; + let searchable_fields = ["title", "overview"]; + let filterable_fields = ["released_date", "genres"]; + let sortable_fields = []; + + setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ) }, move |index| { let config = IndexerConfig::default(); @@ -424,35 +541,80 @@ fn indexing_movies_default(c: &mut Criterion) { }); } -fn indexing_movies_in_three_batches(c: &mut Criterion) { +fn deleting_movies_in_batches_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); - group.bench_function("Indexing movies in three batches", |b| { + group.sample_size(BENCHMARK_ITERATION); + group.bench_function("Deleting movies in batches with default settings", |b| { b.iter_with_setup( move || { - let index = setup_index(); + let primary_key = "id"; + let searchable_fields = ["title", "overview"]; + let filterable_fields = ["released_date", "genres"]; + let sortable_fields = []; + + let index = setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ); + // We index only one half of the dataset in the setup part + // as we don't care about the time it takes. let config = IndexerConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); + let indexing_config = IndexDocumentsConfig::default(); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) + .unwrap(); + let documents = utils::documents_from(datasets_paths::MOVIES, "json"); + builder.add_documents(documents).unwrap(); + builder.execute().unwrap(); + wtxn.commit().unwrap(); + + let count = 1250; + let batch_size = 250; + let document_ids_to_delete = + choose_document_ids_from_index_batched(&index, count, batch_size); + + (index, document_ids_to_delete) + }, + move |(index, document_ids_to_delete)| { + let mut wtxn = index.write_txn().unwrap(); - builder.set_primary_key("id".to_owned()); - let displayed_fields = ["title", "poster", "overview", "release_date", "genres"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_displayed_fields(displayed_fields); + for ids in document_ids_to_delete { + let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); + builder.delete_documents(&ids); + builder.execute().unwrap(); + } - let searchable_fields = - ["title", "overview"].iter().map(|s| s.to_string()).collect(); - builder.set_searchable_fields(searchable_fields); + wtxn.commit().unwrap(); - let faceted_fields = - ["released_date", "genres"].iter().map(|s| s.to_string()).collect(); - builder.set_filterable_fields(faceted_fields); + index.prepare_for_closing().wait(); + }, + ) + }); +} - builder.execute(|_| ()).unwrap(); +fn indexing_movies_in_three_batches(c: &mut Criterion) { + let mut group = c.benchmark_group("indexing"); + group.sample_size(BENCHMARK_ITERATION); + group.bench_function("Indexing movies in three batches", |b| { + b.iter_with_setup( + move || { + let primary_key = "id"; + let searchable_fields = ["title", "overview"]; + let filterable_fields = ["released_date", "genres"]; + let sortable_fields = []; + + let index = setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ); + let mut wtxn = index.write_txn().unwrap(); // We index only one half of the dataset in the setup part // as we don't care about the time it takes. let config = IndexerConfig::default(); @@ -500,17 +662,11 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { fn indexing_nested_movies_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); + group.sample_size(BENCHMARK_ITERATION); group.bench_function("Indexing nested movies with default settings", |b| { b.iter_with_setup( move || { - let index = setup_index(); - - let config = IndexerConfig::default(); - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); - - builder.set_primary_key("id".to_owned()); + let primary_key = "id"; let searchable_fields = [ "title", "overview", @@ -519,12 +675,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) { "crew.name", "cast.character", "cast.name", - ] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_searchable_fields(searchable_fields); - + ]; let filterable_fields = [ "popularity", "release_date", @@ -540,21 +691,15 @@ fn indexing_nested_movies_default(c: &mut Criterion) { "crew.name", "cast.character", "cast.name", - ] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_filterable_fields(filterable_fields); - - let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_sortable_fields(sortable_fields); - - builder.execute(|_| ()).unwrap(); - wtxn.commit().unwrap(); - index + ]; + let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"]; + + setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ) }, move |index| { let config = IndexerConfig::default(); @@ -575,19 +720,91 @@ fn indexing_nested_movies_default(c: &mut Criterion) { }); } -fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { +fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); - group.bench_function("Indexing nested movies without any facets", |b| { + group.sample_size(BENCHMARK_ITERATION); + group.bench_function("Deleting nested movies in batches with default settings", |b| { b.iter_with_setup( move || { - let index = setup_index(); + let primary_key = "id"; + let searchable_fields = [ + "title", + "overview", + "provider_names", + "genres", + "crew.name", + "cast.character", + "cast.name", + ]; + let filterable_fields = [ + "popularity", + "release_date", + "runtime", + "vote_average", + "external_ids", + "keywords", + "providers.buy.name", + "providers.rent.name", + "providers.flatrate.name", + "provider_names", + "genres", + "crew.name", + "cast.character", + "cast.name", + ]; + let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"]; + let index = setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ); + + // We index only one half of the dataset in the setup part + // as we don't care about the time it takes. let config = IndexerConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); + let indexing_config = IndexDocumentsConfig::default(); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) + .unwrap(); + let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); + builder.add_documents(documents).unwrap(); + builder.execute().unwrap(); + wtxn.commit().unwrap(); + + let count = 1250; + let batch_size = 250; + let document_ids_to_delete = + choose_document_ids_from_index_batched(&index, count, batch_size); + + (index, document_ids_to_delete) + }, + move |(index, document_ids_to_delete)| { + let mut wtxn = index.write_txn().unwrap(); + + for ids in document_ids_to_delete { + let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); + builder.delete_documents(&ids); + builder.execute().unwrap(); + } + + wtxn.commit().unwrap(); + + index.prepare_for_closing().wait(); + }, + ) + }); +} - builder.set_primary_key("id".to_owned()); +fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { + let mut group = c.benchmark_group("indexing"); + group.sample_size(BENCHMARK_ITERATION); + group.bench_function("Indexing nested movies without any facets", |b| { + b.iter_with_setup( + move || { + let primary_key = "id"; let searchable_fields = [ "title", "overview", @@ -596,14 +813,16 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { "crew.name", "cast.character", "cast.name", - ] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_searchable_fields(searchable_fields); - builder.execute(|_| ()).unwrap(); - wtxn.commit().unwrap(); - index + ]; + let filterable_fields = []; + let sortable_fields = []; + + setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ) }, move |index| { let config = IndexerConfig::default(); @@ -626,51 +845,88 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { fn indexing_geo(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); - group.sample_size(10); + group.sample_size(BENCHMARK_ITERATION); group.bench_function("Indexing geo_point", |b| { b.iter_with_setup( move || { - let index = setup_index(); - + let primary_key = "geonameid"; + let searchable_fields = ["name", "alternatenames", "elevation"]; + let filterable_fields = ["_geo", "population", "elevation"]; + let sortable_fields = ["_geo", "population", "elevation"]; + + setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ) + }, + move |index| { let config = IndexerConfig::default(); + let indexing_config = IndexDocumentsConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, &config); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) + .unwrap(); - builder.set_primary_key("geonameid".to_owned()); - let displayed_fields = - ["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_displayed_fields(displayed_fields); + let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); + builder.add_documents(documents).unwrap(); + builder.execute().unwrap(); - let searchable_fields = - ["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect(); - builder.set_searchable_fields(searchable_fields); + wtxn.commit().unwrap(); - let filterable_fields = - ["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect(); - builder.set_filterable_fields(filterable_fields); + index.prepare_for_closing().wait(); + }, + ) + }); +} - let sortable_fields = - ["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect(); - builder.set_sortable_fields(sortable_fields); +fn deleting_geo_in_batches_default(c: &mut Criterion) { + let mut group = c.benchmark_group("indexing"); + group.sample_size(BENCHMARK_ITERATION); + group.bench_function("Deleting geo_point in batches with default settings", |b| { + b.iter_with_setup( + move || { + let primary_key = "geonameid"; + let searchable_fields = ["name", "alternatenames", "elevation"]; + let filterable_fields = ["_geo", "population", "elevation"]; + let sortable_fields = ["_geo", "population", "elevation"]; + + let index = setup_index_with_settings( + &primary_key, + &searchable_fields, + &filterable_fields, + &sortable_fields, + ); - builder.execute(|_| ()).unwrap(); - wtxn.commit().unwrap(); - index - }, - move |index| { + // We index only one half of the dataset in the setup part + // as we don't care about the time it takes. let config = IndexerConfig::default(); - let indexing_config = IndexDocumentsConfig::default(); let mut wtxn = index.write_txn().unwrap(); + let indexing_config = IndexDocumentsConfig::default(); let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) .unwrap(); - - let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); + let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "json"); builder.add_documents(documents).unwrap(); builder.execute().unwrap(); + wtxn.commit().unwrap(); + + let count = 1250; + let batch_size = 250; + let document_ids_to_delete = + choose_document_ids_from_index_batched(&index, count, batch_size); + + (index, document_ids_to_delete) + }, + move |(index, document_ids_to_delete)| { + let mut wtxn = index.write_txn().unwrap(); + + for ids in document_ids_to_delete { + let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); + builder.delete_documents(&ids); + builder.execute().unwrap(); + } wtxn.commit().unwrap(); @@ -683,15 +939,20 @@ fn indexing_geo(c: &mut Criterion) { criterion_group!( benches, indexing_songs_default, + deleting_songs_in_batches_default, indexing_songs_without_faceted_numbers, indexing_songs_without_faceted_fields, indexing_songs_in_three_batches_default, indexing_wiki, + deleting_wiki_in_batches_default, indexing_wiki_in_three_batches, indexing_movies_default, + deleting_movies_in_batches_default, indexing_movies_in_three_batches, indexing_nested_movies_default, + deleting_nested_movies_in_batches_default, indexing_nested_movies_without_faceted_fields, - indexing_geo + indexing_geo, + deleting_geo_in_batches_default ); criterion_main!(benches); From 2652310f2abd9ac850ece4869a30ffe7ad757fd5 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 16 Jun 2022 10:32:58 +0200 Subject: [PATCH 2/2] Change delete benchmark names --- benchmarks/benches/indexing.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmarks/benches/indexing.rs b/benchmarks/benches/indexing.rs index b773eca65..9af7f6429 100644 --- a/benchmarks/benches/indexing.rs +++ b/benchmarks/benches/indexing.rs @@ -150,7 +150,7 @@ fn indexing_songs_default(c: &mut Criterion) { fn deleting_songs_in_batches_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); group.sample_size(BENCHMARK_ITERATION); - group.bench_function("Deleting songs in batches with default settings", |b| { + group.bench_function("-songs-delete-facetedString-facetedNumber-searchable-", |b| { b.iter_with_setup( move || { let primary_key = "id"; @@ -381,7 +381,7 @@ fn indexing_wiki(c: &mut Criterion) { fn deleting_wiki_in_batches_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); group.sample_size(BENCHMARK_ITERATION); - group.bench_function("Deleting wiki in batches with default settings", |b| { + group.bench_function("-wiki-delete-searchable-", |b| { b.iter_with_setup( move || { let primary_key = "id"; @@ -544,7 +544,7 @@ fn indexing_movies_default(c: &mut Criterion) { fn deleting_movies_in_batches_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); group.sample_size(BENCHMARK_ITERATION); - group.bench_function("Deleting movies in batches with default settings", |b| { + group.bench_function("-movies-delete-facetedString-facetedNumber-searchable-", |b| { b.iter_with_setup( move || { let primary_key = "id"; @@ -723,7 +723,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) { fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); group.sample_size(BENCHMARK_ITERATION); - group.bench_function("Deleting nested movies in batches with default settings", |b| { + group.bench_function("-movies-delete-facetedString-facetedNumber-searchable-nested-", |b| { b.iter_with_setup( move || { let primary_key = "id"; @@ -884,7 +884,7 @@ fn indexing_geo(c: &mut Criterion) { fn deleting_geo_in_batches_default(c: &mut Criterion) { let mut group = c.benchmark_group("indexing"); group.sample_size(BENCHMARK_ITERATION); - group.bench_function("Deleting geo_point in batches with default settings", |b| { + group.bench_function("-geo-delete-facetedNumber-facetedGeo-searchable-", |b| { b.iter_with_setup( move || { let primary_key = "geonameid";