Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Merge #583
Browse files Browse the repository at this point in the history
583: Use BufReader to read datasets in benchmarks r=ManyTheFish a=loiclec

## What does this PR do?
Ensure that the datasets used by the benchmarks are read efficiently by using a `BufReader`.

## Why?
Using a `BufReader` is more representative of how `meilisearch` works. It will also make performance comparisons between different branches of `milli` more  accurate.




Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
  • Loading branch information
bors[bot] and Loïc Lecrenier authored Jul 7, 2022
2 parents ebddfdb + aae0335 commit ce90fc6
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions benchmarks/benches/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,10 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
}
}

pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl Read + Seek> {
pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl BufRead + Seek> {
let reader =
File::open(filename).expect(&format!("could not find the dataset in: {}", filename));
let reader = BufReader::new(reader);
let documents = match filetype {
"csv" => documents_from_csv(reader).unwrap(),
"json" => documents_from_json(reader).unwrap(),
Expand All @@ -152,12 +153,11 @@ pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<imp
DocumentBatchReader::from_reader(Cursor::new(documents)).unwrap()
}

fn documents_from_jsonl(reader: impl Read) -> anyhow::Result<Vec<u8>> {
fn documents_from_jsonl(mut reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let mut writer = Cursor::new(Vec::new());
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;

let mut buf = String::new();
let mut reader = BufReader::new(reader);

while reader.read_line(&mut buf)? > 0 {
documents.extend_from_json(&mut buf.as_bytes())?;
Expand All @@ -168,7 +168,7 @@ fn documents_from_jsonl(reader: impl Read) -> anyhow::Result<Vec<u8>> {
Ok(writer.into_inner())
}

fn documents_from_json(reader: impl Read) -> anyhow::Result<Vec<u8>> {
fn documents_from_json(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let mut writer = Cursor::new(Vec::new());
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;

Expand All @@ -178,7 +178,7 @@ fn documents_from_json(reader: impl Read) -> anyhow::Result<Vec<u8>> {
Ok(writer.into_inner())
}

fn documents_from_csv(reader: impl Read) -> anyhow::Result<Vec<u8>> {
fn documents_from_csv(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let mut writer = Cursor::new(Vec::new());
milli::documents::DocumentBatchBuilder::from_csv(reader, &mut writer)?.finish()?;

Expand Down

0 comments on commit ce90fc6

Please sign in to comment.