Skip to content

Commit

Permalink
Merge pull request #39 from datdenkikniet/more-buffers
Browse files Browse the repository at this point in the history
adds miscellaneous optimizations to rust version.
  • Loading branch information
bertie2 authored Aug 8, 2023
2 parents 153ca9e + a37c83c commit 7e05d7a
Show file tree
Hide file tree
Showing 3 changed files with 211 additions and 128 deletions.
121 changes: 60 additions & 61 deletions rust/src/cli/cli.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use std::{
collections::{BTreeMap, HashSet},
path::PathBuf,
time::{Duration, Instant},
time::Duration,
};

use clap::{Args, Parser, Subcommand, ValueEnum};
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use opencubes::{naive_polycube::NaivePolyCube, pcube::PCubeFile};
use rayon::prelude::{IntoParallelIterator, ParallelIterator};

mod enumerate;
use enumerate::enumerate;
use rayon::prelude::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator};

fn finish_bar(bar: &ProgressBar, duration: Duration, expansions: usize, n: usize) {
let time = duration.as_micros();
Expand All @@ -37,7 +37,17 @@ fn finish_bar(bar: &ProgressBar, duration: Duration, expansions: usize, n: usize
}

fn unknown_bar() -> ProgressBar {
let style = ProgressStyle::with_template("[{elapsed_precise}] [{spinner:10.cyan/blue}] {msg}")
unknown_bar_with_pos(false)
}

fn unknown_bar_with_pos(with_pos: bool) -> ProgressBar {
let template = if with_pos {
"[{elapsed_precise}] [{spinner:10.cyan/blue}] {pos} {msg}"
} else {
"[{elapsed_precise}] [{spinner:10.cyan/blue}] {msg}"
};

let style = ProgressStyle::with_template(template)
.unwrap()
.tick_strings(&[
">---------",
Expand Down Expand Up @@ -204,55 +214,55 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> {
let in_memory = !opts.no_in_memory;
let n = opts.n;

println!("Validating {}", path);
let file = PCubeFile::new_file(path)?;
let canonical = file.canonical();
let len = file.len();

let bar = if let Some(len) = len {
make_bar(len as u64)
} else {
unknown_bar_with_pos(true)
};

bar.set_message("cubes validated");

bar.println(format!("Validating {}", path));

let mut uniqueness = match (in_memory, uniqueness) {
(true, true) => {
eprintln!("Verifying uniqueness.");
bar.println("Verifying uniqueness.");
Some(HashSet::new())
}
(false, true) => {
bar.abandon();
println!("Cannot verify uniqueness without placing all entries in memory. Re-run with `--no-uniqueness` enabled to run.");
std::process::exit(1);
}
(_, false) => {
eprintln!("Not verifying uniqueness");
bar.println("Not verifying uniqueness");
None
}
};

let file = PCubeFile::new_file(path)?;
let canonical = file.canonical();
let len = file.len();

let bar = if let Some(len) = len {
make_bar(len as u64)
} else {
unknown_bar()
};

let exit = |msg: &str| {
bar.abandon();
println!("{msg}");
std::process::exit(1);
};

match (canonical, validate_canonical) {
(true, true) => eprintln!("Verifying entry canonicality. File indicates that entries are canonical."),
(false, true) => eprintln!("Not verifying entry canonicality. File header does not indicate that entries are canonical"),
(true, false) => eprintln!("Not verifying entry canonicality. File header indicates that they are, but check is disabled."),
(false, false) => eprintln!("Not verifying canonicality. File header does not indicate that entries are canonical, and check is disabled.")
(true, true) => bar.println("Verifying entry canonicality. File indicates that entries are canonical."),
(false, true) => bar.println("Not verifying entry canonicality. File header does not indicate that entries are canonical"),
(true, false) => bar.println("Not verifying entry canonicality. File header indicates that they are, but check is disabled."),
(false, false) => bar.println("Not verifying canonicality. File header does not indicate that entries are canonical, and check is disabled.")
}

if let Some(n) = n {
eprintln!("Verifying that all entries are N = {n}");
bar.println(format!("Verifying that all entries are N = {n}"));
}

let mut total_read = 0;

let mut last_tick = Instant::now();
bar.tick();

for cube in file {
let cube = match cube {
Ok(c) => NaivePolyCube::from(c),
Expand All @@ -264,14 +274,7 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> {

total_read += 1;

if len.is_some() {
bar.inc(1);
} else if last_tick.elapsed() >= Duration::from_millis(66) {
last_tick = Instant::now();
bar.set_message(format!("{total_read}"));
bar.inc(1);
bar.tick();
}
bar.inc(1);

let mut form: Option<NaivePolyCube> = None;
let canonical_form = || cube.pcube_canonical_form();
Expand All @@ -297,10 +300,10 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> {
exit("Found non-unique polycubes.");
}
}

bar.finish();
}

bar.finish();

println!("Success: {path}, containing {total_read} cubes, is valid");

Ok(())
Expand All @@ -318,7 +321,7 @@ pub fn convert(opts: &ConvertArgs) {
// that the longest files are yielded last.
let files: BTreeMap<_, _> = opts
.path
.iter()
.par_iter()
.map(|path| {
let input_file = match PCubeFile::new_file(&path) {
Ok(f) => f,
Expand All @@ -327,43 +330,51 @@ pub fn convert(opts: &ConvertArgs) {
std::process::exit(1);
}
};

(input_file.len(), (input_file, path.to_string()))
})
.collect();

// Iterate over the files and do some printing, in-order
let files: Vec<_> = files
.into_iter()
.map(|(_, (input_file, path))| {
.map(|(len, (input_file, path))| {
let output_path = opts.output_path.clone().unwrap_or(path.clone());

println!("Converting file {}", path);
println!("Final output path: {output_path}");
multi_bar
.println(format!("Converting file {}", path))
.unwrap();
multi_bar
.println(format!("Final output path: {output_path}"))
.unwrap();

if opts.canonicalize {
println!("Canonicalizing output");
multi_bar.println("Canonicalizing output").unwrap();
}
println!("Input compression: {:?}", input_file.compression());
println!("Output compression: {:?}", opts.compression);

let len = input_file.len();
multi_bar
.println(format!("Input compression: {:?}", input_file.compression()))
.unwrap();
multi_bar
.println(format!("Output compression: {:?}", opts.compression))
.unwrap();

let bar = if let Some(len) = len {
make_bar(len as u64)
} else {
unknown_bar()
unknown_bar_with_pos(true)
};

let bar = multi_bar.add(bar);

(input_file, path, output_path, len, bar)
(input_file, path, output_path, bar)
})
.collect();

// Convert, in parallel
files
.into_par_iter()
.for_each(|(input_file, path, output_path, len, bar)| {
bar.set_message(path.to_string());
.for_each(|(input_file, path, output_path, bar)| {
bar.set_message(format!("cubes converted for {path}"));

let canonical = input_file.canonical();
let mut output_path_temp = PathBuf::from(&output_path);
Expand All @@ -373,12 +384,7 @@ pub fn convert(opts: &ConvertArgs) {
output_path_temp.pop();
output_path_temp.push(filename);

let mut total_read = 0;
let mut last_tick = Instant::now();

let input = input_file.filter_map(|v| {
total_read += 1;

let cube = match v {
Ok(v) => Some(v),
Err(e) => {
Expand All @@ -388,14 +394,7 @@ pub fn convert(opts: &ConvertArgs) {
}
}?;

if len.is_some() {
bar.inc(1);
} else if last_tick.elapsed() >= Duration::from_millis(66) {
last_tick = Instant::now();
bar.set_message(format!("{total_read}"));
bar.inc(1);
bar.tick();
}
bar.inc(1);

if opts.canonicalize {
Some(NaivePolyCube::from(cube).canonical_form().into())
Expand All @@ -421,7 +420,7 @@ pub fn convert(opts: &ConvertArgs) {

if !bar.is_finished() {
match std::fs::rename(output_path_temp, output_path) {
Ok(_) => bar.finish_with_message(format!("{path} Done!")),
Ok(_) => bar.finish(),
Err(e) => {
bar.abandon_with_message(format!("{path} Failed to write final file: {e}"));
return;
Expand Down
10 changes: 6 additions & 4 deletions rust/src/pcube/compression.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::io::{BufReader, Read, Write};
use std::io::{BufReader, BufWriter, Read, Write};

use flate2::{read::GzDecoder, write::GzEncoder};

const BUF_SIZE: usize = 1024 * 16384;

/// Compression types supported for `.pcube` files.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Compression {
Expand Down Expand Up @@ -45,7 +47,7 @@ where
{
pub fn new(compression: Compression, reader: T) -> Self {
match compression {
Compression::None => Self::Uncompressed(BufReader::new(reader)),
Compression::None => Self::Uncompressed(BufReader::with_capacity(BUF_SIZE, reader)),
Compression::Gzip => Self::Gzip(GzDecoder::new(reader)),
}
}
Expand Down Expand Up @@ -74,7 +76,7 @@ pub enum Writer<T>
where
T: Write,
{
Uncompressed(T),
Uncompressed(BufWriter<T>),
Gzip(GzEncoder<T>),
}

Expand All @@ -84,7 +86,7 @@ where
{
pub fn new(compression: Compression, writer: T) -> Self {
match compression {
Compression::None => Self::Uncompressed(writer),
Compression::None => Self::Uncompressed(BufWriter::with_capacity(BUF_SIZE, writer)),
Compression::Gzip => Self::Gzip(GzEncoder::new(writer, flate2::Compression::default())),
}
}
Expand Down
Loading

0 comments on commit 7e05d7a

Please sign in to comment.