Skip to content

Commit

Permalink
Merge pull request #34 from sharkLoc/master
Browse files Browse the repository at this point in the history
add gzip/xz/bzip2 support for input
  • Loading branch information
wdecoster authored May 21, 2024
2 parents 98e0dc9 + 92e1cb8 commit 151fe32
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 42 deletions.
106 changes: 106 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ rayon = "1.7.0"
approx = "0.5.1"
minimap2 = "0.1.17+minimap2.2.27"
flate2 = { version = "1.0.17", features = ["zlib-ng"], default-features = false }
xz2 = "0.1.7"
bzip2 = "0.4.4"
atty = "0.2.14"
62 changes: 20 additions & 42 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ use bio::io::fastq;
use clap::Parser;
use minimap2::*;
use rayon::prelude::*;
use std::io::{self, Read, BufReader};
use std::path::{PathBuf, Path};
use std::error::Error;
use std::io::Read;
use std::path::PathBuf;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::fs::File;
use flate2::read::GzDecoder;

mod utils;
use utils::file_reader;

// The arguments end up in the Cli struct
#[derive(Parser, Debug)]
Expand Down Expand Up @@ -64,7 +66,6 @@ struct Cli {
mingc: f64,
}


fn is_file(pathname: &str) -> Result<(), String> {
let path = PathBuf::from(pathname);
if path.is_file() {
Expand All @@ -74,43 +75,17 @@ fn is_file(pathname: &str) -> Result<(), String> {
}
}


fn main() {
fn main() -> Result<(), Box<dyn Error>> {
let args = Cli::parse();
rayon::ThreadPoolBuilder::new()
.num_threads(args.threads)
.build_global()
.expect("Error: Unable to build threadpool");

match args.input {
// Process file if --input exist
Some(ref infile) => {
let path = Path::new(infile);
// make sure the input file is valid, if not shut down gracefully with an error message
if !path.exists() {
eprintln!("ERROR: Input file {} does not exist", infile);
std::process::exit(1);
}
if path.extension().and_then(|s| s.to_str()) == Some("gz") {
// deal with gz compressed file
let gzfile = File::open(&path).expect("Error: Unable to open gzipped file");
let buf_reader = BufReader::with_capacity(512*1024, gzfile);
let mut decoder = GzDecoder::new(buf_reader);
filter(&mut decoder, args);

}
else {
// deal with uncompressed fastq file
let mut input_file = File::open(infile).expect("Error: Unable to open fastq file");
filter(&mut input_file, args);
}
}

None => {
filter(&mut io::stdin(), args);
}

}
let mut reader = file_reader(args.input.as_ref())?;
filter(&mut reader, args);

Ok(())
}

/// This function filters fastq on stdin based on quality, maxlength and minlength
Expand All @@ -137,14 +112,14 @@ where
if !record.is_empty() {
let read_len = record.seq().len();
// If a read is shorter than what is to be cropped the read is dropped entirely (filtered out)

// Check if gc content filter exist, if no gc content filter is set pass the 0.5 to pass all the follwoing filter
let read_gc = if args.mingc != 0.0 || args.maxgc != 1.0 {
cal_gc(record.seq())
} else {
0.5
};

if args.headcrop + args.tailcrop < read_len {
let average_quality = ave_qual(
&record.qual().iter().map(|i| i - 33).collect::<Vec<u8>>(),
Expand Down Expand Up @@ -196,7 +171,7 @@ where
} else {
0.5
};

if args.headcrop + args.tailcrop < read_len {
let average_quality = ave_qual(
&record.qual().iter().map(|i| i - 33).collect::<Vec<u8>>(),
Expand Down Expand Up @@ -283,7 +258,10 @@ fn is_contamination(readseq: &&[u8], contam: &Aligner) -> bool {
}

fn cal_gc(readseq: &[u8]) -> f64 {
let gc_count = readseq.iter().filter(|&&base| base == b'G' || base == b'g' || base == b'C' || base == b'c').count();
let gc_count = readseq
.iter()
.filter(|&&base| base == b'G' || base == b'g' || base == b'C' || base == b'c')
.count();
(gc_count as f64) / (readseq.len() as f64)
}

Expand Down Expand Up @@ -325,7 +303,7 @@ fn test_filter() {
input: None,
mingc: 0.0,
maxgc: 1.0,
},
},
);
}

Expand Down Expand Up @@ -367,7 +345,7 @@ fn test_filter_with_contam() {
threads: 1,
contam: Some("test-data/random_contam.fa".to_owned()),
inverse: false,
input: None,
input: None,
mingc: 0.0,
maxgc: 1.0,
},
Expand Down
Loading

0 comments on commit 151fe32

Please sign in to comment.