diff --git a/Cargo.toml b/Cargo.toml index f35463d..ed07deb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,19 +17,20 @@ bench = false [features] default = ["compression"] -compression = ["bzip2", "flate2", "xz2"] +compression = ["bzip2", "flate2", "xz2", "zstd"] python = ["pyo3/extension-module"] python_test = ["pyo3"] xz2 = ["liblzma"] [dependencies] -buffer-redux = { version = "1", default_features = false } +buffer-redux = { version = "1", default-features = false } bytecount = { version = "0.6", features = ["runtime-dispatch-simd"] } bzip2 = { version = "0.4", optional = true } flate2 = { version = "1.0.30", optional = true } memchr = "2.7.2" pyo3 = { version = "0.21.2", optional = true } liblzma = { version = "0.3.1", optional = true } +zstd = { version = "0.13.2", optional = true } [dev-dependencies] criterion = "0.5" diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2394df3..9c70bf7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9,6 +9,8 @@ use bzip2::read::BzDecoder; use flate2::read::MultiGzDecoder; #[cfg(feature = "xz2")] use liblzma::read::XzDecoder; +#[cfg(feature = "zstd")] +use zstd::stream::read::Decoder as ZstdDecoder; use crate::errors::ParseError; pub use crate::parser::fasta::Reader as FastaReader; @@ -29,6 +31,8 @@ const GZ_MAGIC: [u8; 2] = [0x1F, 0x8B]; const BZ_MAGIC: [u8; 2] = [0x42, 0x5A]; #[cfg(feature = "xz2")] const XZ_MAGIC: [u8; 2] = [0xFD, 0x37]; +#[cfg(feature = "zstd")] +const ZST_MAGIC: [u8; 2] = [0x28, 0xB5]; fn get_fastx_reader<'a, R: 'a + io::Read + Send>( reader: R, @@ -43,7 +47,7 @@ fn get_fastx_reader<'a, R: 'a + io::Read + Send>( /// The main entry point of needletail if you're reading from something that implements [`std::io::Read`]. /// This automatically detects whether the file is: -/// 1. compressed: [`gzip`][gzip], [`bz`][bz] and [`xz`][xz] are supported and will use the appropriate decoder +/// 1. compressed: [`gzip`][gzip], [`bz`][bz], [`xz`][xz], and [`zstd`][zstd] are supported and will use the appropriate decoder /// 2. FASTA or FASTQ: the right parser will be automatically instantiated /// /// Option 1 is only available if the `compression` feature is enabled. @@ -76,6 +80,7 @@ fn get_fastx_reader<'a, R: 'a + io::Read + Send>( /// [gzip]: https://www.gnu.org/software/gzip/ /// [bz]: https://sourceware.org/bzip2/ /// [xz]: https://tukaani.org/xz/format.html +/// [zstd]: https://facebook.github.io/zstd/ /// pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>( mut reader: R, @@ -112,6 +117,14 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>( let r = Cursor::new(first).chain(xz_reader); get_fastx_reader(r, first[0]) } + #[cfg(feature = "zstd")] + ZST_MAGIC => { + let mut zst_reader = ZstdDecoder::new(new_reader)?; + let mut first = [0; 1]; + zst_reader.read_exact(&mut first)?; + let r = Cursor::new(first).chain(zst_reader); + get_fastx_reader(r, first[0]) + } _ => get_fastx_reader(new_reader, first_two_bytes[0]), } } diff --git a/tests/data/test.fa.zst b/tests/data/test.fa.zst new file mode 100644 index 0000000..3f58502 Binary files /dev/null and b/tests/data/test.fa.zst differ diff --git a/tests/test_compressed.rs b/tests/test_compressed.rs index c1141b2..0c6ec85 100644 --- a/tests/test_compressed.rs +++ b/tests/test_compressed.rs @@ -1,9 +1,10 @@ use needletail::parse_fastx_file; -const TEST_FILES: [&str; 3] = [ +const TEST_FILES: [&str; 4] = [ "./tests/data/test.fa.gz", "./tests/data/test.fa.bz2", "./tests/data/test.fa.xz", + "./tests/data/test.fa.zst", ]; #[cfg(feature = "compression")] diff --git a/tests/test_stdin.rs b/tests/test_stdin.rs index 9401f41..9319a4e 100644 --- a/tests/test_stdin.rs +++ b/tests/test_stdin.rs @@ -90,6 +90,33 @@ fn test_stdin_bzip() { .stdout(contains("There are 0 AAAAs in your file")); } +#[cfg(feature = "compression")] +#[test] +fn test_stdin_zstd() { + // Generated with `echo ">id1\nAGTCGTCA" | zstd -c | xxd -i` + let input: &[u8] = &[ + 0x28, 0xb5, 0x2f, 0xfd, 0x04, 0x58, 0x71, 0x00, 0x00, 0x3e, 0x69, 0x64, 0x31, 0x0a, 0x41, + 0x47, 0x54, 0x43, 0x47, 0x54, 0x43, 0x41, 0x0a, 0x52, 0x9d, 0x37, 0x8d, + ]; + let mut file = tempfile::NamedTempFile::new().unwrap(); + file.write_all(input).unwrap(); + file.flush().unwrap(); + file.seek(SeekFrom::Start(0)).unwrap(); + + escargot::CargoBuild::new() + .example("stdin_pipe") + .current_release() + .current_target() + .run() + .unwrap() + .command() + .stdin(file.into_file()) + .assert() + .success() + .stdout(contains("There are 8 bases in your file")) + .stdout(contains("There are 0 AAAAs in your file")); +} + #[test] fn test_stdin_no_compression() { let input: &[u8] = b">id1\nAGTCGTCA";