Skip to content

Commit

Permalink
feat: add zstd support (#86)
Browse files Browse the repository at this point in the history
  • Loading branch information
mbhall88 authored Nov 25, 2024
1 parent 42369ea commit a4c203e
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 4 deletions.
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,20 @@ bench = false

[features]
default = ["compression"]
compression = ["bzip2", "flate2", "xz2"]
compression = ["bzip2", "flate2", "xz2", "zstd"]
python = ["pyo3/extension-module"]
python_test = ["pyo3"]
xz2 = ["liblzma"]

[dependencies]
buffer-redux = { version = "1", default_features = false }
buffer-redux = { version = "1", default-features = false }
bytecount = { version = "0.6", features = ["runtime-dispatch-simd"] }
bzip2 = { version = "0.4", optional = true }
flate2 = { version = "1.0.30", optional = true }
memchr = "2.7.2"
pyo3 = { version = "0.21.2", optional = true }
liblzma = { version = "0.3.1", optional = true }
zstd = { version = "0.13.2", optional = true }

[dev-dependencies]
criterion = "0.5"
Expand Down
15 changes: 14 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use bzip2::read::BzDecoder;
use flate2::read::MultiGzDecoder;
#[cfg(feature = "xz2")]
use liblzma::read::XzDecoder;
#[cfg(feature = "zstd")]
use zstd::stream::read::Decoder as ZstdDecoder;

use crate::errors::ParseError;
pub use crate::parser::fasta::Reader as FastaReader;
Expand All @@ -29,6 +31,8 @@ const GZ_MAGIC: [u8; 2] = [0x1F, 0x8B];
const BZ_MAGIC: [u8; 2] = [0x42, 0x5A];
#[cfg(feature = "xz2")]
const XZ_MAGIC: [u8; 2] = [0xFD, 0x37];
#[cfg(feature = "zstd")]
const ZST_MAGIC: [u8; 2] = [0x28, 0xB5];

fn get_fastx_reader<'a, R: 'a + io::Read + Send>(
reader: R,
Expand All @@ -43,7 +47,7 @@ fn get_fastx_reader<'a, R: 'a + io::Read + Send>(

/// The main entry point of needletail if you're reading from something that implements [`std::io::Read`].
/// This automatically detects whether the file is:
/// 1. compressed: [`gzip`][gzip], [`bz`][bz] and [`xz`][xz] are supported and will use the appropriate decoder
/// 1. compressed: [`gzip`][gzip], [`bz`][bz], [`xz`][xz], and [`zstd`][zstd] are supported and will use the appropriate decoder
/// 2. FASTA or FASTQ: the right parser will be automatically instantiated
///
/// Option 1 is only available if the `compression` feature is enabled.
Expand Down Expand Up @@ -76,6 +80,7 @@ fn get_fastx_reader<'a, R: 'a + io::Read + Send>(
/// [gzip]: https://www.gnu.org/software/gzip/
/// [bz]: https://sourceware.org/bzip2/
/// [xz]: https://tukaani.org/xz/format.html
/// [zstd]: https://facebook.github.io/zstd/
///
pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>(
mut reader: R,
Expand Down Expand Up @@ -112,6 +117,14 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>(
let r = Cursor::new(first).chain(xz_reader);
get_fastx_reader(r, first[0])
}
#[cfg(feature = "zstd")]
ZST_MAGIC => {
let mut zst_reader = ZstdDecoder::new(new_reader)?;
let mut first = [0; 1];
zst_reader.read_exact(&mut first)?;
let r = Cursor::new(first).chain(zst_reader);
get_fastx_reader(r, first[0])
}
_ => get_fastx_reader(new_reader, first_two_bytes[0]),
}
}
Expand Down
Binary file added tests/data/test.fa.zst
Binary file not shown.
3 changes: 2 additions & 1 deletion tests/test_compressed.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use needletail::parse_fastx_file;

const TEST_FILES: [&str; 3] = [
const TEST_FILES: [&str; 4] = [
"./tests/data/test.fa.gz",
"./tests/data/test.fa.bz2",
"./tests/data/test.fa.xz",
"./tests/data/test.fa.zst",
];

#[cfg(feature = "compression")]
Expand Down
27 changes: 27 additions & 0 deletions tests/test_stdin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,33 @@ fn test_stdin_bzip() {
.stdout(contains("There are 0 AAAAs in your file"));
}

#[cfg(feature = "compression")]
#[test]
fn test_stdin_zstd() {
// Generated with `echo ">id1\nAGTCGTCA" | zstd -c | xxd -i`
let input: &[u8] = &[
0x28, 0xb5, 0x2f, 0xfd, 0x04, 0x58, 0x71, 0x00, 0x00, 0x3e, 0x69, 0x64, 0x31, 0x0a, 0x41,
0x47, 0x54, 0x43, 0x47, 0x54, 0x43, 0x41, 0x0a, 0x52, 0x9d, 0x37, 0x8d,
];
let mut file = tempfile::NamedTempFile::new().unwrap();
file.write_all(input).unwrap();
file.flush().unwrap();
file.seek(SeekFrom::Start(0)).unwrap();

escargot::CargoBuild::new()
.example("stdin_pipe")
.current_release()
.current_target()
.run()
.unwrap()
.command()
.stdin(file.into_file())
.assert()
.success()
.stdout(contains("There are 8 bases in your file"))
.stdout(contains("There are 0 AAAAs in your file"));
}

#[test]
fn test_stdin_no_compression() {
let input: &[u8] = b">id1\nAGTCGTCA";
Expand Down

0 comments on commit a4c203e

Please sign in to comment.