Skip to content

Commit

Permalink
Add test against Specimens.jl, improve buffer perf & error msging
Browse files Browse the repository at this point in the history
  • Loading branch information
Roderick Bovee committed Sep 7, 2019
1 parent c4ae3d9 commit 1f5ac3a
Show file tree
Hide file tree
Showing 14 changed files with 454 additions and 229 deletions.
124 changes: 114 additions & 10 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,35 +1,139 @@
version: 2
jobs:
test:
docker:
- image: circleci/rust:1.37-stretch
version: 2.1

executors:
needletail:
machine:
image: ubuntu-1604:201903-01

commands:
checkout_and_setup:
description: "Checkout code and set up rust"
steps:
- checkout
- restore_cache:
name: restore rust install
keys:
- rust-stable
- run:
name: set up rust nightly
command: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable
echo 'export PATH=$HOME/.cargo/bin:$PATH' >> $BASH_ENV
source $HOME/.cargo/env
rustup show
- save_cache:
name: cache rust install
paths:
- ~/.rustup/
- ~/.cargo/
key: rust-stable
- run:
name: Version information
command: rustc --version; cargo --version; rustup --version
- run:
name: Calculate dependencies
command: cargo generate-lockfile
- restore_cache:
name: restore project build artifacts
keys:
- v4-cargo-cache-{{ arch }}-{{ checksum "Cargo.lock" }}
- run:
name: Build all targets
command: cargo build --all --all-targets
command: cargo build --all --all-targets --all-features
- save_cache:
name: save project build artifacts
paths:
- /usr/local/cargo/registry
- ~/.cargo/registry
- target/debug/.fingerprint
- target/debug/build
- target/debug/deps
key: v4-cargo-cache-{{ arch }}-{{ checksum "Cargo.lock" }}

jobs:
build:
executor: needletail
steps:
- checkout_and_setup
test:
executor: needletail
steps:
- checkout_and_setup
- run:
name: Run all tests
command: cargo test --all
command: cargo test --all --all-features
- run:
name: Run slow tests
command: cargo test -- --ignored
lint:
executor: needletail
steps:
- checkout_and_setup
- run:
name: Format
command: |
rustup component add rustfmt
cargo fmt --all -- --check
- run:
name: Clippy
command: |
rustup component add clippy
cargo clippy --all-features -- -D warnings
coverage:
executor: needletail
steps:
- checkout_and_setup
- restore_cache:
keys:
- cargo-tarpaulin-0.8.6
- run:
name: install cargo tarpaulin
command: cargo install cargo-tarpaulin --version 0.8.6 || echo "cargo-tarpaulin already installed"
environment:
RUSTFLAGS: --cfg procmacro2_semver_exempt
- save_cache:
paths:
- ~/.cargo/bin/cargo-tarpaulin
key: cargo-tarpaulin-0.8.6
- run:
name: Generate coverage report
command: cargo tarpaulin --out Xml --all --all-features -t 600
environment:
LZMA_API_STATIC: 1
- run:
name: Export coverage to codecov
command: bash <(curl -s https://codecov.io/bash) || echo "Failed to upload coverage"
bench:
# TODO: probably need to do something useful here (use critcmp?) before turning this on
executor: needletail
steps:
- checkout_and_setup
- run:
name: Run benchmarks
command: |
cargo bench
fuzz:
# TODO: need to figure out how to install nightly here and probably cache the cargo-fuzz binary
executor: needletail
steps:
- checkout_and_setup
- run:
name: Run fuzz for 3 minutes each
command: |
cargo +nightly install cargo-fuzz
cargo +nightly fuzz run parse_fasta -- -max_total_time=180
cargo +nightly fuzz run parse_fastq -- -max_total_time=180
workflows:
version: 2
tests:
ci-checks:
jobs:
- test
- build
- coverage:
requires:
- build
- test:
requires:
- build
- lint:
requires:
- build
9 changes: 9 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,21 @@ flate2 = { version="1.0.6", optional=true }
bzip2 = { version="0.3.3", optional=true }
xz2 = { version="0.1.6", optional=true }
memchr = "2.2.1"
safemem = "0.3.2"

[dev-dependencies]
criterion = "0.3"

# for benchmark comparisons
bio = "0.28"
seq_io = "0.3"

# for testing with the FormatSpecimens.jl repo
reqwest = "0.9"
toml = "0.4"
serde = "1.0"
serde_derive = "1.0"

[[bench]]
name = "benchmark"
harness = false
72 changes: 16 additions & 56 deletions benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ fn bench_kmer_speed(c: &mut Criterion) {
},
)
.unwrap();
assert_eq!(718007, n_total);
assert_eq!(350983, n_canonical);
assert_eq!(718_007, n_total);
assert_eq!(350_983, n_canonical);
});
});

Expand All @@ -61,8 +61,8 @@ fn bench_kmer_speed(c: &mut Criterion) {
},
)
.unwrap();
assert_eq!(718007, n_total);
assert_eq!(350983, n_canonical);
assert_eq!(718_007, n_total);
assert_eq!(350_983, n_canonical);
});
});
}
Expand All @@ -89,7 +89,7 @@ fn bench_fastq_file(c: &mut Criterion) {
let record = record.unwrap();
n_bases += record.seq().len()
}
assert_eq!(250000, n_bases);
assert_eq!(250_000, n_bases);
});
});

Expand All @@ -103,7 +103,7 @@ fn bench_fastq_file(c: &mut Criterion) {
let seqlen = record.seq().len();
n_bases += seqlen;
}
assert_eq!(250000, n_bases);
assert_eq!(250_000, n_bases);
});
});

Expand All @@ -119,38 +119,19 @@ fn bench_fastq_file(c: &mut Criterion) {
},
)
.unwrap();
assert_eq!(250000, n_bases);
});
});

group.bench_function("Needletail (Macro)", |bench| {
use needletail::formats::FastqReader;
use needletail::{parse_stream, ParseError};
#[inline]
fn get_n_bases(mut fastq_data: &mut dyn Read) -> Result<usize, ParseError> {
let mut n_bases = 0;
parse_stream!(&mut fastq_data, &b""[..], FastqReader, rec, {
n_bases += rec.seq.len();
});
Ok(n_bases)
}

bench.iter(|| {
let mut fastq_data = Cursor::new(data.clone());
let n_bases = get_n_bases(&mut fastq_data).unwrap();
assert_eq!(250000, n_bases);
assert_eq!(250_000, n_bases);
});
});

group.bench_function("Needletail (No Buffer)", |bench| {
use needletail::formats::{FastqReader, RecReader};
use needletail::formats::{FastqParser, RecParser};
bench.iter(|| {
let mut reader = FastqReader::from_buffer(&data, true);
let mut reader = FastqParser::from_buffer(&data, true);
let mut n_bases = 0;
for seq in reader.by_ref() {
n_bases += seq.unwrap().seq.len();
}
assert_eq!(250000, n_bases);
assert_eq!(250_000, n_bases);
});
});
}
Expand All @@ -174,7 +155,7 @@ fn bench_fasta_file(c: &mut Criterion) {
let record = record.unwrap();
n_bases += record.seq().len()
}
assert_eq!(738580, n_bases);
assert_eq!(738_580, n_bases);
});
});

Expand All @@ -189,7 +170,7 @@ fn bench_fasta_file(c: &mut Criterion) {
n_bases += s.len();
}
}
assert_eq!(738580, n_bases);
assert_eq!(738_580, n_bases);
});
});

Expand All @@ -205,42 +186,21 @@ fn bench_fasta_file(c: &mut Criterion) {
},
)
.unwrap();
assert_eq!(738580, n_bases);
});
});

group.bench_function("Needletail (Macro)", |bench| {
use needletail::formats::FastaReader;
use needletail::seq::Sequence;
use needletail::{parse_stream, ParseError};
#[inline]
fn get_n_bases(mut fasta_data: &mut dyn Read) -> Result<usize, ParseError> {
let mut n_bases = 0;
parse_stream!(&mut fasta_data, &b""[..], FastaReader, rec, {
let seq = Sequence::from(rec);
n_bases += seq.seq.len();
});
Ok(n_bases)
}

bench.iter(|| {
let mut fasta_data = Cursor::new(data.clone());
let n_bases = get_n_bases(&mut fasta_data).unwrap();
assert_eq!(738580, n_bases);
assert_eq!(738_580, n_bases);
});
});

group.bench_function("Needletail (No Buffer)", |bench| {
use needletail::formats::{FastaReader, RecReader};
use needletail::formats::{FastaParser, RecParser};
use needletail::seq::Sequence;
bench.iter(|| {
let mut reader = FastaReader::from_buffer(&data, true);
let mut reader = FastaParser::from_buffer(&data, true);
let mut n_bases = 0;
for rec in reader.by_ref() {
let seq = Sequence::from(rec.unwrap());
n_bases += seq.seq.len();
}
assert_eq!(738580, n_bases);
assert_eq!(738_580, n_bases);
});
});
}
Expand Down
3 changes: 0 additions & 3 deletions rustfmt.toml

This file was deleted.

26 changes: 13 additions & 13 deletions src/bitkmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,14 @@ fn can_kmerize() {

#[test]
fn test_iterator() {
let seq = "ACGTA".as_bytes();
let seq = b"ACGTA";
let mut kmer_iter = BitNuclKmer::new(seq, 3, false);
assert_eq!(kmer_iter.next(), Some((0, (6, 3), false)));
assert_eq!(kmer_iter.next(), Some((1, (27, 3), false)));
assert_eq!(kmer_iter.next(), Some((2, (44, 3), false)));
assert_eq!(kmer_iter.next(), None);

let seq = "TA".as_bytes();
let seq = b"TA";
let mut kmer_iter = BitNuclKmer::new(seq, 3, false);
assert_eq!(kmer_iter.next(), None);
}
Expand All @@ -172,10 +172,10 @@ pub fn reverse_complement(kmer: BitKmer) -> BitKmer {

#[test]
fn test_reverse_complement() {
assert_eq!(reverse_complement((0b000000, 3)).0, 0b111111);
assert_eq!(reverse_complement((0b111111, 3)).0, 0b000000);
assert_eq!(reverse_complement((0b00000000, 4)).0, 0b11111111);
assert_eq!(reverse_complement((0b00011011, 4)).0, 0b00011011);
assert_eq!(reverse_complement((0b00_0000, 3)).0, 0b11_1111);
assert_eq!(reverse_complement((0b11_1111, 3)).0, 0b00_0000);
assert_eq!(reverse_complement((0b0000_0000, 4)).0, 0b1111_1111);
assert_eq!(reverse_complement((0b0001_1011, 4)).0, 0b0001_1011);
}

/// Return the lexigraphically lowest of the BitKmer and its reverse complement and
Expand Down Expand Up @@ -210,10 +210,10 @@ pub fn minimizer(kmer: BitKmer, minmer_size: u8) -> BitKmer {

#[test]
fn test_minimizer() {
assert_eq!(minimizer((0b001011, 3), 2).0, 0b0010);
assert_eq!(minimizer((0b001011, 3), 1).0, 0b00);
assert_eq!(minimizer((0b11000011, 4), 2).0, 0b0000);
assert_eq!(minimizer((0b110001, 3), 2).0, 0b0001);
assert_eq!(minimizer((0b00_1011, 3), 2).0, 0b0010);
assert_eq!(minimizer((0b00_1011, 3), 1).0, 0b00);
assert_eq!(minimizer((0b1100_0011, 4), 2).0, 0b0000);
assert_eq!(minimizer((0b11_0001, 3), 2).0, 0b0001);
}

pub fn bitmer_to_bytes(kmer: BitKmer) -> Vec<u8> {
Expand Down Expand Up @@ -259,12 +259,12 @@ pub fn bytes_to_bitmer(kmer: &[u8]) -> BitKmer {

#[test]
fn test_bytes_to_bitkmer() {
let mut ikmer: BitKmer = bytes_to_bitmer("C".as_bytes());
let mut ikmer: BitKmer = bytes_to_bitmer(b"C");
assert_eq!(ikmer.0, 1 as BitKmerSeq);

ikmer = bytes_to_bitmer("TTA".as_bytes());
ikmer = bytes_to_bitmer(b"TTA");
assert_eq!(ikmer.0, 60 as BitKmerSeq);

ikmer = bytes_to_bitmer("AAA".as_bytes());
ikmer = bytes_to_bitmer(b"AAA");
assert_eq!(ikmer.0, 0 as BitKmerSeq);
}
Loading

0 comments on commit 1f5ac3a

Please sign in to comment.