From 368da9e640988b0eb2aa16acb7332d4f49db0335 Mon Sep 17 00:00:00 2001 From: Roderick Bovee Date: Wed, 11 Sep 2019 08:24:59 -0700 Subject: [PATCH] Improve error messaging --- src/formats/fasta.rs | 19 ++++++++++--------- src/formats/fastq.rs | 12 +++++++----- src/formats/mod.rs | 2 +- src/util.rs | 2 -- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/formats/fasta.rs b/src/formats/fasta.rs index c186243..a07c03a 100644 --- a/src/formats/fasta.rs +++ b/src/formats/fasta.rs @@ -36,7 +36,7 @@ pub struct FastaParser<'a> { impl<'a> FastaParser<'a> { pub fn new(buf: &'a [u8], last: bool) -> Result { if buf[0] != b'>' { - let context = String::from_utf8_lossy(&buf[..min(32, buf.len())]); + let context = String::from_utf8_lossy(&buf[..min(64, buf.len())]); return Err(ParseError::new( "FASTA record must start with '>'", ParseErrorType::InvalidHeader, @@ -78,7 +78,7 @@ impl<'a> Iterator for FastaParser<'a> { let context = String::from_utf8_lossy(id); return Some(Err(ParseError::new( "Sequence completely empty", - ParseErrorType::PrematureEOF, + ParseErrorType::InvalidRecord, ) .context(context))); } @@ -119,19 +119,18 @@ pub fn check_end(buf: &[u8], last: bool) -> Result<(), ParseError> { // check if there's anything left stuff in the buffer (besides returns) if !last { return Err( - ParseError::new("File ended abruptly", ParseErrorType::PrematureEOF), + ParseError::new("File ended abruptly", ParseErrorType::Invalid), // .record(count + 1), ); } for c in &buf[..] { if c != &b'\r' && c != &b'\n' { - let end = min(32, buf.len()); + let end = min(64, buf.len()); let context = String::from_utf8_lossy(&buf[..end]); return Err(ParseError::new( - "File had extra data past end of records", - ParseErrorType::PrematureEOF, + "Unexpected data encountered in middle of file", + ParseErrorType::Invalid, ) - // .record(count + 1) .context(context)); } } @@ -331,7 +330,7 @@ mod test { ); assert_eq!(i, 1); let e = res.unwrap_err(); - assert_eq!(e.error_type, ParseErrorType::PrematureEOF); + assert_eq!(e.error_type, ParseErrorType::Invalid); assert_eq!(e.record, 2); // test that an abrupt stop in a FASTA triggers an error @@ -352,7 +351,9 @@ mod test { ); assert_eq!(i, 1); let e = res.unwrap_err(); - assert_eq!(e.error_type, ParseErrorType::PrematureEOF); + // technically the sequence is empty here so we're failing + // with an InvalidRecord error rather than Invalid + assert_eq!(e.error_type, ParseErrorType::InvalidRecord); assert_eq!(e.record, 2); } diff --git a/src/formats/fastq.rs b/src/formats/fastq.rs index fa7bd22..ecf2d97 100644 --- a/src/formats/fastq.rs +++ b/src/formats/fastq.rs @@ -39,7 +39,7 @@ impl<'a> FastqParser<'a> { if buf[0] != b'@' { // sometimes there are extra returns at the end of a file so we shouldn't blow up if !(last && (buf[0] == b'\r' && buf[0] == b'\n')) { - let context = String::from_utf8_lossy(&buf[..min(32, buf.len())]); + let context = String::from_utf8_lossy(&buf[..min(64, buf.len())]); let e = ParseError::new( "FASTQ record must start with '@'", ParseErrorType::InvalidHeader, @@ -138,7 +138,7 @@ impl<'a> Iterator for FastqParser<'a> { let context = String::from_utf8_lossy(id); return Some(Err(ParseError::new( "Quality length was shorter than expected", - ParseErrorType::PrematureEOF, + ParseErrorType::InvalidRecord, ) .context(context))); } @@ -248,7 +248,9 @@ mod test { let result = fp.next().unwrap(); assert!(result.is_err()); let e = result.unwrap_err(); - assert!(e.error_type == ParseErrorType::PrematureEOF); + // technically the terminal newline could be part of the record + // so this is an InvalidRecord and not Invalid + assert!(e.error_type == ParseErrorType::InvalidRecord); let mut i = 0; let res = parse_sequence_reader( @@ -268,7 +270,7 @@ mod test { ); assert_eq!(i, 1); let e = res.unwrap_err(); - assert_eq!(e.error_type, ParseErrorType::PrematureEOF); + assert_eq!(e.error_type, ParseErrorType::Invalid); assert_eq!(e.record, 2); // we allow a few extra newlines at the ends of FASTQs @@ -313,7 +315,7 @@ mod test { ); assert_eq!(i, 1); let e = res.unwrap_err(); - assert_eq!(e.error_type, ParseErrorType::PrematureEOF); + assert_eq!(e.error_type, ParseErrorType::Invalid); assert_eq!(e.record, 2); } diff --git a/src/formats/mod.rs b/src/formats/mod.rs index 3210b0f..4ac4a71 100644 --- a/src/formats/mod.rs +++ b/src/formats/mod.rs @@ -142,7 +142,7 @@ where if amt_read < 2 { return Err(ParseError::new( "File was too short", - ParseErrorType::PrematureEOF, + ParseErrorType::Invalid, )); } unsafe { diff --git a/src/util.rs b/src/util.rs index 10bab8f..4232685 100644 --- a/src/util.rs +++ b/src/util.rs @@ -8,7 +8,6 @@ use memchr::memchr_iter; #[derive(Clone, Debug, PartialEq)] pub enum ParseErrorType { BadCompression, - PrematureEOF, InvalidHeader, InvalidRecord, IOError, @@ -54,7 +53,6 @@ impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let msg = match self.error_type { ParseErrorType::BadCompression => "Error in decompression", - ParseErrorType::PrematureEOF => "File ended prematurely", ParseErrorType::InvalidHeader => "Invalid record header", ParseErrorType::InvalidRecord => "Invalid record content", ParseErrorType::IOError => "I/O Error",