Skip to content

Commit

Permalink
Add better error messages for invalid globs.
Browse files Browse the repository at this point in the history
This threads the original glob given by end users through all of the
glob parsing errors. This was slightly trickier than it might appear
because the gitignore implementation actually modifies the glob before
compiling it. So in order to get better glob error messages everywhere,
we need to track the original glob both in the glob parser and in the
higher-level abstractions in the `ignore` crate.

Fixes #444
  • Loading branch information
BurntSushi committed Apr 12, 2017
1 parent 7ad23e5 commit c50b8b4
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 58 deletions.
83 changes: 50 additions & 33 deletions globset/src/glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::str;
use regex;
use regex::bytes::Regex;

use {Candidate, Error, new_regex};
use {Candidate, Error, ErrorKind, new_regex};

/// Describes a matching strategy for a particular pattern.
///
Expand Down Expand Up @@ -544,16 +544,23 @@ impl<'a> GlobBuilder<'a> {
/// Parses and builds the pattern.
pub fn build(&self) -> Result<Glob, Error> {
let mut p = Parser {
glob: &self.glob,
stack: vec![Tokens::default()],
chars: self.glob.chars().peekable(),
prev: None,
cur: None,
};
try!(p.parse());
if p.stack.is_empty() {
Err(Error::UnopenedAlternates)
Err(Error {
glob: Some(self.glob.to_string()),
kind: ErrorKind::UnopenedAlternates,
})
} else if p.stack.len() > 1 {
Err(Error::UnclosedAlternates)
Err(Error {
glob: Some(self.glob.to_string()),
kind: ErrorKind::UnclosedAlternates,
})
} else {
let tokens = p.stack.pop().unwrap();
Ok(Glob {
Expand Down Expand Up @@ -698,13 +705,18 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
}

struct Parser<'a> {
glob: &'a str,
stack: Vec<Tokens>,
chars: iter::Peekable<str::Chars<'a>>,
prev: Option<char>,
cur: Option<char>,
}

impl<'a> Parser<'a> {
fn error(&self, kind: ErrorKind) -> Error {
Error { glob: Some(self.glob.to_string()), kind: kind }
}

fn parse(&mut self) -> Result<(), Error> {
while let Some(c) = self.bump() {
match c {
Expand All @@ -729,7 +741,7 @@ impl<'a> Parser<'a> {

fn push_alternate(&mut self) -> Result<(), Error> {
if self.stack.len() > 1 {
return Err(Error::NestedAlternates);
return Err(self.error(ErrorKind::NestedAlternates));
}
Ok(self.stack.push(Tokens::default()))
}
Expand All @@ -743,22 +755,22 @@ impl<'a> Parser<'a> {
}

fn push_token(&mut self, tok: Token) -> Result<(), Error> {
match self.stack.last_mut() {
None => Err(Error::UnopenedAlternates),
Some(ref mut pat) => Ok(pat.push(tok)),
if let Some(ref mut pat) = self.stack.last_mut() {
return Ok(pat.push(tok));
}
Err(self.error(ErrorKind::UnopenedAlternates))
}

fn pop_token(&mut self) -> Result<Token, Error> {
match self.stack.last_mut() {
None => Err(Error::UnopenedAlternates),
Some(ref mut pat) => Ok(pat.pop().unwrap()),
if let Some(ref mut pat) = self.stack.last_mut() {
return Ok(pat.pop().unwrap());
}
Err(self.error(ErrorKind::UnopenedAlternates))
}

fn have_tokens(&self) -> Result<bool, Error> {
match self.stack.last() {
None => Err(Error::UnopenedAlternates),
None => Err(self.error(ErrorKind::UnopenedAlternates)),
Some(ref pat) => Ok(!pat.is_empty()),
}
}
Expand All @@ -785,15 +797,15 @@ impl<'a> Parser<'a> {
try!(self.push_token(Token::RecursivePrefix));
let next = self.bump();
if !next.map(is_separator).unwrap_or(true) {
return Err(Error::InvalidRecursive);
return Err(self.error(ErrorKind::InvalidRecursive));
}
return Ok(());
}
try!(self.pop_token());
if !prev.map(is_separator).unwrap_or(false) {
if self.stack.len() <= 1
|| (prev != Some(',') && prev != Some('{')) {
return Err(Error::InvalidRecursive);
return Err(self.error(ErrorKind::InvalidRecursive));
}
}
match self.chars.peek() {
Expand All @@ -808,18 +820,22 @@ impl<'a> Parser<'a> {
assert!(self.bump().map(is_separator).unwrap_or(false));
self.push_token(Token::RecursiveZeroOrMore)
}
_ => Err(Error::InvalidRecursive),
_ => Err(self.error(ErrorKind::InvalidRecursive)),
}
}

fn parse_class(&mut self) -> Result<(), Error> {
fn add_to_last_range(
glob: &str,
r: &mut (char, char),
add: char,
) -> Result<(), Error> {
r.1 = add;
if r.1 < r.0 {
Err(Error::InvalidRange(r.0, r.1))
Err(Error {
glob: Some(glob.to_string()),
kind: ErrorKind::InvalidRange(r.0, r.1),
})
} else {
Ok(())
}
Expand All @@ -837,7 +853,7 @@ impl<'a> Parser<'a> {
Some(c) => c,
// The only way to successfully break this loop is to observe
// a ']'.
None => return Err(Error::UnclosedClass),
None => return Err(self.error(ErrorKind::UnclosedClass)),
};
match c {
']' => {
Expand All @@ -854,7 +870,7 @@ impl<'a> Parser<'a> {
// invariant: in_range is only set when there is
// already at least one character seen.
let r = ranges.last_mut().unwrap();
try!(add_to_last_range(r, '-'));
try!(add_to_last_range(&self.glob, r, '-'));
in_range = false;
} else {
assert!(!ranges.is_empty());
Expand All @@ -865,7 +881,8 @@ impl<'a> Parser<'a> {
if in_range {
// invariant: in_range is only set when there is
// already at least one character seen.
try!(add_to_last_range(ranges.last_mut().unwrap(), c));
try!(add_to_last_range(
&self.glob, ranges.last_mut().unwrap(), c));
} else {
ranges.push((c, c));
}
Expand Down Expand Up @@ -909,7 +926,7 @@ fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
mod tests {
use std::ffi::{OsStr, OsString};

use {GlobSetBuilder, Error};
use {GlobSetBuilder, ErrorKind};
use super::{Glob, GlobBuilder, Token};
use super::Token::*;

Expand All @@ -934,7 +951,7 @@ mod tests {
#[test]
fn $name() {
let err = Glob::new($pat).unwrap_err();
assert_eq!($err, err);
assert_eq!(&$err, err.kind());
}
}
}
Expand Down Expand Up @@ -1057,19 +1074,19 @@ mod tests {
syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);

syntaxerr!(err_rseq1, "a**", Error::InvalidRecursive);
syntaxerr!(err_rseq2, "**a", Error::InvalidRecursive);
syntaxerr!(err_rseq3, "a**b", Error::InvalidRecursive);
syntaxerr!(err_rseq4, "***", Error::InvalidRecursive);
syntaxerr!(err_rseq5, "/a**", Error::InvalidRecursive);
syntaxerr!(err_rseq6, "/**a", Error::InvalidRecursive);
syntaxerr!(err_rseq7, "/a**b", Error::InvalidRecursive);
syntaxerr!(err_unclosed1, "[", Error::UnclosedClass);
syntaxerr!(err_unclosed2, "[]", Error::UnclosedClass);
syntaxerr!(err_unclosed3, "[!", Error::UnclosedClass);
syntaxerr!(err_unclosed4, "[!]", Error::UnclosedClass);
syntaxerr!(err_range1, "[z-a]", Error::InvalidRange('z', 'a'));
syntaxerr!(err_range2, "[z--]", Error::InvalidRange('z', '-'));
syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq3, "a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq4, "***", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq5, "/a**", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq6, "/**a", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq7, "/a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));

const CASEI: Options = Options {
casei: true,
Expand Down
82 changes: 65 additions & 17 deletions globset/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,16 @@ mod pathutil;

/// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Error {
pub struct Error {
/// The original glob provided by the caller.
glob: Option<String>,
/// The kind of error.
kind: ErrorKind,
}

/// The kind of error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ErrorKind {
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear
/// adjacent to a path separator, or the beginning/end of a glob.
InvalidRecursive,
Expand All @@ -150,45 +159,74 @@ pub enum Error {
}

impl StdError for Error {
fn description(&self) -> &str {
self.kind.description()
}
}

impl Error {
/// Return the glob that caused this error, if one exists.
pub fn glob(&self) -> Option<&str> {
self.glob.as_ref().map(|s| &**s)
}

/// Return the kind of this error.
pub fn kind(&self) -> &ErrorKind {
&self.kind
}
}

impl ErrorKind {
fn description(&self) -> &str {
match *self {
Error::InvalidRecursive => {
ErrorKind::InvalidRecursive => {
"invalid use of **; must be one path component"
}
Error::UnclosedClass => {
ErrorKind::UnclosedClass => {
"unclosed character class; missing ']'"
}
Error::InvalidRange(_, _) => {
ErrorKind::InvalidRange(_, _) => {
"invalid character range"
}
Error::UnopenedAlternates => {
ErrorKind::UnopenedAlternates => {
"unopened alternate group; missing '{' \
(maybe escape '}' with '[}]'?)"
}
Error::UnclosedAlternates => {
ErrorKind::UnclosedAlternates => {
"unclosed alternate group; missing '}' \
(maybe escape '{' with '[{]'?)"
}
Error::NestedAlternates => {
ErrorKind::NestedAlternates => {
"nested alternate groups are not allowed"
}
Error::Regex(ref err) => err,
ErrorKind::Regex(ref err) => err,
}
}
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.glob {
None => self.kind.fmt(f),
Some(ref glob) => {
write!(f, "error parsing glob '{}': {}", glob, self.kind)
}
}
}
}

impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::InvalidRecursive
| Error::UnclosedClass
| Error::UnopenedAlternates
| Error::UnclosedAlternates
| Error::NestedAlternates
| Error::Regex(_) => {
ErrorKind::InvalidRecursive
| ErrorKind::UnclosedClass
| ErrorKind::UnopenedAlternates
| ErrorKind::UnclosedAlternates
| ErrorKind::NestedAlternates
| ErrorKind::Regex(_) => {
write!(f, "{}", self.description())
}
Error::InvalidRange(s, e) => {
ErrorKind::InvalidRange(s, e) => {
write!(f, "invalid range; '{}' > '{}'", s, e)
}
}
Expand All @@ -201,12 +239,22 @@ fn new_regex(pat: &str) -> Result<Regex, Error> {
.size_limit(10 * (1 << 20))
.dfa_size_limit(10 * (1 << 20))
.build()
.map_err(|err| Error::Regex(err.to_string()))
.map_err(|err| {
Error {
glob: Some(pat.to_string()),
kind: ErrorKind::Regex(err.to_string()),
}
})
}

fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
where S: AsRef<str>, I: IntoIterator<Item=S> {
RegexSet::new(pats).map_err(|err| Error::Regex(err.to_string()))
RegexSet::new(pats).map_err(|err| {
Error {
glob: None,
kind: ErrorKind::Regex(err.to_string()),
}
})
}

type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
Expand Down
14 changes: 12 additions & 2 deletions ignore/src/gitignore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,12 @@ impl GitignoreBuilder {
let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count();
let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count();
let set = try!(
self.builder.build().map_err(|err| Error::Glob(err.to_string())));
self.builder.build().map_err(|err| {
Error::Glob {
glob: None,
err: err.to_string(),
}
}));
Ok(Gitignore {
set: set,
root: self.root.clone(),
Expand Down Expand Up @@ -420,7 +425,12 @@ impl GitignoreBuilder {
GlobBuilder::new(&glob.actual)
.literal_separator(literal_separator)
.build()
.map_err(|err| Error::Glob(err.to_string())));
.map_err(|err| {
Error::Glob {
glob: Some(glob.original.clone()),
err: err.kind().to_string(),
}
}));
self.builder.add(parsed);
self.globs.push(glob);
Ok(self)
Expand Down
Loading

0 comments on commit c50b8b4

Please sign in to comment.