diff --git a/README.md b/README.md index ac6768639..502cfa5c0 100644 --- a/README.md +++ b/README.md @@ -362,6 +362,50 @@ extensions. The syntax supported is [documented as part of Rust's regex library](https://doc.rust-lang.org/regex/regex/index.html#syntax). +### Configuration files + +ripgrep supports reading configuration files that change ripgrep's default +behavior. The format of the configuration file is an "rc" style and is very +simple. It is defined by two rules: + +1. Every line is a shell argument, after trimming ASCII whitespace. +2. Lines starting with '#' (optionally preceded by any amount of + ASCII whitespace) are ignored. + +ripgrep will look for a single configuration file if and only if the +`RIPGREP_CONFIG_PATH` environment variable is set and is non-empty. ripgrep +will parse shell arguments from this file on startup and will behave as if +the arguments in this file were prepended to any explicit arguments given to +ripgrep on the command line. + +For example, if your ripgreprc file contained a single line: + + --smart-case + +then the following command + + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo + +would behave identically to the following command + + rg --smart-case foo + +ripgrep also provides a flag, --no-config, that when present will suppress +any and all support for configuration. This includes any future support for +auto-loading configuration files from pre-determined paths. + +Conflicts between configuration files and explicit arguments are handled +exactly like conflicts in the same command line invocation. That is, this +command: + + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo --case-sensitive + +is exactly equivalent to + + rg --smart-case foo --case-sensitive + +in which case, the --case-sensitive flag would override the --smart-case flag. + ### Shell completions Shell completion files are included in the release tarball for Bash, Fish, Zsh diff --git a/complete/_rg b/complete/_rg index 6b62c1691..1074597dc 100644 --- a/complete/_rg +++ b/complete/_rg @@ -54,6 +54,7 @@ _rg() { '(--mmap --no-mmap)--mmap[search using memory maps when possible]' '(-H --with-filename --no-filename)--no-filename[suppress all file names]' "(-p --heading --pretty --vimgrep)--no-heading[don't group matches by file name]" + "--no-config[don't load configuration files]" "(--no-ignore-parent)--no-ignore[don't respect ignore files]" "--no-ignore-parent[don't respect ignore files in parent directories]" "--no-ignore-vcs[don't respect version control ignore files]" diff --git a/doc/rg.1 b/doc/rg.1 index fd562e812..b81124632 100644 --- a/doc/rg.1 +++ b/doc/rg.1 @@ -403,6 +403,17 @@ context related options.) .RS .RE .TP +.B \-\-no\-config +Never read configuration files. +When this flag is present, ripgrep will not respect the +RIPGREP_CONFIG_PATH environment variable. +.RS +.PP +If ripgrep ever grows a feature to automatically read configuration +files in pre\-defined locations, then this flag will also disable that +behavior as well. +.RE +.TP .B \-\-no\-messages Suppress all error messages. .RS @@ -597,6 +608,77 @@ ripgrep. Note that this must be passed to every invocation of rg. .RS .RE +.SH CONFIGURATION FILES +.PP +ripgrep supports reading configuration files that change ripgrep\[aq]s +default behavior. +The format of the configuration file is an "rc" style and is very +simple. +It is defined by two rules: +.IP +.nf +\f[C] +1.\ Every\ line\ is\ a\ shell\ argument,\ after\ trimming\ ASCII\ whitespace. +2.\ Lines\ starting\ with\ \[aq]#\[aq]\ (optionally\ preceded\ by\ any\ amount\ of +\ \ \ ASCII\ whitespace)\ are\ ignored. +\f[] +.fi +.PP +ripgrep will look for a single configuration file if and only if the +RIPGREP_CONFIG_PATH environment variable is set and is non\-empty. +ripgrep will parse shell arguments from this file on startup and will +behave as if the arguments in this file were prepended to any explicit +arguments given to ripgrep on the command line. +.PP +For example, if your ripgreprc file contained a single line: +.IP +.nf +\f[C] +\-\-smart\-case +\f[] +.fi +.PP +then the following command +.IP +.nf +\f[C] +RIPGREP_CONFIG_PATH=wherever/.ripgreprc\ rg\ foo +\f[] +.fi +.PP +would behave identically to the following command +.IP +.nf +\f[C] +rg\ \-\-smart\-case\ foo +\f[] +.fi +.PP +ripgrep also provides a flag, \-\-no\-config, that when present will +suppress any and all support for configuration. +This includes any future support for auto\-loading configuration files +from pre\-determined paths. +.PP +Conflicts between configuration files and explicit arguments are handled +exactly like conflicts in the same command line invocation. +That is, this command: +.IP +.nf +\f[C] +RIPGREP_CONFIG_PATH=wherever/.ripgreprc\ rg\ foo\ \-\-case\-sensitive +\f[] +.fi +.PP +is exactly equivalent to +.IP +.nf +\f[C] +rg\ \-\-smart\-case\ foo\ \-\-case\-sensitive +\f[] +.fi +.PP +in which case, the \-\-case\-sensitive flag would override the +\-\-smart\-case flag. .SH SHELL COMPLETION .PP Shell completion files are included in the release tarball for Bash, diff --git a/doc/rg.1.md b/doc/rg.1.md index 6b0542867..c92c6aa3a 100644 --- a/doc/rg.1.md +++ b/doc/rg.1.md @@ -268,6 +268,14 @@ Project home page: https://github.com/BurntSushi/ripgrep when ripgrep thinks it will be faster. (Note that mmap searching doesn't currently support the various context related options.) +--no-config +: Never read configuration files. When this flag is present, ripgrep will not + respect the RIPGREP_CONFIG_PATH environment variable. + + If ripgrep ever grows a feature to automatically read configuration files + in pre-defined locations, then this flag will also disable that behavior as + well. + --no-messages : Suppress all error messages. @@ -392,6 +400,51 @@ Project home page: https://github.com/BurntSushi/ripgrep the default type definitions that are found inside of ripgrep. Note that this must be passed to every invocation of rg. +# CONFIGURATION FILES + +ripgrep supports reading configuration files that change +ripgrep's default behavior. The format of the configuration file is an +"rc" style and is very simple. It is defined by two rules: + + 1. Every line is a shell argument, after trimming ASCII whitespace. + 2. Lines starting with '#' (optionally preceded by any amount of + ASCII whitespace) are ignored. + +ripgrep will look for a single configuration file if and only if the +RIPGREP_CONFIG_PATH environment variable is set and is non-empty. +ripgrep will parse shell arguments from this file on startup and will +behave as if the arguments in this file were prepended to any explicit +arguments given to ripgrep on the command line. + +For example, if your ripgreprc file contained a single line: + + --smart-case + +then the following command + + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo + +would behave identically to the following command + + rg --smart-case foo + +ripgrep also provides a flag, --no-config, that when present will suppress +any and all support for configuration. This includes any future support +for auto-loading configuration files from pre-determined paths. + +Conflicts between configuration files and explicit arguments are handled +exactly like conflicts in the same command line invocation. That is, +this command: + + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo --case-sensitive + +is exactly equivalent to + + rg --smart-case foo --case-sensitive + +in which case, the --case-sensitive flag would override the --smart-case +flag. + # SHELL COMPLETION Shell completion files are included in the release tarball for Bash, Fish, Zsh diff --git a/src/app.rs b/src/app.rs index 5fd8065f4..16cd3d826 100644 --- a/src/app.rs +++ b/src/app.rs @@ -22,6 +22,11 @@ Note that ripgrep may abort unexpectedly when using default settings if it searches a file that is simultaneously truncated. This behavior can be avoided by passing the --no-mmap flag. +ripgrep supports configuration files. Set RIPGREP_CONFIG_PATH to a +configuration file. The file can specify one shell argument per line. Lines +starting with '#' are ignored. For more details, see the man page or the +README. + Project home page: https://github.com/BurntSushi/ripgrep Use -h for short descriptions and --help for more details."; @@ -513,6 +518,7 @@ fn all_args_and_flags() -> Vec { flag_max_filesize(&mut args); flag_maxdepth(&mut args); flag_mmap(&mut args); + flag_no_config(&mut args); flag_no_ignore(&mut args); flag_no_ignore_parent(&mut args); flag_no_ignore_vcs(&mut args); @@ -1113,6 +1119,20 @@ This flag overrides --mmap. args.push(arg); } +fn flag_no_config(args: &mut Vec) { + const SHORT: &str = "Never read configuration files."; + const LONG: &str = long!("\ +Never read configuration files. When this flag is present, ripgrep will not +respect the RIPGREP_CONFIG_PATH environment variable. + +If ripgrep ever grows a feature to automatically read configuration files in +pre-defined locations, then this flag will also disable that behavior as well. +"); + let arg = RGArg::switch("no-config") + .help(SHORT).long_help(LONG); + args.push(arg); +} + fn flag_no_ignore(args: &mut Vec) { const SHORT: &str = "Don't respect ignore files."; const LONG: &str = long!("\ @@ -1182,8 +1202,7 @@ part on a separate output line. } fn flag_path_separator(args: &mut Vec) { - const SHORT: &str = - "Set the path separator to use when printing file paths."; + const SHORT: &str = "Set the path separator."; const LONG: &str = long!("\ Set the path separator to use when printing file paths. This defaults to your platform's path separator, which is / on Unix and \\ on Windows. This flag is diff --git a/src/args.rs b/src/args.rs index df7eeb8f9..d0990fdc0 100644 --- a/src/args.rs +++ b/src/args.rs @@ -25,6 +25,7 @@ use printer::{ColorSpecs, Printer}; use unescape::unescape; use worker::{Worker, WorkerBuilder}; +use config; use logger::Logger; use Result; @@ -88,17 +89,59 @@ impl Args { /// /// Also, initialize a global logger. pub fn parse() -> Result { - let matches = app::app().get_matches(); + // We parse the args given on CLI. This does not include args from + // the config. We use the CLI args as an initial configuration while + // trying to parse config files. If a config file exists and has + // arguments, then we re-parse argv, otherwise we just use the matches + // we have here. + let early_matches = ArgMatches(app::app().get_matches()); if let Err(err) = Logger::init() { errored!("failed to initialize logger: {}", err); } + if early_matches.is_present("debug") { + log::set_max_level(log::LevelFilter::Debug); + } else { + log::set_max_level(log::LevelFilter::Warn); + } + + let matches = Args::matches(early_matches); + // The logging level may have changed if we brought in additional + // arguments from a configuration file, so recheck it and set the log + // level as appropriate. if matches.is_present("debug") { log::set_max_level(log::LevelFilter::Debug); } else { log::set_max_level(log::LevelFilter::Warn); } - ArgMatches(matches).to_args() + matches.to_args() + } + + /// Run clap and return the matches. If clap determines a problem with the + /// user provided arguments (or if --help or --version are given), then an + /// error/usage/version will be printed and the process will exit. + /// + /// If there are no additional arguments from the environment (e.g., a + /// config file), then the given matches are returned as is. + fn matches(early_matches: ArgMatches<'static>) -> ArgMatches<'static> { + // If the end user says no config, then respect it. + if early_matches.is_present("no-config") { + debug!("not reading config files because --no-config is present"); + return early_matches; + } + // If the user wants ripgrep to use a config file, then parse args + // from that first. + let mut args = config::args(early_matches.is_present("no-messages")); + if args.is_empty() { + return early_matches; + } + let mut cliargs = env::args_os(); + if let Some(bin) = cliargs.next() { + args.insert(0, bin); + } + args.extend(cliargs); + debug!("final argv: {:?}", args); + ArgMatches(app::app().get_matches_from(args)) } /// Returns true if ripgrep should print the files it will search and exit diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 000000000..c47e6a504 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,195 @@ +// This module provides routines for reading ripgrep config "rc" files. The +// primary output of these routines is a sequence of arguments, where each +// argument corresponds precisely to one shell argument. + +use std::env; +use std::error::Error; +use std::fs::File; +use std::io::{self, BufRead}; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; + +use Result; + +/// Return a sequence of arguments derived from ripgrep rc configuration files. +/// +/// If no_messages is false and there was a problem reading a config file, +/// then errors are printed to stderr. +pub fn args(no_messages: bool) -> Vec { + let config_path = match env::var_os("RIPGREP_CONFIG_PATH") { + None => return vec![], + Some(config_path) => { + if config_path.is_empty() { + return vec![]; + } + PathBuf::from(config_path) + } + }; + let (args, errs) = match parse(&config_path) { + Ok((args, errs)) => (args, errs), + Err(err) => { + if !no_messages { + eprintln!("{}", err); + } + return vec![]; + } + }; + if !no_messages && !errs.is_empty() { + for err in errs { + eprintln!("{}:{}", config_path.display(), err); + } + } + debug!( + "{}: arguments loaded from config file: {:?}", + config_path.display(), args); + args +} + +/// Parse a single ripgrep rc file from the given path. +/// +/// On success, this returns a set of shell arguments, in order, that should +/// be pre-pended to the arguments given to ripgrep at the command line. +/// +/// If the file could not be read, then an error is returned. If there was +/// a problem parsing one or more lines in the file, then errors are returned +/// for each line in addition to successfully parsed arguments. +fn parse>( + path: P, +) -> Result<(Vec, Vec>)> { + let path = path.as_ref(); + match File::open(&path) { + Ok(file) => parse_reader(file), + Err(err) => errored!("{}: {}", path.display(), err), + } +} + +/// Parse a single ripgrep rc file from the given reader. +/// +/// Callers should not provided a buffered reader, as this routine will use its +/// own buffer internally. +/// +/// On success, this returns a set of shell arguments, in order, that should +/// be pre-pended to the arguments given to ripgrep at the command line. +/// +/// If the reader could not be read, then an error is returned. If there was a +/// problem parsing one or more lines, then errors are returned for each line +/// in addition to successfully parsed arguments. +fn parse_reader( + rdr: R, +) -> Result<(Vec, Vec>)> { + let mut bufrdr = io::BufReader::new(rdr); + let (mut args, mut errs) = (vec![], vec![]); + let mut line = vec![]; + let mut line_number = 0; + while { + line.clear(); + line_number += 1; + bufrdr.read_until(b'\n', &mut line)? > 0 + } { + trim(&mut line); + if line.is_empty() || line[0] == b'#' { + continue; + } + match bytes_to_os_string(&line) { + Ok(osstr) => { + args.push(osstr); + } + Err(err) => { + errs.push(format!("{}: {}", line_number, err).into()); + } + } + } + Ok((args, errs)) +} + +/// Trim the given bytes of whitespace according to the ASCII definition. +fn trim(x: &mut Vec) { + let upto = x.iter().take_while(|b| is_space(**b)).count(); + x.drain(..upto); + let revto = x.len() - x.iter().rev().take_while(|b| is_space(**b)).count(); + x.drain(revto..); +} + +/// Returns true if and only if the given byte is an ASCII space character. +fn is_space(b: u8) -> bool { + b == b'\t' + || b == b'\n' + || b == b'\x0B' + || b == b'\x0C' + || b == b'\r' + || b == b' ' +} + +/// On Unix, get an OsString from raw bytes. +#[cfg(unix)] +fn bytes_to_os_string(bytes: &[u8]) -> Result { + use std::os::unix::ffi::OsStringExt; + Ok(OsString::from_vec(bytes.to_vec())) +} + +/// On non-Unix (like Windows), require UTF-8. +#[cfg(not(unix))] +fn bytes_to_os_string(bytes: &[u8]) -> Result { + String::from_utf8(bytes.to_vec()).map(OsString::from).map_err(From::from) +} + +#[cfg(test)] +mod tests { + use std::ffi::OsString; + use super::parse_reader; + + #[test] + fn basic() { + let (args, errs) = parse_reader(&b"\ +# Test +--context=0 + --smart-case +-u + + + # --bar +--foo +"[..]).unwrap(); + assert!(errs.is_empty()); + let args: Vec = + args.into_iter().map(|s| s.into_string().unwrap()).collect(); + assert_eq!(args, vec![ + "--context=0", "--smart-case", "-u", "--foo", + ]); + } + + // We test that we can handle invalid UTF-8 on Unix-like systems. + #[test] + #[cfg(unix)] + fn error() { + use std::os::unix::ffi::OsStringExt; + + let (args, errs) = parse_reader(&b"\ +quux +foo\xFFbar +baz +"[..]).unwrap(); + assert!(errs.is_empty()); + assert_eq!(args, vec![ + OsString::from("quux"), + OsString::from_vec(b"foo\xFFbar".to_vec()), + OsString::from("baz"), + ]); + } + + // ... but test that invalid UTF-8 fails on Windows. + #[test] + #[cfg(not(unix))] + fn error() { + let (args, errs) = parse_reader(&b"\ +quux +foo\xFFbar +baz +"[..]).unwrap(); + assert_eq!(errs.len(), 1); + assert_eq!(args, vec![ + OsString::from("quux"), + OsString::from("baz"), + ]); + } +} diff --git a/src/main.rs b/src/main.rs index d63735050..b3b192c1a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -39,6 +39,7 @@ macro_rules! errored { mod app; mod args; +mod config; mod decoder; mod decompressor; mod logger; @@ -49,7 +50,7 @@ mod search_stream; mod unescape; mod worker; -pub type Result = result::Result>; +pub type Result = result::Result>; fn main() { reset_sigpipe(); diff --git a/tests/tests.rs b/tests/tests.rs index dc19350c0..ecc840e79 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1711,6 +1711,22 @@ fn compressed_failing_gzip() { assert_eq!(err.contains("not in gzip format"), true); } +sherlock!(feature_196_persistent_config, "sherlock", +|wd: WorkDir, mut cmd: Command| { + // Make sure we get no matches by default. + wd.assert_err(&mut cmd); + + // Now add our config file, and make sure it impacts ripgrep. + wd.create(".ripgreprc", "--ignore-case"); + cmd.env("RIPGREP_CONFIG_PATH", ".ripgreprc"); + let lines: String = wd.stdout(&mut cmd); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq!(lines, expected); +}); + #[test] fn feature_740_passthru() { let wd = WorkDir::new("feature_740"); diff --git a/tests/workdir.rs b/tests/workdir.rs index ea5408a40..3c47e9483 100644 --- a/tests/workdir.rs +++ b/tests/workdir.rs @@ -93,6 +93,7 @@ impl WorkDir { /// this working directory. pub fn command(&self) -> process::Command { let mut cmd = process::Command::new(&self.bin()); + cmd.env_remove("RIPGREP_CONFIG_PATH"); cmd.current_dir(&self.dir); cmd }