diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f75882d314..3254e05474 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,7 +53,7 @@ jobs: include: - build: pinned os: ubuntu-latest - rust: 1.72.1 + rust: 1.74.0 - build: stable os: ubuntu-latest rust: stable @@ -94,7 +94,7 @@ jobs: rust: nightly-x86_64-gnu steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install packages (Ubuntu) if: matrix.os == 'ubuntu-latest' @@ -131,6 +131,7 @@ jobs: run: | echo "cargo command is: ${{ env.CARGO }}" echo "target flag is: ${{ env.TARGET_FLAGS }}" + echo "target dir is: ${{ env.TARGET_DIR }}" - name: Build ripgrep and all crates run: ${{ env.CARGO }} build --verbose --workspace ${{ env.TARGET_FLAGS }} @@ -164,26 +165,6 @@ jobs: if: matrix.target != '' run: ${{ env.CARGO }} test --verbose --workspace ${{ env.TARGET_FLAGS }} - - name: Test for existence of build artifacts (Windows) - if: matrix.os == 'windows-2022' - shell: bash - run: | - outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")" - ls "$outdir/_rg.ps1" && file "$outdir/_rg.ps1" - - - name: Test for existence of build artifacts (Unix) - if: matrix.os != 'windows-2022' - shell: bash - run: | - outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")" - # TODO: Check for the man page generation here. For whatever reason, - # it seems to be intermittently failing in CI. No idea why. - # for f in rg.bash rg.fish rg.1; do - for f in rg.bash rg.fish; do - # We could use file -E here, but it isn't supported on macOS. - ls "$outdir/$f" && file "$outdir/$f" - done - - name: Test zsh shell completions (Unix, sans cross) # We could test this when using Cross, but we'd have to execute the # 'rg' binary (done in test-complete) with qemu, which is a pain and @@ -197,11 +178,15 @@ jobs: shell: bash run: ${{ env.CARGO }} test --manifest-path crates/cli/Cargo.toml ${{ env.TARGET_FLAGS }} --lib print_hostname -- --nocapture + - name: Print available short flags + shell: bash + run: ${{ env.CARGO }} test --bin rg ${{ env.TARGET_FLAGS }} flags::defs::tests::available_shorts -- --nocapture + rustfmt: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Rust uses: dtolnay/rust-toolchain@master with: @@ -214,7 +199,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Rust uses: dtolnay/rust-toolchain@master with: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b307ce5e19..dc732771fd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,47 +1,39 @@ -# The way this works is the following: -# -# The create-release job runs purely to initialize the GitHub release itself -# and to output upload_url for the following job. -# -# The build-release job runs only once create-release is finished. It gets the -# release upload URL from create-release job outputs, then builds the release -# executables for each supported platform and attaches them as release assets -# to the previously created release. -# -# The key here is that we create the release only once. -# -# Reference: -# https://eugene-babichenko.github.io/blog/2020/05/09/github-actions-cross-platform-auto-releases/ - name: release + +# Only do the release on x.y.z tags. on: push: - # Enable when testing release infrastructure on a branch. - # branches: - # - ag/work tags: - "[0-9]+.[0-9]+.[0-9]+" + +# We need this to be able to create releases. +permissions: + contents: write + jobs: + # The create-release job runs purely to initialize the GitHub release itself, + # and names the release after the `x.y.z` tag that was pushed. It's separate + # from building the release so that we only create the release once. create-release: name: create-release runs-on: ubuntu-latest # env: # Set to force version number, e.g., when no tag exists. - # RG_VERSION: TEST-0.0.0 - outputs: - rg_version: ${{ env.RG_VERSION }} + # VERSION: TEST-0.0.0 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get the release version from the tag - shell: bash - if: env.RG_VERSION == '' + if: env.VERSION == '' + run: echo "VERSION=${{ github.ref_name }}" >> $GITHUB_ENV + - name: Show the version run: | - echo "RG_VERSION=$GITHUB_REF_NAME" >> $GITHUB_ENV - echo "version is: ${{ env.RG_VERSION }}" + echo "version is: $VERSION" - name: Create GitHub release env: - GH_TOKEN: ${{ github.token }} - run: gh release create ${{ env.RG_VERSION }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: gh release create $VERSION --draft --verify-tag --title $VERSION + outputs: + version: ${{ env.VERSION }} build-release: name: build-release @@ -52,25 +44,50 @@ jobs: # systems. CARGO: cargo # When CARGO is set to CROSS, this is set to `--target matrix.target`. - TARGET_FLAGS: "" + TARGET_FLAGS: # When CARGO is set to CROSS, TARGET_DIR includes matrix.target. TARGET_DIR: ./target + # Bump this as appropriate. We pin to a version to make sure CI + # continues to work as cross releases in the past have broken things + # in subtle ways. + CROSS_VERSION: v0.2.5 # Emit backtraces on panics. RUST_BACKTRACE: 1 # Build static releases with PCRE2. PCRE2_SYS_STATIC: 1 strategy: + fail-fast: false matrix: - build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc] include: - build: linux os: ubuntu-latest rust: nightly target: x86_64-unknown-linux-musl - - build: linux-arm + strip: x86_64-linux-musl-strip + - build: stable-x86 os: ubuntu-latest - rust: nightly - target: arm-unknown-linux-gnueabihf + rust: stable + target: i686-unknown-linux-gnu + strip: x86_64-linux-gnu-strip + qemu: i386 + - build: stable-aarch64 + os: ubuntu-latest + rust: stable + target: aarch64-unknown-linux-gnu + strip: aarch64-linux-gnu-strip + qemu: qemu-aarch64 + - build: stable-powerpc64 + os: ubuntu-latest + rust: stable + target: powerpc64-unknown-linux-gnu + strip: powerpc64-linux-gnu-strip + qemu: qemu-ppc64 + - build: stable-s390x + os: ubuntu-latest + rust: stable + target: s390x-unknown-linux-gnu + strip: s390x-linux-gnu-strip + qemu: qemu-s390x - build: macos os: macos-latest rust: nightly @@ -90,15 +107,17 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install packages (Ubuntu) if: matrix.os == 'ubuntu-latest' + shell: bash run: | ci/ubuntu-install-packages - name: Install packages (macOS) if: matrix.os == 'macos-latest' + shell: bash run: | ci/macos-install-packages @@ -109,64 +128,140 @@ jobs: target: ${{ matrix.target }} - name: Use Cross + if: matrix.os == 'ubuntu-latest' && matrix.target != '' shell: bash run: | - cargo install cross + # In the past, new releases of 'cross' have broken CI. So for now, we + # pin it. We also use their pre-compiled binary releases because cross + # has over 100 dependencies and takes a bit to compile. + dir="$RUNNER_TEMP/cross-download" + mkdir "$dir" + echo "$dir" >> $GITHUB_PATH + cd "$dir" + curl -LO "https://github.com/cross-rs/cross/releases/download/$CROSS_VERSION/cross-x86_64-unknown-linux-musl.tar.gz" + tar xf cross-x86_64-unknown-linux-musl.tar.gz echo "CARGO=cross" >> $GITHUB_ENV + + - name: Set target variables + shell: bash + run: | echo "TARGET_FLAGS=--target ${{ matrix.target }}" >> $GITHUB_ENV echo "TARGET_DIR=./target/${{ matrix.target }}" >> $GITHUB_ENV - name: Show command used for Cargo + shell: bash run: | echo "cargo command is: ${{ env.CARGO }}" echo "target flag is: ${{ env.TARGET_FLAGS }}" echo "target dir is: ${{ env.TARGET_DIR }}" - name: Build release binary - run: ${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }} + shell: bash + run: | + ${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }} + if [ "${{ matrix.os }}" = "windows-latest" ]; then + bin="target/${{ matrix.target }}/release/rg.exe" + else + bin="target/${{ matrix.target }}/release/rg" + fi + echo "BIN=$bin" >> $GITHUB_ENV - - name: Strip release binary (linux, macos and macos-arm) - if: matrix.build == 'linux' || matrix.os == 'macos' - run: strip "target/${{ matrix.target }}/release/rg" + - name: Strip release binary (macos) + if: matrix.os == 'macos' + shell: bash + run: strip "$BIN" - - name: Strip release binary (arm) - if: matrix.build == 'linux-arm' + - name: Strip release binary (cross) + if: env.CARGO == 'cross' + shell: bash run: | docker run --rm -v \ "$PWD/target:/target:Z" \ - rustembedded/cross:arm-unknown-linux-gnueabihf \ - arm-linux-gnueabihf-strip \ - /target/arm-unknown-linux-gnueabihf/release/rg + "rustembedded/cross:${{ matrix.target }}" \ + "${{ matrix.strip }}" \ + "/target/${{ matrix.target }}/release/rg" - - name: Build archive + - name: Determine archive name shell: bash run: | - outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")" - staging="ripgrep-${{ needs.create-release.outputs.rg_version }}-${{ matrix.target }}" - mkdir -p "$staging"/{complete,doc} + version="${{ needs.create-release.outputs.version }}" + echo "ARCHIVE=ripgrep-$version-${{ matrix.target }}" >> $GITHUB_ENV - cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$staging/" - cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$staging/doc/" - cp "$outdir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/" - cp complete/_rg "$staging/complete/" + - name: Creating directory for archive + shell: bash + run: | + mkdir -p "$ARCHIVE"/{complete,doc} + cp "$BIN" "$ARCHIVE"/ + cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$ARCHIVE"/ + cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$ARCHIVE"/doc/ - if [ "${{ matrix.os }}" = "windows-latest" ]; then - cp "target/${{ matrix.target }}/release/rg.exe" "$staging/" - 7z a "$staging.zip" "$staging" - certutil -hashfile "$staging.zip" SHA256 > "$staging.zip.sha256" - echo "ASSET=$staging.zip" >> $GITHUB_ENV - echo "ASSET_SUM=$staging.zip.sha256" >> $GITHUB_ENV - else - # The man page is only generated on Unix systems. ¯\_(ツ)_/¯ - cp "$outdir"/rg.1 "$staging/doc/" - cp "target/${{ matrix.target }}/release/rg" "$staging/" - tar czf "$staging.tar.gz" "$staging" - shasum -a 256 "$staging.tar.gz" > "$staging.tar.gz.sha256" - echo "ASSET=$staging.tar.gz" >> $GITHUB_ENV - echo "ASSET_SUM=$staging.tar.gz.sha256" >> $GITHUB_ENV - fi + - name: Generate man page and completions (no emulation) + if: matrix.qemu == '' + shell: bash + run: | + "$BIN" --version + "$BIN" --generate complete-bash > "$ARCHIVE/complete/rg.bash" + "$BIN" --generate complete-fish > "$ARCHIVE/complete/rg.fish" + "$BIN" --generate complete-powershell > "$ARCHIVE/complete/_rg.ps1" + "$BIN" --generate complete-zsh > "$ARCHIVE/complete/_rg" + "$BIN" --generate man > "$ARCHIVE/doc/rg.1" + + - name: Generate man page and completions (emulation) + if: matrix.qemu != '' + shell: bash + run: | + docker run --rm -v \ + "$PWD/target:/target:Z" \ + "rustembedded/cross:${{ matrix.target }}" \ + "${{ matrix.qemu }}" "/$BIN" --version + docker run --rm -v \ + "$PWD/target:/target:Z" \ + "rustembedded/cross:${{ matrix.target }}" \ + "${{ matrix.qemu }}" "/$BIN" \ + --generate complete-bash > "$ARCHIVE/complete/rg.bash" + docker run --rm -v \ + "$PWD/target:/target:Z" \ + "rustembedded/cross:${{ matrix.target }}" \ + "${{ matrix.qemu }}" "/$BIN" \ + --generate complete-fish > "$ARCHIVE/complete/rg.fish" + docker run --rm -v \ + "$PWD/target:/target:Z" \ + "rustembedded/cross:${{ matrix.target }}" \ + "${{ matrix.qemu }}" "/$BIN" \ + --generate complete-powershell > "$ARCHIVE/complete/_rg.ps1" + docker run --rm -v \ + "$PWD/target:/target:Z" \ + "rustembedded/cross:${{ matrix.target }}" \ + "${{ matrix.qemu }}" "/$BIN" \ + --generate complete-zsh > "$ARCHIVE/complete/_rg" + docker run --rm -v \ + "$PWD/target:/target:Z" \ + "rustembedded/cross:${{ matrix.target }}" \ + "${{ matrix.qemu }}" "/$BIN" \ + --generate man > "$ARCHIVE/doc/rg.1" + + - name: Build archive (Windows) + shell: bash + if: matrix.os == 'windows-latest' + run: | + 7z a "$ARCHIVE.zip" "$ARCHIVE" + certutil -hashfile "$ARCHIVE.zip" SHA256 > "$ARCHIVE.zip.sha256" + echo "ASSET=$ARCHIVE.zip" >> $GITHUB_ENV + echo "ASSET_SUM=$ARCHIVE.zip.sha256" >> $GITHUB_ENV + + - name: Build archive (Unix) + shell: bash + if: matrix.os != 'windows-latest' + run: | + tar czf "$ARCHIVE.tar.gz" "$ARCHIVE" + shasum -a 256 "$ARCHIVE.tar.gz" > "$ARCHIVE.tar.gz.sha256" + echo "ASSET=$ARCHIVE.tar.gz" >> $GITHUB_ENV + echo "ASSET_SUM=$ARCHIVE.tar.gz.sha256" >> $GITHUB_ENV - name: Upload release archive env: - GH_TOKEN: ${{ github.token }} - run: gh release upload ${{ needs.create-release.outputs.rg_version }} ${{ env.ASSET }} ${{ env.ASSET_SUM }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: bash + run: | + version="${{ needs.create-release.outputs.version }}" + gh release upload "$version" ${{ env.ASSET }} ${{ env.ASSET_SUM }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 5de53e23e0..d390ae3a37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ Feature enhancements: Gradle, GraphQL, Markdown, Prolog, Raku, TypeScript, USD, V * [FEATURE #1790](https://github.com/BurntSushi/ripgrep/issues/1790): Add new `--stop-on-nonmatch` flag. +* [FEATURE #1814](https://github.com/BurntSushi/ripgrep/issues/1814): + Flags are now categorized in `-h/--help` output and ripgrep's man page. * [FEATURE #2195](https://github.com/BurntSushi/ripgrep/issues/2195): When `extra-verbose` mode is enabled in zsh, show extra file type info. * [FEATURE #2409](https://github.com/BurntSushi/ripgrep/pull/2409): @@ -28,14 +30,22 @@ Feature enhancements: Bug fixes: +* [BUG #884](https://github.com/BurntSushi/ripgrep/issues/884): + Don't error when `-v/--invert-match` is used multiple times. * [BUG #1275](https://github.com/BurntSushi/ripgrep/issues/1275): Fix bug with `\b` assertion in the regex engine. +* [BUG #1648](https://github.com/BurntSushi/ripgrep/issues/1648): + Fix bug where sometimes short flags with values, e.g., `-M 900`, would fail. +* [BUG #1701](https://github.com/BurntSushi/ripgrep/issues/1701): + Fix bug where some flags could not be repeated. * [BUG #1757](https://github.com/BurntSushi/ripgrep/issues/1757): Fix bug when searching a sub-directory didn't have ignores applied correctly. * [BUG #1891](https://github.com/BurntSushi/ripgrep/issues/1891): Fix bug when using `-w` with a regex that can match the empty string. * [BUG #1911](https://github.com/BurntSushi/ripgrep/issues/1911): Disable mmap searching in all non-64-bit environments. +* [BUG #1966](https://github.com/BurntSushi/ripgrep/issues/1966): + Fix bug where ripgrep can panic when printing to stderr. * [BUG #2108](https://github.com/BurntSushi/ripgrep/issues/2108): Improve docs for `-r/--replace` syntax. * [BUG #2198](https://github.com/BurntSushi/ripgrep/issues/2198): diff --git a/Cargo.lock b/Cargo.lock index 3becb925b3..439099b184 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,12 +29,6 @@ version = "0.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bstr" version = "1.7.0" @@ -62,18 +56,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "bitflags", - "strsim", - "textwrap", - "unicode-width", -] - [[package]] name = "crossbeam-channel" version = "0.5.8" @@ -291,6 +273,12 @@ dependencies = [ "libc", ] +[[package]] +name = "lexopt" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baff4b617f7df3d896f97fe922b64817f6cd9a756bb81d40f8883f2f66dcb401" + [[package]] name = "libc" version = "0.2.149" @@ -434,15 +422,16 @@ version = "13.0.0" dependencies = [ "anyhow", "bstr", - "clap", "grep", "ignore", "jemallocator", + "lexopt", "log", "serde", "serde_derive", "serde_json", "termcolor", + "textwrap", "walkdir", ] @@ -498,12 +487,6 @@ dependencies = [ "serde", ] -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - [[package]] name = "syn" version = "2.0.38" @@ -517,21 +500,18 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64" +checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" dependencies = [ "winapi-util", ] [[package]] name = "textwrap" -version = "0.11.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "unicode-ident" @@ -539,12 +519,6 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-width" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" - [[package]] name = "walkdir" version = "2.4.0" diff --git a/Cargo.toml b/Cargo.toml index 14bb7c8de9..457f1d8331 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,26 +50,18 @@ members = [ [dependencies] anyhow = "1.0.75" -bstr = "1.6.0" +bstr = "1.7.0" grep = { version = "0.2.12", path = "crates/grep" } ignore = { version = "0.4.19", path = "crates/ignore" } +lexopt = "0.3.0" log = "0.4.5" serde_json = "1.0.23" termcolor = "1.1.0" - -[dependencies.clap] -version = "2.33.0" -default-features = false -features = ["suggestions"] +textwrap = { version = "0.16.0", default-features = false } [target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator] version = "0.5.0" -[build-dependencies.clap] -version = "2.33.0" -default-features = false -features = ["suggestions"] - [dev-dependencies] serde = "1.0.77" serde_derive = "1.0.77" diff --git a/RELEASE-CHECKLIST.md b/RELEASE-CHECKLIST.md index 7d56dfee00..46b25c4aa4 100644 --- a/RELEASE-CHECKLIST.md +++ b/RELEASE-CHECKLIST.md @@ -1,11 +1,12 @@ -Release Checklist ------------------ +# Release Checklist + * Ensure local `master` is up to date with respect to `origin/master`. * Run `cargo update` and review dependency updates. Commit updated `Cargo.lock`. * Run `cargo outdated` and review semver incompatible updates. Unless there is a strong motivation otherwise, review and update every dependency. Also run `--aggressive`, but don't update to crates that are still in beta. +* Update date in `crates/core/flags/doc/template.rg.1`. * Review changes for every crate in `crates` since the last ripgrep release. If the set of changes is non-empty, issue a new release for that crate. Check crates in the following order. After updating a crate, ensure minimal @@ -52,5 +53,6 @@ Release Checklist Unreleased changes. Release notes have not yet been written. ``` -Note that -[`cargo-up` can be found in BurntSushi's dotfiles](https://github.com/BurntSushi/dotfiles/blob/master/bin/cargo-up). +Note that [`cargo-up` can be found in BurntSushi's dotfiles][dotfiles]. + +[dotfiles]: https://github.com/BurntSushi/dotfiles/blob/master/bin/cargo-up diff --git a/build.rs b/build.rs index eae7f8566d..db9584bff5 100644 --- a/build.rs +++ b/build.rs @@ -1,70 +1,24 @@ -use std::{ - env, - fs::{self, File}, - io::{self, Read, Write}, - path::Path, - process, -}; - -use clap::Shell; - -use crate::app::{RGArg, RGArgKind}; - -#[allow(dead_code)] -#[path = "crates/core/app.rs"] -mod app; - fn main() { - // OUT_DIR is set by Cargo and it's where any additional build artifacts - // are written. - let Some(outdir) = env::var_os("OUT_DIR") else { - eprintln!( - "OUT_DIR environment variable not defined. \ - Please file a bug: \ - https://github.com/BurntSushi/ripgrep/issues/new" - ); - process::exit(1); - }; - fs::create_dir_all(&outdir).unwrap(); - - let stamp_path = Path::new(&outdir).join("ripgrep-stamp"); - if let Err(err) = File::create(&stamp_path) { - panic!("failed to write {}: {}", stamp_path.display(), err); - } - if let Err(err) = generate_man_page(&outdir) { - eprintln!("failed to generate man page: {}", err); - } - - // Use clap to build completion files. - let mut app = app::app(); - app.gen_completions("rg", Shell::Bash, &outdir); - app.gen_completions("rg", Shell::Fish, &outdir); - app.gen_completions("rg", Shell::PowerShell, &outdir); - // Note that we do not use clap's support for zsh. Instead, zsh completions - // are manually maintained in `complete/_rg`. - - // Make the current git hash available to the build. - if let Some(rev) = git_revision_hash() { - println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={}", rev); - } - // Embed a Windows manifest and set some linker options. The main reason - // for this is to enable long path support on Windows. This still, I - // believe, requires enabling long path support in the registry. But if - // that's enabled, then this will let ripgrep use C:\... style paths that - // are longer than 260 characters. + set_git_revision_hash(); set_windows_exe_options(); } +/// Embed a Windows manifest and set some linker options. +/// +/// The main reason for this is to enable long path support on Windows. This +/// still, I believe, requires enabling long path support in the registry. But +/// if that's enabled, then this will let ripgrep use C:\... style paths that +/// are longer than 260 characters. fn set_windows_exe_options() { static MANIFEST: &str = "pkg/windows/Manifest.xml"; - let Ok(target_os) = env::var("CARGO_CFG_TARGET_OS") else { return }; - let Ok(target_env) = env::var("CARGO_CFG_TARGET_ENV") else { return }; + let Ok(target_os) = std::env::var("CARGO_CFG_TARGET_OS") else { return }; + let Ok(target_env) = std::env::var("CARGO_CFG_TARGET_ENV") else { return }; if !(target_os == "windows" && target_env == "msvc") { return; } - let Ok(mut manifest) = env::current_dir() else { return }; + let Ok(mut manifest) = std::env::current_dir() else { return }; manifest.push(MANIFEST); let Some(manifest) = manifest.to_str() else { return }; @@ -77,189 +31,16 @@ fn set_windows_exe_options() { println!("cargo:rustc-link-arg-bin=rg=/WX"); } -fn git_revision_hash() -> Option { - let output = process::Command::new("git") - .args(&["rev-parse", "--short=10", "HEAD"]) - .output() - .ok()?; - let v = String::from_utf8_lossy(&output.stdout).trim().to_string(); - if v.is_empty() { - None - } else { - Some(v) - } -} - -fn generate_man_page>(outdir: P) -> io::Result<()> { - // If asciidoctor isn't installed, fallback to asciidoc. - if let Err(err) = process::Command::new("asciidoctor").output() { - eprintln!( - "Could not run 'asciidoctor' binary, falling back to 'a2x'." - ); - eprintln!("Error from running 'asciidoctor': {}", err); - return legacy_generate_man_page::

(outdir); - } - // 1. Read asciidoctor template. - // 2. Interpolate template with auto-generated docs. - // 3. Save interpolation to disk. - // 4. Use asciidoctor to convert to man page. - let outdir = outdir.as_ref(); - let cwd = env::current_dir()?; - let tpl_path = cwd.join("doc").join("rg.1.txt.tpl"); - let txt_path = outdir.join("rg.1.txt"); - - let mut tpl = String::new(); - File::open(&tpl_path)?.read_to_string(&mut tpl)?; - let options = - formatted_options()?.replace("{", "{").replace("}", "}"); - tpl = tpl.replace("{OPTIONS}", &options); - - let githash = git_revision_hash(); - let githash = githash.as_ref().map(|x| &**x); - tpl = tpl.replace("{VERSION}", &app::long_version(githash, false)); - - File::create(&txt_path)?.write_all(tpl.as_bytes())?; - let result = process::Command::new("asciidoctor") - .arg("--doctype") - .arg("manpage") - .arg("--backend") - .arg("manpage") - .arg(&txt_path) - .spawn()? - .wait()?; - if !result.success() { - let msg = - format!("'asciidoctor' failed with exit code {:?}", result.code()); - return Err(ioerr(msg)); - } - Ok(()) -} - -fn legacy_generate_man_page>(outdir: P) -> io::Result<()> { - // If asciidoc isn't installed, then don't do anything. - if let Err(err) = process::Command::new("a2x").output() { - eprintln!("Could not run 'a2x' binary, skipping man page generation."); - eprintln!("Error from running 'a2x': {}", err); - return Ok(()); - } - // 1. Read asciidoc template. - // 2. Interpolate template with auto-generated docs. - // 3. Save interpolation to disk. - // 4. Use a2x (part of asciidoc) to convert to man page. - let outdir = outdir.as_ref(); - let cwd = env::current_dir()?; - let tpl_path = cwd.join("doc").join("rg.1.txt.tpl"); - let txt_path = outdir.join("rg.1.txt"); +/// Make the current git hash available to the build as the environment +/// variable `RIPGREP_BUILD_GIT_HASH`. +fn set_git_revision_hash() { + use std::process::Command; - let mut tpl = String::new(); - File::open(&tpl_path)?.read_to_string(&mut tpl)?; - tpl = tpl.replace("{OPTIONS}", &formatted_options()?); - - let githash = git_revision_hash(); - let githash = githash.as_ref().map(|x| &**x); - tpl = tpl.replace("{VERSION}", &app::long_version(githash, false)); - - File::create(&txt_path)?.write_all(tpl.as_bytes())?; - let result = process::Command::new("a2x") - .arg("--no-xmllint") - .arg("--doctype") - .arg("manpage") - .arg("--format") - .arg("manpage") - .arg(&txt_path) - .spawn()? - .wait()?; - if !result.success() { - let msg = format!("'a2x' failed with exit code {:?}", result.code()); - return Err(ioerr(msg)); - } - Ok(()) -} - -fn formatted_options() -> io::Result { - let mut args = app::all_args_and_flags(); - args.sort_by(|x1, x2| x1.name.cmp(&x2.name)); - - let mut formatted = vec![]; - for arg in args { - if arg.hidden { - continue; - } - // ripgrep only has two positional arguments, and probably will only - // ever have two positional arguments, so we just hardcode them into - // the template. - if let app::RGArgKind::Positional { .. } = arg.kind { - continue; - } - formatted.push(formatted_arg(&arg)?); - } - Ok(formatted.join("\n\n")) -} - -fn formatted_arg(arg: &RGArg) -> io::Result { - match arg.kind { - RGArgKind::Positional { .. } => { - panic!("unexpected positional argument") - } - RGArgKind::Switch { long, short, multiple } => { - let mut out = vec![]; - - let mut header = format!("--{}", long); - if let Some(short) = short { - header = format!("-{}, {}", short, header); - } - if multiple { - header = format!("*{}* ...::", header); - } else { - header = format!("*{}*::", header); - } - writeln!(out, "{}", header)?; - writeln!(out, "{}", formatted_doc_txt(arg)?)?; - - Ok(String::from_utf8(out).unwrap()) - } - RGArgKind::Flag { long, short, value_name, multiple, .. } => { - let mut out = vec![]; - - let mut header = format!("--{}", long); - if let Some(short) = short { - header = format!("-{}, {}", short, header); - } - if multiple { - header = format!("*{}* _{}_ ...::", header, value_name); - } else { - header = format!("*{}* _{}_::", header, value_name); - } - writeln!(out, "{}", header)?; - writeln!(out, "{}", formatted_doc_txt(arg)?)?; - - Ok(String::from_utf8(out).unwrap()) - } - } -} - -fn formatted_doc_txt(arg: &RGArg) -> io::Result { - let paragraphs: Vec = arg - .doc_long - .replace("{", "{") - .replace("}", r"}") - // Hack to render ** literally in man page correctly. We can't put - // these crazy +++ in the help text directly, since that shows - // literally in --help output. - .replace("*-g 'foo/**'*", "*-g +++'foo/**'+++*") - .split("\n\n") - .map(|s| s.to_string()) - .collect(); - if paragraphs.is_empty() { - return Err(ioerr(format!("missing docs for --{}", arg.name))); - } - let first = format!(" {}", paragraphs[0].replace("\n", "\n ")); - if paragraphs.len() == 1 { - return Ok(first); + let args = &["rev-parse", "--short=10", "HEAD"]; + let Ok(output) = Command::new("git").args(args).output() else { return }; + let rev = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if rev.is_empty() { + return; } - Ok(format!("{}\n+\n{}", first, paragraphs[1..].join("\n+\n"))) -} - -fn ioerr(msg: String) -> io::Error { - io::Error::new(io::ErrorKind::Other, msg) + println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={}", rev); } diff --git a/ci/build-deb b/ci/build-and-publish-deb similarity index 55% rename from ci/build-deb rename to ci/build-and-publish-deb index 18d72b3c26..b5f7263d19 100755 --- a/ci/build-deb +++ b/ci/build-and-publish-deb @@ -5,7 +5,7 @@ D="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" # This script builds a binary dpkg for Debian based distros. It does not # currently run in CI, and is instead run manually and the resulting dpkg is -# uploaded to GitHub via the web UI. +# uploaded to GitHub at the end of this script. # # Note that this requires 'cargo deb', which can be installed with # 'cargo install cargo-deb'. @@ -17,9 +17,15 @@ if ! command -V cargo-deb > /dev/null 2>&1; then exit 1 fi -if ! command -V asciidoctor > /dev/null 2>&1; then - echo "asciidoctor command missing" >&2 - exit 1 +version="$1" +if [ -z "$version" ]; then + echo "missing version" >&2 + echo "Usage: "$(basename "$0")" " >&2 + exit 1 +fi +if ! grep -q "version = \"$version\"" Cargo.toml; then + echo "version does not match Cargo.toml" >&2 + exit 1 fi # 'cargo deb' does not seem to provide a way to specify an asset that is @@ -30,13 +36,20 @@ fi cargo build DEPLOY_DIR=deployment/deb -OUT_DIR="$("$D"/cargo-out-dir target/debug/)" mkdir -p "$DEPLOY_DIR" -# Copy man page and shell completions. -cp "$OUT_DIR"/{rg.1,rg.bash,rg.fish} "$DEPLOY_DIR/" -cp complete/_rg "$DEPLOY_DIR/" +# Generate man page and shell completions. `cargo deb` knows how to find these +# files via the manifest configuration in `Cargo.toml`. +"target/debug/rg" --generate complete-bash > "$DEPLOY_DIR/rg.bash" +"target/debug/rg" --generate complete-fish > "$DEPLOY_DIR/rg.fish" +"target/debug/rg" --generate complete-zsh > "$DEPLOY_DIR/_rg" +"target/debug/rg" --generate man > "$DEPLOY_DIR/rg.1" # Since we're distributing the dpkg, we don't know whether the user will have # PCRE2 installed, so just do a static build. PCRE2_SYS_STATIC=1 cargo deb --target x86_64-unknown-linux-musl +target="target/x86_64-unknown-linux-musl/debian" +deb="$target/ripgrep_$version-1_amd64.deb" +debsum="$deb.sha256" +shasum -a 256 "$deb" > "$debsum" +gh release upload "$version" "$deb" "$debsum" diff --git a/ci/cargo-out-dir b/ci/cargo-out-dir deleted file mode 100755 index 2b08d616a9..0000000000 --- a/ci/cargo-out-dir +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -# Finds Cargo's `OUT_DIR` directory from the most recent build. -# -# This requires one parameter corresponding to the target directory -# to search for the build output. - -if [ $# != 1 ]; then - echo "Usage: $(basename "$0") " >&2 - exit 2 -fi - -# This works by finding the most recent stamp file, which is produced by -# every ripgrep build. -target_dir="$1" -find "$target_dir" -name ripgrep-stamp -print0 \ - | xargs -0 ls -t \ - | head -n1 \ - | xargs dirname diff --git a/ci/test-complete b/ci/test-complete index 3143cc4907..3793affcca 100755 --- a/ci/test-complete +++ b/ci/test-complete @@ -19,7 +19,7 @@ get_comp_args() { main() { local diff local rg="${0:a:h}/../${TARGET_DIR:-target}/release/rg" - local _rg="${0:a:h}/../complete/_rg" + local _rg="${0:a:h}/../crates/core/flags/complete/rg.zsh" local -a help_args comp_args [[ -e $rg ]] || rg=${rg/%\/release\/rg/\/debug\/rg} diff --git a/crates/cli/src/wtr.rs b/crates/cli/src/wtr.rs index f6c7306bb7..44eb54d591 100644 --- a/crates/cli/src/wtr.rs +++ b/crates/cli/src/wtr.rs @@ -3,6 +3,7 @@ use std::io::{self, IsTerminal}; use termcolor::{self, HyperlinkSpec}; /// A writer that supports coloring with either line or block buffering. +#[derive(Debug)] pub struct StandardStream(StandardStreamKind); /// Returns a possibly buffered writer to stdout for the given color choice. @@ -57,6 +58,7 @@ pub fn stdout_buffered_block( StandardStream(StandardStreamKind::BlockBuffered(out)) } +#[derive(Debug)] enum StandardStreamKind { LineBuffered(termcolor::StandardStream), BlockBuffered(termcolor::BufferedStandardStream), diff --git a/crates/core/app.rs b/crates/core/app.rs deleted file mode 100644 index 3e0247f44f..0000000000 --- a/crates/core/app.rs +++ /dev/null @@ -1,3251 +0,0 @@ -// This module defines the set of command line arguments that ripgrep supports, -// including some light validation. -// -// This module is purposely written in a bare-bones way, since it is included -// in ripgrep's build.rs file as a way to generate a man page and completion -// files for common shells. -// -// The only other place that ripgrep deals with clap is in src/args.rs, which -// is where we read clap's configuration from the end user's arguments and turn -// it into a ripgrep-specific configuration type that is not coupled with clap. - -use clap::{self, crate_authors, crate_version, App, AppSettings}; - -const ABOUT: &str = " -ripgrep (rg) recursively searches the current directory for a regex pattern. -By default, ripgrep will respect gitignore rules and automatically skip hidden -files/directories and binary files. - -Use -h for short descriptions and --help for more details. - -Project home page: https://github.com/BurntSushi/ripgrep -"; - -const USAGE: &str = " - rg [OPTIONS] PATTERN [PATH ...] - rg [OPTIONS] -e PATTERN ... [PATH ...] - rg [OPTIONS] -f PATTERNFILE ... [PATH ...] - rg [OPTIONS] --files [PATH ...] - rg [OPTIONS] --type-list - command | rg [OPTIONS] PATTERN - rg [OPTIONS] --help - rg [OPTIONS] --version"; - -const TEMPLATE: &str = "\ -{bin} {version} -{author} -{about} - -USAGE:{usage} - -ARGS: -{positionals} - -OPTIONS: -{unified}"; - -/// Build a clap application parameterized by usage strings. -pub fn app() -> App<'static, 'static> { - use std::sync::OnceLock; - - // We need to specify our version in a static because we've painted clap - // into a corner. We've told it that every string we give it will be - // 'static, but we need to build the version string dynamically. We can - // fake the 'static lifetime with lazy_static. - static LONG_VERSION: OnceLock = OnceLock::new(); - let long_version = LONG_VERSION.get_or_init(|| long_version(None, true)); - - let mut app = App::new("ripgrep") - .author(crate_authors!()) - .version(crate_version!()) - .long_version(long_version.as_str()) - .about(ABOUT) - .max_term_width(100) - .setting(AppSettings::UnifiedHelpMessage) - .setting(AppSettings::AllArgsOverrideSelf) - .usage(USAGE) - .template(TEMPLATE) - .help_message("Prints help information. Use --help for more details."); - for arg in all_args_and_flags() { - app = app.arg(arg.claparg); - } - app -} - -/// Return the "long" format of ripgrep's version string. -/// -/// If a revision hash is given, then it is used. If one isn't given, then -/// the RIPGREP_BUILD_GIT_HASH env var is inspected for it. If that isn't set, -/// then a revision hash is not included in the version string returned. -/// -/// If `cpu` is true, then the version string will include the compiled and -/// runtime CPU features. -pub fn long_version(revision_hash: Option<&str>, cpu: bool) -> String { - // Do we have a git hash? - // (Yes, if ripgrep was built on a machine with `git` installed.) - let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) { - None => String::new(), - Some(githash) => format!(" (rev {})", githash), - }; - if !cpu { - format!("{}{}", crate_version!(), hash,) - } else { - let runtime = runtime_cpu_features(); - if runtime.is_empty() { - format!( - "{}{}\n{} (compiled)", - crate_version!(), - hash, - compile_cpu_features().join(" ") - ) - } else { - format!( - "{}{}\n{} (compiled)\n{} (runtime)", - crate_version!(), - hash, - compile_cpu_features().join(" "), - runtime.join(" ") - ) - } - } -} - -/// Returns the relevant CPU features enabled at compile time. -fn compile_cpu_features() -> Vec<&'static str> { - let mut features = vec![]; - if cfg!(feature = "simd-accel") { - features.push("+SIMD"); - } else { - features.push("-SIMD"); - } - if cfg!(feature = "avx-accel") { - features.push("+AVX"); - } else { - features.push("-AVX"); - } - features -} - -/// Returns the relevant CPU features enabled at runtime. -#[cfg(target_arch = "x86_64")] -fn runtime_cpu_features() -> Vec<&'static str> { - // This is kind of a dirty violation of abstraction, since it assumes - // knowledge about what specific SIMD features are being used. - - let mut features = vec![]; - if is_x86_feature_detected!("ssse3") { - features.push("+SIMD"); - } else { - features.push("-SIMD"); - } - if is_x86_feature_detected!("avx2") { - features.push("+AVX"); - } else { - features.push("-AVX"); - } - features -} - -/// Returns the relevant CPU features enabled at runtime. -#[cfg(not(target_arch = "x86_64"))] -fn runtime_cpu_features() -> Vec<&'static str> { - vec![] -} - -/// Arg is a light alias for a clap::Arg that is specialized to compile time -/// string literals. -type Arg = clap::Arg<'static, 'static>; - -/// RGArg is a light wrapper around a clap::Arg and also contains some metadata -/// about the underlying Arg so that it can be inspected for other purposes -/// (e.g., hopefully generating a man page). -/// -/// Note that this type is purposely overly constrained to ripgrep's particular -/// use of clap. -#[allow(dead_code)] -#[derive(Clone)] -pub struct RGArg { - /// The underlying clap argument. - claparg: Arg, - /// The name of this argument. This is always present and is the name - /// used in the code to find the value of an argument at runtime. - pub name: &'static str, - /// A short documentation string describing this argument. This string - /// should fit on a single line and be a complete sentence. - /// - /// This is shown in the `-h` output. - pub doc_short: &'static str, - /// A longer documentation string describing this argument. This usually - /// starts with the contents of `doc_short`. This is also usually many - /// lines, potentially paragraphs, and may contain examples and additional - /// prose. - /// - /// This is shown in the `--help` output. - pub doc_long: &'static str, - /// Whether this flag is hidden or not. - /// - /// This is typically used for uncommon flags that only serve to override - /// other flags. For example, --no-ignore is a prominent flag that disables - /// ripgrep's gitignore functionality, but --ignore re-enables it. Since - /// gitignore support is enabled by default, use of the --ignore flag is - /// somewhat niche and relegated to special cases when users make use of - /// configuration files to set defaults. - /// - /// Generally, these flags should be documented in the documentation for - /// the flag they override. - pub hidden: bool, - /// The type of this argument. - pub kind: RGArgKind, -} - -/// The kind of a ripgrep argument. -/// -/// This can be one of three possibilities: a positional argument, a boolean -/// switch flag or a flag that accepts exactly one argument. Each variant -/// stores argument type specific data. -/// -/// Note that clap supports more types of arguments than this, but we don't -/// (and probably shouldn't) use them in ripgrep. -/// -/// Finally, note that we don't capture *all* state about an argument in this -/// type. Some state is only known to clap. There isn't any particular reason -/// why; the state we do capture is motivated by use cases (like generating -/// documentation). -#[derive(Clone)] -pub enum RGArgKind { - /// A positional argument. - Positional { - /// The name of the value used in the `-h/--help` output. By - /// convention, this is an all-uppercase string. e.g., `PATH` or - /// `PATTERN`. - value_name: &'static str, - /// Whether an argument can be repeated multiple times or not. - /// - /// The only argument this applies to is PATH, where an end user can - /// specify multiple paths for ripgrep to search. - /// - /// If this is disabled, then an argument can only be provided once. - /// For example, PATTERN is one such argument. (Note that the - /// -e/--regexp flag is distinct from the positional PATTERN argument, - /// and it can be provided multiple times.) - multiple: bool, - }, - /// A boolean switch. - Switch { - /// The long name of a flag. This is always non-empty. - long: &'static str, - /// The short name of a flag. This is empty if a flag only has a long - /// name. - short: Option<&'static str>, - /// Whether this switch can be provided multiple times where meaning - /// is attached to the number of times this flag is given. - /// - /// Note that every switch can be provided multiple times. This - /// particular state indicates whether all instances of a switch are - /// relevant or not. - /// - /// For example, the -u/--unrestricted flag can be provided multiple - /// times where each repeated use of it indicates more relaxing of - /// ripgrep's filtering. Conversely, the -i/--ignore-case flag can - /// also be provided multiple times, but it is simply considered either - /// present or not. In these cases, -u/--unrestricted has `multiple` - /// set to `true` while -i/--ignore-case has `multiple` set to `false`. - multiple: bool, - }, - /// A flag the accepts a single value. - Flag { - /// The long name of a flag. This is always non-empty. - long: &'static str, - /// The short name of a flag. This is empty if a flag only has a long - /// name. - short: Option<&'static str>, - /// The name of the value used in the `-h/--help` output. By - /// convention, this is an all-uppercase string. e.g., `PATH` or - /// `PATTERN`. - value_name: &'static str, - /// Whether this flag can be provided multiple times with multiple - /// distinct values. - /// - /// Note that every flag can be provided multiple times. This - /// particular state indicates whether all instances of a flag are - /// relevant or not. - /// - /// For example, the -g/--glob flag can be provided multiple times and - /// all of its values should be interpreted by ripgrep. Conversely, - /// while the -C/--context flag can also be provided multiple times, - /// only its last instance is used while all previous instances are - /// ignored. In these cases, -g/--glob has `multiple` set to `true` - /// while -C/--context has `multiple` set to `false`. - multiple: bool, - /// A set of possible values for this flag. If an end user provides - /// any value other than what's in this set, then clap will report an - /// error. - possible_values: Vec<&'static str>, - }, -} - -impl RGArg { - /// Create a positional argument. - /// - /// The `long_name` parameter is the name of the argument, e.g., `pattern`. - /// The `value_name` parameter is a name that describes the type of - /// argument this flag accepts. It should be in uppercase, e.g., PATH or - /// PATTERN. - fn positional(name: &'static str, value_name: &'static str) -> RGArg { - RGArg { - claparg: Arg::with_name(name).value_name(value_name), - name, - doc_short: "", - doc_long: "", - hidden: false, - kind: RGArgKind::Positional { value_name, multiple: false }, - } - } - - /// Create a boolean switch. - /// - /// The `long_name` parameter is the name of the flag, e.g., `--long-name`. - /// - /// All switches may be repeated an arbitrary number of times. If a switch - /// is truly boolean, that consumers of clap's configuration should only - /// check whether the flag is present or not. Otherwise, consumers may - /// inspect the number of times the switch is used. - fn switch(long_name: &'static str) -> RGArg { - let claparg = Arg::with_name(long_name).long(long_name); - RGArg { - claparg, - name: long_name, - doc_short: "", - doc_long: "", - hidden: false, - kind: RGArgKind::Switch { - long: long_name, - short: None, - multiple: false, - }, - } - } - - /// Create a flag. A flag always accepts exactly one argument. - /// - /// The `long_name` parameter is the name of the flag, e.g., `--long-name`. - /// The `value_name` parameter is a name that describes the type of - /// argument this flag accepts. It should be in uppercase, e.g., PATH or - /// PATTERN. - /// - /// All flags may be repeated an arbitrary number of times. If a flag has - /// only one logical value, that consumers of clap's configuration should - /// only use the last value. - fn flag(long_name: &'static str, value_name: &'static str) -> RGArg { - let claparg = Arg::with_name(long_name) - .long(long_name) - .value_name(value_name) - .takes_value(true) - .number_of_values(1); - RGArg { - claparg, - name: long_name, - doc_short: "", - doc_long: "", - hidden: false, - kind: RGArgKind::Flag { - long: long_name, - short: None, - value_name, - multiple: false, - possible_values: vec![], - }, - } - } - - /// Set the short flag name. - /// - /// This panics if this arg isn't a switch or a flag. - fn short(mut self, name: &'static str) -> RGArg { - match self.kind { - RGArgKind::Positional { .. } => panic!("expected switch or flag"), - RGArgKind::Switch { ref mut short, .. } => { - *short = Some(name); - } - RGArgKind::Flag { ref mut short, .. } => { - *short = Some(name); - } - } - self.claparg = self.claparg.short(name); - self - } - - /// Set the "short" help text. - /// - /// This should be a single line. It is shown in the `-h` output. - fn help(mut self, text: &'static str) -> RGArg { - self.doc_short = text; - self.claparg = self.claparg.help(text); - self - } - - /// Set the "long" help text. - /// - /// This should be at least a single line, usually longer. It is shown in - /// the `--help` output. - fn long_help(mut self, text: &'static str) -> RGArg { - self.doc_long = text; - self.claparg = self.claparg.long_help(text); - self - } - - /// Enable this argument to accept multiple values. - /// - /// Note that while switches and flags can always be repeated an arbitrary - /// number of times, this particular method enables the flag to be - /// logically repeated where each occurrence of the flag may have - /// significance. That is, when this is disabled, then a switch is either - /// present or not and a flag has exactly one value (the last one given). - /// When this is enabled, then a switch has a count corresponding to the - /// number of times it is used and a flag's value is a list of all values - /// given. - /// - /// For the most part, this distinction is resolved by consumers of clap's - /// configuration. - fn multiple(mut self) -> RGArg { - // Why not put `multiple` on RGArg proper? Because it's useful to - // document it distinct for each different kind. See RGArgKind docs. - match self.kind { - RGArgKind::Positional { ref mut multiple, .. } => { - *multiple = true; - } - RGArgKind::Switch { ref mut multiple, .. } => { - *multiple = true; - } - RGArgKind::Flag { ref mut multiple, .. } => { - *multiple = true; - } - } - self.claparg = self.claparg.multiple(true); - self - } - - /// Hide this flag from all documentation. - fn hidden(mut self) -> RGArg { - self.hidden = true; - self.claparg = self.claparg.hidden(true); - self - } - - /// Set the possible values for this argument. If this argument is not - /// a flag, then this panics. - /// - /// If the end user provides any value other than what is given here, then - /// clap will report an error to the user. - /// - /// Note that this will suppress clap's automatic output of possible values - /// when using -h/--help, so users of this method should provide - /// appropriate documentation for the choices in the "long" help text. - fn possible_values(mut self, values: &[&'static str]) -> RGArg { - match self.kind { - RGArgKind::Positional { .. } => panic!("expected flag"), - RGArgKind::Switch { .. } => panic!("expected flag"), - RGArgKind::Flag { ref mut possible_values, .. } => { - *possible_values = values.to_vec(); - self.claparg = self - .claparg - .possible_values(values) - .hide_possible_values(true); - } - } - self - } - - /// Add an alias to this argument. - /// - /// Aliases are not show in the output of -h/--help. - fn alias(mut self, name: &'static str) -> RGArg { - self.claparg = self.claparg.alias(name); - self - } - - /// Permit this flag to have values that begin with a hyphen. - /// - /// This panics if this arg is not a flag. - fn allow_leading_hyphen(mut self) -> RGArg { - match self.kind { - RGArgKind::Positional { .. } => panic!("expected flag"), - RGArgKind::Switch { .. } => panic!("expected flag"), - RGArgKind::Flag { .. } => { - self.claparg = self.claparg.allow_hyphen_values(true); - } - } - self - } - - /// Sets this argument to a required argument, unless one of the given - /// arguments is provided. - fn required_unless(mut self, names: &[&'static str]) -> RGArg { - self.claparg = self.claparg.required_unless_one(names); - self - } - - /// Sets conflicting arguments. That is, if this argument is used whenever - /// any of the other arguments given here are used, then clap will report - /// an error. - fn conflicts(mut self, names: &[&'static str]) -> RGArg { - self.claparg = self.claparg.conflicts_with_all(names); - self - } - - /// Sets an overriding argument. That is, if this argument and the given - /// argument are both provided by an end user, then the "last" one will - /// win. ripgrep will behave as if any previous instantiations did not - /// happen. - fn overrides(mut self, name: &'static str) -> RGArg { - self.claparg = self.claparg.overrides_with(name); - self - } - - /// Sets the default value of this argument when not specified at - /// runtime. - fn default_value(mut self, value: &'static str) -> RGArg { - self.claparg = self.claparg.default_value(value); - self - } - - /// Sets the default value of this argument if and only if the argument - /// given is present. - fn default_value_if( - mut self, - value: &'static str, - arg_name: &'static str, - ) -> RGArg { - self.claparg = self.claparg.default_value_if(arg_name, None, value); - self - } - - /// Indicate that any value given to this argument should be a number. If - /// it's not a number, then clap will report an error to the end user. - fn number(mut self) -> RGArg { - self.claparg = self.claparg.validator(|val| { - val.parse::().map(|_| ()).map_err(|err| err.to_string()) - }); - self - } -} - -// We add an extra space to long descriptions so that a blank line is inserted -// between flag descriptions in --help output. -macro_rules! long { - ($lit:expr) => { - concat!($lit, " ") - }; -} - -/// Generate a sequence of all positional and flag arguments. -pub fn all_args_and_flags() -> Vec { - let mut args = vec![]; - // The positional arguments must be defined first and in order. - arg_pattern(&mut args); - arg_path(&mut args); - // Flags can be defined in any order, but we do it alphabetically. Note - // that each function may define multiple flags. For example, - // `flag_encoding` defines `--encoding` and `--no-encoding`. Most `--no` - // flags are hidden and merely mentioned in the docs of the corresponding - // "positive" flag. - flag_after_context(&mut args); - flag_auto_hybrid_regex(&mut args); - flag_before_context(&mut args); - flag_binary(&mut args); - flag_block_buffered(&mut args); - flag_byte_offset(&mut args); - flag_case_sensitive(&mut args); - flag_color(&mut args); - flag_colors(&mut args); - flag_column(&mut args); - flag_context(&mut args); - flag_context_separator(&mut args); - flag_count(&mut args); - flag_count_matches(&mut args); - flag_crlf(&mut args); - flag_debug(&mut args); - flag_dfa_size_limit(&mut args); - flag_encoding(&mut args); - flag_engine(&mut args); - flag_field_context_separator(&mut args); - flag_field_match_separator(&mut args); - flag_file(&mut args); - flag_files(&mut args); - flag_files_with_matches(&mut args); - flag_files_without_match(&mut args); - flag_fixed_strings(&mut args); - flag_follow(&mut args); - flag_glob(&mut args); - flag_glob_case_insensitive(&mut args); - flag_heading(&mut args); - flag_hidden(&mut args); - flag_hostname_bin(&mut args); - flag_hyperlink_format(&mut args); - flag_iglob(&mut args); - flag_ignore_case(&mut args); - flag_ignore_file(&mut args); - flag_ignore_file_case_insensitive(&mut args); - flag_include_zero(&mut args); - flag_invert_match(&mut args); - flag_json(&mut args); - flag_line_buffered(&mut args); - flag_line_number(&mut args); - flag_line_regexp(&mut args); - flag_max_columns(&mut args); - flag_max_columns_preview(&mut args); - flag_max_count(&mut args); - flag_max_depth(&mut args); - flag_max_filesize(&mut args); - flag_mmap(&mut args); - flag_multiline(&mut args); - flag_multiline_dotall(&mut args); - flag_no_config(&mut args); - flag_no_ignore(&mut args); - flag_no_ignore_dot(&mut args); - flag_no_ignore_exclude(&mut args); - flag_no_ignore_files(&mut args); - flag_no_ignore_global(&mut args); - flag_no_ignore_messages(&mut args); - flag_no_ignore_parent(&mut args); - flag_no_ignore_vcs(&mut args); - flag_no_messages(&mut args); - flag_no_pcre2_unicode(&mut args); - flag_no_require_git(&mut args); - flag_no_unicode(&mut args); - flag_null(&mut args); - flag_null_data(&mut args); - flag_one_file_system(&mut args); - flag_only_matching(&mut args); - flag_path_separator(&mut args); - flag_passthru(&mut args); - flag_pcre2(&mut args); - flag_pcre2_version(&mut args); - flag_pre(&mut args); - flag_pre_glob(&mut args); - flag_pretty(&mut args); - flag_quiet(&mut args); - flag_regex_size_limit(&mut args); - flag_regexp(&mut args); - flag_replace(&mut args); - flag_search_zip(&mut args); - flag_smart_case(&mut args); - flag_sort_files(&mut args); - flag_sort(&mut args); - flag_sortr(&mut args); - flag_stats(&mut args); - flag_stop_on_nonmatch(&mut args); - flag_text(&mut args); - flag_threads(&mut args); - flag_trim(&mut args); - flag_type(&mut args); - flag_type_add(&mut args); - flag_type_clear(&mut args); - flag_type_list(&mut args); - flag_type_not(&mut args); - flag_unrestricted(&mut args); - flag_vimgrep(&mut args); - flag_with_filename(&mut args); - flag_word_regexp(&mut args); - args -} - -fn arg_pattern(args: &mut Vec) { - const SHORT: &str = "A regular expression used for searching."; - const LONG: &str = long!( - "\ -A regular expression used for searching. To match a pattern beginning with a -dash, use the -e/--regexp flag. - -For example, to search for the literal '-foo', you can use this flag: - - rg -e -foo - -You can also use the special '--' delimiter to indicate that no more flags -will be provided. Namely, the following is equivalent to the above: - - rg -- -foo -" - ); - let arg = RGArg::positional("pattern", "PATTERN") - .help(SHORT) - .long_help(LONG) - .required_unless(&[ - "file", - "files", - "regexp", - "type-list", - "pcre2-version", - ]); - args.push(arg); -} - -fn arg_path(args: &mut Vec) { - const SHORT: &str = "A file or directory to search."; - const LONG: &str = long!( - "\ -A file or directory to search. Directories are searched recursively. File \ -paths specified on the command line override glob and ignore rules. \ -" - ); - let arg = RGArg::positional("path", "PATH") - .help(SHORT) - .long_help(LONG) - .multiple(); - args.push(arg); -} - -fn flag_after_context(args: &mut Vec) { - const SHORT: &str = "Show NUM lines after each match."; - const LONG: &str = long!( - "\ -Show NUM lines after each match. - -This overrides the --passthru flag and partially overrides --context. -" - ); - let arg = RGArg::flag("after-context", "NUM") - .short("A") - .help(SHORT) - .long_help(LONG) - .number() - .overrides("passthru"); - args.push(arg); -} - -fn flag_auto_hybrid_regex(args: &mut Vec) { - const SHORT: &str = "Dynamically use PCRE2 if necessary."; - const LONG: &str = long!( - "\ -DEPRECATED. Use --engine instead. - -When this flag is used, ripgrep will dynamically choose between supported regex -engines depending on the features used in a pattern. When ripgrep chooses a -regex engine, it applies that choice for every regex provided to ripgrep (e.g., -via multiple -e/--regexp or -f/--file flags). - -As an example of how this flag might behave, ripgrep will attempt to use -its default finite automata based regex engine whenever the pattern can be -successfully compiled with that regex engine. If PCRE2 is enabled and if the -pattern given could not be compiled with the default regex engine, then PCRE2 -will be automatically used for searching. If PCRE2 isn't available, then this -flag has no effect because there is only one regex engine to choose from. - -In the future, ripgrep may adjust its heuristics for how it decides which -regex engine to use. In general, the heuristics will be limited to a static -analysis of the patterns, and not to any specific runtime behavior observed -while searching files. - -The primary downside of using this flag is that it may not always be obvious -which regex engine ripgrep uses, and thus, the match semantics or performance -profile of ripgrep may subtly and unexpectedly change. However, in many cases, -all regex engines will agree on what constitutes a match and it can be nice -to transparently support more advanced regex features like look-around and -backreferences without explicitly needing to enable them. - -This flag can be disabled with --no-auto-hybrid-regex. -" - ); - let arg = RGArg::switch("auto-hybrid-regex") - .help(SHORT) - .long_help(LONG) - .overrides("no-auto-hybrid-regex") - .overrides("pcre2") - .overrides("no-pcre2") - .overrides("engine"); - args.push(arg); - - let arg = RGArg::switch("no-auto-hybrid-regex") - .hidden() - .overrides("auto-hybrid-regex") - .overrides("pcre2") - .overrides("no-pcre2") - .overrides("engine"); - args.push(arg); -} - -fn flag_before_context(args: &mut Vec) { - const SHORT: &str = "Show NUM lines before each match."; - const LONG: &str = long!( - "\ -Show NUM lines before each match. - -This overrides the --passthru flag and partially overrides --context. -" - ); - let arg = RGArg::flag("before-context", "NUM") - .short("B") - .help(SHORT) - .long_help(LONG) - .number() - .overrides("passthru"); - args.push(arg); -} - -fn flag_binary(args: &mut Vec) { - const SHORT: &str = "Search binary files."; - const LONG: &str = long!( - "\ -Enabling this flag will cause ripgrep to search binary files. By default, -ripgrep attempts to automatically skip binary files in order to improve the -relevance of results and make the search faster. - -Binary files are heuristically detected based on whether they contain a NUL -byte or not. By default (without this flag set), once a NUL byte is seen, -ripgrep will stop searching the file. Usually, NUL bytes occur in the beginning -of most binary files. If a NUL byte occurs after a match, then ripgrep will -still stop searching the rest of the file, but a warning will be printed. - -In contrast, when this flag is provided, ripgrep will continue searching a file -even if a NUL byte is found. In particular, if a NUL byte is found then ripgrep -will continue searching until either a match is found or the end of the file is -reached, whichever comes sooner. If a match is found, then ripgrep will stop -and print a warning saying that the search stopped prematurely. - -If you want ripgrep to search a file without any special NUL byte handling at -all (and potentially print binary data to stdout), then you should use the -'-a/--text' flag. - -The '--binary' flag is a flag for controlling ripgrep's automatic filtering -mechanism. As such, it does not need to be used when searching a file -explicitly or when searching stdin. That is, it is only applicable when -recursively searching a directory. - -Note that when the '-u/--unrestricted' flag is provided for a third time, then -this flag is automatically enabled. - -This flag can be disabled with '--no-binary'. It overrides the '-a/--text' -flag. -" - ); - let arg = RGArg::switch("binary") - .help(SHORT) - .long_help(LONG) - .overrides("no-binary") - .overrides("text") - .overrides("no-text"); - args.push(arg); - - let arg = RGArg::switch("no-binary") - .hidden() - .overrides("binary") - .overrides("text") - .overrides("no-text"); - args.push(arg); -} - -fn flag_block_buffered(args: &mut Vec) { - const SHORT: &str = "Force block buffering."; - const LONG: &str = long!( - "\ -When enabled, ripgrep will use block buffering. That is, whenever a matching -line is found, it will be written to an in-memory buffer and will not be -written to stdout until the buffer reaches a certain size. This is the default -when ripgrep's stdout is redirected to a pipeline or a file. When ripgrep's -stdout is connected to a terminal, line buffering will be used. Forcing block -buffering can be useful when dumping a large amount of contents to a terminal. - -Forceful block buffering can be disabled with --no-block-buffered. Note that -using --no-block-buffered causes ripgrep to revert to its default behavior of -automatically detecting the buffering strategy. To force line buffering, use -the --line-buffered flag. -" - ); - let arg = RGArg::switch("block-buffered") - .help(SHORT) - .long_help(LONG) - .overrides("no-block-buffered") - .overrides("line-buffered") - .overrides("no-line-buffered"); - args.push(arg); - - let arg = RGArg::switch("no-block-buffered") - .hidden() - .overrides("block-buffered") - .overrides("line-buffered") - .overrides("no-line-buffered"); - args.push(arg); -} - -fn flag_byte_offset(args: &mut Vec) { - const SHORT: &str = - "Print the 0-based byte offset for each matching line."; - const LONG: &str = long!( - "\ -Print the 0-based byte offset within the input file before each line of output. -If -o (--only-matching) is specified, print the offset of the matching part -itself. - -If ripgrep does transcoding, then the byte offset is in terms of the result of -transcoding and not the original data. This applies similarly to another -transformation on the source, such as decompression or a --pre filter. Note -that when the PCRE2 regex engine is used, then UTF-8 transcoding is done by -default. -" - ); - let arg = - RGArg::switch("byte-offset").short("b").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_case_sensitive(args: &mut Vec) { - const SHORT: &str = "Search case sensitively (default)."; - const LONG: &str = long!( - "\ -Search case sensitively. - -This overrides the -i/--ignore-case and -S/--smart-case flags. -" - ); - let arg = RGArg::switch("case-sensitive") - .short("s") - .help(SHORT) - .long_help(LONG) - .overrides("ignore-case") - .overrides("smart-case"); - args.push(arg); -} - -fn flag_color(args: &mut Vec) { - const SHORT: &str = "Controls when to use color."; - const LONG: &str = long!( - "\ -This flag controls when to use colors. The default setting is 'auto', which -means ripgrep will try to guess when to use colors. For example, if ripgrep is -printing to a terminal, then it will use colors, but if it is redirected to a -file or a pipe, then it will suppress color output. ripgrep will suppress color -output in some other circumstances as well. For example, if the TERM -environment variable is not set or set to 'dumb', then ripgrep will not use -colors. - -The possible values for this flag are: - - never Colors will never be used. - auto The default. ripgrep tries to be smart. - always Colors will always be used regardless of where output is sent. - ansi Like 'always', but emits ANSI escapes (even in a Windows console). - -When the --vimgrep flag is given to ripgrep, then the default value for the ---color flag changes to 'never'. -" - ); - let arg = RGArg::flag("color", "WHEN") - .help(SHORT) - .long_help(LONG) - .possible_values(&["never", "auto", "always", "ansi"]) - .default_value_if("never", "vimgrep"); - args.push(arg); -} - -fn flag_colors(args: &mut Vec) { - const SHORT: &str = "Configure color settings and styles."; - const LONG: &str = long!( - "\ -This flag specifies color settings for use in the output. This flag may be -provided multiple times. Settings are applied iteratively. Colors are limited -to one of eight choices: red, blue, green, cyan, magenta, yellow, white and -black. Styles are limited to nobold, bold, nointense, intense, nounderline -or underline. - -The format of the flag is '{type}:{attribute}:{value}'. '{type}' should be -one of path, line, column or match. '{attribute}' can be fg, bg or style. -'{value}' is either a color (for fg and bg) or a text style. A special format, -'{type}:none', will clear all color settings for '{type}'. - -For example, the following command will change the match color to magenta and -the background color for line numbers to yellow: - - rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo. - -Extended colors can be used for '{value}' when the terminal supports ANSI color -sequences. These are specified as either 'x' (256-color) or 'x,x,x' (24-bit -truecolor) where x is a number between 0 and 255 inclusive. x may be given as -a normal decimal number or a hexadecimal number, which is prefixed by `0x`. - -For example, the following command will change the match background color to -that represented by the rgb value (0,128,255): - - rg --colors 'match:bg:0,128,255' - -or, equivalently, - - rg --colors 'match:bg:0x0,0x80,0xFF' - -Note that the intense and nointense style flags will have no effect when -used alongside these extended color codes. -" - ); - let arg = RGArg::flag("colors", "COLOR_SPEC") - .help(SHORT) - .long_help(LONG) - .multiple(); - args.push(arg); -} - -fn flag_column(args: &mut Vec) { - const SHORT: &str = "Show column numbers."; - const LONG: &str = long!( - "\ -Show column numbers (1-based). This only shows the column numbers for the first -match on each line. This does not try to account for Unicode. One byte is equal -to one column. This implies --line-number. - -This flag can be disabled with --no-column. -" - ); - let arg = RGArg::switch("column") - .help(SHORT) - .long_help(LONG) - .overrides("no-column"); - args.push(arg); - - let arg = RGArg::switch("no-column").hidden().overrides("column"); - args.push(arg); -} - -fn flag_context(args: &mut Vec) { - const SHORT: &str = "Show NUM lines before and after each match."; - const LONG: &str = long!( - "\ -Show NUM lines before and after each match. This is equivalent to providing -both the -B/--before-context and -A/--after-context flags with the same value. - -This overrides the --passthru flag. -" - ); - let arg = RGArg::flag("context", "NUM") - .short("C") - .help(SHORT) - .long_help(LONG) - .number() - .overrides("passthru"); - args.push(arg); -} - -fn flag_context_separator(args: &mut Vec) { - const SHORT: &str = "Set the context separator string."; - const LONG: &str = long!( - "\ -The string used to separate non-contiguous context lines in the output. This -is only used when one of the context flags is used (-A, -B or -C). Escape -sequences like \\x7F or \\t may be used. The default value is --. - -When the context separator is set to an empty string, then a line break -is still inserted. To completely disable context separators, use the ---no-context-separator flag. -" - ); - - let arg = RGArg::flag("context-separator", "SEPARATOR") - .help(SHORT) - .long_help(LONG) - .overrides("no-context-separator"); - args.push(arg); - - let arg = RGArg::switch("no-context-separator") - .hidden() - .overrides("context-separator"); - args.push(arg); -} - -fn flag_count(args: &mut Vec) { - const SHORT: &str = "Only show the count of matching lines for each file."; - const LONG: &str = long!( - "\ -This flag suppresses normal output and shows the number of lines that match -the given patterns for each file searched. Each file containing a match has its -path and count printed on each line. Note that this reports the number of lines -that match and not the total number of matches, unless -U/--multiline is -enabled. In multiline mode, --count is equivalent to --count-matches. - -If only one file is given to ripgrep, then only the count is printed if there -is a match. The --with-filename flag can be used to force printing the file -path in this case. If you need a count to be printed regardless of whether -there is a match, then use --include-zero. - -This overrides the --count-matches flag. Note that when --count is combined -with --only-matching, then ripgrep behaves as if --count-matches was given. -" - ); - let arg = RGArg::switch("count") - .short("c") - .help(SHORT) - .long_help(LONG) - .overrides("count-matches"); - args.push(arg); -} - -fn flag_count_matches(args: &mut Vec) { - const SHORT: &str = - "Only show the count of individual matches for each file."; - const LONG: &str = long!( - "\ -This flag suppresses normal output and shows the number of individual -matches of the given patterns for each file searched. Each file -containing matches has its path and match count printed on each line. -Note that this reports the total number of individual matches and not -the number of lines that match. - -If only one file is given to ripgrep, then only the count is printed if there -is a match. The --with-filename flag can be used to force printing the file -path in this case. - -This overrides the --count flag. Note that when --count is combined with ---only-matching, then ripgrep behaves as if --count-matches was given. -" - ); - let arg = RGArg::switch("count-matches") - .help(SHORT) - .long_help(LONG) - .overrides("count"); - args.push(arg); -} - -fn flag_crlf(args: &mut Vec) { - const SHORT: &str = "Support CRLF line terminators (useful on Windows)."; - const LONG: &str = long!( - "\ -When enabled, ripgrep will treat CRLF ('\\r\\n') as a line terminator instead -of just '\\n'. - -Principally, this permits '$' in regex patterns to match just before CRLF -instead of just before LF. The underlying regex engine may not support this -natively, so ripgrep will translate all instances of '$' to '(?:\\r??$)'. This -may produce slightly different than desired match offsets. It is intended as a -work-around until the regex engine supports this natively. - -CRLF support can be disabled with --no-crlf. -" - ); - let arg = RGArg::switch("crlf") - .help(SHORT) - .long_help(LONG) - .overrides("no-crlf") - .overrides("null-data"); - args.push(arg); - - let arg = RGArg::switch("no-crlf").hidden().overrides("crlf"); - args.push(arg); -} - -fn flag_debug(args: &mut Vec) { - const SHORT: &str = "Show debug messages."; - const LONG: &str = long!( - "\ -Show debug messages. Please use this when filing a bug report. - -The --debug flag is generally useful for figuring out why ripgrep skipped -searching a particular file. The debug messages should mention all files -skipped and why they were skipped. - -To get even more debug output, use the --trace flag, which implies --debug -along with additional trace data. With --trace, the output could be quite -large and is generally more useful for development. -" - ); - let arg = RGArg::switch("debug").help(SHORT).long_help(LONG); - args.push(arg); - - let arg = RGArg::switch("trace").hidden().overrides("debug"); - args.push(arg); -} - -fn flag_dfa_size_limit(args: &mut Vec) { - const SHORT: &str = "The upper size limit of the regex DFA."; - const LONG: &str = long!( - "\ -The upper size limit of the regex DFA. The default limit is 10M. This should -only be changed on very large regex inputs where the (slower) fallback regex -engine may otherwise be used if the limit is reached. - -The argument accepts the same size suffixes as allowed in with the ---max-filesize flag. -" - ); - let arg = RGArg::flag("dfa-size-limit", "NUM+SUFFIX?") - .help(SHORT) - .long_help(LONG); - args.push(arg); -} - -fn flag_encoding(args: &mut Vec) { - const SHORT: &str = "Specify the text encoding of files to search."; - const LONG: &str = long!( - "\ -Specify the text encoding that ripgrep will use on all files searched. The -default value is 'auto', which will cause ripgrep to do a best effort automatic -detection of encoding on a per-file basis. Automatic detection in this case -only applies to files that begin with a UTF-8 or UTF-16 byte-order mark (BOM). -No other automatic detection is performed. One can also specify 'none' which -will then completely disable BOM sniffing and always result in searching the -raw bytes, including a BOM if it's present, regardless of its encoding. - -Other supported values can be found in the list of labels here: -https://encoding.spec.whatwg.org/#concept-encoding-get - -For more details on encoding and how ripgrep deals with it, see GUIDE.md. - -This flag can be disabled with --no-encoding. -" - ); - let arg = RGArg::flag("encoding", "ENCODING") - .short("E") - .help(SHORT) - .long_help(LONG); - args.push(arg); - - let arg = RGArg::switch("no-encoding").hidden().overrides("encoding"); - args.push(arg); -} - -fn flag_engine(args: &mut Vec) { - const SHORT: &str = "Specify which regexp engine to use."; - const LONG: &str = long!( - "\ -Specify which regular expression engine to use. When you choose a regex engine, -it applies that choice for every regex provided to ripgrep (e.g., via multiple --e/--regexp or -f/--file flags). - -Accepted values are 'default', 'pcre2', or 'auto'. - -The default value is 'default', which is the fastest and should be good for -most use cases. The 'pcre2' engine is generally useful when you want to use -features such as look-around or backreferences. 'auto' will dynamically choose -between supported regex engines depending on the features used in a pattern on -a best effort basis. - -Note that the 'pcre2' engine is an optional ripgrep feature. If PCRE2 wasn't -included in your build of ripgrep, then using this flag will result in ripgrep -printing an error message and exiting. - -This overrides previous uses of --pcre2 and --auto-hybrid-regex flags. -" - ); - let arg = RGArg::flag("engine", "ENGINE") - .help(SHORT) - .long_help(LONG) - .possible_values(&["default", "pcre2", "auto"]) - .default_value("default") - .overrides("pcre2") - .overrides("no-pcre2") - .overrides("auto-hybrid-regex") - .overrides("no-auto-hybrid-regex"); - args.push(arg); -} - -fn flag_field_context_separator(args: &mut Vec) { - const SHORT: &str = "Set the field context separator."; - const LONG: &str = long!( - "\ -Set the field context separator, which is used to delimit file paths, line -numbers, columns and the context itself, when printing contextual lines. The -separator may be any number of bytes, including zero. Escape sequences like -\\x7F or \\t may be used. The '-' character is the default value. -" - ); - let arg = RGArg::flag("field-context-separator", "SEPARATOR") - .help(SHORT) - .long_help(LONG); - args.push(arg); -} - -fn flag_field_match_separator(args: &mut Vec) { - const SHORT: &str = "Set the match separator."; - const LONG: &str = long!( - "\ -Set the field match separator, which is used to delimit file paths, line -numbers, columns and the match itself. The separator may be any number of -bytes, including zero. Escape sequences like \\x7F or \\t may be used. The ':' -character is the default value. -" - ); - let arg = RGArg::flag("field-match-separator", "SEPARATOR") - .help(SHORT) - .long_help(LONG); - args.push(arg); -} - -fn flag_file(args: &mut Vec) { - const SHORT: &str = "Search for patterns from the given file."; - const LONG: &str = long!( - "\ -Search for patterns from the given file, with one pattern per line. When this -flag is used multiple times or in combination with the -e/--regexp flag, -then all patterns provided are searched. Empty pattern lines will match all -input lines, and the newline is not counted as part of the pattern. - -A line is printed if and only if it matches at least one of the patterns. -" - ); - let arg = RGArg::flag("file", "PATTERNFILE") - .short("f") - .help(SHORT) - .long_help(LONG) - .multiple() - .allow_leading_hyphen(); - args.push(arg); -} - -fn flag_files(args: &mut Vec) { - const SHORT: &str = "Print each file that would be searched."; - const LONG: &str = long!( - "\ -Print each file that would be searched without actually performing the search. -This is useful to determine whether a particular file is being searched or not. -" - ); - let arg = RGArg::switch("files") - .help(SHORT) - .long_help(LONG) - // This also technically conflicts with pattern, but the first file - // path will actually be in pattern. - .conflicts(&["file", "regexp", "type-list"]); - args.push(arg); -} - -fn flag_files_with_matches(args: &mut Vec) { - const SHORT: &str = "Print the paths with at least one match."; - const LONG: &str = long!( - "\ -Print the paths with at least one match and suppress match contents. - -This overrides --files-without-match. -" - ); - let arg = RGArg::switch("files-with-matches") - .short("l") - .help(SHORT) - .long_help(LONG) - .overrides("files-without-match"); - args.push(arg); -} - -fn flag_files_without_match(args: &mut Vec) { - const SHORT: &str = "Print the paths that contain zero matches."; - const LONG: &str = long!( - "\ -Print the paths that contain zero matches and suppress match contents. This -inverts/negates the --files-with-matches flag. - -This overrides --files-with-matches. -" - ); - let arg = RGArg::switch("files-without-match") - .help(SHORT) - .long_help(LONG) - .overrides("files-with-matches"); - args.push(arg); -} - -fn flag_fixed_strings(args: &mut Vec) { - const SHORT: &str = "Treat the pattern as a literal string."; - const LONG: &str = long!( - "\ -Treat the pattern as a literal string instead of a regular expression. When -this flag is used, special regular expression meta characters such as .(){}*+ -do not need to be escaped. - -This flag can be disabled with --no-fixed-strings. -" - ); - let arg = RGArg::switch("fixed-strings") - .short("F") - .help(SHORT) - .long_help(LONG) - .overrides("no-fixed-strings"); - args.push(arg); - - let arg = - RGArg::switch("no-fixed-strings").hidden().overrides("fixed-strings"); - args.push(arg); -} - -fn flag_follow(args: &mut Vec) { - const SHORT: &str = "Follow symbolic links."; - const LONG: &str = long!( - "\ -When this flag is enabled, ripgrep will follow symbolic links while traversing -directories. This is disabled by default. Note that ripgrep will check for -symbolic link loops and report errors if it finds one. - -This flag can be disabled with --no-follow. -" - ); - let arg = RGArg::switch("follow") - .short("L") - .help(SHORT) - .long_help(LONG) - .overrides("no-follow"); - args.push(arg); - - let arg = RGArg::switch("no-follow").hidden().overrides("follow"); - args.push(arg); -} - -fn flag_glob(args: &mut Vec) { - const SHORT: &str = "Include or exclude files."; - const LONG: &str = long!( - "\ -Include or exclude files and directories for searching that match the given -glob. This always overrides any other ignore logic. Multiple glob flags may be -used. Globbing rules match .gitignore globs. Precede a glob with a ! to exclude -it. If multiple globs match a file or directory, the glob given later in the -command line takes precedence. - -As an extension, globs support specifying alternatives: *-g ab{c,d}* is -equivalent to *-g abc -g abd*. Empty alternatives like *-g ab{,c}* are not -currently supported. Note that this syntax extension is also currently enabled -in gitignore files, even though this syntax isn't supported by git itself. -ripgrep may disable this syntax extension in gitignore files, but it will -always remain available via the -g/--glob flag. - -When this flag is set, every file and directory is applied to it to test for -a match. So for example, if you only want to search in a particular directory -'foo', then *-g foo* is incorrect because 'foo/bar' does not match the glob -'foo'. Instead, you should use *-g 'foo/**'*. -" - ); - let arg = RGArg::flag("glob", "GLOB") - .short("g") - .help(SHORT) - .long_help(LONG) - .multiple() - .allow_leading_hyphen(); - args.push(arg); -} - -fn flag_glob_case_insensitive(args: &mut Vec) { - const SHORT: &str = "Process all glob patterns case insensitively."; - const LONG: &str = long!( - "\ -Process glob patterns given with the -g/--glob flag case insensitively. This -effectively treats --glob as --iglob. - -This flag can be disabled with the --no-glob-case-insensitive flag. -" - ); - let arg = RGArg::switch("glob-case-insensitive") - .help(SHORT) - .long_help(LONG) - .overrides("no-glob-case-insensitive"); - args.push(arg); - - let arg = RGArg::switch("no-glob-case-insensitive") - .hidden() - .overrides("glob-case-insensitive"); - args.push(arg); -} - -fn flag_heading(args: &mut Vec) { - const SHORT: &str = "Print matches grouped by each file."; - const LONG: &str = long!( - "\ -This flag prints the file path above clusters of matches from each file instead -of printing the file path as a prefix for each matched line. This is the -default mode when printing to a terminal. - -This overrides the --no-heading flag. -" - ); - let arg = RGArg::switch("heading") - .help(SHORT) - .long_help(LONG) - .overrides("no-heading"); - args.push(arg); - - const NO_SHORT: &str = "Don't group matches by each file."; - const NO_LONG: &str = long!( - "\ -Don't group matches by each file. If --no-heading is provided in addition to -the -H/--with-filename flag, then file paths will be printed as a prefix for -every matched line. This is the default mode when not printing to a terminal. - -This overrides the --heading flag. -" - ); - let arg = RGArg::switch("no-heading") - .help(NO_SHORT) - .long_help(NO_LONG) - .overrides("heading"); - args.push(arg); -} - -fn flag_hidden(args: &mut Vec) { - const SHORT: &str = "Search hidden files and directories."; - const LONG: &str = long!( - "\ -Search hidden files and directories. By default, hidden files and directories -are skipped. Note that if a hidden file or a directory is whitelisted in an -ignore file, then it will be searched even if this flag isn't provided. - -A file or directory is considered hidden if its base name starts with a dot -character ('.'). On operating systems which support a `hidden` file attribute, -like Windows, files with this attribute are also considered hidden. - -This flag can be disabled with --no-hidden. -" - ); - let arg = RGArg::switch("hidden") - .short(".") - .help(SHORT) - .long_help(LONG) - .overrides("no-hidden"); - args.push(arg); - - let arg = RGArg::switch("no-hidden").hidden().overrides("hidden"); - args.push(arg); -} - -fn flag_hostname_bin(args: &mut Vec) { - const SHORT: &str = "Run a program to get this system's hostname."; - const LONG: &str = long!( - "\ -This flag controls how ripgrep determines this system's hostname. The flag's -value should correspond to an executable (either a path or something that can -be found via your system's *PATH* environment variable). When set, ripgrep will -run this executable, with no arguments, and treat its output (with leading and -trailing whitespace stripped) as your system's hostname. - -When not set (the default, or the empty string), ripgrep will try to -automatically detect your system's hostname. On Unix, this corresponds -to calling *gethostname*. On Windows, this corresponds to calling -*GetComputerNameExW* to fetch the system's \"physical DNS hostname.\" - -ripgrep uses your system's hostname for producing hyperlinks. -" - ); - let arg = - RGArg::flag("hostname-bin", "COMMAND").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_hyperlink_format(args: &mut Vec) { - const SHORT: &str = "Set the format of hyperlinks to match results."; - const LONG: &str = long!( - "\ -Set the format of hyperlinks to match results. Hyperlinks make certain elements -of ripgrep's output, such as file paths, clickable. This generally only works -in terminal emulators that support OSC-8 hyperlinks. For example, the format -file://{host}{path} will emit an RFC 8089 hyperlink. To see the format that -ripgrep is using, pass the --debug flag. - -Alternatively, a format string may correspond to one of the following aliases: -default, file, grep+, kitty, macvim, none, textmate, vscode, vscode-insiders, -vscodium. The alias will be replaced with a format string that is intended to -work for the corresponding application. - -The following variables are available in the format string: - -{path}: Required. This is replaced with a path to a matching file. The -path is guaranteed to be absolute and percent encoded such that it is valid to -put into a URI. Note that a path is guaranteed to start with a /. - -{host}: Optional. This is replaced with your system's hostname. On Unix, -this corresponds to calling 'gethostname'. On Windows, this corresponds to -calling 'GetComputerNameExW' to fetch the system's \"physical DNS hostname.\" -Alternatively, if --hostname-bin was provided, then the hostname returned from -the output of that program will be returned. If no hostname could be found, -then this variable is replaced with the empty string. - -{line}: Optional. If appropriate, this is replaced with the line number of -a match. If no line number is available (for example, if --no-line-number was -given), then it is automatically replaced with the value 1. - -{column}: Optional, but requires the presence of {line}. If appropriate, this -is replaced with the column number of a match. If no column number is available -(for example, if --no-column was given), then it is automatically replaced with -the value 1. - -{wslprefix}: Optional. This is a special value that is set to -wsl$/WSL_DISTRO_NAME, where WSL_DISTRO_NAME corresponds to the value of -the equivalent environment variable. If the system is not Unix or if the -WSL_DISTRO_NAME environment variable is not set, then this is replaced with the -empty string. - -A format string may be empty. An empty format string is equivalent to the -'none' alias. In this case, hyperlinks will be disabled. - -At present, ripgrep does not enable hyperlinks by default. Users must opt into -them. If you aren't sure what format to use, try 'default'. - -When ripgrep detects a tty on stdout then hyperlinks are automatically -disabled, regardless of the value of this flag. Users can pass '--color always' -to forcefully emit hyperlinks. - -Note that hyperlinks are only written when a path is also in the output -and colors are enabled. To write hyperlinks without colors, you'll need to -configure ripgrep to not colorize anything without actually disabling all ANSI -escape codes completely: - - --colors 'path:none' --colors 'line:none' --colors 'column:none' --colors 'match:none' - -ripgrep works this way because it treats the --color=(never|always|auto) flag -as a proxy for whether ANSI escape codes should be used at all. This means -that environment variables like NO_COLOR=1 and TERM=dumb not only disable -colors, but hyperlinks as well. Similarly, colors and hyperlinks are disabled -when ripgrep is not writing to a tty. (Unless one forces the issue by setting ---color=always.) - -If you're searching a file directly, for example: - - rg foo path/to/file - -then hyperlinks will not be emitted since the path given does not appear -in the output. To make the path appear, and thus also a hyperlink, use the --H/--with-filename flag. - -For more information on hyperlinks in terminal emulators, see: -https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda -" - ); - let arg = - RGArg::flag("hyperlink-format", "FORMAT").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_iglob(args: &mut Vec) { - const SHORT: &str = "Include or exclude files case insensitively."; - const LONG: &str = long!( - "\ -Include or exclude files and directories for searching that match the given -glob. This always overrides any other ignore logic. Multiple glob flags may be -used. Globbing rules match .gitignore globs. Precede a glob with a ! to exclude -it. Globs are matched case insensitively. -" - ); - let arg = RGArg::flag("iglob", "GLOB") - .help(SHORT) - .long_help(LONG) - .multiple() - .allow_leading_hyphen(); - args.push(arg); -} - -fn flag_ignore_case(args: &mut Vec) { - const SHORT: &str = "Case insensitive search."; - const LONG: &str = long!( - "\ -When this flag is provided, the given patterns will be searched case -insensitively. The case insensitivity rules used by ripgrep conform to -Unicode's \"simple\" case folding rules. - -This flag overrides -s/--case-sensitive and -S/--smart-case. -" - ); - let arg = RGArg::switch("ignore-case") - .short("i") - .help(SHORT) - .long_help(LONG) - .overrides("case-sensitive") - .overrides("smart-case"); - args.push(arg); -} - -fn flag_ignore_file(args: &mut Vec) { - const SHORT: &str = "Specify additional ignore files."; - const LONG: &str = long!( - "\ -Specifies a path to one or more .gitignore format rules files. These patterns -are applied after the patterns found in .gitignore and .ignore are applied -and are matched relative to the current working directory. Multiple additional -ignore files can be specified by using the --ignore-file flag several times. -When specifying multiple ignore files, earlier files have lower precedence -than later files. - -If you are looking for a way to include or exclude files and directories -directly on the command line, then use -g instead. -" - ); - let arg = RGArg::flag("ignore-file", "PATH") - .help(SHORT) - .long_help(LONG) - .multiple() - .allow_leading_hyphen(); - args.push(arg); -} - -fn flag_ignore_file_case_insensitive(args: &mut Vec) { - const SHORT: &str = "Process ignore files case insensitively."; - const LONG: &str = long!( - "\ -Process ignore files (.gitignore, .ignore, etc.) case insensitively. Note that -this comes with a performance penalty and is most useful on case insensitive -file systems (such as Windows). - -This flag can be disabled with the --no-ignore-file-case-insensitive flag. -" - ); - let arg = RGArg::switch("ignore-file-case-insensitive") - .help(SHORT) - .long_help(LONG) - .overrides("no-ignore-file-case-insensitive"); - args.push(arg); - - let arg = RGArg::switch("no-ignore-file-case-insensitive") - .hidden() - .overrides("ignore-file-case-insensitive"); - args.push(arg); -} - -fn flag_include_zero(args: &mut Vec) { - const SHORT: &str = "Include files with zero matches in summary"; - const LONG: &str = long!( - "\ -When used with --count or --count-matches, print the number of matches for -each file even if there were zero matches. This is disabled by default but can -be enabled to make ripgrep behave more like grep. -" - ); - let arg = RGArg::switch("include-zero").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_invert_match(args: &mut Vec) { - const SHORT: &str = "Invert matching."; - const LONG: &str = long!( - "\ -Invert matching. Show lines that do not match the given patterns. -" - ); - let arg = - RGArg::switch("invert-match").short("v").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_json(args: &mut Vec) { - const SHORT: &str = "Show search results in a JSON Lines format."; - const LONG: &str = long!( - "\ -Enable printing results in a JSON Lines format. - -When this flag is provided, ripgrep will emit a sequence of messages, each -encoded as a JSON object, where there are five different message types: - -**begin** - A message that indicates a file is being searched and contains at -least one match. - -**end** - A message the indicates a file is done being searched. This message -also include summary statistics about the search for a particular file. - -**match** - A message that indicates a match was found. This includes the text -and offsets of the match. - -**context** - A message that indicates a contextual line was found. This -includes the text of the line, along with any match information if the search -was inverted. - -**summary** - The final message emitted by ripgrep that contains summary -statistics about the search across all files. - -Since file paths or the contents of files are not guaranteed to be valid UTF-8 -and JSON itself must be representable by a Unicode encoding, ripgrep will emit -all data elements as objects with one of two keys: 'text' or 'bytes'. 'text' is -a normal JSON string when the data is valid UTF-8 while 'bytes' is the base64 -encoded contents of the data. - -The JSON Lines format is only supported for showing search results. It cannot -be used with other flags that emit other types of output, such as --files, ---files-with-matches, --files-without-match, --count or --count-matches. -ripgrep will report an error if any of the aforementioned flags are used in -concert with --json. - -Other flags that control aspects of the standard output such as ---only-matching, --heading, --replace, --max-columns, etc., have no effect -when --json is set. - -A more complete description of the JSON format used can be found here: -https://docs.rs/grep-printer/*/grep_printer/struct.JSON.html - -The JSON Lines format can be disabled with --no-json. -" - ); - let arg = RGArg::switch("json") - .help(SHORT) - .long_help(LONG) - .overrides("no-json") - .conflicts(&[ - "count", - "count-matches", - "files", - "files-with-matches", - "files-without-match", - ]); - args.push(arg); - - let arg = RGArg::switch("no-json").hidden().overrides("json"); - args.push(arg); -} - -fn flag_line_buffered(args: &mut Vec) { - const SHORT: &str = "Force line buffering."; - const LONG: &str = long!( - "\ -When enabled, ripgrep will use line buffering. That is, whenever a matching -line is found, it will be flushed to stdout immediately. This is the default -when ripgrep's stdout is connected to a terminal, but otherwise, ripgrep will -use block buffering, which is typically faster. This flag forces ripgrep to -use line buffering even if it would otherwise use block buffering. This is -typically useful in shell pipelines, e.g., -'tail -f something.log | rg foo --line-buffered | rg bar'. - -Forceful line buffering can be disabled with --no-line-buffered. Note that -using --no-line-buffered causes ripgrep to revert to its default behavior of -automatically detecting the buffering strategy. To force block buffering, use -the --block-buffered flag. -" - ); - let arg = RGArg::switch("line-buffered") - .help(SHORT) - .long_help(LONG) - .overrides("no-line-buffered") - .overrides("block-buffered") - .overrides("no-block-buffered"); - args.push(arg); - - let arg = RGArg::switch("no-line-buffered") - .hidden() - .overrides("line-buffered") - .overrides("block-buffered") - .overrides("no-block-buffered"); - args.push(arg); -} - -fn flag_line_number(args: &mut Vec) { - const SHORT: &str = "Show line numbers."; - const LONG: &str = long!( - "\ -Show line numbers (1-based). This is enabled by default when searching in a -terminal. - -This flag overrides --no-line-number. -" - ); - let arg = RGArg::switch("line-number") - .short("n") - .help(SHORT) - .long_help(LONG) - .overrides("no-line-number"); - args.push(arg); - - const NO_SHORT: &str = "Suppress line numbers."; - const NO_LONG: &str = long!( - "\ -Suppress line numbers. This is enabled by default when not searching in a -terminal. - -This flag overrides --line-number. -" - ); - let arg = RGArg::switch("no-line-number") - .short("N") - .help(NO_SHORT) - .long_help(NO_LONG) - .overrides("line-number"); - args.push(arg); -} - -fn flag_line_regexp(args: &mut Vec) { - const SHORT: &str = "Only show matches surrounded by line boundaries."; - const LONG: &str = long!( - "\ -Only show matches surrounded by line boundaries. This is equivalent to putting -^...$ around all of the search patterns. In other words, this only prints lines -where the entire line participates in a match. - -This overrides the --word-regexp flag. -" - ); - let arg = RGArg::switch("line-regexp") - .short("x") - .help(SHORT) - .long_help(LONG) - .overrides("word-regexp"); - args.push(arg); -} - -fn flag_max_columns(args: &mut Vec) { - const SHORT: &str = "Don't print lines longer than this limit."; - const LONG: &str = long!( - "\ -Don't print lines longer than this limit in bytes. Longer lines are omitted, -and only the number of matches in that line is printed. - -When this flag is omitted or is set to 0, then it has no effect. -" - ); - let arg = RGArg::flag("max-columns", "NUM") - .short("M") - .help(SHORT) - .long_help(LONG) - .number(); - args.push(arg); -} - -fn flag_max_columns_preview(args: &mut Vec) { - const SHORT: &str = "Print a preview for lines exceeding the limit."; - const LONG: &str = long!( - "\ -When the '--max-columns' flag is used, ripgrep will by default completely -replace any line that is too long with a message indicating that a matching -line was removed. When this flag is combined with '--max-columns', a preview -of the line (corresponding to the limit size) is shown instead, where the part -of the line exceeding the limit is not shown. - -If the '--max-columns' flag is not set, then this has no effect. - -This flag can be disabled with '--no-max-columns-preview'. -" - ); - let arg = RGArg::switch("max-columns-preview") - .help(SHORT) - .long_help(LONG) - .overrides("no-max-columns-preview"); - args.push(arg); - - let arg = RGArg::switch("no-max-columns-preview") - .hidden() - .overrides("max-columns-preview"); - args.push(arg); -} - -fn flag_max_count(args: &mut Vec) { - const SHORT: &str = "Limit the number of matches."; - const LONG: &str = long!( - "\ -Limit the number of matching lines per file searched to NUM. -" - ); - let arg = RGArg::flag("max-count", "NUM") - .short("m") - .help(SHORT) - .long_help(LONG) - .number(); - args.push(arg); -} - -fn flag_max_depth(args: &mut Vec) { - const SHORT: &str = "Descend at most NUM directories."; - const LONG: &str = long!( - "\ -Limit the depth of directory traversal to NUM levels beyond the paths given. A -value of zero only searches the explicitly given paths themselves. - -For example, 'rg --max-depth 0 dir/' is a no-op because dir/ will not be -descended into. 'rg --max-depth 1 dir/' will search only the direct children of -'dir'. -" - ); - let arg = RGArg::flag("max-depth", "NUM") - .help(SHORT) - .long_help(LONG) - .alias("maxdepth") - .number(); - args.push(arg); -} - -fn flag_max_filesize(args: &mut Vec) { - const SHORT: &str = "Ignore files larger than NUM in size."; - const LONG: &str = long!( - "\ -Ignore files larger than NUM in size. This does not apply to directories. - -The input format accepts suffixes of K, M or G which correspond to kilobytes, -megabytes and gigabytes, respectively. If no suffix is provided the input is -treated as bytes. - -Examples: --max-filesize 50K or --max-filesize 80M -" - ); - let arg = - RGArg::flag("max-filesize", "NUM+SUFFIX?").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_mmap(args: &mut Vec) { - const SHORT: &str = "Search using memory maps when possible."; - const LONG: &str = long!( - "\ -Search using memory maps when possible. This is enabled by default when ripgrep -thinks it will be faster. - -Memory map searching doesn't currently support all options, so if an -incompatible option (e.g., --context) is given with --mmap, then memory maps -will not be used. - -Note that ripgrep may abort unexpectedly when --mmap if it searches a file that -is simultaneously truncated. - -This flag overrides --no-mmap. -" - ); - let arg = - RGArg::switch("mmap").help(SHORT).long_help(LONG).overrides("no-mmap"); - args.push(arg); - - const NO_SHORT: &str = "Never use memory maps."; - const NO_LONG: &str = long!( - "\ -Never use memory maps, even when they might be faster. - -This flag overrides --mmap. -" - ); - let arg = RGArg::switch("no-mmap") - .help(NO_SHORT) - .long_help(NO_LONG) - .overrides("mmap"); - args.push(arg); -} - -fn flag_multiline(args: &mut Vec) { - const SHORT: &str = "Enable matching across multiple lines."; - const LONG: &str = long!( - "\ -Enable matching across multiple lines. - -When multiline mode is enabled, ripgrep will lift the restriction that a match -cannot include a line terminator. For example, when multiline mode is not -enabled (the default), then the regex '\\p{any}' will match any Unicode -codepoint other than '\\n'. Similarly, the regex '\\n' is explicitly forbidden, -and if you try to use it, ripgrep will return an error. However, when multiline -mode is enabled, '\\p{any}' will match any Unicode codepoint, including '\\n', -and regexes like '\\n' are permitted. - -An important caveat is that multiline mode does not change the match semantics -of '.'. Namely, in most regex matchers, a '.' will by default match any -character other than '\\n', and this is true in ripgrep as well. In order to -make '.' match '\\n', you must enable the \"dot all\" flag inside the regex. -For example, both '(?s).' and '(?s:.)' have the same semantics, where '.' will -match any character, including '\\n'. Alternatively, the '--multiline-dotall' -flag may be passed to make the \"dot all\" behavior the default. This flag only -applies when multiline search is enabled. - -There is no limit on the number of the lines that a single match can span. - -**WARNING**: Because of how the underlying regex engine works, multiline -searches may be slower than normal line-oriented searches, and they may also -use more memory. In particular, when multiline mode is enabled, ripgrep -requires that each file it searches is laid out contiguously in memory -(either by reading it onto the heap or by memory-mapping it). Things that -cannot be memory-mapped (such as stdin) will be consumed until EOF before -searching can begin. In general, ripgrep will only do these things when -necessary. Specifically, if the --multiline flag is provided but the regex -does not contain patterns that would match '\\n' characters, then ripgrep -will automatically avoid reading each file into memory before searching it. -Nevertheless, if you only care about matches spanning at most one line, then it -is always better to disable multiline mode. - -This flag can be disabled with --no-multiline. - -This overrides the --stop-on-nonmatch flag. -" - ); - let arg = RGArg::switch("multiline") - .short("U") - .help(SHORT) - .long_help(LONG) - .overrides("no-multiline") - .overrides("stop-on-nonmatch"); - args.push(arg); - - let arg = RGArg::switch("no-multiline").hidden().overrides("multiline"); - args.push(arg); -} - -fn flag_multiline_dotall(args: &mut Vec) { - const SHORT: &str = "Make '.' match new lines when multiline is enabled."; - const LONG: &str = long!( - "\ -This flag enables \"dot all\" in your regex pattern, which causes '.' to match -newlines when multiline searching is enabled. This flag has no effect if -multiline searching isn't enabled with the --multiline flag. - -Normally, a '.' will match any character except newlines. While this behavior -typically isn't relevant for line-oriented matching (since matches can span at -most one line), this can be useful when searching with the -U/--multiline flag. -By default, the multiline mode runs without this flag. - -This flag is generally intended to be used in an alias or your ripgrep config -file if you prefer \"dot all\" semantics by default. Note that regardless of -whether this flag is used, \"dot all\" semantics can still be controlled via -inline flags in the regex pattern itself, e.g., '(?s:.)' always enables \"dot -all\" whereas '(?-s:.)' always disables \"dot all\". - -This flag can be disabled with --no-multiline-dotall. -" - ); - let arg = RGArg::switch("multiline-dotall") - .help(SHORT) - .long_help(LONG) - .overrides("no-multiline-dotall"); - args.push(arg); - - let arg = RGArg::switch("no-multiline-dotall") - .hidden() - .overrides("multiline-dotall"); - args.push(arg); -} - -fn flag_no_config(args: &mut Vec) { - const SHORT: &str = "Never read configuration files."; - const LONG: &str = long!( - "\ -Never read configuration files. When this flag is present, ripgrep will not -respect the RIPGREP_CONFIG_PATH environment variable. - -If ripgrep ever grows a feature to automatically read configuration files in -pre-defined locations, then this flag will also disable that behavior as well. -" - ); - let arg = RGArg::switch("no-config").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_no_ignore(args: &mut Vec) { - const SHORT: &str = "Don't respect ignore files."; - const LONG: &str = long!( - "\ -Don't respect ignore files (.gitignore, .ignore, etc.). This implies ---no-ignore-dot, --no-ignore-exclude, --no-ignore-global, no-ignore-parent and ---no-ignore-vcs. - -This does *not* imply --no-ignore-files, since --ignore-file is specified -explicitly as a command line argument. - -When given only once, the -u flag is identical in behavior to --no-ignore and -can be considered an alias. However, subsequent -u flags have additional -effects; see --unrestricted. - -This flag can be disabled with the --ignore flag. -" - ); - let arg = RGArg::switch("no-ignore") - .help(SHORT) - .long_help(LONG) - .overrides("ignore"); - args.push(arg); - - let arg = RGArg::switch("ignore").hidden().overrides("no-ignore"); - args.push(arg); -} - -fn flag_no_ignore_dot(args: &mut Vec) { - const SHORT: &str = "Don't respect .ignore files."; - const LONG: &str = long!( - "\ -Don't respect .ignore files. - -This does *not* affect whether ripgrep will ignore files and directories -whose names begin with a dot. For that, see the -./--hidden flag. - -This flag can be disabled with the --ignore-dot flag. -" - ); - let arg = RGArg::switch("no-ignore-dot") - .help(SHORT) - .long_help(LONG) - .overrides("ignore-dot"); - args.push(arg); - - let arg = RGArg::switch("ignore-dot").hidden().overrides("no-ignore-dot"); - args.push(arg); -} - -fn flag_no_ignore_exclude(args: &mut Vec) { - const SHORT: &str = "Don't respect local exclusion files."; - const LONG: &str = long!( - "\ -Don't respect ignore files that are manually configured for the repository -such as git's '.git/info/exclude'. - -This flag can be disabled with the --ignore-exclude flag. -" - ); - let arg = RGArg::switch("no-ignore-exclude") - .help(SHORT) - .long_help(LONG) - .overrides("ignore-exclude"); - args.push(arg); - - let arg = RGArg::switch("ignore-exclude") - .hidden() - .overrides("no-ignore-exclude"); - args.push(arg); -} - -fn flag_no_ignore_files(args: &mut Vec) { - const SHORT: &str = "Don't respect --ignore-file arguments."; - const LONG: &str = long!( - "\ -When set, any --ignore-file flags, even ones that come after this flag, are -ignored. - -This flag can be disabled with the --ignore-files flag. -" - ); - let arg = RGArg::switch("no-ignore-files") - .help(SHORT) - .long_help(LONG) - .overrides("ignore-files"); - args.push(arg); - - let arg = - RGArg::switch("ignore-files").hidden().overrides("no-ignore-files"); - args.push(arg); -} - -fn flag_no_ignore_global(args: &mut Vec) { - const SHORT: &str = "Don't respect global ignore files."; - const LONG: &str = long!( - "\ -Don't respect ignore files that come from \"global\" sources such as git's -`core.excludesFile` configuration option (which defaults to -`$HOME/.config/git/ignore`). - -This flag can be disabled with the --ignore-global flag. -" - ); - let arg = RGArg::switch("no-ignore-global") - .help(SHORT) - .long_help(LONG) - .overrides("ignore-global"); - args.push(arg); - - let arg = - RGArg::switch("ignore-global").hidden().overrides("no-ignore-global"); - args.push(arg); -} - -fn flag_no_ignore_messages(args: &mut Vec) { - const SHORT: &str = "Suppress gitignore parse error messages."; - const LONG: &str = long!( - "\ -Suppresses all error messages related to parsing ignore files such as .ignore -or .gitignore. - -This flag can be disabled with the --ignore-messages flag. -" - ); - let arg = RGArg::switch("no-ignore-messages") - .help(SHORT) - .long_help(LONG) - .overrides("ignore-messages"); - args.push(arg); - - let arg = RGArg::switch("ignore-messages") - .hidden() - .overrides("no-ignore-messages"); - args.push(arg); -} - -fn flag_no_ignore_parent(args: &mut Vec) { - const SHORT: &str = "Don't respect ignore files in parent directories."; - const LONG: &str = long!( - "\ -Don't respect ignore files (.gitignore, .ignore, etc.) in parent directories. - -This flag can be disabled with the --ignore-parent flag. -" - ); - let arg = RGArg::switch("no-ignore-parent") - .help(SHORT) - .long_help(LONG) - .overrides("ignore-parent"); - args.push(arg); - - let arg = - RGArg::switch("ignore-parent").hidden().overrides("no-ignore-parent"); - args.push(arg); -} - -fn flag_no_ignore_vcs(args: &mut Vec) { - const SHORT: &str = "Don't respect VCS ignore files."; - const LONG: &str = long!( - "\ -Don't respect version control ignore files (.gitignore, etc.). This implies ---no-ignore-parent for VCS files. Note that .ignore files will continue to be -respected. - -This flag can be disabled with the --ignore-vcs flag. -" - ); - let arg = RGArg::switch("no-ignore-vcs") - .help(SHORT) - .long_help(LONG) - .overrides("ignore-vcs"); - args.push(arg); - - let arg = RGArg::switch("ignore-vcs").hidden().overrides("no-ignore-vcs"); - args.push(arg); -} - -fn flag_no_messages(args: &mut Vec) { - const SHORT: &str = "Suppress some error messages."; - const LONG: &str = long!( - "\ -Suppress all error messages related to opening and reading files. Error -messages related to the syntax of the pattern given are still shown. - -This flag can be disabled with the --messages flag. -" - ); - let arg = RGArg::switch("no-messages") - .help(SHORT) - .long_help(LONG) - .overrides("messages"); - args.push(arg); - - let arg = RGArg::switch("messages").hidden().overrides("no-messages"); - args.push(arg); -} - -fn flag_no_pcre2_unicode(args: &mut Vec) { - const SHORT: &str = "Disable Unicode mode for PCRE2 matching."; - const LONG: &str = long!( - "\ -DEPRECATED. Use --no-unicode instead. - -This flag is now an alias for --no-unicode. And --pcre2-unicode is an alias -for --unicode. -" - ); - let arg = RGArg::switch("no-pcre2-unicode") - .help(SHORT) - .long_help(LONG) - .overrides("pcre2-unicode") - .overrides("unicode"); - args.push(arg); - - let arg = RGArg::switch("pcre2-unicode") - .hidden() - .overrides("no-pcre2-unicode") - .overrides("no-unicode"); - args.push(arg); -} - -fn flag_no_require_git(args: &mut Vec) { - const SHORT: &str = "Do not require a git repository to use gitignores."; - const LONG: &str = long!( - "\ -By default, ripgrep will only respect global gitignore rules, .gitignore rules -and local exclude rules if ripgrep detects that you are searching inside a -git repository. This flag allows you to relax this restriction such that -ripgrep will respect all git related ignore rules regardless of whether you're -searching in a git repository or not. - -This flag can be disabled with --require-git. -" - ); - let arg = RGArg::switch("no-require-git") - .help(SHORT) - .long_help(LONG) - .overrides("require-git"); - args.push(arg); - - let arg = - RGArg::switch("require-git").hidden().overrides("no-require-git"); - args.push(arg); -} - -fn flag_no_unicode(args: &mut Vec) { - const SHORT: &str = "Disable Unicode mode."; - const LONG: &str = long!( - "\ -By default, ripgrep will enable \"Unicode mode\" in all of its regexes. This -has a number of consequences: - -* '.' will only match valid UTF-8 encoded scalar values. -* Classes like '\\w', '\\s', '\\d' are all Unicode aware and much bigger - than their ASCII only versions. -* Case insensitive matching will use Unicode case folding. -* A large array of classes like '\\p{Emoji}' are available. -* Word boundaries ('\\b' and '\\B') use the Unicode definition of a word - character. - -In some cases it can be desirable to turn these things off. The --no-unicode -flag will do exactly that. - -For PCRE2 specifically, Unicode mode represents a critical trade off in the -user experience of ripgrep. In particular, unlike the default regex engine, -PCRE2 does not support the ability to search possibly invalid UTF-8 with -Unicode features enabled. Instead, PCRE2 *requires* that everything it searches -when Unicode mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for -the purposes of ripgrep, we only discuss UTF-8.) This means that if you have -PCRE2's Unicode mode enabled and you attempt to search invalid UTF-8, then -the search for that file will halt and print an error. For this reason, when -PCRE2's Unicode mode is enabled, ripgrep will automatically \"fix\" invalid -UTF-8 sequences by replacing them with the Unicode replacement codepoint. This -penalty does not occur when using the default regex engine. - -If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode -is enabled, then pass the --no-encoding flag to disable all transcoding. - -The --no-unicode flag can be disabled with --unicode. Note that ---no-pcre2-unicode and --pcre2-unicode are aliases for --no-unicode and ---unicode, respectively. -" - ); - let arg = RGArg::switch("no-unicode") - .help(SHORT) - .long_help(LONG) - .overrides("unicode") - .overrides("pcre2-unicode"); - args.push(arg); - - let arg = RGArg::switch("unicode") - .hidden() - .overrides("no-unicode") - .overrides("no-pcre2-unicode"); - args.push(arg); -} - -fn flag_null(args: &mut Vec) { - const SHORT: &str = "Print a NUL byte after file paths."; - const LONG: &str = long!( - "\ -Whenever a file path is printed, follow it with a NUL byte. This includes -printing file paths before matches, and when printing a list of matching files -such as with --count, --files-with-matches and --files. This option is useful -for use with xargs. -" - ); - let arg = RGArg::switch("null").short("0").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_null_data(args: &mut Vec) { - const SHORT: &str = "Use NUL as a line terminator instead of \\n."; - const LONG: &str = long!( - "\ -Enabling this option causes ripgrep to use NUL as a line terminator instead of -the default of '\\n'. - -This is useful when searching large binary files that would otherwise have very -long lines if '\\n' were used as the line terminator. In particular, ripgrep -requires that, at a minimum, each line must fit into memory. Using NUL instead -can be a useful stopgap to keep memory requirements low and avoid OOM (out of -memory) conditions. - -This is also useful for processing NUL delimited data, such as that emitted -when using ripgrep's -0/--null flag or find's --print0 flag. - -Using this flag implies -a/--text. -" - ); - let arg = RGArg::switch("null-data") - .help(SHORT) - .long_help(LONG) - .overrides("crlf"); - args.push(arg); -} - -fn flag_one_file_system(args: &mut Vec) { - const SHORT: &str = - "Do not descend into directories on other file systems."; - const LONG: &str = long!( - "\ -When enabled, ripgrep will not cross file system boundaries relative to where -the search started from. - -Note that this applies to each path argument given to ripgrep. For example, in -the command 'rg --one-file-system /foo/bar /quux/baz', ripgrep will search both -'/foo/bar' and '/quux/baz' even if they are on different file systems, but will -not cross a file system boundary when traversing each path's directory tree. - -This is similar to find's '-xdev' or '-mount' flag. - -This flag can be disabled with --no-one-file-system. -" - ); - let arg = RGArg::switch("one-file-system") - .help(SHORT) - .long_help(LONG) - .overrides("no-one-file-system"); - args.push(arg); - - let arg = RGArg::switch("no-one-file-system") - .hidden() - .overrides("one-file-system"); - args.push(arg); -} - -fn flag_only_matching(args: &mut Vec) { - const SHORT: &str = "Print only matched parts of a line."; - const LONG: &str = long!( - "\ -Print only the matched (non-empty) parts of a matching line, with each such -part on a separate output line. -" - ); - let arg = - RGArg::switch("only-matching").short("o").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_path_separator(args: &mut Vec) { - const SHORT: &str = "Set the path separator."; - const LONG: &str = long!( - "\ -Set the path separator to use when printing file paths. This defaults to your -platform's path separator, which is / on Unix and \\ on Windows. This flag is -intended for overriding the default when the environment demands it (e.g., -cygwin). A path separator is limited to a single byte. -" - ); - let arg = - RGArg::flag("path-separator", "SEPARATOR").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_passthru(args: &mut Vec) { - const SHORT: &str = "Print both matching and non-matching lines."; - const LONG: &str = long!( - "\ -Print both matching and non-matching lines. - -Another way to achieve a similar effect is by modifying your pattern to match -the empty string. For example, if you are searching using 'rg foo' then using -'rg \"^|foo\"' instead will emit every line in every file searched, but only -occurrences of 'foo' will be highlighted. This flag enables the same behavior -without needing to modify the pattern. - -This overrides the --context, --after-context and --before-context flags. -" - ); - let arg = RGArg::switch("passthru") - .help(SHORT) - .long_help(LONG) - .alias("passthrough") - .overrides("after-context") - .overrides("before-context") - .overrides("context"); - args.push(arg); -} - -fn flag_pcre2(args: &mut Vec) { - const SHORT: &str = "Enable PCRE2 matching."; - const LONG: &str = long!( - "\ -When this flag is present, ripgrep will use the PCRE2 regex engine instead of -its default regex engine. - -This is generally useful when you want to use features such as look-around -or backreferences. - -Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in -your build of ripgrep, then using this flag will result in ripgrep printing -an error message and exiting. PCRE2 may also have worse user experience in -some cases, since it has fewer introspection APIs than ripgrep's default regex -engine. For example, if you use a '\\n' in a PCRE2 regex without the -'-U/--multiline' flag, then ripgrep will silently fail to match anything -instead of reporting an error immediately (like it does with the default -regex engine). - -Related flags: --no-pcre2-unicode - -This flag can be disabled with --no-pcre2. -" - ); - let arg = RGArg::switch("pcre2") - .short("P") - .help(SHORT) - .long_help(LONG) - .overrides("no-pcre2") - .overrides("auto-hybrid-regex") - .overrides("no-auto-hybrid-regex") - .overrides("engine"); - args.push(arg); - - let arg = RGArg::switch("no-pcre2") - .hidden() - .overrides("pcre2") - .overrides("auto-hybrid-regex") - .overrides("no-auto-hybrid-regex") - .overrides("engine"); - args.push(arg); -} - -fn flag_pcre2_version(args: &mut Vec) { - const SHORT: &str = "Print the version of PCRE2 that ripgrep uses."; - const LONG: &str = long!( - "\ -When this flag is present, ripgrep will print the version of PCRE2 in use, -along with other information, and then exit. If PCRE2 is not available, then -ripgrep will print an error message and exit with an error code. -" - ); - let arg = RGArg::switch("pcre2-version").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_pre(args: &mut Vec) { - const SHORT: &str = "search outputs of COMMAND FILE for each FILE"; - const LONG: &str = long!( - "\ -For each input FILE, search the standard output of COMMAND FILE rather than the -contents of FILE. This option expects the COMMAND program to either be an -absolute path or to be available in your PATH. Either an empty string COMMAND -or the '--no-pre' flag will disable this behavior. - - WARNING: When this flag is set, ripgrep will unconditionally spawn a - process for every file that is searched. Therefore, this can incur an - unnecessarily large performance penalty if you don't otherwise need the - flexibility offered by this flag. One possible mitigation to this is to use - the '--pre-glob' flag to limit which files a preprocessor is run with. - -A preprocessor is not run when ripgrep is searching stdin. - -When searching over sets of files that may require one of several decoders -as preprocessors, COMMAND should be a wrapper program or script which first -classifies FILE based on magic numbers/content or based on the FILE name and -then dispatches to an appropriate preprocessor. Each COMMAND also has its -standard input connected to FILE for convenience. - -For example, a shell script for COMMAND might look like: - - case \"$1\" in - *.pdf) - exec pdftotext \"$1\" - - ;; - *) - case $(file \"$1\") in - *Zstandard*) - exec pzstd -cdq - ;; - *) - exec cat - ;; - esac - ;; - esac - -The above script uses `pdftotext` to convert a PDF file to plain text. For -all other files, the script uses the `file` utility to sniff the type of the -file based on its contents. If it is a compressed file in the Zstandard format, -then `pzstd` is used to decompress the contents to stdout. - -This overrides the -z/--search-zip flag. -" - ); - let arg = RGArg::flag("pre", "COMMAND") - .help(SHORT) - .long_help(LONG) - .overrides("no-pre") - .overrides("search-zip"); - args.push(arg); - - let arg = RGArg::switch("no-pre").hidden().overrides("pre"); - args.push(arg); -} - -fn flag_pre_glob(args: &mut Vec) { - const SHORT: &str = - "Include or exclude files from a preprocessing command."; - const LONG: &str = long!( - "\ -This flag works in conjunction with the --pre flag. Namely, when one or more ---pre-glob flags are given, then only files that match the given set of globs -will be handed to the command specified by the --pre flag. Any non-matching -files will be searched without using the preprocessor command. - -This flag is useful when searching many files with the --pre flag. Namely, -it permits the ability to avoid process overhead for files that don't need -preprocessing. For example, given the following shell script, 'pre-pdftotext': - - #!/bin/sh - - pdftotext \"$1\" - - -then it is possible to use '--pre pre-pdftotext --pre-glob \'*.pdf\'' to make -it so ripgrep only executes the 'pre-pdftotext' command on files with a '.pdf' -extension. - -Multiple --pre-glob flags may be used. Globbing rules match .gitignore globs. -Precede a glob with a ! to exclude it. - -This flag has no effect if the --pre flag is not used. -" - ); - let arg = RGArg::flag("pre-glob", "GLOB") - .help(SHORT) - .long_help(LONG) - .multiple() - .allow_leading_hyphen(); - args.push(arg); -} - -fn flag_pretty(args: &mut Vec) { - const SHORT: &str = "Alias for --color always --heading --line-number."; - const LONG: &str = long!( - "\ -This is a convenience alias for '--color always --heading --line-number'. This -flag is useful when you still want pretty output even if you're piping ripgrep -to another program or file. For example: 'rg -p foo | less -R'. -" - ); - let arg = RGArg::switch("pretty").short("p").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_quiet(args: &mut Vec) { - const SHORT: &str = "Do not print anything to stdout."; - const LONG: &str = long!( - "\ -Do not print anything to stdout. If a match is found in a file, then ripgrep -will stop searching. This is useful when ripgrep is used only for its exit -code (which will be an error if no matches are found). - -When --files is used, ripgrep will stop finding files after finding the -first file that does not match any ignore rules. -" - ); - let arg = RGArg::switch("quiet").short("q").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_regex_size_limit(args: &mut Vec) { - const SHORT: &str = "The upper size limit of the compiled regex."; - const LONG: &str = long!( - "\ -The upper size limit of the compiled regex. The default limit is 10M. - -The argument accepts the same size suffixes as allowed in the --max-filesize -flag. -" - ); - let arg = RGArg::flag("regex-size-limit", "NUM+SUFFIX?") - .help(SHORT) - .long_help(LONG); - args.push(arg); -} - -fn flag_regexp(args: &mut Vec) { - const SHORT: &str = "A pattern to search for."; - const LONG: &str = long!( - "\ -A pattern to search for. This option can be provided multiple times, where -all patterns given are searched. Lines matching at least one of the provided -patterns are printed. This flag can also be used when searching for patterns -that start with a dash. - -For example, to search for the literal '-foo', you can use this flag: - - rg -e -foo - -You can also use the special '--' delimiter to indicate that no more flags -will be provided. Namely, the following is equivalent to the above: - - rg -- -foo -" - ); - let arg = RGArg::flag("regexp", "PATTERN") - .short("e") - .help(SHORT) - .long_help(LONG) - .multiple() - .allow_leading_hyphen(); - args.push(arg); -} - -fn flag_replace(args: &mut Vec) { - const SHORT: &str = "Replace matches with the given text."; - const LONG: &str = long!( - "\ -Replace every match with the text given when printing results. Neither this -flag nor any other ripgrep flag will modify your files. - -Capture group indices (e.g., $5) and names (e.g., $foo) are supported in the -replacement string. Capture group indices are numbered based on the position of -the opening parenthesis of the group, where the leftmost such group is $1. The -special $0 group corresponds to the entire match. - -The name of a group is formed by taking the longest string of letters, numbers -and underscores (i.e. [_0-9A-Za-z]) after the $. For example, $1a will be -replaced with the group named '1a', not the group at index 1. If the group's -name contains characters that aren't letters, numbers or underscores, or you -want to immediately follow the group with another string, the name should be -put inside braces. For example, ${1}a will take the content of the group at -index 1 and append 'a' to the end of it. - -If an index or name does not refer to a valid capture group, it will be -replaced with an empty string. - -In shells such as Bash and zsh, you should wrap the pattern in single quotes -instead of double quotes. Otherwise, capture group indices will be replaced by -expanded shell variables which will most likely be empty. - -To write a literal '$', use '$$'. - -Note that the replacement by default replaces each match, and NOT the entire -line. To replace the entire line, you should match the entire line. - -This flag can be used with the -o/--only-matching flag. -" - ); - let arg = RGArg::flag("replace", "REPLACEMENT_TEXT") - .short("r") - .help(SHORT) - .long_help(LONG) - .allow_leading_hyphen(); - args.push(arg); -} - -fn flag_search_zip(args: &mut Vec) { - const SHORT: &str = "Search in compressed files."; - const LONG: &str = long!( - "\ -Search in compressed files. Currently gzip, bzip2, xz, LZ4, LZMA, Brotli and -Zstd files are supported. This option expects the decompression binaries to be -available in your PATH. - -This flag can be disabled with --no-search-zip. -" - ); - let arg = RGArg::switch("search-zip") - .short("z") - .help(SHORT) - .long_help(LONG) - .overrides("no-search-zip") - .overrides("pre"); - args.push(arg); - - let arg = RGArg::switch("no-search-zip").hidden().overrides("search-zip"); - args.push(arg); -} - -fn flag_smart_case(args: &mut Vec) { - const SHORT: &str = "Smart case search."; - const LONG: &str = long!( - "\ -Searches case insensitively if the pattern is all lowercase. Search case -sensitively otherwise. - -A pattern is considered all lowercase if both of the following rules hold: - -First, the pattern contains at least one literal character. For example, 'a\\w' -contains a literal ('a') but just '\\w' does not. - -Second, of the literals in the pattern, none of them are considered to be -uppercase according to Unicode. For example, 'foo\\pL' has no uppercase -literals but 'Foo\\pL' does. - -This overrides the -s/--case-sensitive and -i/--ignore-case flags. -" - ); - let arg = RGArg::switch("smart-case") - .short("S") - .help(SHORT) - .long_help(LONG) - .overrides("case-sensitive") - .overrides("ignore-case"); - args.push(arg); -} - -fn flag_sort_files(args: &mut Vec) { - const SHORT: &str = "DEPRECATED"; - const LONG: &str = long!( - "\ -DEPRECATED: Use --sort or --sortr instead. - -Sort results by file path. Note that this currently disables all parallelism -and runs search in a single thread. - -This flag can be disabled with --no-sort-files. -" - ); - let arg = RGArg::switch("sort-files") - .help(SHORT) - .long_help(LONG) - .hidden() - .overrides("no-sort-files") - .overrides("sort") - .overrides("sortr"); - args.push(arg); - - let arg = RGArg::switch("no-sort-files") - .hidden() - .overrides("sort-files") - .overrides("sort") - .overrides("sortr"); - args.push(arg); -} - -fn flag_sort(args: &mut Vec) { - const SHORT: &str = - "Sort results in ascending order. Implies --threads=1."; - const LONG: &str = long!( - "\ -This flag enables sorting of results in ascending order. The possible values -for this flag are: - - none (Default) Do not sort results. Fastest. Can be multi-threaded. - path Sort by file path. Always single-threaded. - modified Sort by the last modified time on a file. Always single-threaded. - accessed Sort by the last accessed time on a file. Always single-threaded. - created Sort by the creation time on a file. Always single-threaded. - -If the chosen (manually or by-default) sorting criteria isn't available on your -system (for example, creation time is not available on ext4 file systems), then -ripgrep will attempt to detect this, print an error and exit without searching. - -To sort results in reverse or descending order, use the --sortr flag. Also, -this flag overrides --sortr. - -Note that sorting results currently always forces ripgrep to abandon -parallelism and run in a single thread. -" - ); - let arg = RGArg::flag("sort", "SORTBY") - .help(SHORT) - .long_help(LONG) - .possible_values(&["path", "modified", "accessed", "created", "none"]) - .overrides("sortr") - .overrides("sort-files") - .overrides("no-sort-files"); - args.push(arg); -} - -fn flag_sortr(args: &mut Vec) { - const SHORT: &str = - "Sort results in descending order. Implies --threads=1."; - const LONG: &str = long!( - "\ -This flag enables sorting of results in descending order. The possible values -for this flag are: - - none (Default) Do not sort results. Fastest. Can be multi-threaded. - path Sort by file path. Always single-threaded. - modified Sort by the last modified time on a file. Always single-threaded. - accessed Sort by the last accessed time on a file. Always single-threaded. - created Sort by the creation time on a file. Always single-threaded. - -If the chosen (manually or by-default) sorting criteria isn't available on your -system (for example, creation time is not available on ext4 file systems), then -ripgrep will attempt to detect this, print an error and exit without searching. - -To sort results in ascending order, use the --sort flag. Also, this flag -overrides --sort. - -Note that sorting results currently always forces ripgrep to abandon -parallelism and run in a single thread. -" - ); - let arg = RGArg::flag("sortr", "SORTBY") - .help(SHORT) - .long_help(LONG) - .possible_values(&["path", "modified", "accessed", "created", "none"]) - .overrides("sort") - .overrides("sort-files") - .overrides("no-sort-files"); - args.push(arg); -} - -fn flag_stats(args: &mut Vec) { - const SHORT: &str = "Print statistics about this ripgrep search."; - const LONG: &str = long!( - "\ -Print aggregate statistics about this ripgrep search. When this flag is -present, ripgrep will print the following stats to stdout at the end of the -search: number of matched lines, number of files with matches, number of files -searched, and the time taken for the entire search to complete. - -This set of aggregate statistics may expand over time. - -Note that this flag has no effect if --files, --files-with-matches or ---files-without-match is passed. - -This flag can be disabled with --no-stats. -" - ); - let arg = RGArg::switch("stats") - .help(SHORT) - .long_help(LONG) - .overrides("no-stats"); - args.push(arg); - - let arg = RGArg::switch("no-stats").hidden().overrides("stats"); - args.push(arg); -} - -fn flag_stop_on_nonmatch(args: &mut Vec) { - const SHORT: &str = "Stop searching after a non-match."; - const LONG: &str = long!( - "\ -Enabling this option will cause ripgrep to stop reading a file once it -encounters a non-matching line after it has encountered a matching line. -This is useful if it is expected that all matches in a given file will be on -sequential lines, for example due to the lines being sorted. - -This overrides the -U/--multiline flag. -" - ); - let arg = RGArg::switch("stop-on-nonmatch") - .help(SHORT) - .long_help(LONG) - .overrides("multiline"); - args.push(arg); -} - -fn flag_text(args: &mut Vec) { - const SHORT: &str = "Search binary files as if they were text."; - const LONG: &str = long!( - "\ -Search binary files as if they were text. When this flag is present, ripgrep's -binary file detection is disabled. This means that when a binary file is -searched, its contents may be printed if there is a match. This may cause -escape codes to be printed that alter the behavior of your terminal. - -When binary file detection is enabled it is imperfect. In general, it uses -a simple heuristic. If a NUL byte is seen during search, then the file is -considered binary and search stops (unless this flag is present). -Alternatively, if the '--binary' flag is used, then ripgrep will only quit -when it sees a NUL byte after it sees a match (or searches the entire file). - -This flag can be disabled with '--no-text'. It overrides the '--binary' flag. -" - ); - let arg = RGArg::switch("text") - .short("a") - .help(SHORT) - .long_help(LONG) - .overrides("no-text") - .overrides("binary") - .overrides("no-binary"); - args.push(arg); - - let arg = RGArg::switch("no-text") - .hidden() - .overrides("text") - .overrides("binary") - .overrides("no-binary"); - args.push(arg); -} - -fn flag_threads(args: &mut Vec) { - const SHORT: &str = "The approximate number of threads to use."; - const LONG: &str = long!( - "\ -The approximate number of threads to use. A value of 0 (which is the default) -causes ripgrep to choose the thread count using heuristics. -" - ); - let arg = - RGArg::flag("threads", "NUM").short("j").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_trim(args: &mut Vec) { - const SHORT: &str = "Trim prefixed whitespace from matches."; - const LONG: &str = long!( - "\ -When set, all ASCII whitespace at the beginning of each line printed will be -trimmed. - -This flag can be disabled with --no-trim. -" - ); - let arg = - RGArg::switch("trim").help(SHORT).long_help(LONG).overrides("no-trim"); - args.push(arg); - - let arg = RGArg::switch("no-trim").hidden().overrides("trim"); - args.push(arg); -} - -fn flag_type(args: &mut Vec) { - const SHORT: &str = "Only search files matching TYPE."; - const LONG: &str = long!( - "\ -Only search files matching TYPE. Multiple type flags may be provided. Use the ---type-list flag to list all available types. - -This flag supports the special value 'all', which will behave as if --type -was provided for every file type supported by ripgrep (including any custom -file types). The end result is that '--type all' causes ripgrep to search in -\"whitelist\" mode, where it will only search files it recognizes via its type -definitions. -" - ); - let arg = RGArg::flag("type", "TYPE") - .short("t") - .help(SHORT) - .long_help(LONG) - .multiple(); - args.push(arg); -} - -fn flag_type_add(args: &mut Vec) { - const SHORT: &str = "Add a new glob for a file type."; - const LONG: &str = long!( - "\ -Add a new glob for a particular file type. Only one glob can be added at a -time. Multiple --type-add flags can be provided. Unless --type-clear is used, -globs are added to any existing globs defined inside of ripgrep. - -Note that this MUST be passed to every invocation of ripgrep. Type settings are -NOT persisted. See CONFIGURATION FILES for a workaround. - -Example: - - rg --type-add 'foo:*.foo' -tfoo PATTERN. - ---type-add can also be used to include rules from other types with the special -include directive. The include directive permits specifying one or more other -type names (separated by a comma) that have been defined and its rules will -automatically be imported into the type specified. For example, to create a -type called src that matches C++, Python and Markdown files, one can use: - - --type-add 'src:include:cpp,py,md' - -Additional glob rules can still be added to the src type by using the ---type-add flag again: - - --type-add 'src:include:cpp,py,md' --type-add 'src:*.foo' - -Note that type names must consist only of Unicode letters or numbers. -Punctuation characters are not allowed. -" - ); - let arg = RGArg::flag("type-add", "TYPE_SPEC") - .help(SHORT) - .long_help(LONG) - .multiple(); - args.push(arg); -} - -fn flag_type_clear(args: &mut Vec) { - const SHORT: &str = "Clear globs for a file type."; - const LONG: &str = long!( - "\ -Clear the file type globs previously defined for TYPE. This only clears the -default type definitions that are found inside of ripgrep. - -Note that this MUST be passed to every invocation of ripgrep. Type settings are -NOT persisted. See CONFIGURATION FILES for a workaround. -" - ); - let arg = RGArg::flag("type-clear", "TYPE") - .help(SHORT) - .long_help(LONG) - .multiple(); - args.push(arg); -} - -fn flag_type_not(args: &mut Vec) { - const SHORT: &str = "Do not search files matching TYPE."; - const LONG: &str = long!( - "\ -Do not search files matching TYPE. Multiple type-not flags may be provided. Use -the --type-list flag to list all available types. -" - ); - let arg = RGArg::flag("type-not", "TYPE") - .short("T") - .help(SHORT) - .long_help(LONG) - .multiple(); - args.push(arg); -} - -fn flag_type_list(args: &mut Vec) { - const SHORT: &str = "Show all supported file types."; - const LONG: &str = long!( - "\ -Show all supported file types and their corresponding globs. -" - ); - let arg = RGArg::switch("type-list") - .help(SHORT) - .long_help(LONG) - // This also technically conflicts with PATTERN, but the first file - // path will actually be in PATTERN. - .conflicts(&["file", "files", "pattern", "regexp"]); - args.push(arg); -} - -fn flag_unrestricted(args: &mut Vec) { - const SHORT: &str = "Reduce the level of \"smart\" searching."; - const LONG: &str = long!( - "\ -Reduce the level of \"smart\" searching. A single -u won't respect .gitignore -(etc.) files (--no-ignore). Two -u flags will additionally search hidden files -and directories (-./--hidden). Three -u flags will additionally search binary -files (--binary). - -'rg -uuu' is roughly equivalent to 'grep -r'. -" - ); - let arg = RGArg::switch("unrestricted") - .short("u") - .help(SHORT) - .long_help(LONG) - .multiple(); - args.push(arg); -} - -fn flag_vimgrep(args: &mut Vec) { - const SHORT: &str = "Show results in vim compatible format."; - const LONG: &str = long!( - "\ -Show results with every match on its own line, including line numbers and -column numbers. With this option, a line with more than one match will be -printed more than once. -" - ); - let arg = RGArg::switch("vimgrep").help(SHORT).long_help(LONG); - args.push(arg); -} - -fn flag_with_filename(args: &mut Vec) { - const SHORT: &str = "Print the file path with the matched lines."; - const LONG: &str = long!( - "\ -Display the file path for matches. This is the default when more than one -file is searched. If --heading is enabled (the default when printing to a -terminal), the file path will be shown above clusters of matches from each -file; otherwise, the file name will be shown as a prefix for each matched line. - -This flag overrides --no-filename. -" - ); - let arg = RGArg::switch("with-filename") - .short("H") - .help(SHORT) - .long_help(LONG) - .overrides("no-filename"); - args.push(arg); - - const NO_SHORT: &str = "Never print the file path with the matched lines."; - const NO_LONG: &str = long!( - "\ -Never print the file path with the matched lines. This is the default when -ripgrep is explicitly instructed to search one file or stdin. - -This flag overrides --with-filename. -" - ); - let arg = RGArg::switch("no-filename") - .short("I") - .help(NO_SHORT) - .long_help(NO_LONG) - .overrides("with-filename"); - args.push(arg); -} - -fn flag_word_regexp(args: &mut Vec) { - const SHORT: &str = "Only show matches surrounded by word boundaries."; - const LONG: &str = long!( - "\ -Only show matches surrounded by word boundaries. This is roughly equivalent to -putting \\b before and after all of the search patterns. - -This overrides the --line-regexp flag. -" - ); - let arg = RGArg::switch("word-regexp") - .short("w") - .help(SHORT) - .long_help(LONG) - .overrides("line-regexp"); - args.push(arg); -} diff --git a/crates/core/args.rs b/crates/core/args.rs deleted file mode 100644 index 1e781b4602..0000000000 --- a/crates/core/args.rs +++ /dev/null @@ -1,1975 +0,0 @@ -use std::{ - collections::HashSet, - env, - ffi::{OsStr, OsString}, - io::{self, IsTerminal, Write}, - path::{Path, PathBuf}, - sync::Arc, -}; - -use { - clap, - grep::{ - cli, - matcher::LineTerminator, - printer::{ - default_color_specs, ColorSpecs, HyperlinkConfig, - HyperlinkEnvironment, HyperlinkFormat, JSONBuilder, PathPrinter, - PathPrinterBuilder, Standard, StandardBuilder, Stats, Summary, - SummaryBuilder, SummaryKind, JSON, - }, - regex::{ - RegexMatcher as RustRegexMatcher, - RegexMatcherBuilder as RustRegexMatcherBuilder, - }, - searcher::{ - BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder, - }, - }, - ignore::{ - overrides::{Override, OverrideBuilder}, - types::{FileTypeDef, Types, TypesBuilder}, - {Walk, WalkBuilder, WalkParallel}, - }, - termcolor::{BufferWriter, ColorChoice, WriteColor}, -}; - -#[cfg(feature = "pcre2")] -use grep::pcre2::{ - RegexMatcher as PCRE2RegexMatcher, - RegexMatcherBuilder as PCRE2RegexMatcherBuilder, -}; - -use crate::{ - app, config, - logger::Logger, - messages::{set_ignore_messages, set_messages}, - search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder}, - subject::{Subject, SubjectBuilder}, -}; - -/// The command that ripgrep should execute based on the command line -/// configuration. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum Command { - /// Search using exactly one thread. - Search, - /// Search using possibly many threads. - SearchParallel, - /// The command line parameters suggest that a search should occur, but - /// ripgrep knows that a match can never be found (e.g., no given patterns - /// or --max-count=0). - SearchNever, - /// Show the files that would be searched, but don't actually search them, - /// and use exactly one thread. - Files, - /// Show the files that would be searched, but don't actually search them, - /// and perform directory traversal using possibly many threads. - FilesParallel, - /// List all file type definitions configured, including the default file - /// types and any additional file types added to the command line. - Types, - /// Print the version of PCRE2 in use. - PCRE2Version, -} - -impl Command { - /// Returns true if and only if this command requires executing a search. - fn is_search(&self) -> bool { - use self::Command::*; - - match *self { - Search | SearchParallel => true, - SearchNever | Files | FilesParallel | Types | PCRE2Version => { - false - } - } - } -} - -/// The primary configuration object used throughout ripgrep. It provides a -/// high-level convenient interface to the provided command line arguments. -/// -/// An `Args` object is cheap to clone and can be used from multiple threads -/// simultaneously. -#[derive(Clone, Debug)] -pub struct Args(Arc); - -#[derive(Clone, Debug)] -struct ArgsImp { - /// Mid-to-low level routines for extracting CLI arguments. - matches: ArgMatches, - /// The command we want to execute. - command: Command, - /// The number of threads to use. This is based in part on available - /// threads, in part on the number of threads requested and in part on the - /// command we're running. - threads: usize, - /// A matcher built from the patterns. - /// - /// It's important that this is only built once, since building this goes - /// through regex compilation and various types of analyses. That is, if - /// you need many of these (one per thread, for example), it is better to - /// build it once and then clone it. - matcher: PatternMatcher, - /// The paths provided at the command line. This is guaranteed to be - /// non-empty. (If no paths are provided, then a default path is created.) - paths: Vec, - /// Returns true if and only if `paths` had to be populated with a single - /// default path. - using_default_path: bool, -} - -impl Args { - /// Parse the command line arguments for this process. - /// - /// If a CLI usage error occurred, then exit the process and print a usage - /// or error message. Similarly, if the user requested the version of - /// ripgrep, then print the version and exit. - /// - /// Also, initialize a global logger. - pub fn parse() -> anyhow::Result { - // We parse the args given on CLI. This does not include args from - // the config. We use the CLI args as an initial configuration while - // trying to parse config files. If a config file exists and has - // arguments, then we re-parse argv, otherwise we just use the matches - // we have here. - let early_matches = ArgMatches::new(clap_matches(env::args_os())?); - set_messages(!early_matches.is_present("no-messages")); - set_ignore_messages(!early_matches.is_present("no-ignore-messages")); - - if let Err(err) = Logger::init() { - anyhow::bail!("failed to initialize logger: {err}"); - } - if early_matches.is_present("trace") { - log::set_max_level(log::LevelFilter::Trace); - } else if early_matches.is_present("debug") { - log::set_max_level(log::LevelFilter::Debug); - } else { - log::set_max_level(log::LevelFilter::Warn); - } - - let matches = early_matches.reconfigure()?; - // The logging level may have changed if we brought in additional - // arguments from a configuration file, so recheck it and set the log - // level as appropriate. - if matches.is_present("trace") { - log::set_max_level(log::LevelFilter::Trace); - } else if matches.is_present("debug") { - log::set_max_level(log::LevelFilter::Debug); - } else { - log::set_max_level(log::LevelFilter::Warn); - } - set_messages(!matches.is_present("no-messages")); - set_ignore_messages(!matches.is_present("no-ignore-messages")); - matches.to_args() - } - - /// Return direct access to command line arguments. - fn matches(&self) -> &ArgMatches { - &self.0.matches - } - - /// Return the matcher builder from the patterns. - fn matcher(&self) -> &PatternMatcher { - &self.0.matcher - } - - /// Return the paths found in the command line arguments. This is - /// guaranteed to be non-empty. In the case where no explicit arguments are - /// provided, a single default path is provided automatically. - fn paths(&self) -> &[PathBuf] { - &self.0.paths - } - - /// Returns true if and only if `paths` had to be populated with a default - /// path, which occurs only when no paths were given as command line - /// arguments. - pub fn using_default_path(&self) -> bool { - self.0.using_default_path - } - - /// Return the printer that should be used for formatting the output of - /// search results. - /// - /// The returned printer will write results to the given writer. - fn printer(&self, wtr: W) -> anyhow::Result> { - match self.matches().output_kind() { - OutputKind::Standard => { - let separator_search = self.command() == Command::Search; - self.matches() - .printer_standard(self.paths(), wtr, separator_search) - .map(Printer::Standard) - } - OutputKind::Summary => self - .matches() - .printer_summary(self.paths(), wtr) - .map(Printer::Summary), - OutputKind::JSON => { - self.matches().printer_json(wtr).map(Printer::JSON) - } - } - } -} - -/// High level public routines for building data structures used by ripgrep -/// from command line arguments. -impl Args { - /// Create a new buffer writer for multi-threaded printing with color - /// support. - pub fn buffer_writer(&self) -> anyhow::Result { - let mut wtr = BufferWriter::stdout(self.matches().color_choice()); - wtr.separator(self.matches().file_separator()?); - Ok(wtr) - } - - /// Return the high-level command that ripgrep should run. - pub fn command(&self) -> Command { - self.0.command - } - - /// Builder a path printer that can be used for printing just file paths, - /// with optional color support. - /// - /// The printer will print paths to the given writer. - pub fn path_printer( - &self, - wtr: W, - ) -> anyhow::Result> { - let mut builder = PathPrinterBuilder::new(); - builder - .color_specs(self.matches().color_specs()?) - .hyperlink(self.matches().hyperlink_config()?) - .separator(self.matches().path_separator()?) - .terminator(self.matches().path_terminator().unwrap_or(b'\n')); - Ok(builder.build(wtr)) - } - - /// Returns true if and only if ripgrep should be "quiet." - pub fn quiet(&self) -> bool { - self.matches().is_present("quiet") - } - - /// Returns true if and only if the search should quit after finding the - /// first match. - pub fn quit_after_match(&self) -> anyhow::Result { - Ok(self.matches().is_present("quiet") && self.stats()?.is_none()) - } - - /// Build a worker for executing searches. - /// - /// Search results are written to the given writer. - pub fn search_worker( - &self, - wtr: W, - ) -> anyhow::Result> { - let matches = self.matches(); - let matcher = self.matcher().clone(); - let printer = self.printer(wtr)?; - let searcher = matches.searcher(self.paths())?; - let mut builder = SearchWorkerBuilder::new(); - builder - .json_stats(matches.is_present("json")) - .preprocessor(matches.preprocessor())? - .preprocessor_globs(matches.preprocessor_globs()?) - .search_zip(matches.is_present("search-zip")) - .binary_detection_implicit(matches.binary_detection_implicit()) - .binary_detection_explicit(matches.binary_detection_explicit()); - Ok(builder.build(matcher, searcher, printer)) - } - - /// Returns a zero value for tracking statistics if and only if it has been - /// requested. - /// - /// When this returns a `Stats` value, then it is guaranteed that the - /// search worker will be configured to track statistics as well. - pub fn stats(&self) -> anyhow::Result> { - Ok(if self.command().is_search() && self.matches().stats() { - Some(Stats::new()) - } else { - None - }) - } - - /// Return a builder for constructing subjects. A subject represents a - /// single unit of something to search. Typically, this corresponds to a - /// file or a stream such as stdin. - pub fn subject_builder(&self) -> SubjectBuilder { - let mut builder = SubjectBuilder::new(); - builder.strip_dot_prefix(self.using_default_path()); - builder - } - - /// Execute the given function with a writer to stdout that enables color - /// support based on the command line configuration. - pub fn stdout(&self) -> cli::StandardStream { - let color = self.matches().color_choice(); - if self.matches().is_present("line-buffered") { - cli::stdout_buffered_line(color) - } else if self.matches().is_present("block-buffered") { - cli::stdout_buffered_block(color) - } else { - cli::stdout(color) - } - } - - /// Return the type definitions compiled into ripgrep. - /// - /// If there was a problem reading and parsing the type definitions, then - /// this returns an error. - pub fn type_defs(&self) -> anyhow::Result> { - Ok(self.matches().types()?.definitions().to_vec()) - } - - /// Return a walker that never uses additional threads. - pub fn walker(&self) -> anyhow::Result { - Ok(self - .matches() - .walker_builder(self.paths(), self.0.threads)? - .build()) - } - - /// Returns true if and only if `stat`-related sorting is required - pub fn needs_stat_sort(&self) -> bool { - return self.matches().sort_by().map_or( - false, - |sort_by| match sort_by.kind { - SortByKind::LastModified - | SortByKind::Created - | SortByKind::LastAccessed => sort_by.check().is_ok(), - _ => false, - }, - ); - } - - /// Sort subjects if a sorter is specified, but only if the sort requires - /// stat calls. Non-stat related sorts are handled during file traversal - /// - /// This function assumes that it is known that a stat-related sort is - /// required, and does not check for it again. - /// - /// It is important that that precondition is fulfilled, since this function - /// consumes the subjects iterator, and is therefore a blocking function. - pub fn sort_by_stat(&self, subjects: I) -> Vec - where - I: Iterator, - { - let sorter = match self.matches().sort_by() { - Ok(v) => v, - Err(_) => return subjects.collect(), - }; - use SortByKind::*; - let mut keyed = match sorter.kind { - LastModified => load_timestamps(subjects, |m| m.modified()), - LastAccessed => load_timestamps(subjects, |m| m.accessed()), - Created => load_timestamps(subjects, |m| m.created()), - _ => return subjects.collect(), - }; - keyed.sort_by(|a, b| sort_by_option(&a.0, &b.0, sorter.reverse)); - keyed.into_iter().map(|v| v.1).collect() - } - - /// Return a parallel walker that may use additional threads. - pub fn walker_parallel(&self) -> anyhow::Result { - Ok(self - .matches() - .walker_builder(self.paths(), self.0.threads)? - .build_parallel()) - } -} - -/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to -/// the parsed arguments. -#[derive(Clone, Debug)] -struct ArgMatches(clap::ArgMatches<'static>); - -/// The output format. Generally, this corresponds to the printer that ripgrep -/// uses to show search results. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum OutputKind { - /// Classic grep-like or ack-like format. - Standard, - /// Show matching files and possibly the number of matches in each file. - Summary, - /// Emit match information in the JSON Lines format. - JSON, -} - -/// The sort criteria, if present. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -struct SortBy { - /// Whether to reverse the sort criteria (i.e., descending order). - reverse: bool, - /// The actual sorting criteria. - kind: SortByKind, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum SortByKind { - /// No sorting at all. - None, - /// Sort by path. - Path, - /// Sort by last modified time. - LastModified, - /// Sort by last accessed time. - LastAccessed, - /// Sort by creation time. - Created, -} - -impl SortBy { - fn asc(kind: SortByKind) -> SortBy { - SortBy { reverse: false, kind } - } - - fn desc(kind: SortByKind) -> SortBy { - SortBy { reverse: true, kind } - } - - fn none() -> SortBy { - SortBy::asc(SortByKind::None) - } - - /// Try to check that the sorting criteria selected is actually supported. - /// If it isn't, then an error is returned. - fn check(&self) -> anyhow::Result<()> { - match self.kind { - SortByKind::None | SortByKind::Path => {} - SortByKind::LastModified => { - env::current_exe()?.metadata()?.modified()?; - } - SortByKind::LastAccessed => { - env::current_exe()?.metadata()?.accessed()?; - } - SortByKind::Created => { - env::current_exe()?.metadata()?.created()?; - } - } - Ok(()) - } - - /// Load sorters only if they are applicable at the walk stage. - /// - /// In particular, sorts that involve `stat` calls are not loaded because - /// the walk inherently assumes that parent directories are aware of all its - /// decendent properties, but `stat` does not work that way. - fn configure_builder_sort(self, builder: &mut WalkBuilder) { - use SortByKind::*; - match self.kind { - Path if self.reverse => { - builder.sort_by_file_name(|a, b| a.cmp(b).reverse()); - } - Path => { - builder.sort_by_file_name(|a, b| a.cmp(b)); - } - // these use `stat` calls and will be sorted in Args::sort_by_stat() - LastModified | LastAccessed | Created | None => {} - }; - } -} - -impl SortByKind { - fn new(kind: &str) -> SortByKind { - match kind { - "none" => SortByKind::None, - "path" => SortByKind::Path, - "modified" => SortByKind::LastModified, - "accessed" => SortByKind::LastAccessed, - "created" => SortByKind::Created, - _ => SortByKind::None, - } - } -} - -/// Encoding mode the searcher will use. -#[derive(Clone, Debug)] -enum EncodingMode { - /// Use an explicit encoding forcefully, but let BOM sniffing override it. - Some(Encoding), - /// Use only BOM sniffing to auto-detect an encoding. - Auto, - /// Use no explicit encoding and disable all BOM sniffing. This will - /// always result in searching the raw bytes, regardless of their - /// true encoding. - Disabled, -} - -impl ArgMatches { - /// Create an ArgMatches from clap's parse result. - fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches { - ArgMatches(clap_matches) - } - - /// Run clap and return the matches using a config file if present. If clap - /// determines a problem with the user provided arguments (or if --help or - /// --version are given), then an error/usage/version will be printed and - /// the process will exit. - /// - /// If there are no additional arguments from the environment (e.g., a - /// config file), then the given matches are returned as is. - fn reconfigure(self) -> anyhow::Result { - // If the end user says no config, then respect it. - if self.is_present("no-config") { - log::debug!( - "not reading config files because --no-config is present" - ); - return Ok(self); - } - // If the user wants ripgrep to use a config file, then parse args - // from that first. - let mut args = config::args(); - if args.is_empty() { - return Ok(self); - } - let mut cliargs = env::args_os(); - if let Some(bin) = cliargs.next() { - args.insert(0, bin); - } - args.extend(cliargs); - log::debug!("final argv: {:?}", args); - Ok(ArgMatches(clap_matches(args)?)) - } - - /// Convert the result of parsing CLI arguments into ripgrep's higher level - /// configuration structure. - fn to_args(self) -> anyhow::Result { - // We compute these once since they could be large. - let patterns = self.patterns()?; - let matcher = self.matcher(&patterns)?; - let mut paths = self.paths(); - let using_default_path = if paths.is_empty() { - paths.push(self.path_default()); - true - } else { - false - }; - // Now figure out the number of threads we'll use and which - // command will run. - let is_one_search = self.is_one_search(&paths); - let threads = if is_one_search { 1 } else { self.threads()? }; - if threads == 1 { - log::debug!("running in single threaded mode"); - } else { - log::debug!("running with {threads} threads for parallelism"); - } - let command = if self.is_present("pcre2-version") { - Command::PCRE2Version - } else if self.is_present("type-list") { - Command::Types - } else if self.is_present("files") { - if threads == 1 { - Command::Files - } else { - Command::FilesParallel - } - } else if self.can_never_match(&patterns) { - Command::SearchNever - } else if threads == 1 { - Command::Search - } else { - Command::SearchParallel - }; - Ok(Args(Arc::new(ArgsImp { - matches: self, - command, - threads, - matcher, - paths, - using_default_path, - }))) - } -} - -/// High level routines for converting command line arguments into various -/// data structures used by ripgrep. -/// -/// Methods are sorted alphabetically. -impl ArgMatches { - /// Return the matcher that should be used for searching. - /// - /// If there was a problem building the matcher (e.g., a syntax error), - /// then this returns an error. - fn matcher(&self, patterns: &[String]) -> anyhow::Result { - if self.is_present("pcre2") { - self.matcher_engine("pcre2", patterns) - } else if self.is_present("auto-hybrid-regex") { - self.matcher_engine("auto", patterns) - } else { - let engine = self.value_of_lossy("engine").unwrap(); - self.matcher_engine(&engine, patterns) - } - } - - /// Return the matcher that should be used for searching using engine - /// as the engine for the patterns. - /// - /// If there was a problem building the matcher (e.g., a syntax error), - /// then this returns an error. - fn matcher_engine( - &self, - engine: &str, - patterns: &[String], - ) -> anyhow::Result { - match engine { - "default" => { - let matcher = match self.matcher_rust(patterns) { - Ok(matcher) => matcher, - Err(err) => { - anyhow::bail!(suggest(err.to_string())); - } - }; - Ok(PatternMatcher::RustRegex(matcher)) - } - #[cfg(feature = "pcre2")] - "pcre2" => { - let matcher = self.matcher_pcre2(patterns)?; - Ok(PatternMatcher::PCRE2(matcher)) - } - #[cfg(not(feature = "pcre2"))] - "pcre2" => anyhow::bail!( - "PCRE2 is not available in this build of ripgrep", - ), - "auto" => { - let rust_err = match self.matcher_rust(patterns) { - Ok(matcher) => { - return Ok(PatternMatcher::RustRegex(matcher)); - } - Err(err) => err, - }; - log::debug!( - "error building Rust regex in hybrid mode:\n{}", - rust_err, - ); - - let pcre_err = match self.matcher_engine("pcre2", patterns) { - Ok(matcher) => return Ok(matcher), - Err(err) => err, - }; - let divider = "~".repeat(79); - anyhow::bail!( - "regex could not be compiled with either the default \ - regex engine or with PCRE2.\n\n\ - default regex engine error:\n\ - {divider}\n\ - {rust_err}\n\ - {divider}\n\n\ - PCRE2 regex engine error:\n{pcre_err}", - ); - } - _ => anyhow::bail!("unrecognized regex engine '{engine}'"), - } - } - - /// Build a matcher using Rust's regex engine. - /// - /// If there was a problem building the matcher (such as a regex syntax - /// error), then an error is returned. - fn matcher_rust( - &self, - patterns: &[String], - ) -> anyhow::Result { - let mut builder = RustRegexMatcherBuilder::new(); - builder - .case_smart(self.case_smart()) - .case_insensitive(self.case_insensitive()) - .multi_line(true) - .unicode(self.unicode()) - .octal(false) - .fixed_strings(self.is_present("fixed-strings")) - .whole_line(self.is_present("line-regexp")) - .word(self.is_present("word-regexp")); - if self.is_present("multiline") { - builder.dot_matches_new_line(self.is_present("multiline-dotall")); - if self.is_present("crlf") { - builder.crlf(true).line_terminator(None); - } - } else { - builder.line_terminator(Some(b'\n')).dot_matches_new_line(false); - if self.is_present("crlf") { - builder.crlf(true); - } - // We don't need to set this in multiline mode since mulitline - // matchers don't use optimizations related to line terminators. - // Moreover, a mulitline regex used with --null-data should - // be allowed to match NUL bytes explicitly, which this would - // otherwise forbid. - if self.is_present("null-data") { - builder.line_terminator(Some(b'\x00')); - } - } - if let Some(limit) = self.regex_size_limit()? { - builder.size_limit(limit); - } - if let Some(limit) = self.dfa_size_limit()? { - builder.dfa_size_limit(limit); - } - match builder.build_many(patterns) { - Ok(m) => Ok(m), - Err(err) => anyhow::bail!(suggest_multiline(err.to_string())), - } - } - - /// Build a matcher using PCRE2. - /// - /// If there was a problem building the matcher (such as a regex syntax - /// error), then an error is returned. - #[cfg(feature = "pcre2")] - fn matcher_pcre2( - &self, - patterns: &[String], - ) -> anyhow::Result { - let mut builder = PCRE2RegexMatcherBuilder::new(); - builder - .case_smart(self.case_smart()) - .caseless(self.case_insensitive()) - .multi_line(true) - .fixed_strings(self.is_present("fixed-strings")) - .whole_line(self.is_present("line-regexp")) - .word(self.is_present("word-regexp")); - // For whatever reason, the JIT craps out during regex compilation with - // a "no more memory" error on 32 bit systems. So don't use it there. - if cfg!(target_pointer_width = "64") { - builder - .jit_if_available(true) - // The PCRE2 docs say that 32KB is the default, and that 1MB - // should be big enough for anything. But let's crank it to - // 10MB. - .max_jit_stack_size(Some(10 * (1 << 20))); - } - if self.unicode() { - builder.utf(true).ucp(true); - } - if self.is_present("multiline") { - builder.dotall(self.is_present("multiline-dotall")); - } - if self.is_present("crlf") { - builder.crlf(true); - } - Ok(builder.build_many(patterns)?) - } - - /// Build a JSON printer that writes results to the given writer. - fn printer_json(&self, wtr: W) -> anyhow::Result> { - let mut builder = JSONBuilder::new(); - builder - .pretty(false) - .max_matches(self.max_count()?) - .always_begin_end(false); - Ok(builder.build(wtr)) - } - - /// Build a Standard printer that writes results to the given writer. - /// - /// The given paths are used to configure aspects of the printer. - /// - /// If `separator_search` is true, then the returned printer will assume - /// the responsibility of printing a separator between each set of - /// search results, when appropriate (e.g., when contexts are enabled). - /// When it's set to false, the caller is responsible for handling - /// separators. - /// - /// In practice, we want the printer to handle it in the single threaded - /// case but not in the multi-threaded case. - fn printer_standard( - &self, - paths: &[PathBuf], - wtr: W, - separator_search: bool, - ) -> anyhow::Result> { - let mut builder = StandardBuilder::new(); - builder - .color_specs(self.color_specs()?) - .hyperlink(self.hyperlink_config()?) - .stats(self.stats()) - .heading(self.heading()) - .path(self.with_filename(paths)) - .only_matching(self.is_present("only-matching")) - .per_match(self.is_present("vimgrep")) - .per_match_one_line(true) - .replacement(self.replacement()) - .max_columns(self.max_columns()?) - .max_columns_preview(self.max_columns_preview()) - .max_matches(self.max_count()?) - .column(self.column()) - .byte_offset(self.is_present("byte-offset")) - .trim_ascii(self.is_present("trim")) - .separator_search(None) - .separator_context(self.context_separator()) - .separator_field_match(self.field_match_separator()) - .separator_field_context(self.field_context_separator()) - .separator_path(self.path_separator()?) - .path_terminator(self.path_terminator()); - if separator_search { - builder.separator_search(self.file_separator()?); - } - Ok(builder.build(wtr)) - } - - /// Build a Summary printer that writes results to the given writer. - /// - /// The given paths are used to configure aspects of the printer. - /// - /// This panics if the output format is not `OutputKind::Summary`. - fn printer_summary( - &self, - paths: &[PathBuf], - wtr: W, - ) -> anyhow::Result> { - let mut builder = SummaryBuilder::new(); - builder - .kind(self.summary_kind().expect("summary format")) - .color_specs(self.color_specs()?) - .hyperlink(self.hyperlink_config()?) - .stats(self.stats()) - .path(self.with_filename(paths)) - .max_matches(self.max_count()?) - .exclude_zero(!self.is_present("include-zero")) - .separator_field(b":".to_vec()) - .separator_path(self.path_separator()?) - .path_terminator(self.path_terminator()); - Ok(builder.build(wtr)) - } - - /// Build a searcher from the command line parameters. - fn searcher(&self, paths: &[PathBuf]) -> anyhow::Result { - let (ctx_before, ctx_after) = self.contexts()?; - let line_term = if self.is_present("crlf") { - LineTerminator::crlf() - } else if self.is_present("null-data") { - LineTerminator::byte(b'\x00') - } else { - LineTerminator::byte(b'\n') - }; - let mut builder = SearcherBuilder::new(); - builder - .line_terminator(line_term) - .invert_match(self.is_present("invert-match")) - .line_number(self.line_number(paths)) - .multi_line(self.is_present("multiline")) - .before_context(ctx_before) - .after_context(ctx_after) - .passthru(self.is_present("passthru")) - .memory_map(self.mmap_choice(paths)) - .stop_on_nonmatch(self.is_present("stop-on-nonmatch")); - match self.encoding()? { - EncodingMode::Some(enc) => { - builder.encoding(Some(enc)); - } - EncodingMode::Auto => {} // default for the searcher - EncodingMode::Disabled => { - builder.bom_sniffing(false); - } - } - Ok(builder.build()) - } - - /// Return a builder for recursively traversing a directory while - /// respecting ignore rules. - /// - /// If there was a problem parsing the CLI arguments necessary for - /// constructing the builder, then this returns an error. - fn walker_builder( - &self, - paths: &[PathBuf], - threads: usize, - ) -> anyhow::Result { - let mut builder = WalkBuilder::new(&paths[0]); - for path in &paths[1..] { - builder.add(path); - } - if !self.no_ignore_files() { - for path in self.ignore_paths() { - if let Some(err) = builder.add_ignore(path) { - ignore_message!("{}", err); - } - } - } - builder - .max_depth(self.usize_of("max-depth")?) - .follow_links(self.is_present("follow")) - .max_filesize(self.max_file_size()?) - .threads(threads) - .same_file_system(self.is_present("one-file-system")) - .skip_stdout(!self.is_present("files")) - .overrides(self.overrides()?) - .types(self.types()?) - .hidden(!self.hidden()) - .parents(!self.no_ignore_parent()) - .ignore(!self.no_ignore_dot()) - .git_global(!self.no_ignore_vcs() && !self.no_ignore_global()) - .git_ignore(!self.no_ignore_vcs()) - .git_exclude(!self.no_ignore_vcs() && !self.no_ignore_exclude()) - .require_git(!self.is_present("no-require-git")) - .ignore_case_insensitive(self.ignore_file_case_insensitive()); - if !self.no_ignore() && !self.no_ignore_dot() { - builder.add_custom_ignore_filename(".rgignore"); - } - self.sort_by()?.configure_builder_sort(&mut builder); - Ok(builder) - } -} - -/// Mid level routines for converting command line arguments into various types -/// of data structures. -/// -/// Methods are sorted alphabetically. -impl ArgMatches { - /// Returns the form of binary detection to perform on files that are - /// implicitly searched via recursive directory traversal. - fn binary_detection_implicit(&self) -> BinaryDetection { - let none = self.is_present("text") || self.is_present("null-data"); - let convert = - self.is_present("binary") || self.unrestricted_count() >= 3; - if none { - BinaryDetection::none() - } else if convert { - BinaryDetection::convert(b'\x00') - } else { - BinaryDetection::quit(b'\x00') - } - } - - /// Returns the form of binary detection to perform on files that are - /// explicitly searched via the user invoking ripgrep on a particular - /// file or files or stdin. - /// - /// In general, this should never be BinaryDetection::quit, since that acts - /// as a filter (but quitting immediately once a NUL byte is seen), and we - /// should never filter out files that the user wants to explicitly search. - fn binary_detection_explicit(&self) -> BinaryDetection { - let none = self.is_present("text") || self.is_present("null-data"); - if none { - BinaryDetection::none() - } else { - BinaryDetection::convert(b'\x00') - } - } - - /// Returns true if the command line configuration implies that a match - /// can never be shown. - fn can_never_match(&self, patterns: &[String]) -> bool { - patterns.is_empty() || self.max_count().ok() == Some(Some(0)) - } - - /// Returns true if and only if case should be ignore. - /// - /// If --case-sensitive is present, then case is never ignored, even if - /// --ignore-case is present. - fn case_insensitive(&self) -> bool { - self.is_present("ignore-case") && !self.is_present("case-sensitive") - } - - /// Returns true if and only if smart case has been enabled. - /// - /// If either --ignore-case of --case-sensitive are present, then smart - /// case is disabled. - fn case_smart(&self) -> bool { - self.is_present("smart-case") - && !self.is_present("ignore-case") - && !self.is_present("case-sensitive") - } - - /// Returns the user's color choice based on command line parameters and - /// environment. - fn color_choice(&self) -> ColorChoice { - let preference = match self.value_of_lossy("color") { - None => "auto".to_string(), - Some(v) => v, - }; - if preference == "always" { - ColorChoice::Always - } else if preference == "ansi" { - ColorChoice::AlwaysAnsi - } else if preference == "auto" { - if std::io::stdout().is_terminal() || self.is_present("pretty") { - ColorChoice::Auto - } else { - ColorChoice::Never - } - } else { - ColorChoice::Never - } - } - - /// Returns the color specifications given by the user on the CLI. - /// - /// If the was a problem parsing any of the provided specs, then an error - /// is returned. - fn color_specs(&self) -> anyhow::Result { - // Start with a default set of color specs. - let mut specs = default_color_specs(); - for spec_str in self.values_of_lossy_vec("colors") { - specs.push(spec_str.parse()?); - } - Ok(ColorSpecs::new(&specs)) - } - - /// Returns true if and only if column numbers should be shown. - fn column(&self) -> bool { - if self.is_present("no-column") { - return false; - } - self.is_present("column") || self.is_present("vimgrep") - } - - /// Returns the before and after contexts from the command line. - /// - /// If a context setting was absent, then `0` is returned. - /// - /// If there was a problem parsing the values from the user as an integer, - /// then an error is returned. - fn contexts(&self) -> anyhow::Result<(usize, usize)> { - let both = self.usize_of("context")?.unwrap_or(0); - let after = self.usize_of("after-context")?.unwrap_or(both); - let before = self.usize_of("before-context")?.unwrap_or(both); - Ok((before, after)) - } - - /// Returns the unescaped context separator in UTF-8 bytes. - /// - /// If one was not provided, the default `--` is returned. - /// If --no-context-separator is passed, None is returned. - fn context_separator(&self) -> Option> { - let nosep = self.is_present("no-context-separator"); - let sep = self.value_of_os("context-separator"); - match (nosep, sep) { - (true, _) => None, - (false, None) => Some(b"--".to_vec()), - (false, Some(sep)) => Some(cli::unescape_os(&sep)), - } - } - - /// Returns whether the -c/--count or the --count-matches flags were - /// passed from the command line. - /// - /// If --count-matches and --invert-match were passed in, behave - /// as if --count and --invert-match were passed in (i.e. rg will - /// count inverted matches as per existing behavior). - fn counts(&self) -> (bool, bool) { - let count = self.is_present("count"); - let count_matches = self.is_present("count-matches"); - let invert_matches = self.is_present("invert-match"); - let only_matching = self.is_present("only-matching"); - if count_matches && invert_matches { - // Treat `-v --count-matches` as `-v -c`. - (true, false) - } else if count && only_matching { - // Treat `-c --only-matching` as `--count-matches`. - (false, true) - } else { - (count, count_matches) - } - } - - /// Parse the dfa-size-limit argument option into a byte count. - fn dfa_size_limit(&self) -> anyhow::Result> { - let r = self.parse_human_readable_size("dfa-size-limit")?; - u64_to_usize("dfa-size-limit", r) - } - - /// Returns the encoding mode to use. - /// - /// This only returns an encoding if one is explicitly specified. Otherwise - /// if set to automatic, the Searcher will do BOM sniffing for UTF-16 - /// and transcode seamlessly. If disabled, no BOM sniffing nor transcoding - /// will occur. - fn encoding(&self) -> anyhow::Result { - if self.is_present("no-encoding") { - return Ok(EncodingMode::Auto); - } - - let label = match self.value_of_lossy("encoding") { - None => return Ok(EncodingMode::Auto), - Some(label) => label, - }; - - if label == "auto" { - return Ok(EncodingMode::Auto); - } else if label == "none" { - return Ok(EncodingMode::Disabled); - } - - Ok(EncodingMode::Some(Encoding::new(&label)?)) - } - - /// Return the file separator to use based on the CLI configuration. - fn file_separator(&self) -> anyhow::Result>> { - // File separators are only used for the standard grep-line format. - if self.output_kind() != OutputKind::Standard { - return Ok(None); - } - - let (ctx_before, ctx_after) = self.contexts()?; - Ok(if self.heading() { - Some(b"".to_vec()) - } else if ctx_before > 0 || ctx_after > 0 { - self.context_separator() - } else { - None - }) - } - - /// Returns true if and only if matches should be grouped with file name - /// headings. - fn heading(&self) -> bool { - if self.is_present("no-heading") || self.is_present("vimgrep") { - false - } else { - std::io::stdout().is_terminal() - || self.is_present("heading") - || self.is_present("pretty") - } - } - - /// Returns true if and only if hidden files/directories should be - /// searched. - fn hidden(&self) -> bool { - self.is_present("hidden") || self.unrestricted_count() >= 2 - } - - /// Returns the hyperlink pattern to use. A default pattern suitable - /// for the current system is used if the value is not set. - /// - /// If an invalid pattern is provided, then an error is returned. - fn hyperlink_config(&self) -> anyhow::Result { - let mut env = HyperlinkEnvironment::new(); - env.host(hostname(self.value_of_os("hostname-bin"))) - .wsl_prefix(wsl_prefix()); - let fmt: HyperlinkFormat = - match self.value_of_lossy("hyperlink-format") { - None => "none".parse().unwrap(), - Some(format) => match format.parse() { - Ok(format) => format, - Err(err) => { - anyhow::bail!("invalid hyperlink format: {err}"); - } - }, - }; - log::debug!("hyperlink format: {:?}", fmt.to_string()); - Ok(HyperlinkConfig::new(env, fmt)) - } - - /// Returns true if ignore files should be processed case insensitively. - fn ignore_file_case_insensitive(&self) -> bool { - self.is_present("ignore-file-case-insensitive") - } - - /// Return all of the ignore file paths given on the command line. - fn ignore_paths(&self) -> Vec { - let paths = match self.values_of_os("ignore-file") { - None => return vec![], - Some(paths) => paths, - }; - paths.map(|p| Path::new(p).to_path_buf()).collect() - } - - /// Returns true if and only if ripgrep is invoked in a way where it knows - /// it search exactly one thing. - fn is_one_search(&self, paths: &[PathBuf]) -> bool { - if paths.len() != 1 { - return false; - } - self.is_only_stdin(paths) || paths[0].is_file() - } - - /// Returns true if and only if we're only searching a single thing and - /// that thing is stdin. - fn is_only_stdin(&self, paths: &[PathBuf]) -> bool { - paths == [Path::new("-")] - } - - /// Returns true if and only if we should show line numbers. - fn line_number(&self, paths: &[PathBuf]) -> bool { - if self.output_kind() == OutputKind::Summary { - return false; - } - if self.is_present("no-line-number") { - return false; - } - if self.output_kind() == OutputKind::JSON { - return true; - } - - // A few things can imply counting line numbers. In particular, we - // generally want to show line numbers by default when printing to a - // tty for human consumption, except for one interesting case: when - // we're only searching stdin. This makes pipelines work as expected. - (std::io::stdout().is_terminal() && !self.is_only_stdin(paths)) - || self.is_present("line-number") - || self.is_present("column") - || self.is_present("pretty") - || self.is_present("vimgrep") - } - - /// The maximum number of columns allowed on each line. - /// - /// If `0` is provided, then this returns `None`. - fn max_columns(&self) -> anyhow::Result> { - Ok(self.usize_of_nonzero("max-columns")?.map(|n| n as u64)) - } - - /// Returns true if and only if a preview should be shown for lines that - /// exceed the maximum column limit. - fn max_columns_preview(&self) -> bool { - self.is_present("max-columns-preview") - } - - /// The maximum number of matches permitted. - fn max_count(&self) -> anyhow::Result> { - Ok(self.usize_of("max-count")?.map(|n| n as u64)) - } - - /// Parses the max-filesize argument option into a byte count. - fn max_file_size(&self) -> anyhow::Result> { - self.parse_human_readable_size("max-filesize") - } - - /// Returns whether we should attempt to use memory maps or not. - fn mmap_choice(&self, paths: &[PathBuf]) -> MmapChoice { - // SAFETY: Memory maps are difficult to impossible to encapsulate - // safely in a portable way that doesn't simultaneously negate some of - // the benfits of using memory maps. For ripgrep's use, we never mutate - // a memory map and generally never store the contents of memory map - // in a data structure that depends on immutability. Generally - // speaking, the worst thing that can happen is a SIGBUS (if the - // underlying file is truncated while reading it), which will cause - // ripgrep to abort. This reasoning should be treated as suspect. - let maybe = unsafe { MmapChoice::auto() }; - let never = MmapChoice::never(); - if self.is_present("no-mmap") { - never - } else if self.is_present("mmap") { - maybe - } else if paths.len() <= 10 && paths.iter().all(|p| p.is_file()) { - // If we're only searching a few paths and all of them are - // files, then memory maps are probably faster. - maybe - } else { - never - } - } - - /// Returns true if ignore files should be ignored. - fn no_ignore(&self) -> bool { - self.is_present("no-ignore") || self.unrestricted_count() >= 1 - } - - /// Returns true if .ignore files should be ignored. - fn no_ignore_dot(&self) -> bool { - self.is_present("no-ignore-dot") || self.no_ignore() - } - - /// Returns true if local exclude (ignore) files should be ignored. - fn no_ignore_exclude(&self) -> bool { - self.is_present("no-ignore-exclude") || self.no_ignore() - } - - /// Returns true if explicitly given ignore files should be ignored. - fn no_ignore_files(&self) -> bool { - // We don't look at no-ignore here because --no-ignore is explicitly - // documented to not override --ignore-file. We could change this, but - // it would be a fairly severe breaking change. - self.is_present("no-ignore-files") - } - - /// Returns true if global ignore files should be ignored. - fn no_ignore_global(&self) -> bool { - self.is_present("no-ignore-global") || self.no_ignore() - } - - /// Returns true if parent ignore files should be ignored. - fn no_ignore_parent(&self) -> bool { - self.is_present("no-ignore-parent") || self.no_ignore() - } - - /// Returns true if VCS ignore files should be ignored. - fn no_ignore_vcs(&self) -> bool { - self.is_present("no-ignore-vcs") || self.no_ignore() - } - - /// Determine the type of output we should produce. - fn output_kind(&self) -> OutputKind { - if self.is_present("quiet") { - // While we don't technically print results (or aggregate results) - // in quiet mode, we still support the --stats flag, and those - // stats are computed by the Summary printer for now. - return OutputKind::Summary; - } else if self.is_present("json") { - return OutputKind::JSON; - } - - let (count, count_matches) = self.counts(); - let summary = count - || count_matches - || self.is_present("files-with-matches") - || self.is_present("files-without-match"); - if summary { - OutputKind::Summary - } else { - OutputKind::Standard - } - } - - /// Builds the set of glob overrides from the command line flags. - fn overrides(&self) -> anyhow::Result { - let globs = self.values_of_lossy_vec("glob"); - let iglobs = self.values_of_lossy_vec("iglob"); - if globs.is_empty() && iglobs.is_empty() { - return Ok(Override::empty()); - } - - let mut builder = OverrideBuilder::new(current_dir()?); - // Make all globs case insensitive with --glob-case-insensitive. - if self.is_present("glob-case-insensitive") { - builder.case_insensitive(true).unwrap(); - } - for glob in globs { - builder.add(&glob)?; - } - // This only enables case insensitivity for subsequent globs. - builder.case_insensitive(true).unwrap(); - for glob in iglobs { - builder.add(&glob)?; - } - Ok(builder.build()?) - } - - /// Return all file paths that ripgrep should search. - /// - /// If no paths were given, then this returns an empty list. - fn paths(&self) -> Vec { - let mut paths: Vec = match self.values_of_os("path") { - None => vec![], - Some(paths) => paths.map(|p| Path::new(p).to_path_buf()).collect(), - }; - // If --file, --files or --regexp is given, then the first path is - // always in `pattern`. - if self.is_present("file") - || self.is_present("files") - || self.is_present("regexp") - { - if let Some(path) = self.value_of_os("pattern") { - paths.insert(0, Path::new(path).to_path_buf()); - } - } - paths - } - - /// Return the default path that ripgrep should search. This should only - /// be used when ripgrep is not otherwise given at least one file path - /// as a positional argument. - fn path_default(&self) -> PathBuf { - let file_is_stdin = self - .values_of_os("file") - .map_or(false, |mut files| files.any(|f| f == "-")); - let search_cwd = !cli::is_readable_stdin() - || (self.is_present("file") && file_is_stdin) - || self.is_present("files") - || self.is_present("type-list") - || self.is_present("pcre2-version"); - if search_cwd { - Path::new("./").to_path_buf() - } else { - Path::new("-").to_path_buf() - } - } - - /// Returns the unescaped path separator as a single byte, if one exists. - /// - /// If the provided path separator is more than a single byte, then an - /// error is returned. - fn path_separator(&self) -> anyhow::Result> { - let sep = match self.value_of_os("path-separator") { - None => return Ok(None), - Some(sep) => cli::unescape_os(&sep), - }; - if sep.is_empty() { - Ok(None) - } else if sep.len() > 1 { - anyhow::bail!( - "A path separator must be exactly one byte, but \ - the given separator is {} bytes: {}\n\ - In some shells on Windows '/' is automatically \ - expanded. Use '//' instead.", - sep.len(), - cli::escape(&sep), - ) - } else { - Ok(Some(sep[0])) - } - } - - /// Returns the byte that should be used to terminate paths. - /// - /// Typically, this is only set to `\x00` when the --null flag is provided, - /// and `None` otherwise. - fn path_terminator(&self) -> Option { - if self.is_present("null") { - Some(b'\x00') - } else { - None - } - } - - /// Returns the unescaped field context separator. If one wasn't specified, - /// then '-' is used as the default. - fn field_context_separator(&self) -> Vec { - match self.value_of_os("field-context-separator") { - None => b"-".to_vec(), - Some(sep) => cli::unescape_os(&sep), - } - } - - /// Returns the unescaped field match separator. If one wasn't specified, - /// then ':' is used as the default. - fn field_match_separator(&self) -> Vec { - match self.value_of_os("field-match-separator") { - None => b":".to_vec(), - Some(sep) => cli::unescape_os(&sep), - } - } - - /// Get a sequence of all available patterns from the command line. - /// This includes reading the -e/--regexp and -f/--file flags. - /// - /// If any pattern is invalid UTF-8, then an error is returned. - fn patterns(&self) -> anyhow::Result> { - if self.is_present("files") || self.is_present("type-list") { - return Ok(vec![]); - } - let mut seen = HashSet::new(); - let mut pats = vec![]; - let mut add = |pat: String| { - if !seen.contains(&pat) { - seen.insert(pat.clone()); - pats.push(pat); - } - }; - match self.values_of_os("regexp") { - None => { - if self.values_of_os("file").is_none() { - if let Some(os_pat) = self.value_of_os("pattern") { - add(self.pattern_from_os_str(os_pat)?); - } - } - } - Some(os_pats) => { - for os_pat in os_pats { - add(self.pattern_from_os_str(os_pat)?); - } - } - } - if let Some(paths) = self.values_of_os("file") { - for path in paths { - if path == "-" { - let it = cli::patterns_from_stdin()? - .into_iter() - .map(|p| self.pattern_from_string(p)); - for pat in it { - add(pat); - } - } else { - let it = cli::patterns_from_path(path)? - .into_iter() - .map(|p| self.pattern_from_string(p)); - for pat in it { - add(pat); - } - } - } - } - Ok(pats) - } - - /// Converts an OsStr pattern to a String pattern. The pattern is escaped - /// if -F/--fixed-strings is set. - /// - /// If the pattern is not valid UTF-8, then an error is returned. - fn pattern_from_os_str(&self, pat: &OsStr) -> anyhow::Result { - let s = cli::pattern_from_os(pat)?; - Ok(self.pattern_from_str(s)) - } - - /// Converts a &str pattern to a String pattern. The pattern is escaped - /// if -F/--fixed-strings is set. - fn pattern_from_str(&self, pat: &str) -> String { - self.pattern_from_string(pat.to_string()) - } - - /// Applies additional processing on the given pattern if necessary - /// (such as escaping meta characters or turning it into a line regex). - fn pattern_from_string(&self, pat: String) -> String { - if pat.is_empty() { - // This would normally just be an empty string, which works on its - // own, but if the patterns are joined in a set of alternations, - // then you wind up with `foo|`, which is currently invalid in - // Rust's regex engine. - "(?:)".to_string() - } else { - pat - } - } - - /// Returns the preprocessor command if one was specified. - fn preprocessor(&self) -> Option { - let path = match self.value_of_os("pre") { - None => return None, - Some(path) => path, - }; - if path.is_empty() { - return None; - } - Some(Path::new(path).to_path_buf()) - } - - /// Builds the set of globs for filtering files to apply to the --pre - /// flag. If no --pre-globs are available, then this always returns an - /// empty set of globs. - fn preprocessor_globs(&self) -> anyhow::Result { - let globs = self.values_of_lossy_vec("pre-glob"); - if globs.is_empty() { - return Ok(Override::empty()); - } - let mut builder = OverrideBuilder::new(current_dir()?); - for glob in globs { - builder.add(&glob)?; - } - Ok(builder.build()?) - } - - /// Parse the regex-size-limit argument option into a byte count. - fn regex_size_limit(&self) -> anyhow::Result> { - let r = self.parse_human_readable_size("regex-size-limit")?; - u64_to_usize("regex-size-limit", r) - } - - /// Returns the replacement string as UTF-8 bytes if it exists. - fn replacement(&self) -> Option> { - self.value_of_lossy("replace").map(|s| s.into_bytes()) - } - - /// Returns the sorting criteria based on command line parameters. - fn sort_by(&self) -> anyhow::Result { - // For backcompat, continue supporting deprecated --sort-files flag. - if self.is_present("sort-files") { - return Ok(SortBy::asc(SortByKind::Path)); - } - let sortby = match self.value_of_lossy("sort") { - None => match self.value_of_lossy("sortr") { - None => return Ok(SortBy::none()), - Some(choice) => SortBy::desc(SortByKind::new(&choice)), - }, - Some(choice) => SortBy::asc(SortByKind::new(&choice)), - }; - Ok(sortby) - } - - /// Returns true if and only if aggregate statistics for a search should - /// be tracked. - /// - /// Generally, this is only enabled when explicitly requested by in the - /// command line arguments via the --stats flag, but this can also be - /// enabled implicitly via the output format, e.g., for JSON Lines. - fn stats(&self) -> bool { - self.output_kind() == OutputKind::JSON || self.is_present("stats") - } - - /// When the output format is `Summary`, this returns the type of summary - /// output to show. - /// - /// This returns `None` if the output format is not `Summary`. - fn summary_kind(&self) -> Option { - let (count, count_matches) = self.counts(); - if self.is_present("quiet") { - Some(SummaryKind::Quiet) - } else if count_matches { - Some(SummaryKind::CountMatches) - } else if count { - Some(SummaryKind::Count) - } else if self.is_present("files-with-matches") { - Some(SummaryKind::PathWithMatch) - } else if self.is_present("files-without-match") { - Some(SummaryKind::PathWithoutMatch) - } else { - None - } - } - - /// Return the number of threads that should be used for parallelism. - fn threads(&self) -> anyhow::Result { - if self.sort_by()?.kind != SortByKind::None { - return Ok(1); - } - let threads = self.usize_of("threads")?.unwrap_or(0); - let available = - std::thread::available_parallelism().map_or(1, |n| n.get()); - Ok(if threads == 0 { std::cmp::min(12, available) } else { threads }) - } - - /// Builds a file type matcher from the command line flags. - fn types(&self) -> anyhow::Result { - let mut builder = TypesBuilder::new(); - builder.add_defaults(); - for ty in self.values_of_lossy_vec("type-clear") { - builder.clear(&ty); - } - for def in self.values_of_lossy_vec("type-add") { - builder.add_def(&def)?; - } - for ty in self.values_of_lossy_vec("type") { - builder.select(&ty); - } - for ty in self.values_of_lossy_vec("type-not") { - builder.negate(&ty); - } - builder.build().map_err(From::from) - } - - /// Returns the number of times the `unrestricted` flag is provided. - fn unrestricted_count(&self) -> u64 { - self.occurrences_of("unrestricted") - } - - /// Returns true if and only if Unicode mode should be enabled. - fn unicode(&self) -> bool { - // Unicode mode is enabled by default, so only disable it when - // --no-unicode is given explicitly. - !(self.is_present("no-unicode") || self.is_present("no-pcre2-unicode")) - } - - /// Returns true if and only if file names containing each match should - /// be emitted. - fn with_filename(&self, paths: &[PathBuf]) -> bool { - if self.is_present("no-filename") { - false - } else { - let path_stdin = Path::new("-"); - self.is_present("with-filename") - || self.is_present("vimgrep") - || paths.len() > 1 - || paths - .get(0) - .map_or(false, |p| p != path_stdin && p.is_dir()) - } - } -} - -/// Lower level generic helper methods for teasing values out of clap. -impl ArgMatches { - /// Like values_of_lossy, but returns an empty vec if the flag is not - /// present. - fn values_of_lossy_vec(&self, name: &str) -> Vec { - self.values_of_lossy(name).unwrap_or_else(Vec::new) - } - - /// Safely reads an arg value with the given name, and if it's present, - /// tries to parse it as a usize value. - /// - /// If the number is zero, then it is considered absent and `None` is - /// returned. - fn usize_of_nonzero(&self, name: &str) -> anyhow::Result> { - let n = match self.usize_of(name)? { - None => return Ok(None), - Some(n) => n, - }; - Ok(if n == 0 { None } else { Some(n) }) - } - - /// Safely reads an arg value with the given name, and if it's present, - /// tries to parse it as a usize value. - fn usize_of(&self, name: &str) -> anyhow::Result> { - match self.value_of_lossy(name) { - None => Ok(None), - Some(v) => v.parse().map(Some).map_err(From::from), - } - } - - /// Parses an argument of the form `[0-9]+(KMG)?`. - /// - /// If the aforementioned format is not recognized, then this returns an - /// error. - fn parse_human_readable_size( - &self, - arg_name: &str, - ) -> anyhow::Result> { - let size = match self.value_of_lossy(arg_name) { - None => return Ok(None), - Some(size) => size, - }; - Ok(Some(cli::parse_human_readable_size(&size)?)) - } -} - -/// The following methods mostly dispatch to the underlying clap methods -/// directly. Methods that would otherwise get a single value will fetch all -/// values and return the last one. (Clap returns the first one.) We only -/// define the ones we need. -impl ArgMatches { - fn is_present(&self, name: &str) -> bool { - self.0.is_present(name) - } - - fn occurrences_of(&self, name: &str) -> u64 { - self.0.occurrences_of(name) - } - - fn value_of_lossy(&self, name: &str) -> Option { - self.0.value_of_lossy(name).map(|s| s.into_owned()) - } - - fn values_of_lossy(&self, name: &str) -> Option> { - self.0.values_of_lossy(name) - } - - fn value_of_os(&self, name: &str) -> Option<&OsStr> { - self.0.value_of_os(name) - } - - fn values_of_os(&self, name: &str) -> Option> { - self.0.values_of_os(name) - } -} - -/// Inspect an error resulting from building a Rust regex matcher, and if it's -/// believed to correspond to a syntax error that another engine could handle, -/// then add a message to suggest the use of the engine flag. -fn suggest(msg: String) -> String { - if let Some(pcre_msg) = suggest_pcre2(&msg) { - return pcre_msg; - } - msg -} - -/// Inspect an error resulting from building a Rust regex matcher, and if it's -/// believed to correspond to a syntax error that PCRE2 could handle, then -/// add a message to suggest the use of -P/--pcre2. -fn suggest_pcre2(msg: &str) -> Option { - #[cfg(feature = "pcre2")] - fn suggest(msg: &str) -> Option { - if !msg.contains("backreferences") && !msg.contains("look-around") { - None - } else { - Some(format!( - "{} - -Consider enabling PCRE2 with the --pcre2 flag, which can handle backreferences -and look-around.", - msg - )) - } - } - - #[cfg(not(feature = "pcre2"))] - fn suggest(_: &str) -> Option { - None - } - - suggest(msg) -} - -fn suggest_multiline(msg: String) -> String { - if msg.contains("the literal") && msg.contains("not allowed") { - format!( - "{msg} - -Consider enabling multiline mode with the --multiline flag (or -U for short). -When multiline mode is enabled, new line characters can be matched.", - ) - } else { - msg - } -} - -/// Convert the result of parsing a human readable file size to a `usize`, -/// failing if the type does not fit. -fn u64_to_usize( - arg_name: &str, - value: Option, -) -> anyhow::Result> { - use std::usize; - - let Some(value) = value else { return Ok(None) }; - usize::try_from(value) - .map_err(|_| anyhow::anyhow!("number too large for {arg_name}")) - .map(Some) -} - -/// Sorts by an optional parameter. -// -/// If parameter is found to be `None`, both entries compare equal. -fn sort_by_option( - p1: &Option, - p2: &Option, - reverse: bool, -) -> std::cmp::Ordering { - match (p1, p2, reverse) { - (Some(p1), Some(p2), true) => p1.cmp(&p2).reverse(), - (Some(p1), Some(p2), false) => p1.cmp(&p2), - _ => std::cmp::Ordering::Equal, - } -} - -/// Returns a clap matches object if the given arguments parse successfully. -/// -/// Otherwise, if an error occurred, then it is returned unless the error -/// corresponds to a `--help` or `--version` request. In which case, the -/// corresponding output is printed and the current process is exited -/// successfully. -fn clap_matches(args: I) -> anyhow::Result> -where - I: IntoIterator, - T: Into + Clone, -{ - let err = match app::app().get_matches_from_safe(args) { - Ok(matches) => return Ok(matches), - Err(err) => err, - }; - if err.use_stderr() { - return Err(err.into()); - } - // Explicitly ignore any error returned by write!. The most likely error - // at this point is a broken pipe error, in which case, we want to ignore - // it and exit quietly. - // - // (This is the point of this helper function. clap's functionality for - // doing this will panic on a broken pipe error.) - let _ = write!(io::stdout(), "{}", err); - std::process::exit(0); -} - -/// Attempts to discover the current working directory. This mostly just defers -/// to the standard library, however, such things will fail if ripgrep is in -/// a directory that no longer exists. We attempt some fallback mechanisms, -/// such as querying the PWD environment variable, but otherwise return an -/// error. -fn current_dir() -> anyhow::Result { - let err = match env::current_dir() { - Err(err) => err, - Ok(cwd) => return Ok(cwd), - }; - if let Some(cwd) = env::var_os("PWD") { - if !cwd.is_empty() { - return Ok(PathBuf::from(cwd)); - } - } - anyhow::bail!( - "failed to get current working directory: {err} \ - --- did your CWD get deleted?", - ) -} - -/// Retrieves the hostname that ripgrep should use wherever a hostname is -/// required. Currently, that's just in the hyperlink format. -/// -/// This works by first running the given binary program (if present and with -/// no arguments) to get the hostname after trimming leading and trailing -/// whitespace. If that fails for any reason, then it falls back to getting -/// the hostname via platform specific means (e.g., `gethostname` on Unix). -/// -/// The purpose of `bin` is to make it possible for end users to override how -/// ripgrep determines the hostname. -fn hostname(bin: Option<&OsStr>) -> Option { - let Some(bin) = bin else { return platform_hostname() }; - let bin = match grep::cli::resolve_binary(bin) { - Ok(bin) => bin, - Err(err) => { - log::debug!( - "failed to run command '{bin:?}' to get hostname \ - (falling back to platform hostname): {err}", - ); - return platform_hostname(); - } - }; - let mut cmd = std::process::Command::new(&bin); - cmd.stdin(std::process::Stdio::null()); - let rdr = match grep::cli::CommandReader::new(&mut cmd) { - Ok(rdr) => rdr, - Err(err) => { - log::debug!( - "failed to spawn command '{bin:?}' to get \ - hostname (falling back to platform hostname): {err}", - ); - return platform_hostname(); - } - }; - let out = match io::read_to_string(rdr) { - Ok(out) => out, - Err(err) => { - log::debug!( - "failed to read output from command '{bin:?}' to get \ - hostname (falling back to platform hostname): {err}", - ); - return platform_hostname(); - } - }; - let hostname = out.trim(); - if hostname.is_empty() { - log::debug!( - "output from command '{bin:?}' is empty after trimming \ - leading and trailing whitespace (falling back to \ - platform hostname)", - ); - return platform_hostname(); - } - Some(hostname.to_string()) -} - -/// Attempts to get the hostname by using platform specific routines. For -/// example, this will do `gethostname` on Unix and `GetComputerNameExW` on -/// Windows. -fn platform_hostname() -> Option { - let hostname_os = match grep::cli::hostname() { - Ok(x) => x, - Err(err) => { - log::debug!("could not get hostname: {}", err); - return None; - } - }; - let Some(hostname) = hostname_os.to_str() else { - log::debug!( - "got hostname {:?}, but it's not valid UTF-8", - hostname_os - ); - return None; - }; - Some(hostname.to_string()) -} - -/// Returns a value that is meant to fill in the `{wslprefix}` variable for -/// a user given hyperlink format. A WSL prefix is a share/network like thing -/// that is meant to permit Windows applications to open files stored within -/// a WSL drive. -/// -/// If a WSL distro name is unavailable, not valid UTF-8 or this isn't running -/// in a Unix environment, then this returns None. -/// -/// See: -fn wsl_prefix() -> Option { - if !cfg!(unix) { - return None; - } - let distro_os = env::var_os("WSL_DISTRO_NAME")?; - let Some(distro) = distro_os.to_str() else { - log::debug!( - "found WSL_DISTRO_NAME={:?}, but value is not UTF-8", - distro_os - ); - return None; - }; - Some(format!("wsl$/{distro}")) -} - -/// Tries to assign a timestamp to every `Subject` in the vector to help with -/// sorting Subjects by time. -fn load_timestamps( - subjects: impl Iterator, - get_time: G, -) -> Vec<(Option, Subject)> -where - G: Fn(&std::fs::Metadata) -> io::Result, -{ - subjects - .map(|s| (s.path().metadata().and_then(|m| get_time(&m)).ok(), s)) - .collect() -} diff --git a/crates/core/flags/complete/bash.rs b/crates/core/flags/complete/bash.rs new file mode 100644 index 0000000000..a390061bd4 --- /dev/null +++ b/crates/core/flags/complete/bash.rs @@ -0,0 +1,107 @@ +/*! +Provides completions for ripgrep's CLI for the bash shell. +*/ + +use crate::flags::defs::FLAGS; + +const TEMPLATE_FULL: &'static str = " +_rg() { + local i cur prev opts cmds + COMPREPLY=() + cur=\"${COMP_WORDS[COMP_CWORD]}\" + prev=\"${COMP_WORDS[COMP_CWORD-1]}\" + cmd=\"\" + opts=\"\" + + for i in ${COMP_WORDS[@]}; do + case \"${i}\" in + rg) + cmd=\"rg\" + ;; + *) + ;; + esac + done + + case \"${cmd}\" in + rg) + opts=\"!OPTS!\" + if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then + COMPREPLY=($(compgen -W \"${opts}\" -- \"${cur}\")) + return 0 + fi + case \"${prev}\" in +!CASES! + esac + COMPREPLY=($(compgen -W \"${opts}\" -- \"${cur}\")) + return 0 + ;; + esac +} + +complete -F _rg -o bashdefault -o default rg +"; + +const TEMPLATE_CASE: &'static str = " + !FLAG!) + COMPREPLY=($(compgen -f \"${cur}\")) + return 0 + ;; +"; + +const TEMPLATE_CASE_CHOICES: &'static str = " + !FLAG!) + COMPREPLY=($(compgen -W \"!CHOICES!\" -- \"${cur}\")) + return 0 + ;; +"; + +/// Generate completions for Bash. +/// +/// Note that these completions are based on what was produced for ripgrep <=13 +/// using Clap 2.x. Improvements on this are welcome. +pub(crate) fn generate() -> String { + let mut opts = String::new(); + for flag in FLAGS.iter() { + opts.push_str("--"); + opts.push_str(flag.name_long()); + opts.push(' '); + if let Some(short) = flag.name_short() { + opts.push('-'); + opts.push(char::from(short)); + opts.push(' '); + } + if let Some(name) = flag.name_negated() { + opts.push_str("--"); + opts.push_str(name); + opts.push(' '); + } + } + opts.push_str(" ..."); + + let mut cases = String::new(); + for flag in FLAGS.iter() { + let template = if !flag.doc_choices().is_empty() { + let choices = flag.doc_choices().join(" "); + TEMPLATE_CASE_CHOICES.trim_end().replace("!CHOICES!", &choices) + } else { + TEMPLATE_CASE.trim_end().to_string() + }; + let name = format!("--{}", flag.name_long()); + cases.push_str(&template.replace("!FLAG!", &name)); + if let Some(short) = flag.name_short() { + let name = format!("-{}", char::from(short)); + cases.push_str(&template.replace("!FLAG!", &name)); + } + if let Some(negated) = flag.name_negated() { + let name = format!("--{negated}"); + cases.push_str(&template.replace("!FLAG!", &name)); + } + } + + TEMPLATE_FULL + .replace("!OPTS!", &opts) + .replace("!CASES!", &cases) + .trim_start() + .to_string() +} diff --git a/crates/core/flags/complete/fish.rs b/crates/core/flags/complete/fish.rs new file mode 100644 index 0000000000..6b28421f35 --- /dev/null +++ b/crates/core/flags/complete/fish.rs @@ -0,0 +1,47 @@ +/*! +Provides completions for ripgrep's CLI for the fish shell. +*/ + +use crate::flags::defs::FLAGS; + +const TEMPLATE: &'static str = + "complete -c rg -n '__fish_use_subcommand' !SHORT! !LONG! !DOC!\n"; +const TEMPLATE_CHOICES: &'static str = + "complete -c rg -n '__fish_use_subcommand' !SHORT! !LONG! !DOC! -r -f -a '!CHOICES!'\n"; + +/// Generate completions for Fish. +/// +/// Note that these completions are based on what was produced for ripgrep <=13 +/// using Clap 2.x. Improvements on this are welcome. +pub(crate) fn generate() -> String { + let mut out = String::new(); + for flag in FLAGS.iter() { + let short = match flag.name_short() { + None => "".to_string(), + Some(byte) => format!("-s {}", char::from(byte)), + }; + let long = format!("-l '{}'", flag.name_long().replace("'", "\\'")); + let doc = format!("-d '{}'", flag.doc_short().replace("'", "\\'")); + let template = if flag.doc_choices().is_empty() { + TEMPLATE.to_string() + } else { + TEMPLATE_CHOICES + .replace("!CHOICES!", &flag.doc_choices().join(" ")) + }; + out.push_str( + &template + .replace("!SHORT!", &short) + .replace("!LONG!", &long) + .replace("!DOC!", &doc), + ); + if let Some(negated) = flag.name_negated() { + out.push_str( + &template + .replace("!SHORT!", "") + .replace("!LONG!", &negated) + .replace("!DOC!", &doc), + ); + } + } + out +} diff --git a/crates/core/flags/complete/mod.rs b/crates/core/flags/complete/mod.rs new file mode 100644 index 0000000000..7d2fb606b0 --- /dev/null +++ b/crates/core/flags/complete/mod.rs @@ -0,0 +1,8 @@ +/*! +Modules for generating completions for various shells. +*/ + +pub(super) mod bash; +pub(super) mod fish; +pub(super) mod powershell; +pub(super) mod zsh; diff --git a/crates/core/flags/complete/powershell.rs b/crates/core/flags/complete/powershell.rs new file mode 100644 index 0000000000..e8a89e2ea0 --- /dev/null +++ b/crates/core/flags/complete/powershell.rs @@ -0,0 +1,86 @@ +/*! +Provides completions for ripgrep's CLI for PowerShell. +*/ + +use crate::flags::defs::FLAGS; + +const TEMPLATE: &'static str = " +using namespace System.Management.Automation +using namespace System.Management.Automation.Language + +Register-ArgumentCompleter -Native -CommandName 'rg' -ScriptBlock { + param($wordToComplete, $commandAst, $cursorPosition) + $commandElements = $commandAst.CommandElements + $command = @( + 'rg' + for ($i = 1; $i -lt $commandElements.Count; $i++) { + $element = $commandElements[$i] + if ($element -isnot [StringConstantExpressionAst] -or + $element.StringConstantType -ne [StringConstantType]::BareWord -or + $element.Value.StartsWith('-')) { + break + } + $element.Value + }) -join ';' + + $completions = @(switch ($command) { + 'rg' { +!FLAGS! + } + }) + + $completions.Where{ $_.CompletionText -like \"$wordToComplete*\" } | + Sort-Object -Property ListItemText +} +"; + +const TEMPLATE_FLAG: &'static str = + "[CompletionResult]::new('!DASH_NAME!', '!NAME!', [CompletionResultType]::ParameterName, '!DOC!')"; + +/// Generate completions for PowerShell. +/// +/// Note that these completions are based on what was produced for ripgrep <=13 +/// using Clap 2.x. Improvements on this are welcome. +pub(crate) fn generate() -> String { + let mut flags = String::new(); + for (i, flag) in FLAGS.iter().enumerate() { + let doc = flag.doc_short().replace("'", "''"); + + let dash_name = format!("--{}", flag.name_long()); + let name = flag.name_long(); + if i > 0 { + flags.push('\n'); + } + flags.push_str(" "); + flags.push_str( + &TEMPLATE_FLAG + .replace("!DASH_NAME!", &dash_name) + .replace("!NAME!", &name) + .replace("!DOC!", &doc), + ); + + if let Some(byte) = flag.name_short() { + let dash_name = format!("-{}", char::from(byte)); + let name = char::from(byte).to_string(); + flags.push_str("\n "); + flags.push_str( + &TEMPLATE_FLAG + .replace("!DASH_NAME!", &dash_name) + .replace("!NAME!", &name) + .replace("!DOC!", &doc), + ); + } + + if let Some(negated) = flag.name_negated() { + let dash_name = format!("--{}", negated); + flags.push_str("\n "); + flags.push_str( + &TEMPLATE_FLAG + .replace("!DASH_NAME!", &dash_name) + .replace("!NAME!", &negated) + .replace("!DOC!", &doc), + ); + } + } + TEMPLATE.trim_start().replace("!FLAGS!", &flags) +} diff --git a/complete/_rg b/crates/core/flags/complete/rg.zsh similarity index 96% rename from complete/_rg rename to crates/core/flags/complete/rg.zsh index 7fd6c542b0..0d44ce7dd7 100644 --- a/complete/_rg +++ b/crates/core/flags/complete/rg.zsh @@ -73,6 +73,7 @@ _rg() { {-c,--count}'[only show count of matching lines for each file]' '--count-matches[only show count of individual matches for each file]' '--include-zero[include files with zero matches in summary]' + $no"--no-include-zero[don't include files with zero matches in summary]" + '(encoding)' # Encoding options {-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings' @@ -108,6 +109,15 @@ _rg() { {-L,--follow}'[follow symlinks]' $no"--no-follow[don't follow symlinks]" + + '(generate)' # Options for generating ancillary data + '--generate=[generate man page or completion scripts]:when:(( + man\:"man page" + complete-bash\:"shell completions for bash" + complete-zsh\:"shell completions for zsh" + complete-fish\:"shell completions for fish" + complete-powershell\:"shell completions for PowerShell" + ))' + + glob # File-glob options '*'{-g+,--glob=}'[include/exclude files matching specified glob]:glob' '*--iglob=[include/exclude files matching specified case-insensitive glob]:glob' @@ -125,8 +135,8 @@ _rg() { $no"--no-hidden[don't search hidden files and directories]" + '(hybrid)' # hybrid regex options - '--auto-hybrid-regex[dynamically use PCRE2 if necessary]' - $no"--no-auto-hybrid-regex[don't dynamically use PCRE2 if necessary]" + '--auto-hybrid-regex[DEPRECATED: dynamically use PCRE2 if necessary]' + $no"--no-auto-hybrid-regex[DEPRECATED: don't dynamically use PCRE2 if necessary]" + '(ignore)' # Ignore-file options "(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]" @@ -183,6 +193,7 @@ _rg() { + '(max-depth)' # Directory-depth options '--max-depth=[specify max number of directories to descend]:number of directories' + '--maxdepth=[alias for --max-depth]:number of directories' '!--maxdepth=:number of directories' + '(messages)' # Error-message options @@ -210,15 +221,15 @@ _rg() { + '(passthru)' # Pass-through options '(--vimgrep)--passthru[show both matching and non-matching lines]' - '!(--vimgrep)--passthrough' + '(--vimgrep)--passthrough[alias for --passthru]' + '(pcre2)' # PCRE2 options {-P,--pcre2}'[enable matching with PCRE2]' $no'(pcre2-unicode)--no-pcre2[disable matching with PCRE2]' + '(pcre2-unicode)' # PCRE2 Unicode options - $no'(--no-pcre2 --no-pcre2-unicode)--pcre2-unicode[enable PCRE2 Unicode mode (with -P)]' - '(--no-pcre2 --pcre2-unicode)--no-pcre2-unicode[disable PCRE2 Unicode mode (with -P)]' + $no'(--no-pcre2 --no-pcre2-unicode)--pcre2-unicode[DEPRECATED: enable PCRE2 Unicode mode (with -P)]' + '(--no-pcre2 --pcre2-unicode)--no-pcre2-unicode[DEPRECATED: disable PCRE2 Unicode mode (with -P)]' + '(pre)' # Preprocessing options '(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e' @@ -252,7 +263,8 @@ _rg() { accessed\:"sort by last accessed time" created\:"sort by creation time" ))' - '!(threads)--sort-files[sort results by file path (disables parallelism)]' + '(threads)--sort-files[DEPRECATED: sort results by file path (disables parallelism)]' + $no"--no-sort-files[DEPRECATED: do not sort results]" + '(stats)' # Statistics options '(--files file-match)--stats[show search statistics]' @@ -293,6 +305,7 @@ _rg() { + misc # Other options — no need to separate these at the moment '(-b --byte-offset)'{-b,--byte-offset}'[show 0-based byte offset for each matching line]' + $no"--no-byte-offset[don't show byte offsets for each matching line]" '--color=[specify when to use colors in output]:when:(( never\:"never use colors" auto\:"use colors or not based on stdout, TERM, etc." @@ -312,6 +325,7 @@ _rg() { "(1 stats)--files[show each file that would be searched (but don't search)]" '*--ignore-file=[specify additional ignore file]:ignore file:_files' '(-v --invert-match)'{-v,--invert-match}'[invert matching]' + $no"--no-invert-match[do not invert matching]" '(-M --max-columns)'{-M+,--max-columns=}'[specify max length of lines to print]:number of bytes' '(-m --max-count)'{-m+,--max-count=}'[specify max number of matches per file]:number of matches' '--max-filesize=[specify size above which files should be ignored]:file size (bytes)' diff --git a/crates/core/flags/complete/zsh.rs b/crates/core/flags/complete/zsh.rs new file mode 100644 index 0000000000..59876f18ae --- /dev/null +++ b/crates/core/flags/complete/zsh.rs @@ -0,0 +1,23 @@ +/*! +Provides completions for ripgrep's CLI for the zsh shell. + +Unlike completion short for other shells (at time of writing), zsh's +completions for ripgrep are maintained by hand. This is because: + +1. They are lovingly written by an expert in such things. +2. Are much higher in quality than the ones below that are auto-generated. +Namely, the zsh completions take application level context about flag +compatibility into account. +3. There is a CI script that fails if a new flag is added to ripgrep that +isn't included in the zsh completions. +4. There is a wealth of documentation in the zsh script explaining how it +works and how it can be extended. + +In principle, I'd be open to maintaining any completion script by hand so +long as it meets criteria 3 and 4 above. +*/ + +/// Generate completions for zsh. +pub(crate) fn generate() -> String { + include_str!("rg.zsh").to_string() +} diff --git a/crates/core/config.rs b/crates/core/flags/config.rs similarity index 95% rename from crates/core/config.rs rename to crates/core/flags/config.rs index fadcbd3781..a081fe7937 100644 --- a/crates/core/config.rs +++ b/crates/core/flags/config.rs @@ -1,6 +1,9 @@ -// This module provides routines for reading ripgrep config "rc" files. The -// primary output of these routines is a sequence of arguments, where each -// argument corresponds precisely to one shell argument. +/*! +This module provides routines for reading ripgrep config "rc" files. + +The primary output of these routines is a sequence of arguments, where each +argument corresponds precisely to one shell argument. +*/ use std::{ ffi::OsString, diff --git a/crates/core/flags/defs.rs b/crates/core/flags/defs.rs new file mode 100644 index 0000000000..f07d7f90e3 --- /dev/null +++ b/crates/core/flags/defs.rs @@ -0,0 +1,7625 @@ +/*! +Defines all of the flags available in ripgrep. + +Each flag corresponds to a unit struct with a corresponding implementation +of `Flag`. Note that each implementation of `Flag` might actually have many +possible manifestations of the same "flag." That is, each implementation of +`Flag` can have the following flags available to an end user of ripgrep: + +* The long flag name. +* An optional short flag name. +* An optional negated long flag name. +* An arbitrarily long list of aliases. + +The idea is that even though there are multiple flags that a user can type, +one implementation of `Flag` corresponds to a single _logical_ flag inside of +ripgrep. For example, `-E`, `--encoding` and `--no-encoding` all manipulate the +same encoding state in ripgrep. +*/ + +use std::path::PathBuf; + +use {anyhow::Context as AnyhowContext, bstr::ByteVec}; + +use crate::flags::{ + lowargs::{ + BinaryMode, BoundaryMode, BufferMode, CaseMode, ColorChoice, + ContextMode, EncodingMode, EngineChoice, GenerateMode, LoggingMode, + LowArgs, MmapMode, Mode, PatternSource, SearchMode, SortMode, + SortModeKind, SpecialMode, TypeChange, + }, + Category, Flag, FlagValue, +}; + +#[cfg(test)] +use crate::flags::parse::parse_low_raw; + +/// A list of all flags in ripgrep via implementations of `Flag`. +/// +/// The order of these flags matter. It determines the order of the flags in +/// the generated documentation (`-h`, `--help` and the man page) within each +/// category. (This is why the deprecated flags are last.) +pub(super) const FLAGS: &[&dyn Flag] = &[ + // -e/--regexp and -f/--file should come before anything else in the + // same category. + &Regexp, + &File, + &AfterContext, + &BeforeContext, + &Binary, + &BlockBuffered, + &ByteOffset, + &CaseSensitive, + &Color, + &Colors, + &Column, + &Context, + &ContextSeparator, + &Count, + &CountMatches, + &Crlf, + &Debug, + &DfaSizeLimit, + &Encoding, + &Engine, + &FieldContextSeparator, + &FieldMatchSeparator, + &Files, + &FilesWithMatches, + &FilesWithoutMatch, + &FixedStrings, + &Follow, + &Generate, + &Glob, + &GlobCaseInsensitive, + &Heading, + &Help, + &Hidden, + &HostnameBin, + &HyperlinkFormat, + &IGlob, + &IgnoreCase, + &IgnoreFile, + &IgnoreFileCaseInsensitive, + &IncludeZero, + &InvertMatch, + &JSON, + &LineBuffered, + &LineNumber, + &LineNumberNo, + &LineRegexp, + &MaxColumns, + &MaxColumnsPreview, + &MaxCount, + &MaxDepth, + &MaxFilesize, + &Mmap, + &Multiline, + &MultilineDotall, + &NoConfig, + &NoIgnore, + &NoIgnoreDot, + &NoIgnoreExclude, + &NoIgnoreFiles, + &NoIgnoreGlobal, + &NoIgnoreMessages, + &NoIgnoreParent, + &NoIgnoreVcs, + &NoMessages, + &NoRequireGit, + &NoUnicode, + &Null, + &NullData, + &OneFileSystem, + &OnlyMatching, + &PathSeparator, + &Passthru, + &PCRE2, + &PCRE2Version, + &Pre, + &PreGlob, + &Pretty, + &Quiet, + &RegexSizeLimit, + &Replace, + &SearchZip, + &SmartCase, + &Sort, + &Sortr, + &Stats, + &StopOnNonmatch, + &Text, + &Threads, + &Trace, + &Trim, + &Type, + &TypeNot, + &TypeAdd, + &TypeClear, + &TypeList, + &Unrestricted, + &Version, + &Vimgrep, + &WithFilename, + &WithFilenameNo, + &WordRegexp, + // DEPRECATED (make them show up last in their respective categories) + &AutoHybridRegex, + &NoPcre2Unicode, + &SortFiles, +]; + +/// -A/--after-context +#[derive(Debug)] +struct AfterContext; + +impl Flag for AfterContext { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'A') + } + fn name_long(&self) -> &'static str { + "after-context" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Show NUM lines after each match." + } + fn doc_long(&self) -> &'static str { + r" +Show \fINUM\fP lines after each match. +.sp +This overrides the \flag{passthru} flag and partially overrides the +\flag{context} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.context.set_after(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_after_context() { + let mkctx = |lines| { + let mut mode = ContextMode::default(); + mode.set_after(lines); + mode + }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse_low_raw(["--after-context", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["--after-context=5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-A", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-A5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-A5", "-A10"]).unwrap(); + assert_eq!(mkctx(10), args.context); + + let args = parse_low_raw(["-A5", "-A0"]).unwrap(); + assert_eq!(mkctx(0), args.context); + + let args = parse_low_raw(["-A5", "--passthru"]).unwrap(); + assert_eq!(ContextMode::Passthru, args.context); + + let args = parse_low_raw(["--passthru", "-A5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let n = usize::MAX.to_string(); + let args = parse_low_raw(["--after-context", n.as_str()]).unwrap(); + assert_eq!(mkctx(usize::MAX), args.context); + + #[cfg(target_pointer_width = "64")] + { + let n = (u128::from(u64::MAX) + 1).to_string(); + let result = parse_low_raw(["--after-context", n.as_str()]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// --auto-hybrid-regex +#[derive(Debug)] +struct AutoHybridRegex; + +impl Flag for AutoHybridRegex { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "auto-hybrid-regex" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-auto-hybrid-regex") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + "(DEPRECATED) Use PCRE2 if appropriate." + } + fn doc_long(&self) -> &'static str { + r" +DEPRECATED. Use \flag{engine} instead. +.sp +When this flag is used, ripgrep will dynamically choose between supported regex +engines depending on the features used in a pattern. When ripgrep chooses a +regex engine, it applies that choice for every regex provided to ripgrep (e.g., +via multiple \flag{regexp} or \flag{file} flags). +.sp +As an example of how this flag might behave, ripgrep will attempt to use +its default finite automata based regex engine whenever the pattern can be +successfully compiled with that regex engine. If PCRE2 is enabled and if the +pattern given could not be compiled with the default regex engine, then PCRE2 +will be automatically used for searching. If PCRE2 isn't available, then this +flag has no effect because there is only one regex engine to choose from. +.sp +In the future, ripgrep may adjust its heuristics for how it decides which +regex engine to use. In general, the heuristics will be limited to a static +analysis of the patterns, and not to any specific runtime behavior observed +while searching files. +.sp +The primary downside of using this flag is that it may not always be obvious +which regex engine ripgrep uses, and thus, the match semantics or performance +profile of ripgrep may subtly and unexpectedly change. However, in many cases, +all regex engines will agree on what constitutes a match and it can be nice +to transparently support more advanced regex features like look-around and +backreferences without explicitly needing to enable them. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let mode = if v.unwrap_switch() { + EngineChoice::Auto + } else { + EngineChoice::Default + }; + args.engine = mode; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_auto_hybrid_regex() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse_low_raw(["--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--auto-hybrid-regex", "--no-auto-hybrid-regex"]) + .unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = + parse_low_raw(["--no-auto-hybrid-regex", "--auto-hybrid-regex"]) + .unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = parse_low_raw(["--auto-hybrid-regex", "-P"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse_low_raw(["-P", "--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--engine=auto", "--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--engine=default", "--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--auto-hybrid-regex", "--engine=default"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); +} + +/// -B/--before-context +#[derive(Debug)] +struct BeforeContext; + +impl Flag for BeforeContext { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'B') + } + fn name_long(&self) -> &'static str { + "before-context" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Show NUM lines before each match." + } + fn doc_long(&self) -> &'static str { + r" +Show \fINUM\fP lines before each match. +.sp +This overrides the \flag{passthru} flag and partially overrides the +\flag{context} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.context.set_before(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_before_context() { + let mkctx = |lines| { + let mut mode = ContextMode::default(); + mode.set_before(lines); + mode + }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse_low_raw(["--before-context", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["--before-context=5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-B", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-B5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-B5", "-B10"]).unwrap(); + assert_eq!(mkctx(10), args.context); + + let args = parse_low_raw(["-B5", "-B0"]).unwrap(); + assert_eq!(mkctx(0), args.context); + + let args = parse_low_raw(["-B5", "--passthru"]).unwrap(); + assert_eq!(ContextMode::Passthru, args.context); + + let args = parse_low_raw(["--passthru", "-B5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let n = usize::MAX.to_string(); + let args = parse_low_raw(["--before-context", n.as_str()]).unwrap(); + assert_eq!(mkctx(usize::MAX), args.context); + + #[cfg(target_pointer_width = "64")] + { + let n = (u128::from(u64::MAX) + 1).to_string(); + let result = parse_low_raw(["--before-context", n.as_str()]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// --binary +#[derive(Debug)] +struct Binary; + +impl Flag for Binary { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "binary" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-binary") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + "Search binary files." + } + fn doc_long(&self) -> &'static str { + r" +Enabling this flag will cause ripgrep to search binary files. By default, +ripgrep attempts to automatically skip binary files in order to improve the +relevance of results and make the search faster. +.sp +Binary files are heuristically detected based on whether they contain a +\fBNUL\fP byte or not. By default (without this flag set), once a \fBNUL\fP +byte is seen, ripgrep will stop searching the file. Usually, \fBNUL\fP bytes +occur in the beginning of most binary files. If a \fBNUL\fP byte occurs after +a match, then ripgrep will not print the match, stop searching that file, and +emit a warning that some matches are being suppressed. +.sp +In contrast, when this flag is provided, ripgrep will continue searching a +file even if a \fBNUL\fP byte is found. In particular, if a \fBNUL\fP byte is +found then ripgrep will continue searching until either a match is found or +the end of the file is reached, whichever comes sooner. If a match is found, +then ripgrep will stop and print a warning saying that the search stopped +prematurely. +.sp +If you want ripgrep to search a file without any special \fBNUL\fP byte +handling at all (and potentially print binary data to stdout), then you should +use the \flag{text} flag. +.sp +The \flag{binary} flag is a flag for controlling ripgrep's automatic filtering +mechanism. As such, it does not need to be used when searching a file +explicitly or when searching stdin. That is, it is only applicable when +recursively searching a directory. +.sp +When the \flag{unrestricted} flag is provided for a third time, then this flag +is automatically enabled. +.sp +This flag overrides the \flag{text} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.binary = if v.unwrap_switch() { + BinaryMode::SearchAndSuppress + } else { + BinaryMode::Auto + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_binary() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--binary"]).unwrap(); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let args = parse_low_raw(["--binary", "--no-binary"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--no-binary", "--binary"]).unwrap(); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let args = parse_low_raw(["--binary", "-a"]).unwrap(); + assert_eq!(BinaryMode::AsText, args.binary); + + let args = parse_low_raw(["-a", "--binary"]).unwrap(); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let args = parse_low_raw(["-a", "--no-binary"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); +} + +/// --block-buffered +#[derive(Debug)] +struct BlockBuffered; + +impl Flag for BlockBuffered { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "block-buffered" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-block-buffered") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Force block buffering." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will use block buffering. That is, whenever a matching +line is found, it will be written to an in-memory buffer and will not be +written to stdout until the buffer reaches a certain size. This is the default +when ripgrep's stdout is redirected to a pipeline or a file. When ripgrep's +stdout is connected to a terminal, line buffering will be used by default. +Forcing block buffering can be useful when dumping a large amount of contents +to a terminal. +.sp +This overrides the \flag{line-buffered} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.buffer = if v.unwrap_switch() { + BufferMode::Block + } else { + BufferMode::Auto + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_block_buffered() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BufferMode::Auto, args.buffer); + + let args = parse_low_raw(["--block-buffered"]).unwrap(); + assert_eq!(BufferMode::Block, args.buffer); + + let args = + parse_low_raw(["--block-buffered", "--no-block-buffered"]).unwrap(); + assert_eq!(BufferMode::Auto, args.buffer); + + let args = parse_low_raw(["--block-buffered", "--line-buffered"]).unwrap(); + assert_eq!(BufferMode::Line, args.buffer); +} + +/// --byte-offset +#[derive(Debug)] +struct ByteOffset; + +impl Flag for ByteOffset { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'b') + } + fn name_long(&self) -> &'static str { + "byte-offset" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-byte-offset") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Print the byte offset for each matching line." + } + fn doc_long(&self) -> &'static str { + r" +Print the 0-based byte offset within the input file before each line of output. +If \flag{only-matching} is specified, print the offset of the matched text +itself. +.sp +If ripgrep does transcoding, then the byte offset is in terms of the result +of transcoding and not the original data. This applies similarly to other +transformations on the data, such as decompression or a \flag{pre} filter. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.byte_offset = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_byte_offset() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.byte_offset); + + let args = parse_low_raw(["--byte-offset"]).unwrap(); + assert_eq!(true, args.byte_offset); + + let args = parse_low_raw(["-b"]).unwrap(); + assert_eq!(true, args.byte_offset); + + let args = parse_low_raw(["--byte-offset", "--no-byte-offset"]).unwrap(); + assert_eq!(false, args.byte_offset); + + let args = parse_low_raw(["--no-byte-offset", "-b"]).unwrap(); + assert_eq!(true, args.byte_offset); +} + +/// -s/--case-sensitive +#[derive(Debug)] +struct CaseSensitive; + +impl Flag for CaseSensitive { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b's') + } + fn name_long(&self) -> &'static str { + "case-sensitive" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Search case sensitively (default)." + } + fn doc_long(&self) -> &'static str { + r" +Execute the search case sensitively. This is the default mode. +.sp +This is a global option that applies to all patterns given to ripgrep. +Individual patterns can still be matched case insensitively by using inline +regex flags. For example, \fB(?i)abc\fP will match \fBabc\fP case insensitively +even when this flag is used. +.sp +This flag overrides the \flag{ignore-case} and \flag{smart-case} flags. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "flag has no negation"); + args.case = CaseMode::Sensitive; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_case_sensitive() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["--case-sensitive"]).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["-s"]).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); +} + +/// --color +#[derive(Debug)] +struct Color; + +impl Flag for Color { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "color" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("WHEN") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "When to use color." + } + fn doc_long(&self) -> &'static str { + r" +This flag controls when to use colors. The default setting is \fBauto\fP, which +means ripgrep will try to guess when to use colors. For example, if ripgrep is +printing to a terminal, then it will use colors, but if it is redirected to a +file or a pipe, then it will suppress color output. +.sp +ripgrep will suppress color output by default in some other circumstances as +well. These include, but are not limited to: +.sp +.IP \(bu 3n +When the \fBTERM\fP environment variable is not set or set to \fBdumb\fP. +.sp +.IP \(bu 3n +When the \fBNO_COLOR\fP environment variable is set (regardless of value). +.sp +.IP \(bu 3n +When flags that imply no use for colors are given. For example, +\flag{vimgrep} and \flag{json}. +. +.PP +The possible values for this flag are: +.sp +.IP \fBnever\fP 10n +Colors will never be used. +.sp +.IP \fBauto\fP 10n +The default. ripgrep tries to be smart. +.sp +.IP \fBalways\fP 10n +Colors will always be used regardless of where output is sent. +.sp +.IP \fBansi\fP 10n +Like 'always', but emits ANSI escapes (even in a Windows console). +. +.PP +This flag also controls whether hyperlinks are emitted. For example, when +a hyperlink format is specified, hyperlinks won't be used when color is +suppressed. If one wants to emit hyperlinks but no colors, then one must use +the \flag{colors} flag to manually set all color styles to \fBnone\fP: +.sp +.EX + \-\-colors 'path:none' \\ + \-\-colors 'line:none' \\ + \-\-colors 'column:none' \\ + \-\-colors 'match:none' +.EE +.sp +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &["never", "auto", "always", "ansi"] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.color = match convert::str(&v.unwrap_value())? { + "never" => ColorChoice::Never, + "auto" => ColorChoice::Auto, + "always" => ColorChoice::Always, + "ansi" => ColorChoice::Ansi, + unk => anyhow::bail!("choice '{unk}' is unrecognized"), + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_color() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ColorChoice::Auto, args.color); + + let args = parse_low_raw(["--color", "never"]).unwrap(); + assert_eq!(ColorChoice::Never, args.color); + + let args = parse_low_raw(["--color", "auto"]).unwrap(); + assert_eq!(ColorChoice::Auto, args.color); + + let args = parse_low_raw(["--color", "always"]).unwrap(); + assert_eq!(ColorChoice::Always, args.color); + + let args = parse_low_raw(["--color", "ansi"]).unwrap(); + assert_eq!(ColorChoice::Ansi, args.color); + + let args = parse_low_raw(["--color=never"]).unwrap(); + assert_eq!(ColorChoice::Never, args.color); + + let args = + parse_low_raw(["--color", "always", "--color", "never"]).unwrap(); + assert_eq!(ColorChoice::Never, args.color); + + let args = + parse_low_raw(["--color", "never", "--color", "always"]).unwrap(); + assert_eq!(ColorChoice::Always, args.color); + + let result = parse_low_raw(["--color", "foofoo"]); + assert!(result.is_err(), "{result:?}"); + + let result = parse_low_raw(["--color", "Always"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --colors +#[derive(Debug)] +struct Colors; + +impl Flag for Colors { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "colors" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("COLOR_SPEC") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Configure color settings and styles." + } + fn doc_long(&self) -> &'static str { + r" +This flag specifies color settings for use in the output. This flag may be +provided multiple times. Settings are applied iteratively. Pre-existing color +labels are limited to one of eight choices: \fBred\fP, \fBblue\fP, \fBgreen\fP, +\fBcyan\fP, \fBmagenta\fP, \fByellow\fP, \fBwhite\fP and \fBblack\fP. Styles +are limited to \fBnobold\fP, \fBbold\fP, \fBnointense\fP, \fBintense\fP, +\fBnounderline\fP or \fBunderline\fP. +.sp +The format of the flag is +\fB{\fP\fItype\fP\fB}:{\fP\fIattribute\fP\fB}:{\fP\fIvalue\fP\fB}\fP. +\fItype\fP should be one of \fBpath\fP, \fBline\fP, \fBcolumn\fP or +\fBmatch\fP. \fIattribute\fP can be \fBfg\fP, \fBbg\fP or \fBstyle\fP. +\fIvalue\fP is either a color (for \fBfg\fP and \fBbg\fP) or a text style. A +special format, \fB{\fP\fItype\fP\fB}:none\fP, will clear all color settings +for \fItype\fP. +.sp +For example, the following command will change the match color to magenta and +the background color for line numbers to yellow: +.sp +.EX + rg \-\-colors 'match:fg:magenta' \-\-colors 'line:bg:yellow' +.EE +.sp +Extended colors can be used for \fIvalue\fP when the terminal supports +ANSI color sequences. These are specified as either \fIx\fP (256-color) or +.IB x , x , x +(24-bit truecolor) where \fIx\fP is a number between \fB0\fP and \fB255\fP +inclusive. \fIx\fP may be given as a normal decimal number or a hexadecimal +number, which is prefixed by \fB0x\fP. +.sp +For example, the following command will change the match background color to +that represented by the rgb value (0,128,255): +.sp +.EX + rg \-\-colors 'match:bg:0,128,255' +.EE +.sp +or, equivalently, +.sp +.EX + rg \-\-colors 'match:bg:0x0,0x80,0xFF' +.EE +.sp +Note that the \fBintense\fP and \fBnointense\fP styles will have no effect when +used alongside these extended color codes. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + let v = convert::str(&v)?; + args.colors.push(v.parse()?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_colors() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert!(args.colors.is_empty()); + + let args = parse_low_raw(["--colors", "match:fg:magenta"]).unwrap(); + assert_eq!(args.colors, vec!["match:fg:magenta".parse().unwrap()]); + + let args = parse_low_raw([ + "--colors", + "match:fg:magenta", + "--colors", + "line:bg:yellow", + ]) + .unwrap(); + assert_eq!( + args.colors, + vec![ + "match:fg:magenta".parse().unwrap(), + "line:bg:yellow".parse().unwrap() + ] + ); +} + +/// --column +#[derive(Debug)] +struct Column; + +impl Flag for Column { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "column" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-column") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Show column numbers." + } + fn doc_long(&self) -> &'static str { + r" +Show column numbers (1-based). This only shows the column numbers for the first +match on each line. This does not try to account for Unicode. One byte is equal +to one column. This implies \flag{line-number}. +.sp +When \flag{only-matching} is used, then the column numbers written correspond +to the start of each match. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.column = Some(v.unwrap_switch()); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_column() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.column); + + let args = parse_low_raw(["--column"]).unwrap(); + assert_eq!(Some(true), args.column); + + let args = parse_low_raw(["--column", "--no-column"]).unwrap(); + assert_eq!(Some(false), args.column); + + let args = parse_low_raw(["--no-column", "--column"]).unwrap(); + assert_eq!(Some(true), args.column); +} + +/// -C/--context +#[derive(Debug)] +struct Context; + +impl Flag for Context { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'C') + } + fn name_long(&self) -> &'static str { + "context" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Show NUM lines before and after each match." + } + fn doc_long(&self) -> &'static str { + r" +Show \fINUM\fP lines before and after each match. This is equivalent to +providing both the \flag{before-context} and \flag{after-context} flags with +the same value. +.sp +This overrides the \flag{passthru} flag. The \flag{after-context} and +\flag{before-context} flags both partially override this flag, regardless of +the order. For example, \fB\-A2 \-C1\fP is equivalent to \fB\-A2 \-B1\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.context.set_both(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_context() { + let mkctx = |lines| { + let mut mode = ContextMode::default(); + mode.set_both(lines); + mode + }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse_low_raw(["--context", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["--context=5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-C", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-C5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-C5", "-C10"]).unwrap(); + assert_eq!(mkctx(10), args.context); + + let args = parse_low_raw(["-C5", "-C0"]).unwrap(); + assert_eq!(mkctx(0), args.context); + + let args = parse_low_raw(["-C5", "--passthru"]).unwrap(); + assert_eq!(ContextMode::Passthru, args.context); + + let args = parse_low_raw(["--passthru", "-C5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let n = usize::MAX.to_string(); + let args = parse_low_raw(["--context", n.as_str()]).unwrap(); + assert_eq!(mkctx(usize::MAX), args.context); + + #[cfg(target_pointer_width = "64")] + { + let n = (u128::from(u64::MAX) + 1).to_string(); + let result = parse_low_raw(["--context", n.as_str()]); + assert!(result.is_err(), "{result:?}"); + } + + // Test the interaction between -A/-B and -C. Basically, -A/-B always + // partially overrides -C, regardless of where they appear relative to + // each other. This behavior is also how GNU grep works, and it also makes + // logical sense to me: -A/-B are the more specific flags. + let args = parse_low_raw(["-A1", "-C5"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((5, 1), args.context.get_limited()); + + let args = parse_low_raw(["-B1", "-C5"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((1, 5), args.context.get_limited()); + + let args = parse_low_raw(["-A1", "-B2", "-C5"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(2); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((2, 1), args.context.get_limited()); + + // These next three are like the ones above, but with -C before -A/-B. This + // tests that -A and -B only partially override -C. That is, -C1 -A2 is + // equivalent to -B1 -A2. + let args = parse_low_raw(["-C5", "-A1"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((5, 1), args.context.get_limited()); + + let args = parse_low_raw(["-C5", "-B1"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((1, 5), args.context.get_limited()); + + let args = parse_low_raw(["-C5", "-A1", "-B2"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(2); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((2, 1), args.context.get_limited()); +} + +/// --context-separator +#[derive(Debug)] +struct ContextSeparator; + +impl Flag for ContextSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "context-separator" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-context-separator") + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Set the separator for contextual chunks." + } + fn doc_long(&self) -> &'static str { + r" +The string used to separate non-contiguous context lines in the output. This is +only used when one of the context flags is used (that is, \flag{after-context}, +\flag{before-context} or \flag{context}). Escape sequences like \fB\\x7F\fP or +\fB\\t\fP may be used. The default value is \fB\-\-\fP. +.sp +When the context separator is set to an empty string, then a line break +is still inserted. To completely disable context separators, use the +\flag-negate{context-separator} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + use crate::flags::lowargs::ContextSeparator as Separator; + + args.context_separator = match v { + FlagValue::Switch(true) => { + unreachable!("flag can only be disabled") + } + FlagValue::Switch(false) => Separator::disabled(), + FlagValue::Value(v) => Separator::new(&v)?, + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_context_separator() { + use bstr::BString; + + use crate::flags::lowargs::ContextSeparator as Separator; + + let getbytes = |ctxsep: Separator| ctxsep.into_bytes().map(BString::from); + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Some(BString::from("--")), getbytes(args.context_separator)); + + let args = parse_low_raw(["--context-separator", "XYZ"]).unwrap(); + assert_eq!(Some(BString::from("XYZ")), getbytes(args.context_separator)); + + let args = parse_low_raw(["--no-context-separator"]).unwrap(); + assert_eq!(None, getbytes(args.context_separator)); + + let args = parse_low_raw([ + "--context-separator", + "XYZ", + "--no-context-separator", + ]) + .unwrap(); + assert_eq!(None, getbytes(args.context_separator)); + + let args = parse_low_raw([ + "--no-context-separator", + "--context-separator", + "XYZ", + ]) + .unwrap(); + assert_eq!(Some(BString::from("XYZ")), getbytes(args.context_separator)); + + // This checks that invalid UTF-8 can be used. This case isn't too tricky + // to handle, because it passes the invalid UTF-8 as an escape sequence + // that is itself valid UTF-8. It doesn't become invalid UTF-8 until after + // the argument is parsed and then unescaped. + let args = parse_low_raw(["--context-separator", r"\xFF"]).unwrap(); + assert_eq!(Some(BString::from(b"\xFF")), getbytes(args.context_separator)); + + // In this case, we specifically try to pass an invalid UTF-8 argument to + // the flag. In theory we might be able to support this, but because we do + // unescaping and because unescaping wants valid UTF-8, we do a UTF-8 check + // on the value. Since we pass invalid UTF-8, it fails. This demonstrates + // that the only way to use an invalid UTF-8 separator is by specifying an + // escape sequence that is itself valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let result = parse_low_raw([ + OsStr::from_bytes(b"--context-separator"), + OsStr::from_bytes(&[0xFF]), + ]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// -c/--count +#[derive(Debug)] +struct Count; + +impl Flag for Count { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'c') + } + fn name_long(&self) -> &'static str { + "count" + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Show count of matching lines for each file." + } + fn doc_long(&self) -> &'static str { + r" +This flag suppresses normal output and shows the number of lines that match the +given patterns for each file searched. Each file containing a match has its +path and count printed on each line. Note that unless \flag{multiline} +is enabled, this reports the number of lines that match and not the total +number of matches. In multiline mode, \flag{count} is equivalent to +\flag{count-matches}. +.sp +If only one file is given to ripgrep, then only the count is printed if there +is a match. The \flag{with-filename} flag can be used to force printing the +file path in this case. If you need a count to be printed regardless of whether +there is a match, then use \flag{include-zero}. +.sp +This overrides the \flag{count-matches} flag. Note that when \flag{count} +is combined with \flag{only-matching}, then ripgrep behaves as if +\flag{count-matches} was given. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--count can only be enabled"); + args.mode.update(Mode::Search(SearchMode::Count)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_count() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--count"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Count), args.mode); + + let args = parse_low_raw(["-c"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Count), args.mode); + + let args = parse_low_raw(["--count-matches", "--count"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Count), args.mode); + + let args = parse_low_raw(["--count-matches", "-c"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Count), args.mode); +} + +/// --count-matches +#[derive(Debug)] +struct CountMatches; + +impl Flag for CountMatches { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "count-matches" + } + fn doc_variable(&self) -> Option<&'static str> { + None + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Show count of every match for each file." + } + fn doc_long(&self) -> &'static str { + r" +This flag suppresses normal output and shows the number of individual matches +of the given patterns for each file searched. Each file containing matches has +its path and match count printed on each line. Note that this reports the total +number of individual matches and not the number of lines that match. +.sp +If only one file is given to ripgrep, then only the count is printed if there +is a match. The \flag{with-filename} flag can be used to force printing the +file path in this case. +.sp +This overrides the \flag{count} flag. Note that when \flag{count} is combined +with \flag{only-matching}, then ripgrep behaves as if \flag{count-matches} was +given. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--count-matches can only be enabled"); + args.mode.update(Mode::Search(SearchMode::CountMatches)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_count_matches() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--count-matches"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::CountMatches), args.mode); + + let args = parse_low_raw(["--count", "--count-matches"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::CountMatches), args.mode); + + let args = parse_low_raw(["-c", "--count-matches"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::CountMatches), args.mode); +} + +/// --crlf +#[derive(Debug)] +struct Crlf; + +impl Flag for Crlf { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "crlf" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-crlf") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Use CRLF line terminators (nice for Windows)." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will treat CRLF (\fB\\r\\n\fP) as a line terminator +instead of just \fB\\n\fP. +.sp +Principally, this permits the line anchor assertions \fB^\fP and \fB$\fP in +regex patterns to treat CRLF, CR or LF as line terminators instead of just LF. +Note that they will never match between a CR and a LF. CRLF is treated as one +single line terminator. +.sp +When using the default regex engine, CRLF support can also be enabled inside +the pattern with the \fBR\fP flag. For example, \fB(?R:$)\fP will match just +before either CR or LF, but never between CR and LF. +.sp +This flag overrides \flag{null-data}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.crlf = v.unwrap_switch(); + if args.crlf { + args.null_data = false; + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_crlf() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.crlf); + + let args = parse_low_raw(["--crlf"]).unwrap(); + assert_eq!(true, args.crlf); + assert_eq!(false, args.null_data); + + let args = parse_low_raw(["--crlf", "--null-data"]).unwrap(); + assert_eq!(false, args.crlf); + assert_eq!(true, args.null_data); + + let args = parse_low_raw(["--null-data", "--crlf"]).unwrap(); + assert_eq!(true, args.crlf); + assert_eq!(false, args.null_data); + + let args = parse_low_raw(["--null-data", "--no-crlf"]).unwrap(); + assert_eq!(false, args.crlf); + assert_eq!(true, args.null_data); + + let args = parse_low_raw(["--null-data", "--crlf", "--no-crlf"]).unwrap(); + assert_eq!(false, args.crlf); + assert_eq!(false, args.null_data); +} + +/// --debug +#[derive(Debug)] +struct Debug; + +impl Flag for Debug { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "debug" + } + fn doc_category(&self) -> Category { + Category::Logging + } + fn doc_short(&self) -> &'static str { + r"Show debug messages." + } + fn doc_long(&self) -> &'static str { + r" +Show debug messages. Please use this when filing a bug report. +.sp +The \flag{debug} flag is generally useful for figuring out why ripgrep skipped +searching a particular file. The debug messages should mention all files +skipped and why they were skipped. +.sp +To get even more debug output, use the \flag{trace} flag, which implies +\flag{debug} along with additional trace data. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--debug can only be enabled"); + args.logging = Some(LoggingMode::Debug); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_debug() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.logging); + + let args = parse_low_raw(["--debug"]).unwrap(); + assert_eq!(Some(LoggingMode::Debug), args.logging); + + let args = parse_low_raw(["--trace", "--debug"]).unwrap(); + assert_eq!(Some(LoggingMode::Debug), args.logging); +} + +/// --dfa-size-limit +#[derive(Debug)] +struct DfaSizeLimit; + +impl Flag for DfaSizeLimit { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "dfa-size-limit" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM+SUFFIX?") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"The upper size limit of the regex DFA." + } + fn doc_long(&self) -> &'static str { + r" +The upper size limit of the regex DFA. The default limit is something generous +for any single pattern or for many smallish patterns. This should only be +changed on very large regex inputs where the (slower) fallback regex engine may +otherwise be used if the limit is reached. +.sp +The input format accepts suffixes of \fBK\fP, \fBM\fP or \fBG\fP which +correspond to kilobytes, megabytes and gigabytes, respectively. If no suffix is +provided the input is treated as bytes. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + args.dfa_size_limit = Some(convert::human_readable_usize(&v)?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_dfa_size_limit() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.dfa_size_limit); + + #[cfg(target_pointer_width = "64")] + { + let args = parse_low_raw(["--dfa-size-limit", "9G"]).unwrap(); + assert_eq!(Some(9 * (1 << 30)), args.dfa_size_limit); + + let args = parse_low_raw(["--dfa-size-limit=9G"]).unwrap(); + assert_eq!(Some(9 * (1 << 30)), args.dfa_size_limit); + + let args = + parse_low_raw(["--dfa-size-limit=9G", "--dfa-size-limit=0"]) + .unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + } + + let args = parse_low_raw(["--dfa-size-limit=0K"]).unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + + let args = parse_low_raw(["--dfa-size-limit=0M"]).unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + + let args = parse_low_raw(["--dfa-size-limit=0G"]).unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + + let result = parse_low_raw(["--dfa-size-limit", "9999999999999999999999"]); + assert!(result.is_err(), "{result:?}"); + + let result = parse_low_raw(["--dfa-size-limit", "9999999999999999G"]); + assert!(result.is_err(), "{result:?}"); +} + +/// -E/--encoding +#[derive(Debug)] +struct Encoding; + +impl Flag for Encoding { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'E') + } + fn name_long(&self) -> &'static str { + "encoding" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-encoding") + } + fn doc_variable(&self) -> Option<&'static str> { + Some("ENCODING") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Specify the text encoding of files to search." + } + fn doc_long(&self) -> &'static str { + r" +Specify the text encoding that ripgrep will use on all files searched. The +default value is \fBauto\fP, which will cause ripgrep to do a best effort +automatic detection of encoding on a per-file basis. Automatic detection in +this case only applies to files that begin with a UTF-8 or UTF-16 byte-order +mark (BOM). No other automatic detection is performed. One can also specify +\fBnone\fP which will then completely disable BOM sniffing and always result +in searching the raw bytes, including a BOM if it's present, regardless of its +encoding. +.sp +Other supported values can be found in the list of labels here: +\fIhttps://encoding.spec.whatwg.org/#concept-encoding-get\fP. +.sp +For more details on encoding and how ripgrep deals with it, see \fBGUIDE.md\fP. +.sp +The encoding detection that ripgrep uses can be reverted to its automatic mode +via the \flag-negate{encoding} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let value = match v { + FlagValue::Value(v) => v, + FlagValue::Switch(true) => { + unreachable!("--encoding must accept a value") + } + FlagValue::Switch(false) => { + args.encoding = EncodingMode::Auto; + return Ok(()); + } + }; + let label = convert::str(&value)?; + args.encoding = match label { + "auto" => EncodingMode::Auto, + "none" => EncodingMode::Disabled, + _ => EncodingMode::Some(grep::searcher::Encoding::new(label)?), + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_encoding() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let args = parse_low_raw(["--encoding", "auto"]).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let args = parse_low_raw(["--encoding", "none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse_low_raw(["--encoding=none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse_low_raw(["-E", "none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse_low_raw(["-Enone"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse_low_raw(["-E", "none", "--no-encoding"]).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let args = parse_low_raw(["--no-encoding", "-E", "none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse_low_raw(["-E", "utf-16"]).unwrap(); + let enc = grep::searcher::Encoding::new("utf-16").unwrap(); + assert_eq!(EncodingMode::Some(enc), args.encoding); + + let args = parse_low_raw(["-E", "utf-16", "--no-encoding"]).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let result = parse_low_raw(["-E", "foo"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --engine +#[derive(Debug)] +struct Engine; + +impl Flag for Engine { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "engine" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("ENGINE") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Specify which regex engine to use." + } + fn doc_long(&self) -> &'static str { + r" +Specify which regular expression engine to use. When you choose a regex engine, +it applies that choice for every regex provided to ripgrep (e.g., via multiple +\flag{regexp} or \flag{file} flags). +.sp +Accepted values are \fBdefault\fP, \fBpcre2\fP, or \fBauto\fP. +.sp +The default value is \fBdefault\fP, which is usually the fastest and should be +good for most use cases. The \fBpcre2\fP engine is generally useful when you +want to use features such as look-around or backreferences. \fBauto\fP will +dynamically choose between supported regex engines depending on the features +used in a pattern on a best effort basis. +.sp +Note that the \fBpcre2\fP engine is an optional ripgrep feature. If PCRE2 +wasn't included in your build of ripgrep, then using this flag will result in +ripgrep printing an error message and exiting. +.sp +This overrides previous uses of the \flag{pcre2} and \flag{auto-hybrid-regex} +flags. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &["default", "pcre2", "auto"] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + let string = convert::str(&v)?; + args.engine = match string { + "default" => EngineChoice::Default, + "pcre2" => EngineChoice::PCRE2, + "auto" => EngineChoice::Auto, + _ => anyhow::bail!("unrecognized regex engine '{string}'"), + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_engine() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse_low_raw(["--engine", "pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse_low_raw(["--engine=pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = + parse_low_raw(["--auto-hybrid-regex", "--engine=pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = + parse_low_raw(["--engine=pcre2", "--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--auto-hybrid-regex", "--engine=auto"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--auto-hybrid-regex", "--engine=default"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = + parse_low_raw(["--engine=pcre2", "--no-auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); +} + +/// --field-context-separator +#[derive(Debug)] +struct FieldContextSeparator; + +impl Flag for FieldContextSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "field-context-separator" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Set the field context separator." + } + fn doc_long(&self) -> &'static str { + r" +Set the field context separator. This separator is only used when printing +contextual lines. It is used to delimit file paths, line numbers, columns and +the contextual line itself. The separator may be any number of bytes, including +zero. Escape sequences like \fB\\x7F\fP or \fB\\t\fP may be used. +.sp +The \fB-\fP character is the default value. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + use crate::flags::lowargs::FieldContextSeparator as Separator; + + args.field_context_separator = Separator::new(&v.unwrap_value())?; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_field_context_separator() { + use bstr::BString; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BString::from("-"), args.field_context_separator.into_bytes()); + + let args = parse_low_raw(["--field-context-separator", "XYZ"]).unwrap(); + assert_eq!( + BString::from("XYZ"), + args.field_context_separator.into_bytes() + ); + + let args = parse_low_raw(["--field-context-separator=XYZ"]).unwrap(); + assert_eq!( + BString::from("XYZ"), + args.field_context_separator.into_bytes() + ); + + let args = parse_low_raw([ + "--field-context-separator", + "XYZ", + "--field-context-separator", + "ABC", + ]) + .unwrap(); + assert_eq!( + BString::from("ABC"), + args.field_context_separator.into_bytes() + ); + + let args = parse_low_raw(["--field-context-separator", r"\t"]).unwrap(); + assert_eq!(BString::from("\t"), args.field_context_separator.into_bytes()); + + let args = parse_low_raw(["--field-context-separator", r"\x00"]).unwrap(); + assert_eq!( + BString::from("\x00"), + args.field_context_separator.into_bytes() + ); + + // This checks that invalid UTF-8 can be used. This case isn't too tricky + // to handle, because it passes the invalid UTF-8 as an escape sequence + // that is itself valid UTF-8. It doesn't become invalid UTF-8 until after + // the argument is parsed and then unescaped. + let args = parse_low_raw(["--field-context-separator", r"\xFF"]).unwrap(); + assert_eq!( + BString::from(b"\xFF"), + args.field_context_separator.into_bytes() + ); + + // In this case, we specifically try to pass an invalid UTF-8 argument to + // the flag. In theory we might be able to support this, but because we do + // unescaping and because unescaping wants valid UTF-8, we do a UTF-8 check + // on the value. Since we pass invalid UTF-8, it fails. This demonstrates + // that the only way to use an invalid UTF-8 separator is by specifying an + // escape sequence that is itself valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let result = parse_low_raw([ + OsStr::from_bytes(b"--field-context-separator"), + OsStr::from_bytes(&[0xFF]), + ]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// --field-match-separator +#[derive(Debug)] +struct FieldMatchSeparator; + +impl Flag for FieldMatchSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "field-match-separator" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Set the field match separator." + } + fn doc_long(&self) -> &'static str { + r" +Set the field match separator. This separator is only used when printing +matching lines. It is used to delimit file paths, line numbers, columns and the +matching line itself. The separator may be any number of bytes, including zero. +Escape sequences like \fB\\x7F\fP or \fB\\t\fP may be used. +.sp +The \fB:\fP character is the default value. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + use crate::flags::lowargs::FieldMatchSeparator as Separator; + + args.field_match_separator = Separator::new(&v.unwrap_value())?; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_field_match_separator() { + use bstr::BString; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BString::from(":"), args.field_match_separator.into_bytes()); + + let args = parse_low_raw(["--field-match-separator", "XYZ"]).unwrap(); + assert_eq!(BString::from("XYZ"), args.field_match_separator.into_bytes()); + + let args = parse_low_raw(["--field-match-separator=XYZ"]).unwrap(); + assert_eq!(BString::from("XYZ"), args.field_match_separator.into_bytes()); + + let args = parse_low_raw([ + "--field-match-separator", + "XYZ", + "--field-match-separator", + "ABC", + ]) + .unwrap(); + assert_eq!(BString::from("ABC"), args.field_match_separator.into_bytes()); + + let args = parse_low_raw(["--field-match-separator", r"\t"]).unwrap(); + assert_eq!(BString::from("\t"), args.field_match_separator.into_bytes()); + + let args = parse_low_raw(["--field-match-separator", r"\x00"]).unwrap(); + assert_eq!(BString::from("\x00"), args.field_match_separator.into_bytes()); + + // This checks that invalid UTF-8 can be used. This case isn't too tricky + // to handle, because it passes the invalid UTF-8 as an escape sequence + // that is itself valid UTF-8. It doesn't become invalid UTF-8 until after + // the argument is parsed and then unescaped. + let args = parse_low_raw(["--field-match-separator", r"\xFF"]).unwrap(); + assert_eq!( + BString::from(b"\xFF"), + args.field_match_separator.into_bytes() + ); + + // In this case, we specifically try to pass an invalid UTF-8 argument to + // the flag. In theory we might be able to support this, but because we do + // unescaping and because unescaping wants valid UTF-8, we do a UTF-8 check + // on the value. Since we pass invalid UTF-8, it fails. This demonstrates + // that the only way to use an invalid UTF-8 separator is by specifying an + // escape sequence that is itself valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let result = parse_low_raw([ + OsStr::from_bytes(b"--field-match-separator"), + OsStr::from_bytes(&[0xFF]), + ]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// -f/--file +#[derive(Debug)] +struct File; + +impl Flag for File { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'f') + } + fn name_long(&self) -> &'static str { + "file" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("PATTERNFILE") + } + fn doc_category(&self) -> Category { + Category::Input + } + fn doc_short(&self) -> &'static str { + r"Search for patterns from the given file." + } + fn doc_long(&self) -> &'static str { + r" +Search for patterns from the given file, with one pattern per line. When this +flag is used multiple times or in combination with the \flag{regexp} flag, then +all patterns provided are searched. Empty pattern lines will match all input +lines, and the newline is not counted as part of the pattern. +.sp +A line is printed if and only if it matches at least one of the patterns. +.sp +When \fIPATTERNFILE\fP is \fB-\fP, then \fBstdin\fP will be read for the +patterns. +.sp +When \flag{file} or \flag{regexp} is used, then ripgrep treats all positional +arguments as files or directories to search. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let path = PathBuf::from(v.unwrap_value()); + args.patterns.push(PatternSource::File(path)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_file() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.patterns); + + let args = parse_low_raw(["--file", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse_low_raw(["--file=foo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse_low_raw(["-f", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse_low_raw(["-ffoo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse_low_raw(["--file", "-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse_low_raw(["--file=-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse_low_raw(["-f", "-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse_low_raw(["-f-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse_low_raw(["--file=foo", "--file", "bar"]).unwrap(); + assert_eq!( + vec![ + PatternSource::File(PathBuf::from("foo")), + PatternSource::File(PathBuf::from("bar")) + ], + args.patterns + ); + + // We permit path arguments to be invalid UTF-8. So test that. Some of + // these cases are tricky and depend on lexopt doing the right thing. + // + // We probably should add tests for this handling on Windows too, but paths + // that are invalid UTF-16 appear incredibly rare in the Windows world. + #[cfg(unix)] + { + use std::{ + ffi::{OsStr, OsString}, + os::unix::ffi::{OsStrExt, OsStringExt}, + }; + + let bytes = &[b'A', 0xFF, b'Z'][..]; + let path = PathBuf::from(OsString::from_vec(bytes.to_vec())); + + let args = parse_low_raw([ + OsStr::from_bytes(b"--file"), + OsStr::from_bytes(bytes), + ]) + .unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + + let args = parse_low_raw([ + OsStr::from_bytes(b"-f"), + OsStr::from_bytes(bytes), + ]) + .unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + + let mut bytes = b"--file=A".to_vec(); + bytes.push(0xFF); + bytes.push(b'Z'); + let args = parse_low_raw([OsStr::from_bytes(&bytes)]).unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + + let mut bytes = b"-fA".to_vec(); + bytes.push(0xFF); + bytes.push(b'Z'); + let args = parse_low_raw([OsStr::from_bytes(&bytes)]).unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + } +} + +/// --files +#[derive(Debug)] +struct Files; + +impl Flag for Files { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "files" + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Print each file that would be searched." + } + fn doc_long(&self) -> &'static str { + r" +Print each file that would be searched without actually performing the search. +This is useful to determine whether a particular file is being searched or not. +.sp +This overrides \flag{type-list}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch()); + args.mode.update(Mode::Files); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_files() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--files"]).unwrap(); + assert_eq!(Mode::Files, args.mode); +} + +/// -l/--files-with-matches +#[derive(Debug)] +struct FilesWithMatches; + +impl Flag for FilesWithMatches { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'l') + } + fn name_long(&self) -> &'static str { + "files-with-matches" + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Print the paths with at least one match." + } + fn doc_long(&self) -> &'static str { + r" +Print only the paths with at least one match and suppress match contents. +.sp +This overrides \flag{files-without-match}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--files-with-matches can only be enabled"); + args.mode.update(Mode::Search(SearchMode::FilesWithMatches)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_files_with_matches() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--files-with-matches"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithMatches), args.mode); + + let args = parse_low_raw(["-l"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithMatches), args.mode); +} + +/// -l/--files-without-match +#[derive(Debug)] +struct FilesWithoutMatch; + +impl Flag for FilesWithoutMatch { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "files-without-match" + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Print the paths that contain zero matches." + } + fn doc_long(&self) -> &'static str { + r" +Print the paths that contain zero matches and suppress match contents. +.sp +This overrides \flag{files-with-matches}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!( + v.unwrap_switch(), + "--files-without-match can only be enabled" + ); + args.mode.update(Mode::Search(SearchMode::FilesWithoutMatch)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_files_without_match() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--files-without-match"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithoutMatch), args.mode); + + let args = + parse_low_raw(["--files-with-matches", "--files-without-match"]) + .unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithoutMatch), args.mode); + + let args = + parse_low_raw(["--files-without-match", "--files-with-matches"]) + .unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithMatches), args.mode); +} + +/// -F/--fixed-strings +#[derive(Debug)] +struct FixedStrings; + +impl Flag for FixedStrings { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'F') + } + fn name_long(&self) -> &'static str { + "fixed-strings" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-fixed-strings") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Treat all patterns as literals." + } + fn doc_long(&self) -> &'static str { + r" +Treat all patterns as literals instead of as regular expressions. When this +flag is used, special regular expression meta characters such as \fB.(){}*+\fP +should not need be escaped. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.fixed_strings = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_fixed_strings() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.fixed_strings); + + let args = parse_low_raw(["--fixed-strings"]).unwrap(); + assert_eq!(true, args.fixed_strings); + + let args = parse_low_raw(["-F"]).unwrap(); + assert_eq!(true, args.fixed_strings); + + let args = parse_low_raw(["-F", "--no-fixed-strings"]).unwrap(); + assert_eq!(false, args.fixed_strings); + + let args = parse_low_raw(["--no-fixed-strings", "-F"]).unwrap(); + assert_eq!(true, args.fixed_strings); +} + +/// -L/--follow +#[derive(Debug)] +struct Follow; + +impl Flag for Follow { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'L') + } + fn name_long(&self) -> &'static str { + "follow" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-follow") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Follow symbolic links." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to follow symbolic links while traversing +directories. This behavior is disabled by default. Note that ripgrep will +check for symbolic link loops and report errors if it finds one. ripgrep will +also report errors for broken links. To suppress error messages, use the +\flag{no-messages} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.follow = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_follow() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.follow); + + let args = parse_low_raw(["--follow"]).unwrap(); + assert_eq!(true, args.follow); + + let args = parse_low_raw(["-L"]).unwrap(); + assert_eq!(true, args.follow); + + let args = parse_low_raw(["-L", "--no-follow"]).unwrap(); + assert_eq!(false, args.follow); + + let args = parse_low_raw(["--no-follow", "-L"]).unwrap(); + assert_eq!(true, args.follow); +} + +/// --generate +#[derive(Debug)] +struct Generate; + +impl Flag for Generate { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "generate" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("KIND") + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Generate man pages and completion scripts." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to generate some special kind of output identified +by \fIKIND\fP and then quit without searching. \fIKIND\fP can be one of the +following values: +.sp +.TP 15 +\fBman\fP +Generates a manual page for ripgrep in the \fBroff\fP format. +.TP 15 +\fBcomplete\-bash\fP +Generates a completion script for the \fBbash\fP shell. +.TP 15 +\fBcomplete\-zsh\fP +Generates a completion script for the \fBzsh\fP shell. +.TP 15 +\fBcomplete\-fish\fP +Generates a completion script for the \fBfish\fP shell. +.TP 15 +\fBcomplete\-powershell\fP +Generates a completion script for PowerShell. +.PP +The output is written to \fBstdout\fP. The list above may expand over time. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &[ + "man", + "complete-bash", + "complete-zsh", + "complete-fish", + "complete-powershell", + ] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let genmode = match convert::str(&v.unwrap_value())? { + "man" => GenerateMode::Man, + "complete-bash" => GenerateMode::CompleteBash, + "complete-zsh" => GenerateMode::CompleteZsh, + "complete-fish" => GenerateMode::CompleteFish, + "complete-powershell" => GenerateMode::CompletePowerShell, + unk => anyhow::bail!("choice '{unk}' is unrecognized"), + }; + args.mode.update(Mode::Generate(genmode)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_generate() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--generate", "man"]).unwrap(); + assert_eq!(Mode::Generate(GenerateMode::Man), args.mode); + + let args = parse_low_raw(["--generate", "complete-bash"]).unwrap(); + assert_eq!(Mode::Generate(GenerateMode::CompleteBash), args.mode); + + let args = parse_low_raw(["--generate", "complete-zsh"]).unwrap(); + assert_eq!(Mode::Generate(GenerateMode::CompleteZsh), args.mode); + + let args = parse_low_raw(["--generate", "complete-fish"]).unwrap(); + assert_eq!(Mode::Generate(GenerateMode::CompleteFish), args.mode); + + let args = parse_low_raw(["--generate", "complete-powershell"]).unwrap(); + assert_eq!(Mode::Generate(GenerateMode::CompletePowerShell), args.mode); + + let args = + parse_low_raw(["--generate", "complete-bash", "--generate=man"]) + .unwrap(); + assert_eq!(Mode::Generate(GenerateMode::Man), args.mode); + + let args = parse_low_raw(["--generate", "man", "-l"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithMatches), args.mode); + + // An interesting quirk of how the modes override each other that lets + // you get back to the "default" mode of searching. + let args = + parse_low_raw(["--generate", "man", "--json", "--no-json"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); +} + +/// -g/--glob +#[derive(Debug)] +struct Glob; + +impl Flag for Glob { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'g') + } + fn name_long(&self) -> &'static str { + "glob" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("GLOB") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Include or exclude file paths." + } + fn doc_long(&self) -> &'static str { + r#" +Include or exclude files and directories for searching that match the given +glob. This always overrides any other ignore logic. Multiple glob flags may +be used. Globbing rules match \fB.gitignore\fP globs. Precede a glob with a +\fB!\fP to exclude it. If multiple globs match a file or directory, the glob +given later in the command line takes precedence. +.sp +As an extension, globs support specifying alternatives: +.BI "\-g '" ab{c,d}* ' +is equivalent to +.BI "\-g " "abc " "\-g " abd. +Empty alternatives like +.BI "\-g '" ab{,c} ' +are not currently supported. Note that this syntax extension is also currently +enabled in \fBgitignore\fP files, even though this syntax isn't supported by +git itself. ripgrep may disable this syntax extension in gitignore files, but +it will always remain available via the \flag{glob} flag. +.sp +When this flag is set, every file and directory is applied to it to test for +a match. For example, if you only want to search in a particular directory +\fIfoo\fP, then +.BI "\-g " foo +is incorrect because \fIfoo/bar\fP does not match +the glob \fIfoo\fP. Instead, you should use +.BI "\-g '" foo/** '. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let glob = convert::string(v.unwrap_value())?; + args.globs.push(glob); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_glob() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.globs); + + let args = parse_low_raw(["--glob", "foo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.globs); + + let args = parse_low_raw(["--glob=foo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.globs); + + let args = parse_low_raw(["-g", "foo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.globs); + + let args = parse_low_raw(["-gfoo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.globs); + + let args = parse_low_raw(["--glob", "-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.globs); + + let args = parse_low_raw(["--glob=-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.globs); + + let args = parse_low_raw(["-g", "-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.globs); + + let args = parse_low_raw(["-g-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.globs); +} + +/// --glob-case-insensitive +#[derive(Debug)] +struct GlobCaseInsensitive; + +impl Flag for GlobCaseInsensitive { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "glob-case-insensitive" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-glob-case-insensitive") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Process all glob patterns case insensitively." + } + fn doc_long(&self) -> &'static str { + r" +Process all glob patterns given with the \flag{glob} flag case insensitively. +This effectively treats \flag{glob} as \flag{iglob}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.glob_case_insensitive = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_glob_case_insensitive() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.glob_case_insensitive); + + let args = parse_low_raw(["--glob-case-insensitive"]).unwrap(); + assert_eq!(true, args.glob_case_insensitive); + + let args = parse_low_raw([ + "--glob-case-insensitive", + "--no-glob-case-insensitive", + ]) + .unwrap(); + assert_eq!(false, args.glob_case_insensitive); + + let args = parse_low_raw([ + "--no-glob-case-insensitive", + "--glob-case-insensitive", + ]) + .unwrap(); + assert_eq!(true, args.glob_case_insensitive); +} + +/// --heading +#[derive(Debug)] +struct Heading; + +impl Flag for Heading { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "heading" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-heading") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print matches grouped by each file." + } + fn doc_long(&self) -> &'static str { + r" +This flag prints the file path above clusters of matches from each file instead +of printing the file path as a prefix for each matched line. This is the +default mode when printing to a terminal. +.sp +When \fBstdout\fP is not a terminal, then ripgrep will default to the standard +grep-like format. Once can force this format in Unix-like environments by +piping the output of ripgrep to \fBcat\fP. For example, \fBrg\fP \fIfoo\fP \fB| +cat\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.heading = Some(v.unwrap_switch()); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_heading() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.heading); + + let args = parse_low_raw(["--heading"]).unwrap(); + assert_eq!(Some(true), args.heading); + + let args = parse_low_raw(["--no-heading"]).unwrap(); + assert_eq!(Some(false), args.heading); + + let args = parse_low_raw(["--heading", "--no-heading"]).unwrap(); + assert_eq!(Some(false), args.heading); + + let args = parse_low_raw(["--no-heading", "--heading"]).unwrap(); + assert_eq!(Some(true), args.heading); +} + +/// -h/--help +#[derive(Debug)] +struct Help; + +impl Flag for Help { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "help" + } + fn name_short(&self) -> Option { + Some(b'h') + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Show help output." + } + fn doc_long(&self) -> &'static str { + r" +This flag prints the help output for ripgrep. +.sp +Unlike most other flags, the behavior of the short flag, \fB\-h\fP, and the +long flag, \fB\-\-help\fP, is different. The short flag will show a condensed +help output while the long flag will show a verbose help output. The verbose +help output has complete documentation, where as the condensed help output will +show only a single line for every flag. +" + } + + fn update(&self, v: FlagValue, _: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--help has no negation"); + // Since this flag has different semantics for -h and --help and the + // Flag trait doesn't support encoding this sort of thing, we handle it + // as a special case in the parser. + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_help() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.special); + + let args = parse_low_raw(["-h"]).unwrap(); + assert_eq!(Some(SpecialMode::HelpShort), args.special); + + let args = parse_low_raw(["--help"]).unwrap(); + assert_eq!(Some(SpecialMode::HelpLong), args.special); + + let args = parse_low_raw(["-h", "--help"]).unwrap(); + assert_eq!(Some(SpecialMode::HelpLong), args.special); + + let args = parse_low_raw(["--help", "-h"]).unwrap(); + assert_eq!(Some(SpecialMode::HelpShort), args.special); +} + +/// -./--hidden +#[derive(Debug)] +struct Hidden; + +impl Flag for Hidden { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'.') + } + fn name_long(&self) -> &'static str { + "hidden" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-hidden") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Search hidden files and directories." + } + fn doc_long(&self) -> &'static str { + r#" +Search hidden files and directories. By default, hidden files and directories +are skipped. Note that if a hidden file or a directory is whitelisted in +an ignore file, then it will be searched even if this flag isn't provided. +Similarly if a hidden file or directory is given explicitly as an argumnet to +ripgrep. +.sp +A file or directory is considered hidden if its base name starts with a dot +character (\fB.\fP). On operating systems which support a "hidden" file +attribute, like Windows, files with this attribute are also considered hidden. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.hidden = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_hidden() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.hidden); + + let args = parse_low_raw(["--hidden"]).unwrap(); + assert_eq!(true, args.hidden); + + let args = parse_low_raw(["-."]).unwrap(); + assert_eq!(true, args.hidden); + + let args = parse_low_raw(["-.", "--no-hidden"]).unwrap(); + assert_eq!(false, args.hidden); + + let args = parse_low_raw(["--no-hidden", "-."]).unwrap(); + assert_eq!(true, args.hidden); +} + +/// --hostname-bin +#[derive(Debug)] +struct HostnameBin; + +impl Flag for HostnameBin { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "hostname-bin" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("COMMAND") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Run a program to get this system's hostname." + } + fn doc_long(&self) -> &'static str { + r#" +This flag controls how ripgrep determines this system's hostname. The flag's +value should correspond to an executable (either a path or something that can +be found via your system's \fBPATH\fP environment variable). When set, ripgrep +will run this executable, with no arguments, and treat its output (with leading +and trailing whitespace stripped) as your system's hostname. +.sp +When not set (the default, or the empty string), ripgrep will try to +automatically detect your system's hostname. On Unix, this corresponds +to calling \fBgethostname\fP. On Windows, this corresponds to calling +\fBGetComputerNameExW\fP to fetch the system's "physical DNS hostname." +.sp +ripgrep uses your system's hostname for producing hyperlinks. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let path = PathBuf::from(v.unwrap_value()); + args.hostname_bin = + if path.as_os_str().is_empty() { None } else { Some(path) }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_hostname_bin() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.hostname_bin); + + let args = parse_low_raw(["--hostname-bin", "foo"]).unwrap(); + assert_eq!(Some(PathBuf::from("foo")), args.hostname_bin); + + let args = parse_low_raw(["--hostname-bin=foo"]).unwrap(); + assert_eq!(Some(PathBuf::from("foo")), args.hostname_bin); +} + +/// --hyperlink-format +#[derive(Debug)] +struct HyperlinkFormat; + +impl Flag for HyperlinkFormat { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "hyperlink-format" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("FORMAT") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Set the format of hyperlinks." + } + fn doc_long(&self) -> &'static str { + r#" +Set the format of hyperlinks to use when printing results. Hyperlinks make +certain elements of ripgrep's output, such as file paths, clickable. This +generally only works in terminal emulators that support OSC-8 hyperlinks. For +example, the format \fBfile://{host}{path}\fP will emit an RFC 8089 hyperlink. +To see the format that ripgrep is using, pass the \flag{debug} flag. +.sp +Alternatively, a format string may correspond to one of the following aliases: +\fBdefault\fP, \fBnone\fP, \fBfile\fP, \fBgrep+\fP, \fBkitty\fP, \fBmacvim\fP, +\fBtextmate\fP, \fBvscode\fP, \fBvscode-insiders\fP, \fBvscodium\fP. The +alias will be replaced with a format string that is intended to work for the +corresponding application. +.sp +The following variables are available in the format string: +.sp +.TP 12 +\fB{path}\fP +Required. This is replaced with a path to a matching file. The path is +guaranteed to be absolute and percent encoded such that it is valid to put into +a URI. Note that a path is guaranteed to start with a /. +.TP 12 +\fB{host}\fP +Optional. This is replaced with your system's hostname. On Unix, this +corresponds to calling \fBgethostname\fP. On Windows, this corresponds to +calling \fBGetComputerNameExW\fP to fetch the system's "physical DNS hostname." +Alternatively, if \flag{hostname-bin} was provided, then the hostname returned +from the output of that program will be returned. If no hostname could be +found, then this variable is replaced with the empty string. +.TP 12 +\fB{line}\fP +Optional. If appropriate, this is replaced with the line number of a match. If +no line number is available (for example, if \fB\-\-no\-line\-number\fP was +given), then it is automatically replaced with the value 1. +.TP 12 +\fB{column}\fP +Optional, but requires the presence of \fB{line}\fP. If appropriate, this is +replaced with the column number of a match. If no column number is available +(for example, if \fB\-\-no\-column\fP was given), then it is automatically +replaced with the value 1. +.TP 12 +\fB{wslprefix}\fP +Optional. This is a special value that is set to +\fBwsl$/\fP\fIWSL_DISTRO_NAME\fP, where \fIWSL_DISTRO_NAME\fP corresponds to +the value of the equivalent environment variable. If the system is not Unix +or if the \fIWSL_DISTRO_NAME\fP environment variable is not set, then this is +replaced with the empty string. +.PP +A format string may be empty. An empty format string is equivalent to the +\fBnone\fP alias. In this case, hyperlinks will be disabled. +.sp +At present, ripgrep does not enable hyperlinks by default. Users must opt into +them. If you aren't sure what format to use, try \fBdefault\fP. +.sp +Like colors, when ripgrep detects that stdout is not connected to a tty, then +hyperlinks are automatically disabled, regardless of the value of this flag. +Users can pass \fB\-\-color=always\fP to forcefully emit hyperlinks. +.sp +Note that hyperlinks are only written when a path is also in the output +and colors are enabled. To write hyperlinks without colors, you'll need to +configure ripgrep to not colorize anything without actually disabling all ANSI +escape codes completely: +.sp +.EX + \-\-colors 'path:none' \\ + \-\-colors 'line:none' \\ + \-\-colors 'column:none' \\ + \-\-colors 'match:none' +.EE +.sp +ripgrep works this way because it treats the \flag{color} flag as a proxy for +whether ANSI escape codes should be used at all. This means that environment +variables like \fBNO_COLOR=1\fP and \fBTERM=dumb\fP not only disable colors, +but hyperlinks as well. Similarly, colors and hyperlinks are disabled when +ripgrep is not writing to a tty. (Unless one forces the issue by setting +\fB\-\-color=always\fP.) +.sp +If you're searching a file directly, for example: +.sp +.EX + rg foo path/to/file +.EE +.sp +then hyperlinks will not be emitted since the path given does not appear +in the output. To make the path appear, and thus also a hyperlink, use the +\flag{with-filename} flag. +.sp +For more information on hyperlinks in terminal emulators, see: +https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + let string = convert::str(&v)?; + let format = string.parse().context("invalid hyperlink format")?; + args.hyperlink_format = format; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_hyperlink_format() { + let parseformat = |format: &str| { + format.parse::().unwrap() + }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(parseformat("none"), args.hyperlink_format); + + let args = parse_low_raw(["--hyperlink-format", "default"]).unwrap(); + #[cfg(windows)] + assert_eq!(parseformat("file://{path}"), args.hyperlink_format); + #[cfg(not(windows))] + assert_eq!(parseformat("file://{host}{path}"), args.hyperlink_format); + + let args = parse_low_raw(["--hyperlink-format", "file"]).unwrap(); + assert_eq!(parseformat("file://{host}{path}"), args.hyperlink_format); + + let args = parse_low_raw([ + "--hyperlink-format", + "file", + "--hyperlink-format=grep+", + ]) + .unwrap(); + assert_eq!(parseformat("grep+://{path}:{line}"), args.hyperlink_format); + + let args = + parse_low_raw(["--hyperlink-format", "file://{host}{path}#{line}"]) + .unwrap(); + assert_eq!( + parseformat("file://{host}{path}#{line}"), + args.hyperlink_format + ); + + let result = parse_low_raw(["--hyperlink-format", "file://heythere"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --iglob +#[derive(Debug)] +struct IGlob; + +impl Flag for IGlob { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "iglob" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("GLOB") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Include/exclude paths case insensitively." + } + fn doc_long(&self) -> &'static str { + r" +Include or exclude files and directories for searching that match the given +glob. This always overrides any other ignore logic. Multiple glob flags may +be used. Globbing rules match \fB.gitignore\fP globs. Precede a glob with a +\fB!\fP to exclude it. If multiple globs match a file or directory, the glob +given later in the command line takes precedence. Globs used via this flag are +matched case insensitively. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let glob = convert::string(v.unwrap_value())?; + args.iglobs.push(glob); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_iglob() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.iglobs); + + let args = parse_low_raw(["--iglob", "foo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.iglobs); + + let args = parse_low_raw(["--iglob=foo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.iglobs); + + let args = parse_low_raw(["--iglob", "-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.iglobs); + + let args = parse_low_raw(["--iglob=-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.iglobs); +} + +/// -i/--ignore-case +#[derive(Debug)] +struct IgnoreCase; + +impl Flag for IgnoreCase { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'i') + } + fn name_long(&self) -> &'static str { + "ignore-case" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Case insensitive search." + } + fn doc_long(&self) -> &'static str { + r#" +When this flag is provided, all patterns will be searched case insensitively. +The case insensitivity rules used by ripgrep's default regex engine conform to +Unicode's "simple" case folding rules. +.sp +This is a global option that applies to all patterns given to ripgrep. +Individual patterns can still be matched case sensitively by using +inline regex flags. For example, \fB(?\-i)abc\fP will match \fBabc\fP +case sensitively even when this flag is used. +.sp +This flag overrides \flag{case-sensitive} and flag{smart-case}. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "flag has no negation"); + args.case = CaseMode::Insensitive; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_ignore_case() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["--ignore-case"]).unwrap(); + assert_eq!(CaseMode::Insensitive, args.case); + + let args = parse_low_raw(["-i"]).unwrap(); + assert_eq!(CaseMode::Insensitive, args.case); + + let args = parse_low_raw(["-i", "-s"]).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["-s", "-i"]).unwrap(); + assert_eq!(CaseMode::Insensitive, args.case); +} + +/// --ignore-file +#[derive(Debug)] +struct IgnoreFile; + +impl Flag for IgnoreFile { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "ignore-file" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("PATH") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Specify additional ignore files." + } + fn doc_long(&self) -> &'static str { + r" +Specifies a path to one or more \fBgitignore\fP formatted rules files. +These patterns are applied after the patterns found in \fB.gitignore\fP, +\fB.rgignore\fP and \fB.ignore\fP are applied and are matched relative to the +current working directory. Multiple additional ignore files can be specified +by using this flag repeatedly. When specifying multiple ignore files, earlier +files have lower precedence than later files. +.sp +If you are looking for a way to include or exclude files and directories +directly on the command line, then use \flag{glob} instead. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let path = PathBuf::from(v.unwrap_value()); + args.ignore_file.push(path); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_ignore_file() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.ignore_file); + + let args = parse_low_raw(["--ignore-file", "foo"]).unwrap(); + assert_eq!(vec![PathBuf::from("foo")], args.ignore_file); + + let args = parse_low_raw(["--ignore-file", "foo", "--ignore-file", "bar"]) + .unwrap(); + assert_eq!( + vec![PathBuf::from("foo"), PathBuf::from("bar")], + args.ignore_file + ); +} + +/// --ignore-file-case-insensitive +#[derive(Debug)] +struct IgnoreFileCaseInsensitive; + +impl Flag for IgnoreFileCaseInsensitive { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "ignore-file-case-insensitive" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-ignore-file-case-insensitive") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Process ignore files case insensitively." + } + fn doc_long(&self) -> &'static str { + r" +Process ignore files (\fB.gitignore\fP, \fB.ignore\fP, etc.) case +insensitively. Note that this comes with a performance penalty and is most +useful on case insensitive file systems (such as Windows). +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.ignore_file_case_insensitive = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_ignore_file_case_insensitive() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.ignore_file_case_insensitive); + + let args = parse_low_raw(["--ignore-file-case-insensitive"]).unwrap(); + assert_eq!(true, args.ignore_file_case_insensitive); + + let args = parse_low_raw([ + "--ignore-file-case-insensitive", + "--no-ignore-file-case-insensitive", + ]) + .unwrap(); + assert_eq!(false, args.ignore_file_case_insensitive); + + let args = parse_low_raw([ + "--no-ignore-file-case-insensitive", + "--ignore-file-case-insensitive", + ]) + .unwrap(); + assert_eq!(true, args.ignore_file_case_insensitive); +} + +/// --include-zero +#[derive(Debug)] +struct IncludeZero; + +impl Flag for IncludeZero { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "include-zero" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-include-zero") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Include zero matches in summary output." + } + fn doc_long(&self) -> &'static str { + r" +When used with \flag{count} or \flag{count-matches}, this causes ripgrep to +print the number of matches for each file even if there were zero matches. This +is disabled by default but can be enabled to make ripgrep behave more like +grep. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.include_zero = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_include_zero() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.include_zero); + + let args = parse_low_raw(["--include-zero"]).unwrap(); + assert_eq!(true, args.include_zero); + + let args = parse_low_raw(["--include-zero", "--no-include-zero"]).unwrap(); + assert_eq!(false, args.include_zero); +} + +/// -v/--invert-match +#[derive(Debug)] +struct InvertMatch; + +impl Flag for InvertMatch { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'v') + } + fn name_long(&self) -> &'static str { + "invert-match" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-invert-match") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Invert matching." + } + fn doc_long(&self) -> &'static str { + r" +This flag inverts matching. That is, instead of printing lines that match, +ripgrep will print lines that don't match. +.sp +Note that this only inverts line-by-line matching. For example, combining this +flag with \flag{files-with-matches} will emit files that contain any lines +that do not match the patterns given. That's not the same as, for example, +\flag{files-without-match}, which will emit files that do not contain any +matching lines. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.invert_match = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_invert_match() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.invert_match); + + let args = parse_low_raw(["--invert-match"]).unwrap(); + assert_eq!(true, args.invert_match); + + let args = parse_low_raw(["-v"]).unwrap(); + assert_eq!(true, args.invert_match); + + let args = parse_low_raw(["-v", "--no-invert-match"]).unwrap(); + assert_eq!(false, args.invert_match); +} + +/// --json +#[derive(Debug)] +struct JSON; + +impl Flag for JSON { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "json" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-json") + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Show search results in a JSON Lines format." + } + fn doc_long(&self) -> &'static str { + r" +Enable printing results in a JSON Lines format. +.sp +When this flag is provided, ripgrep will emit a sequence of messages, each +encoded as a JSON object, where there are five different message types: +.sp +.TP 12 +\fBbegin\fP +A message that indicates a file is being searched and contains at least one +match. +.TP 12 +\fBend\fP +A message the indicates a file is done being searched. This message also +include summary statistics about the search for a particular file. +.TP 12 +\fBmatch\fP +A message that indicates a match was found. This includes the text and offsets +of the match. +.TP 12 +\fBcontext\fP +A message that indicates a contextual line was found. This includes the text of +the line, along with any match information if the search was inverted. +.TP 12 +\fBsummary\fP +The final message emitted by ripgrep that contains summary statistics about the +search across all files. +.PP +Since file paths or the contents of files are not guaranteed to be valid +UTF-8 and JSON itself must be representable by a Unicode encoding, ripgrep +will emit all data elements as objects with one of two keys: \fBtext\fP or +\fBbytes\fP. \fBtext\fP is a normal JSON string when the data is valid UTF-8 +while \fBbytes\fP is the base64 encoded contents of the data. +.sp +The JSON Lines format is only supported for showing search results. It cannot +be used with other flags that emit other types of output, such as \flag{files}, +\flag{files-with-matches}, \flag{files-without-match}, \flag{count} or +\flag{count-matches}. ripgrep will report an error if any of the aforementioned +flags are used in concert with \flag{json}. +.sp +Other flags that control aspects of the standard output such as +\flag{only-matching}, \flag{heading}, \flag{replace}, \flag{max-columns}, etc., +have no effect when \flag{json} is set. +.sp +A more complete description of the JSON format used can be found here: +\fIhttps://docs.rs/grep-printer/*/grep_printer/struct.JSON.html\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + if v.unwrap_switch() { + args.mode.update(Mode::Search(SearchMode::JSON)); + } else if matches!(args.mode, Mode::Search(SearchMode::JSON)) { + // --no-json only reverts to the default mode if the mode is + // JSON, otherwise it's a no-op. + args.mode.update(Mode::Search(SearchMode::Standard)); + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_json() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--json"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::JSON), args.mode); + + let args = parse_low_raw(["--json", "--no-json"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--json", "--files", "--no-json"]).unwrap(); + assert_eq!(Mode::Files, args.mode); + + let args = parse_low_raw(["--json", "-l", "--no-json"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithMatches), args.mode); +} + +/// --line-buffered +#[derive(Debug)] +struct LineBuffered; + +impl Flag for LineBuffered { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "line-buffered" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-line-buffered") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Force line buffering." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will always use line buffering. That is, whenever a +matching line is found, it will be flushed to stdout immediately. This is +the default when ripgrep's stdout is connected to a terminal, but otherwise, +ripgrep will use block buffering, which is typically faster. This flag forces +ripgrep to use line buffering even if it would otherwise use block buffering. +This is typically useful in shell pipelines, for example: +.sp +.EX + tail -f something.log | rg foo --line-buffered | rg bar +.EE +.sp +This overrides the \flag{block-buffered} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.buffer = if v.unwrap_switch() { + BufferMode::Line + } else { + BufferMode::Auto + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_line_buffered() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BufferMode::Auto, args.buffer); + + let args = parse_low_raw(["--line-buffered"]).unwrap(); + assert_eq!(BufferMode::Line, args.buffer); + + let args = + parse_low_raw(["--line-buffered", "--no-line-buffered"]).unwrap(); + assert_eq!(BufferMode::Auto, args.buffer); + + let args = parse_low_raw(["--line-buffered", "--block-buffered"]).unwrap(); + assert_eq!(BufferMode::Block, args.buffer); +} + +/// -n/--line-number +#[derive(Debug)] +struct LineNumber; + +impl Flag for LineNumber { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'n') + } + fn name_long(&self) -> &'static str { + "line-number" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Show line numbers." + } + fn doc_long(&self) -> &'static str { + r" +Show line numbers (1-based). This is enabled by default stdout is connected to +a tty. +.sp +This flag can be disabled by \flag{no-line-number}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--line-number has no automatic negation"); + args.line_number = Some(true); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_line_number() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.line_number); + + let args = parse_low_raw(["--line-number"]).unwrap(); + assert_eq!(Some(true), args.line_number); + + let args = parse_low_raw(["-n"]).unwrap(); + assert_eq!(Some(true), args.line_number); + + let args = parse_low_raw(["-n", "--no-line-number"]).unwrap(); + assert_eq!(Some(false), args.line_number); +} + +/// -N/--no-line-number +#[derive(Debug)] +struct LineNumberNo; + +impl Flag for LineNumberNo { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'N') + } + fn name_long(&self) -> &'static str { + "no-line-number" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Suppress line numbers." + } + fn doc_long(&self) -> &'static str { + r" +Suppress line numbers. Line numbers are off by default when stdout is not +connected to a tty. +.sp +Line numbers can be forcefully turned on by \flag{line-number}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!( + v.unwrap_switch(), + "--no-line-number has no automatic negation" + ); + args.line_number = Some(false); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_line_number() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.line_number); + + let args = parse_low_raw(["--no-line-number"]).unwrap(); + assert_eq!(Some(false), args.line_number); + + let args = parse_low_raw(["-N"]).unwrap(); + assert_eq!(Some(false), args.line_number); + + let args = parse_low_raw(["-N", "--line-number"]).unwrap(); + assert_eq!(Some(true), args.line_number); +} + +/// -x/--line-regexp +#[derive(Debug)] +struct LineRegexp; + +impl Flag for LineRegexp { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'x') + } + fn name_long(&self) -> &'static str { + "line-regexp" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Show matches surrounded by line boundaries." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will only show matches surrounded by line boundaries. +This is equivalent to surrounding every pattern with \fB^\fP and \fB$\fP. In +other words, this only prints lines where the entire line participates in a +match. +.sp +This overrides the \flag{word-regexp} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--line-regexp has no negation"); + args.boundary = Some(BoundaryMode::Line); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_line_regexp() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.boundary); + + let args = parse_low_raw(["--line-regexp"]).unwrap(); + assert_eq!(Some(BoundaryMode::Line), args.boundary); + + let args = parse_low_raw(["-x"]).unwrap(); + assert_eq!(Some(BoundaryMode::Line), args.boundary); +} + +/// -M/--max-columns +#[derive(Debug)] +struct MaxColumns; + +impl Flag for MaxColumns { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'M') + } + fn name_long(&self) -> &'static str { + "max-columns" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Omit lines longer than this limit." + } + fn doc_long(&self) -> &'static str { + r" +When given, ripgrep will omit lines longer than this limit in bytes. Instead of +printing long lines, only the number of matches in that line is printed. +.sp +When this flag is omitted or is set to \fB0\fP, then it has no effect. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let max = convert::u64(&v.unwrap_value())?; + args.max_columns = if max == 0 { None } else { Some(max) }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_max_columns() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.max_columns); + + let args = parse_low_raw(["--max-columns", "5"]).unwrap(); + assert_eq!(Some(5), args.max_columns); + + let args = parse_low_raw(["-M", "5"]).unwrap(); + assert_eq!(Some(5), args.max_columns); + + let args = parse_low_raw(["-M5"]).unwrap(); + assert_eq!(Some(5), args.max_columns); + + let args = parse_low_raw(["--max-columns", "5", "-M0"]).unwrap(); + assert_eq!(None, args.max_columns); +} + +/// --max-columns-preview +#[derive(Debug)] +struct MaxColumnsPreview; + +impl Flag for MaxColumnsPreview { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "max-columns-preview" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-max-columns-preview") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Show preview for lines exceeding the limit." + } + fn doc_long(&self) -> &'static str { + r" +Prints a preview for lines exceeding the configured max column limit. +.sp +When the \flag{max-columns} flag is used, ripgrep will by default completely +replace any line that is too long with a message indicating that a matching +line was removed. When this flag is combined with \flag{max-columns}, a preview +of the line (corresponding to the limit size) is shown instead, where the part +of the line exceeding the limit is not shown. +.sp +If the \flag{max-columns} flag is not set, then this has no effect. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.max_columns_preview = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_max_columns_preview() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.max_columns_preview); + + let args = parse_low_raw(["--max-columns-preview"]).unwrap(); + assert_eq!(true, args.max_columns_preview); + + let args = + parse_low_raw(["--max-columns-preview", "--no-max-columns-preview"]) + .unwrap(); + assert_eq!(false, args.max_columns_preview); +} + +/// -m/--max-count +#[derive(Debug)] +struct MaxCount; + +impl Flag for MaxCount { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'm') + } + fn name_long(&self) -> &'static str { + "max-count" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Limit the number of matching lines." + } + fn doc_long(&self) -> &'static str { + r" +Limit the number of matching lines per file searched to \fINUM\fP. +.sp +Note that \fB0\fP is a legal value but not likely to be useful. When used, +ripgrep won't search anything. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.max_count = Some(convert::u64(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_max_count() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.max_count); + + let args = parse_low_raw(["--max-count", "5"]).unwrap(); + assert_eq!(Some(5), args.max_count); + + let args = parse_low_raw(["-m", "5"]).unwrap(); + assert_eq!(Some(5), args.max_count); + + let args = parse_low_raw(["-m", "5", "--max-count=10"]).unwrap(); + assert_eq!(Some(10), args.max_count); + let args = parse_low_raw(["-m0"]).unwrap(); + assert_eq!(Some(0), args.max_count); +} + +/// --max-depth +#[derive(Debug)] +struct MaxDepth; + +impl Flag for MaxDepth { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "max-depth" + } + fn aliases(&self) -> &'static [&'static str] { + &["maxdepth"] + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Descend at most NUM directories." + } + fn doc_long(&self) -> &'static str { + r" +This flag limits the depth of directory traversal to \fINUM\fP levels beyond +the paths given. A value of \fB0\fP only searches the explicitly given paths +themselves. +.sp +For example, \fBrg --max-depth 0 \fP\fIdir/\fP is a no-op because \fIdir/\fP +will not be descended into. \fBrg --max-depth 1 \fP\fIdir/\fP will search only +the direct children of \fIdir\fP. +.sp +An alternative spelling for this flag is \fB\-\-maxdepth\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.max_depth = Some(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_max_depth() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.max_depth); + + let args = parse_low_raw(["--max-depth", "5"]).unwrap(); + assert_eq!(Some(5), args.max_depth); + + let args = parse_low_raw(["--max-depth", "5", "--max-depth=10"]).unwrap(); + assert_eq!(Some(10), args.max_depth); + + let args = parse_low_raw(["--max-depth", "0"]).unwrap(); + assert_eq!(Some(0), args.max_depth); + + let args = parse_low_raw(["--maxdepth", "5"]).unwrap(); + assert_eq!(Some(5), args.max_depth); +} + +/// --max-filesize +#[derive(Debug)] +struct MaxFilesize; + +impl Flag for MaxFilesize { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "max-filesize" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM+SUFFIX?") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Ignore files larger than NUM in size." + } + fn doc_long(&self) -> &'static str { + r" +Ignore files larger than \fINUM\fP in size. This does not apply to directories. +.sp +The input format accepts suffixes of \fBK\fP, \fBM\fP or \fBG\fP which +correspond to kilobytes, megabytes and gigabytes, respectively. If no suffix is +provided the input is treated as bytes. +.sp +Examples: \fB\-\-max-filesize 50K\fP or \fB\-\-max\-filesize 80M\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + args.max_filesize = Some(convert::human_readable_u64(&v)?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_max_filesize() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.max_filesize); + + let args = parse_low_raw(["--max-filesize", "1024"]).unwrap(); + assert_eq!(Some(1024), args.max_filesize); + + let args = parse_low_raw(["--max-filesize", "1K"]).unwrap(); + assert_eq!(Some(1024), args.max_filesize); + + let args = + parse_low_raw(["--max-filesize", "1K", "--max-filesize=1M"]).unwrap(); + assert_eq!(Some(1024 * 1024), args.max_filesize); +} + +/// --mmap +#[derive(Debug)] +struct Mmap; + +impl Flag for Mmap { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "mmap" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-mmap") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Search with memory maps when possible." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will search using memory maps when possible. This is +enabled by default when ripgrep thinks it will be faster. +.sp +Memory map searching cannot be used in all circumstances. For example, when +searching virtual files or streams likes \fBstdin\fP. In such cases, memory +maps will not be used even when this flag is enabled. +.sp +Note that ripgrep may abort unexpectedly when memory maps are used if it +searches a file that is simultaneously truncated. Users can opt out of this +possibility by disabling memory maps. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.mmap = if v.unwrap_switch() { + MmapMode::AlwaysTryMmap + } else { + MmapMode::Never + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_mmap() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(MmapMode::Auto, args.mmap); + + let args = parse_low_raw(["--mmap"]).unwrap(); + assert_eq!(MmapMode::AlwaysTryMmap, args.mmap); + + let args = parse_low_raw(["--no-mmap"]).unwrap(); + assert_eq!(MmapMode::Never, args.mmap); + + let args = parse_low_raw(["--mmap", "--no-mmap"]).unwrap(); + assert_eq!(MmapMode::Never, args.mmap); + + let args = parse_low_raw(["--no-mmap", "--mmap"]).unwrap(); + assert_eq!(MmapMode::AlwaysTryMmap, args.mmap); +} + +/// -U/--multiline +#[derive(Debug)] +struct Multiline; + +impl Flag for Multiline { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'U') + } + fn name_long(&self) -> &'static str { + "multiline" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-multiline") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Enable searching across multiple lines." + } + fn doc_long(&self) -> &'static str { + r#" +This flag enable searching across multiple lines. +.sp +When multiline mode is enabled, ripgrep will lift the restriction that a +match cannot include a line terminator. For example, when multiline mode +is not enabled (the default), then the regex \fB\\p{any}\fP will match any +Unicode codepoint other than \fB\\n\fP. Similarly, the regex \fB\\n\fP is +explicitly forbidden, and if you try to use it, ripgrep will return an error. +However, when multiline mode is enabled, \fB\\p{any}\fP will match any Unicode +codepoint, including \fB\\n\fP, and regexes like \fB\\n\fP are permitted. +.sp +An important caveat is that multiline mode does not change the match semantics +of \fB.\fP. Namely, in most regex matchers, a \fB.\fP will by default match any +character other than \fB\\n\fP, and this is true in ripgrep as well. In order +to make \fB.\fP match \fB\\n\fP, you must enable the "dot all" flag inside the +regex. For example, both \fB(?s).\fP and \fB(?s:.)\fP have the same semantics, +where \fB.\fP will match any character, including \fB\\n\fP. Alternatively, the +\flag{multiline-dotall} flag may be passed to make the "dot all" behavior the +default. This flag only applies when multiline search is enabled. +.sp +There is no limit on the number of the lines that a single match can span. +.sp +\fBWARNING\fP: Because of how the underlying regex engine works, multiline +searches may be slower than normal line-oriented searches, and they may also +use more memory. In particular, when multiline mode is enabled, ripgrep +requires that each file it searches is laid out contiguously in memory (either +by reading it onto the heap or by memory-mapping it). Things that cannot be +memory-mapped (such as \fBstdin\fP) will be consumed until EOF before searching +can begin. In general, ripgrep will only do these things when necessary. +Specifically, if the \flag{multiline} flag is provided but the regex does +not contain patterns that would match \fB\\n\fP characters, then ripgrep +will automatically avoid reading each file into memory before searching it. +Nevertheless, if you only care about matches spanning at most one line, then it +is always better to disable multiline mode. +.sp +This overrides the \flag{stop-on-nonmatch} flag. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.multiline = v.unwrap_switch(); + if args.multiline { + args.stop_on_nonmatch = false; + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_multiline() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.multiline); + + let args = parse_low_raw(["--multiline"]).unwrap(); + assert_eq!(true, args.multiline); + + let args = parse_low_raw(["-U"]).unwrap(); + assert_eq!(true, args.multiline); + + let args = parse_low_raw(["-U", "--no-multiline"]).unwrap(); + assert_eq!(false, args.multiline); +} + +/// --multiline-dotall +#[derive(Debug)] +struct MultilineDotall; + +impl Flag for MultilineDotall { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "multiline-dotall" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-multiline-dotall") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Make '.' match line terminators." + } + fn doc_long(&self) -> &'static str { + r#" +This flag enables "dot all" mode in all regex patterns. This causes \fB.\fP to +match line terminators when multiline searching is enabled. This flag has no +effect if multiline searching isn't enabled with the \flag{multiline} flag. +.sp +Normally, a \fB.\fP will match any character except line terminators. While +this behavior typically isn't relevant for line-oriented matching (since +matches can span at most one line), this can be useful when searching with the +\flag{multiline} flag. By default, multiline mode runs without "dot all" mode +enabled. +.sp +This flag is generally intended to be used in an alias or your ripgrep config +file if you prefer "dot all" semantics by default. Note that regardless of +whether this flag is used, "dot all" semantics can still be controlled via +inline flags in the regex pattern itself, e.g., \fB(?s:.)\fP always enables +"dot all" whereas \fB(?-s:.)\fP always disables "dot all". Moreover, you +can use character classes like \fB\\p{any}\fP to match any Unicode codepoint +regardless of whether "dot all" mode is enabled or not. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.multiline_dotall = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_multiline_dotall() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.multiline_dotall); + + let args = parse_low_raw(["--multiline-dotall"]).unwrap(); + assert_eq!(true, args.multiline_dotall); + + let args = parse_low_raw(["--multiline-dotall", "--no-multiline-dotall"]) + .unwrap(); + assert_eq!(false, args.multiline_dotall); +} + +/// --no-config +#[derive(Debug)] +struct NoConfig; + +impl Flag for NoConfig { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-config" + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Never read configuration files." + } + fn doc_long(&self) -> &'static str { + r" +When set, ripgrep will never read configuration files. When this flag is +present, ripgrep will not respect the \fBRIPGREP_CONFIG_PATH\fP environment +variable. +.sp +If ripgrep ever grows a feature to automatically read configuration files in +pre-defined locations, then this flag will also disable that behavior as well. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--no-config has no negation"); + args.no_config = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_config() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_config); + + let args = parse_low_raw(["--no-config"]).unwrap(); + assert_eq!(true, args.no_config); +} + +/// --no-ignore +#[derive(Debug)] +struct NoIgnore; + +impl Flag for NoIgnore { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use ignore files." + } + fn doc_long(&self) -> &'static str { + r" +When set, ignore files such as \fB.gitignore\fP, \fB.ignore\fP and +\fB.rgignore\fP will not be respected. This implies \flag{no-ignore-dot}, +\flag{no-ignore-exclude}, \flag{no-ignore-global}, \flag{no-ignore-parent} and +\flag{no-ignore-vcs}. +.sp +This does not imply \flag{no-ignore-files}, since \flag{ignore-file} is +specified explicitly as a command line argument. +.sp +When given only once, the \flag{unrestricted} flag is identical in +behavior to this flag and can be considered an alias. However, subsequent +\flag{unrestricted} flags have additional effects. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let yes = v.unwrap_switch(); + args.no_ignore_dot = yes; + args.no_ignore_exclude = yes; + args.no_ignore_global = yes; + args.no_ignore_parent = yes; + args.no_ignore_vcs = yes; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_dot); + assert_eq!(false, args.no_ignore_exclude); + assert_eq!(false, args.no_ignore_global); + assert_eq!(false, args.no_ignore_parent); + assert_eq!(false, args.no_ignore_vcs); + + let args = parse_low_raw(["--no-ignore"]).unwrap(); + assert_eq!(true, args.no_ignore_dot); + assert_eq!(true, args.no_ignore_exclude); + assert_eq!(true, args.no_ignore_global); + assert_eq!(true, args.no_ignore_parent); + assert_eq!(true, args.no_ignore_vcs); + + let args = parse_low_raw(["--no-ignore", "--ignore"]).unwrap(); + assert_eq!(false, args.no_ignore_dot); + assert_eq!(false, args.no_ignore_exclude); + assert_eq!(false, args.no_ignore_global); + assert_eq!(false, args.no_ignore_parent); + assert_eq!(false, args.no_ignore_vcs); +} + +/// --no-ignore-dot +#[derive(Debug)] +struct NoIgnoreDot; + +impl Flag for NoIgnoreDot { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-dot" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-dot") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use .ignore or .rgignore files." + } + fn doc_long(&self) -> &'static str { + r" +Don't respect filter rules from \fB.ignore\fP or \fB.rgignore\fP files. +.sp +This does not impact whether ripgrep will ignore files and directories whose +names begin with a dot. For that, see the \flag{hidden} flag. This flag also +does not impact whether filter rules from \fB.gitignore\fP files are respected. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_dot = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_dot() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_dot); + + let args = parse_low_raw(["--no-ignore-dot"]).unwrap(); + assert_eq!(true, args.no_ignore_dot); + + let args = parse_low_raw(["--no-ignore-dot", "--ignore-dot"]).unwrap(); + assert_eq!(false, args.no_ignore_dot); +} + +/// --no-ignore-exclude +#[derive(Debug)] +struct NoIgnoreExclude; + +impl Flag for NoIgnoreExclude { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-exclude" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-exclude") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use local exclusion files." + } + fn doc_long(&self) -> &'static str { + r" +Don't respect filter rules from files that are manually configured for the repository. +For example, this includes \fBgit\fP's \fB.git/info/exclude\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_exclude = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_exclude() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_exclude); + + let args = parse_low_raw(["--no-ignore-exclude"]).unwrap(); + assert_eq!(true, args.no_ignore_exclude); + + let args = + parse_low_raw(["--no-ignore-exclude", "--ignore-exclude"]).unwrap(); + assert_eq!(false, args.no_ignore_exclude); +} + +/// --no-ignore-files +#[derive(Debug)] +struct NoIgnoreFiles; + +impl Flag for NoIgnoreFiles { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-files" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-files") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use --ignore-file arguments." + } + fn doc_long(&self) -> &'static str { + r" +When set, any \flag{ignore-file} flags, even ones that come after this flag, +are ignored. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_files = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_files() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_files); + + let args = parse_low_raw(["--no-ignore-files"]).unwrap(); + assert_eq!(true, args.no_ignore_files); + + let args = parse_low_raw(["--no-ignore-files", "--ignore-files"]).unwrap(); + assert_eq!(false, args.no_ignore_files); +} + +/// --no-ignore-global +#[derive(Debug)] +struct NoIgnoreGlobal; + +impl Flag for NoIgnoreGlobal { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-global" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-global") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use global ignore files." + } + fn doc_long(&self) -> &'static str { + r#" +Don't respect filter rules from ignore files that come from "global" sources +such as \fBgit\fP's \fBcore.excludesFile\fP configuration option (which +defaults to \fB$HOME/.config/git/ignore\fP). +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_global = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_global() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_global); + + let args = parse_low_raw(["--no-ignore-global"]).unwrap(); + assert_eq!(true, args.no_ignore_global); + + let args = + parse_low_raw(["--no-ignore-global", "--ignore-global"]).unwrap(); + assert_eq!(false, args.no_ignore_global); +} + +/// --no-ignore-messages +#[derive(Debug)] +struct NoIgnoreMessages; + +impl Flag for NoIgnoreMessages { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-messages" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-messages") + } + fn doc_category(&self) -> Category { + Category::Logging + } + fn doc_short(&self) -> &'static str { + r"Suppress gitignore parse error messages." + } + fn doc_long(&self) -> &'static str { + r" +When this flag is enabled, all error messages related to parsing ignore files +are suppressed. By default, error messages are printed to stderr. In cases +where these errors are expected, this flag can be used to avoid seeing the +noise produced by the messages. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_messages = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_messages() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_messages); + + let args = parse_low_raw(["--no-ignore-messages"]).unwrap(); + assert_eq!(true, args.no_ignore_messages); + + let args = + parse_low_raw(["--no-ignore-messages", "--ignore-messages"]).unwrap(); + assert_eq!(false, args.no_ignore_messages); +} + +/// --no-ignore-parent +#[derive(Debug)] +struct NoIgnoreParent; + +impl Flag for NoIgnoreParent { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-parent" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-parent") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use ignore files in parent directories." + } + fn doc_long(&self) -> &'static str { + r" +When this flag is set, filter rules from ignore files found in parent +directories are not respected. By default, ripgrep will ascend the parent +directories of the current working directory to look for any applicable ignore +files that should be applied. In some cases this may not be desirable. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_parent = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_parent() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_parent); + + let args = parse_low_raw(["--no-ignore-parent"]).unwrap(); + assert_eq!(true, args.no_ignore_parent); + + let args = + parse_low_raw(["--no-ignore-parent", "--ignore-parent"]).unwrap(); + assert_eq!(false, args.no_ignore_parent); +} + +/// --no-ignore-vcs +#[derive(Debug)] +struct NoIgnoreVcs; + +impl Flag for NoIgnoreVcs { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-vcs" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-vcs") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use ignore files from source control." + } + fn doc_long(&self) -> &'static str { + r" +When given, filter rules from source control ignore files (e.g., \fB.gitignore\fP) +are not respected. By default, ripgrep respects \fBgit\fP's ignore rules for +automatic filtering. In some cases, it may not be desirable to respect the +source control's ignore rules and instead only respect rules in \fB.ignore\fP +or \fB.rgignore\fP. +.sp +This flag implies \flag{no-ignore-parent} for source control ignore files as +well. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_vcs = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_vcs() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_vcs); + + let args = parse_low_raw(["--no-ignore-vcs"]).unwrap(); + assert_eq!(true, args.no_ignore_vcs); + + let args = parse_low_raw(["--no-ignore-vcs", "--ignore-vcs"]).unwrap(); + assert_eq!(false, args.no_ignore_vcs); +} + +/// --no-messages +#[derive(Debug)] +struct NoMessages; + +impl Flag for NoMessages { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-messages" + } + fn name_negated(&self) -> Option<&'static str> { + Some("messages") + } + fn doc_category(&self) -> Category { + Category::Logging + } + fn doc_short(&self) -> &'static str { + r"Suppress some error messages." + } + fn doc_long(&self) -> &'static str { + r" +This flag suppresses some error messages. Specifically, messages related to +the failed opening and reading of files. Error messages related to the syntax +of the pattern are still shown. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_messages = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_messages() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_messages); + + let args = parse_low_raw(["--no-messages"]).unwrap(); + assert_eq!(true, args.no_messages); + + let args = parse_low_raw(["--no-messages", "--messages"]).unwrap(); + assert_eq!(false, args.no_messages); +} + +/// --no-pcre2-unicode +#[derive(Debug)] +struct NoPcre2Unicode; + +impl Flag for NoPcre2Unicode { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-pcre2-unicode" + } + fn name_negated(&self) -> Option<&'static str> { + Some("pcre2-unicode") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"(DEPRECATED) Disable Unicode mode for PCRE2." + } + fn doc_long(&self) -> &'static str { + r" +DEPRECATED. Use \flag{no-unicode} instead. +.sp +Note that Unicode mode is enabled by default. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_unicode = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_pcre2_unicode() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_unicode); + + let args = parse_low_raw(["--no-pcre2-unicode"]).unwrap(); + assert_eq!(true, args.no_unicode); + + let args = + parse_low_raw(["--no-pcre2-unicode", "--pcre2-unicode"]).unwrap(); + assert_eq!(false, args.no_unicode); +} + +/// --no-require-git +#[derive(Debug)] +struct NoRequireGit; + +impl Flag for NoRequireGit { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-require-git" + } + fn name_negated(&self) -> Option<&'static str> { + Some("require-git") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Use .gitignore outside of git repositories." + } + fn doc_long(&self) -> &'static str { + r" +When this flag is given, source control ignore files such as \fB.gitignore\fP +are respect even if no \fBgit\fP repository is present. +.sp +By default, ripgrep will only respect filter rules from source control ignore +files when ripgrep detects that the search is executed inside a source control +repository. For example, when a \fB.git\fP directory is observed. +.sp +This flag relaxes the default restriction. For example, it might be useful when +the contents of a \fBgit\fP repository are stored or copied somewhere, but +where the repository state is absent. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_require_git = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_require_git() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_require_git); + + let args = parse_low_raw(["--no-require-git"]).unwrap(); + assert_eq!(true, args.no_require_git); + + let args = parse_low_raw(["--no-require-git", "--require-git"]).unwrap(); + assert_eq!(false, args.no_require_git); +} + +/// --no-unicode +#[derive(Debug)] +struct NoUnicode; + +impl Flag for NoUnicode { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-unicode" + } + fn name_negated(&self) -> Option<&'static str> { + Some("unicode") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Disable Unicode mode." + } + fn doc_long(&self) -> &'static str { + r#" +This flag disables Unicode mode for all patterns given to ripgrep. +.sp +By default, ripgrep will enable "Unicode mode" in all of its regexes. This has +a number of consequences: +.sp +.IP \(bu 3n +\fB.\fP will only match valid UTF-8 encoded Unicode scalar values. +.sp +.IP \(bu 3n +Classes like \fB\\w\fP, \fB\\s\fP, \fB\\d\fP are all Unicode aware and much +bigger than their ASCII only versions. +.sp +.IP \(bu 3n +Case insensitive matching will use Unicode case folding. +.sp +.IP \(bu 3n +A large array of classes like \fB\\p{Emoji}\fP are available. (Although the +specific set of classes available varies based on the regex engine. In general, +the default regex engine has more classes available to it.) +.sp +.IP \(bu 3n +Word boundaries (\fB\\b\fP and \fB\\B\fP) use the Unicode definition of a word +character. +.PP +In some cases it can be desirable to turn these things off. This flag will do +exactly that. For example, Unicode mode can sometimes have a negative impact +on performance, especially when things like \fB\\w\fP are used frequently +(including via bounded repetitions like \fB\\w{100}\fP) when only their ASCII +interpretation is needed. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_unicode = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_unicode() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_unicode); + + let args = parse_low_raw(["--no-unicode"]).unwrap(); + assert_eq!(true, args.no_unicode); + + let args = parse_low_raw(["--no-unicode", "--unicode"]).unwrap(); + assert_eq!(false, args.no_unicode); + + let args = parse_low_raw(["--no-unicode", "--pcre2-unicode"]).unwrap(); + assert_eq!(false, args.no_unicode); + + let args = parse_low_raw(["--no-pcre2-unicode", "--unicode"]).unwrap(); + assert_eq!(false, args.no_unicode); +} + +/// -0/--null +#[derive(Debug)] +struct Null; + +impl Flag for Null { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'0') + } + fn name_long(&self) -> &'static str { + "null" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print a NUL byte after file paths." + } + fn doc_long(&self) -> &'static str { + r" +Whenever a file path is printed, follow it with a \fBNUL\fP byte. This includes +printing file paths before matches, and when printing a list of matching files +such as with \flag{count}, \flag{files-with-matches} and \flag{files}. This +option is useful for use with \fBxargs\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--null has no negation"); + args.null = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_null() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.null); + + let args = parse_low_raw(["--null"]).unwrap(); + assert_eq!(true, args.null); + + let args = parse_low_raw(["-0"]).unwrap(); + assert_eq!(true, args.null); +} + +/// --null-data +#[derive(Debug)] +struct NullData; + +impl Flag for NullData { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "null-data" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Use NUL as a line terminator." + } + fn doc_long(&self) -> &'static str { + r" +Enabling this flag causes ripgrep to use \fBNUL\fP as a line terminator instead +of the default of \fP\\n\fP. +.sp +This is useful when searching large binary files that would otherwise have +very long lines if \fB\\n\fP were used as the line terminator. In particular, +ripgrep requires that, at a minimum, each line must fit into memory. Using +\fBNUL\fP instead can be a useful stopgap to keep memory requirements low and +avoid OOM (out of memory) conditions. +.sp +This is also useful for processing NUL delimited data, such as that emitted +when using ripgrep's \flag{null} flag or \fBfind\fP's \fB\-\-print0\fP flag. +.sp +Using this flag implies \flag{text}. It also overrides \flag{crlf}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--null-data has no negation"); + args.crlf = false; + args.null_data = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_null_data() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.null_data); + + let args = parse_low_raw(["--null-data"]).unwrap(); + assert_eq!(true, args.null_data); + + let args = parse_low_raw(["--null-data", "--crlf"]).unwrap(); + assert_eq!(false, args.null_data); + assert_eq!(true, args.crlf); + + let args = parse_low_raw(["--crlf", "--null-data"]).unwrap(); + assert_eq!(true, args.null_data); + assert_eq!(false, args.crlf); + + let args = parse_low_raw(["--null-data", "--no-crlf"]).unwrap(); + assert_eq!(true, args.null_data); + assert_eq!(false, args.crlf); +} + +/// --one-file-system +#[derive(Debug)] +struct OneFileSystem; + +impl Flag for OneFileSystem { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "one-file-system" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-one-file-system") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Skip directories on other file systems." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will not cross file system boundaries relative to where +the search started from. +.sp +Note that this applies to each path argument given to ripgrep. For example, in +the command +.sp +.EX + rg \-\-one\-file\-system /foo/bar /quux/baz +.EE +.sp +ripgrep will search both \fI/foo/bar\fP and \fI/quux/baz\fP even if they are +on different file systems, but will not cross a file system boundary when +traversing each path's directory tree. +.sp +This is similar to \fBfind\fP's \fB\-xdev\fP or \fB\-mount\fP flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.one_file_system = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_one_file_system() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.one_file_system); + + let args = parse_low_raw(["--one-file-system"]).unwrap(); + assert_eq!(true, args.one_file_system); + + let args = + parse_low_raw(["--one-file-system", "--no-one-file-system"]).unwrap(); + assert_eq!(false, args.one_file_system); +} + +/// -o/--only-matching +#[derive(Debug)] +struct OnlyMatching; + +impl Flag for OnlyMatching { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'o') + } + fn name_long(&self) -> &'static str { + "only-matching" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print only matched parts of a line." + } + fn doc_long(&self) -> &'static str { + r" +Print only the matched (non-empty) parts of a matching line, with each such +part on a separate output line. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--only-matching does not have a negation"); + args.only_matching = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_only_matching() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.only_matching); + + let args = parse_low_raw(["--only-matching"]).unwrap(); + assert_eq!(true, args.only_matching); + + let args = parse_low_raw(["-o"]).unwrap(); + assert_eq!(true, args.only_matching); +} + +/// --path-separator +#[derive(Debug)] +struct PathSeparator; + +impl Flag for PathSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "path-separator" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Set the path separator for printing paths." + } + fn doc_long(&self) -> &'static str { + r" +Set the path separator to use when printing file paths. This defaults to your +platform's path separator, which is \fB/\fP on Unix and \fB\\\fP on Windows. +This flag is intended for overriding the default when the environment demands +it (e.g., cygwin). A path separator is limited to a single byte. +.sp +Setting this flag to an empty string reverts it to its default behavior. That +is, the path separator is automatically chosen based on the environment. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let s = convert::string(v.unwrap_value())?; + let raw = Vec::unescape_bytes(&s); + args.path_separator = if raw.is_empty() { + None + } else if raw.len() == 1 { + Some(raw[0]) + } else { + anyhow::bail!( + "A path separator must be exactly one byte, but \ + the given separator is {len} bytes: {sep}\n\ + In some shells on Windows '/' is automatically \ + expanded. Use '//' instead.", + len = raw.len(), + sep = s, + ) + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_path_separator() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.path_separator); + + let args = parse_low_raw(["--path-separator", "/"]).unwrap(); + assert_eq!(Some(b'/'), args.path_separator); + + let args = parse_low_raw(["--path-separator", r"\"]).unwrap(); + assert_eq!(Some(b'\\'), args.path_separator); + + let args = parse_low_raw(["--path-separator", r"\x00"]).unwrap(); + assert_eq!(Some(0), args.path_separator); + + let args = parse_low_raw(["--path-separator", r"\0"]).unwrap(); + assert_eq!(Some(0), args.path_separator); + + let args = parse_low_raw(["--path-separator", "\x00"]).unwrap(); + assert_eq!(Some(0), args.path_separator); + + let args = parse_low_raw(["--path-separator", "\0"]).unwrap(); + assert_eq!(Some(0), args.path_separator); + + let args = + parse_low_raw(["--path-separator", r"\x00", "--path-separator=/"]) + .unwrap(); + assert_eq!(Some(b'/'), args.path_separator); + + let result = parse_low_raw(["--path-separator", "foo"]); + assert!(result.is_err(), "{result:?}"); + + let result = parse_low_raw(["--path-separator", r"\\x00"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --passthru +#[derive(Debug)] +struct Passthru; + +impl Flag for Passthru { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "passthru" + } + fn aliases(&self) -> &'static [&'static str] { + &["passthrough"] + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print both matching and non-matching lines." + } + fn doc_long(&self) -> &'static str { + r#" +Print both matching and non-matching lines. +.sp +Another way to achieve a similar effect is by modifying your pattern to match +the empty string. For example, if you are searching using \fBrg\fP \fIfoo\fP, +then using \fBrg\fP \fB'^|\fP\fIfoo\fP\fB'\fP instead will emit every line in +every file searched, but only occurrences of \fIfoo\fP will be highlighted. +This flag enables the same behavior without needing to modify the pattern. +.sp +An alternative spelling for this flag is \fB\-\-passthrough\fP. +.sp +This overrides the \flag{context}, \flag{after-context} and +\flag{before-context} flags. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--passthru has no negation"); + args.context = ContextMode::Passthru; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_passthru() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse_low_raw(["--passthru"]).unwrap(); + assert_eq!(ContextMode::Passthru, args.context); + + let args = parse_low_raw(["--passthrough"]).unwrap(); + assert_eq!(ContextMode::Passthru, args.context); +} + +/// -P/--pcre2 +#[derive(Debug)] +struct PCRE2; + +impl Flag for PCRE2 { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'P') + } + fn name_long(&self) -> &'static str { + "pcre2" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-pcre2") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Enable PCRE2 matching." + } + fn doc_long(&self) -> &'static str { + r" +When this flag is present, ripgrep will use the PCRE2 regex engine instead of +its default regex engine. +.sp +This is generally useful when you want to use features such as look-around +or backreferences. +.sp +Using this flag is the same as passing \fB\-\-engine=pcre2\fP. Users may +instead elect to use \fB\-\-engine=auto\fP to ask ripgrep to automatically +select the right regex engine based on the patterns given. This flag and the +\flag{engine} flag override one another. +.sp +Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in +your build of ripgrep, then using this flag will result in ripgrep printing +an error message and exiting. PCRE2 may also have worse user experience in +some cases, since it has fewer introspection APIs than ripgrep's default +regex engine. For example, if you use a \fB\\n\fP in a PCRE2 regex without +the \flag{multiline} flag, then ripgrep will silently fail to match anything +instead of reporting an error immediately (like it does with the default regex +engine). +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.engine = if v.unwrap_switch() { + EngineChoice::PCRE2 + } else { + EngineChoice::Default + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_pcre2() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse_low_raw(["--pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse_low_raw(["-P"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse_low_raw(["-P", "--no-pcre2"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse_low_raw(["--engine=auto", "-P", "--no-pcre2"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse_low_raw(["-P", "--engine=auto"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); +} + +/// --pcre2-version +#[derive(Debug)] +struct PCRE2Version; + +impl Flag for PCRE2Version { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "pcre2-version" + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Print the version of PCRE2 that ripgrep uses." + } + fn doc_long(&self) -> &'static str { + r" +When this flag is present, ripgrep will print the version of PCRE2 in use, +along with other information, and then exit. If PCRE2 is not available, then +ripgrep will print an error message and exit with an error code. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--pcre2-version has no negation"); + args.special = Some(SpecialMode::VersionPCRE2); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_pcre2_version() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.special); + + let args = parse_low_raw(["--pcre2-version"]).unwrap(); + assert_eq!(Some(SpecialMode::VersionPCRE2), args.special); +} + +/// --pre +#[derive(Debug)] +struct Pre; + +impl Flag for Pre { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "pre" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-pre") + } + fn doc_variable(&self) -> Option<&'static str> { + Some("COMMAND") + } + fn doc_category(&self) -> Category { + Category::Input + } + fn doc_short(&self) -> &'static str { + r"Search output of COMMAND for each PATH." + } + fn doc_long(&self) -> &'static str { + r#" +For each input \fIPATH\fP, this flag causes ripgrep to search the standard +output of \fICOMMAND\fP \fIPATH\fP instead of the contents of \fIPATH\fP. This +option expects the \fICOMMAND\fP program to either be an absolute path or to +be available in your \fBPATH\fP. Either an empty string \fICOMMAND\fP or the +\fB\-\-no\-pre\fP flag will disable this behavior. +.sp +.TP 12 +\fBWARNING\fP +When this flag is set, ripgrep will unconditionally spawn a process for every +file that is searched. Therefore, this can incur an unnecessarily large +performance penalty if you don't otherwise need the flexibility offered by this +flag. One possible mitigation to this is to use the \flag{pre-glob} flag to +limit which files a preprocessor is run with. +.PP +A preprocessor is not run when ripgrep is searching stdin. +.sp +When searching over sets of files that may require one of several +preprocessors, \fICOMMAND\fP should be a wrapper program which first classifies +\fIPATH\fP based on magic numbers/content or based on the \fIPATH\fP name and +then dispatches to an appropriate preprocessor. Each \fICOMMAND\fP also has its +standard input connected to \fIPATH\fP for convenience. +.sp +For example, a shell script for \fICOMMAND\fP might look like: +.sp +.EX + case "$1" in + *.pdf) + exec pdftotext "$1" - + ;; + *) + case $(file "$1") in + *Zstandard*) + exec pzstd -cdq + ;; + *) + exec cat + ;; + esac + ;; + esac +.EE +.sp +The above script uses \fBpdftotext\fP to convert a PDF file to plain text. For +all other files, the script uses the \fBfile\fP utility to sniff the type of +the file based on its contents. If it is a compressed file in the Zstandard +format, then \fBpzstd\fP is used to decompress the contents to stdout. +.sp +This overrides the \flag{search-zip} flag. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let path = match v { + FlagValue::Value(v) => PathBuf::from(v), + FlagValue::Switch(yes) => { + assert!(!yes, "there is no affirmative switch for --pre"); + args.pre = None; + return Ok(()); + } + }; + args.pre = if path.as_os_str().is_empty() { None } else { Some(path) }; + if args.pre.is_some() { + args.search_zip = false; + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_pre() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.pre); + + let args = parse_low_raw(["--pre", "foo/bar"]).unwrap(); + assert_eq!(Some(PathBuf::from("foo/bar")), args.pre); + + let args = parse_low_raw(["--pre", ""]).unwrap(); + assert_eq!(None, args.pre); + + let args = parse_low_raw(["--pre", "foo/bar", "--pre", ""]).unwrap(); + assert_eq!(None, args.pre); + + let args = parse_low_raw(["--pre", "foo/bar", "--pre="]).unwrap(); + assert_eq!(None, args.pre); + + let args = parse_low_raw(["--pre", "foo/bar", "--no-pre"]).unwrap(); + assert_eq!(None, args.pre); +} + +/// --pre-glob +#[derive(Debug)] +struct PreGlob; + +impl Flag for PreGlob { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "pre-glob" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("GLOB") + } + fn doc_category(&self) -> Category { + Category::Input + } + fn doc_short(&self) -> &'static str { + r"Include or exclude files from a preprocessor." + } + fn doc_long(&self) -> &'static str { + r#" +This flag works in conjunction with the \flag{pre} flag. Namely, when one or +more \flag{pre-glob} flags are given, then only files that match the given set +of globs will be handed to the command specified by the \flag{pre} flag. Any +non-matching files will be searched without using the preprocessor command. +.sp +This flag is useful when searching many files with the \flag{pre} flag. +Namely, it provides the ability to avoid process overhead for files that +don't need preprocessing. For example, given the following shell script, +\fIpre-pdftotext\fP: +.sp +.EX + #!/bin/sh + pdftotext "$1" - +.EE +.sp +then it is possible to use \fB\-\-pre\fP \fIpre-pdftotext\fP \fB--pre-glob +'\fP\fI*.pdf\fP\fB'\fP to make it so ripgrep only executes the +\fIpre-pdftotext\fP command on files with a \fI.pdf\fP extension. +.sp +Multiple \flag{pre-glob} flags may be used. Globbing rules match +\fBgitignore\fP globs. Precede a glob with a \fB!\fP to exclude it. +.sp +This flag has no effect if the \flag{pre} flag is not used. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let glob = convert::string(v.unwrap_value())?; + args.pre_glob.push(glob); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_pre_glob() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.pre_glob); + + let args = parse_low_raw(["--pre-glob", "*.pdf"]).unwrap(); + assert_eq!(vec!["*.pdf".to_string()], args.pre_glob); + + let args = + parse_low_raw(["--pre-glob", "*.pdf", "--pre-glob=foo"]).unwrap(); + assert_eq!(vec!["*.pdf".to_string(), "foo".to_string()], args.pre_glob); +} + +/// -p/--pretty +#[derive(Debug)] +struct Pretty; + +impl Flag for Pretty { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'p') + } + fn name_long(&self) -> &'static str { + "pretty" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Alias for colors, headings and line numbers." + } + fn doc_long(&self) -> &'static str { + r" +This is a convenience alias for \fB\-\-color=always \-\-heading +\-\-line\-number\fP. This flag is useful when you still want pretty output even +if you're piping ripgrep to another program or file. For example: \fBrg -p +\fP\fIfoo\fP \fB| less -R\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--pretty has no negation"); + args.color = ColorChoice::Always; + args.heading = Some(true); + args.line_number = Some(true); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_pretty() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ColorChoice::Auto, args.color); + assert_eq!(None, args.heading); + assert_eq!(None, args.line_number); + + let args = parse_low_raw(["--pretty"]).unwrap(); + assert_eq!(ColorChoice::Always, args.color); + assert_eq!(Some(true), args.heading); + assert_eq!(Some(true), args.line_number); + + let args = parse_low_raw(["-p"]).unwrap(); + assert_eq!(ColorChoice::Always, args.color); + assert_eq!(Some(true), args.heading); + assert_eq!(Some(true), args.line_number); +} + +/// -q/--quiet +#[derive(Debug)] +struct Quiet; + +impl Flag for Quiet { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'q') + } + fn name_long(&self) -> &'static str { + "quiet" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Do not print anything to stdout." + } + fn doc_long(&self) -> &'static str { + r" +Do not print anything to stdout. If a match is found in a file, then ripgrep +will stop searching. This is useful when ripgrep is used only for its exit code +(which will be an error code if no matches are found). +.sp +When \flag{files} is used, ripgrep will stop finding files after finding the +first file that does not match any ignore rules. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--quiet has no negation"); + args.quiet = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_quiet() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.quiet); + + let args = parse_low_raw(["--quiet"]).unwrap(); + assert_eq!(true, args.quiet); + + let args = parse_low_raw(["-q"]).unwrap(); + assert_eq!(true, args.quiet); + + // flags like -l and --json cannot override -q, regardless of order + let args = parse_low_raw(["-q", "--json"]).unwrap(); + assert_eq!(true, args.quiet); + + let args = parse_low_raw(["-q", "--files-with-matches"]).unwrap(); + assert_eq!(true, args.quiet); + + let args = parse_low_raw(["-q", "--files-without-match"]).unwrap(); + assert_eq!(true, args.quiet); + + let args = parse_low_raw(["-q", "--count"]).unwrap(); + assert_eq!(true, args.quiet); + + let args = parse_low_raw(["-q", "--count-matches"]).unwrap(); + assert_eq!(true, args.quiet); +} + +/// --regex-size-limit +#[derive(Debug)] +struct RegexSizeLimit; + +impl Flag for RegexSizeLimit { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "regex-size-limit" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM+SUFFIX?") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"The size limit of the compiled regex." + } + fn doc_long(&self) -> &'static str { + r" +The size limit of the compiled regex, where the compiled regex generally +corresponds to a single object in memory that can match all of the patterns +provided to ripgrep. The default limit is generous enough that most reasonable +patterns (or even a small number of them) should fit. +.sp +This useful to change when you explicitly want to let ripgrep spend potentially +much more time and/or memory building a regex matcher. +.sp +The input format accepts suffixes of \fBK\fP, \fBM\fP or \fBG\fP which +correspond to kilobytes, megabytes and gigabytes, respectively. If no suffix is +provided the input is treated as bytes. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + args.regex_size_limit = Some(convert::human_readable_usize(&v)?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_regex_size_limit() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.regex_size_limit); + + #[cfg(target_pointer_width = "64")] + { + let args = parse_low_raw(["--regex-size-limit", "9G"]).unwrap(); + assert_eq!(Some(9 * (1 << 30)), args.regex_size_limit); + + let args = parse_low_raw(["--regex-size-limit=9G"]).unwrap(); + assert_eq!(Some(9 * (1 << 30)), args.regex_size_limit); + + let args = + parse_low_raw(["--regex-size-limit=9G", "--regex-size-limit=0"]) + .unwrap(); + assert_eq!(Some(0), args.regex_size_limit); + } + + let args = parse_low_raw(["--regex-size-limit=0K"]).unwrap(); + assert_eq!(Some(0), args.regex_size_limit); + + let args = parse_low_raw(["--regex-size-limit=0M"]).unwrap(); + assert_eq!(Some(0), args.regex_size_limit); + + let args = parse_low_raw(["--regex-size-limit=0G"]).unwrap(); + assert_eq!(Some(0), args.regex_size_limit); + + let result = + parse_low_raw(["--regex-size-limit", "9999999999999999999999"]); + assert!(result.is_err(), "{result:?}"); + + let result = parse_low_raw(["--regex-size-limit", "9999999999999999G"]); + assert!(result.is_err(), "{result:?}"); +} + +/// -e/--regexp +#[derive(Debug)] +struct Regexp; + +impl Flag for Regexp { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'e') + } + fn name_long(&self) -> &'static str { + "regexp" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("PATTERN") + } + fn doc_category(&self) -> Category { + Category::Input + } + fn doc_short(&self) -> &'static str { + r"A pattern to search for." + } + fn doc_long(&self) -> &'static str { + r" +A pattern to search for. This option can be provided multiple times, where +all patterns given are searched, in addition to any patterns provided by +\flag{file}. Lines matching at least one of the provided patterns are printed. +This flag can also be used when searching for patterns that start with a dash. +.sp +For example, to search for the literal \fB\-foo\fP: +.sp +.EX + rg \-e \-foo +.EE +.sp +You can also use the special \fB\-\-\fP delimiter to indicate that no more +flags will be provided. Namely, the following is equivalent to the above: +.sp +.EX + rg \-\- \-foo +.EE +.sp +When \flag{file} or \flag{regexp} is used, then ripgrep treats all positional +arguments as files or directories to search. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let regexp = convert::string(v.unwrap_value())?; + args.patterns.push(PatternSource::Regexp(regexp)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_regexp() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.patterns); + + let args = parse_low_raw(["--regexp", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse_low_raw(["--regexp=foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse_low_raw(["-e", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse_low_raw(["-efoo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse_low_raw(["--regexp", "-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse_low_raw(["--regexp=-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse_low_raw(["-e", "-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse_low_raw(["-e-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse_low_raw(["--regexp=foo", "--regexp", "bar"]).unwrap(); + assert_eq!( + vec![ + PatternSource::Regexp("foo".to_string()), + PatternSource::Regexp("bar".to_string()) + ], + args.patterns + ); + + // While we support invalid UTF-8 arguments in general, patterns must be + // valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let bytes = &[b'A', 0xFF, b'Z'][..]; + let result = parse_low_raw([ + OsStr::from_bytes(b"-e"), + OsStr::from_bytes(bytes), + ]); + assert!(result.is_err(), "{result:?}"); + } + + // Check that combining -e/--regexp and -f/--file works as expected. + let args = parse_low_raw(["-efoo", "-fbar"]).unwrap(); + assert_eq!( + vec![ + PatternSource::Regexp("foo".to_string()), + PatternSource::File(PathBuf::from("bar")) + ], + args.patterns + ); + + let args = parse_low_raw(["-efoo", "-fbar", "-equux"]).unwrap(); + assert_eq!( + vec![ + PatternSource::Regexp("foo".to_string()), + PatternSource::File(PathBuf::from("bar")), + PatternSource::Regexp("quux".to_string()), + ], + args.patterns + ); +} + +/// -r/--replace +#[derive(Debug)] +struct Replace; + +impl Flag for Replace { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'r') + } + fn name_long(&self) -> &'static str { + "replace" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("REPLACEMENT") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Replace matches with the given text." + } + fn doc_long(&self) -> &'static str { + r#" +Replaces every match with the text given when printing results. Neither this +flag nor any other ripgrep flag will modify your files. +.sp +Capture group indices (e.g., \fB$\fP\fI5\fP) and names (e.g., \fB$\fP\fIfoo\fP) +are supported in the replacement string. Capture group indices are numbered +based on the position of the opening parenthesis of the group, where the +leftmost such group is \fB$\fP\fI1\fP. The special \fB$\fP\fI0\fP group +corresponds to the entire match. +.sp +The name of a group is formed by taking the longest string of letters, numbers +and underscores (i.e. \fB[_0-9A-Za-z]\fP) after the \fB$\fP. For example, +\fB$\fP\fI1a\fP will be replaced with the group named \fI1a\fP, not the +group at index \fI1\fP. If the group's name contains characters that aren't +letters, numbers or underscores, or you want to immediately follow the group +with another string, the name should be put inside braces. For example, +\fB${\fP\fI1\fP\fB}\fP\fIa\fP will take the content of the group at index +\fI1\fP and append \fIa\fP to the end of it. +.sp +If an index or name does not refer to a valid capture group, it will be +replaced with an empty string. +.sp +In shells such as Bash and zsh, you should wrap the pattern in single quotes +instead of double quotes. Otherwise, capture group indices will be replaced by +expanded shell variables which will most likely be empty. +.sp +To write a literal \fB$\fP, use \fB$$\fP. +.sp +Note that the replacement by default replaces each match, and not the entire +line. To replace the entire line, you should match the entire line. +.sp +This flag can be used with the \flag{only-matching} flag. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.replace = Some(convert::string(v.unwrap_value())?.into()); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_replace() { + use bstr::BString; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.replace); + + let args = parse_low_raw(["--replace", "foo"]).unwrap(); + assert_eq!(Some(BString::from("foo")), args.replace); + + let args = parse_low_raw(["--replace", "-foo"]).unwrap(); + assert_eq!(Some(BString::from("-foo")), args.replace); + + let args = parse_low_raw(["-r", "foo"]).unwrap(); + assert_eq!(Some(BString::from("foo")), args.replace); + + let args = parse_low_raw(["-r", "foo", "-rbar"]).unwrap(); + assert_eq!(Some(BString::from("bar")), args.replace); + + let args = parse_low_raw(["-r", "foo", "-r", ""]).unwrap(); + assert_eq!(Some(BString::from("")), args.replace); +} + +/// -z/--search-zip +#[derive(Debug)] +struct SearchZip; + +impl Flag for SearchZip { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'z') + } + fn name_long(&self) -> &'static str { + "search-zip" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-search-zip") + } + fn doc_category(&self) -> Category { + Category::Input + } + fn doc_short(&self) -> &'static str { + r"Search in compressed files." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to search in compressed files. Currently gzip, +bzip2, xz, LZ4, LZMA, Brotli and Zstd files are supported. This option expects +the decompression binaries (such as \fBgzip\fP) to be available in your +\fBPATH\fP. +.sp +Note that this flag does not make ripgrep search archive formats as directory +trees. It only makes ripgrep detect compressed files and then decompress them +before searching their contents as it would any other file. +.sp +This overrides the \flag{pre} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.search_zip = if v.unwrap_switch() { + args.pre = None; + true + } else { + false + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_search_zip() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.search_zip); + + let args = parse_low_raw(["--search-zip"]).unwrap(); + assert_eq!(true, args.search_zip); + + let args = parse_low_raw(["-z"]).unwrap(); + assert_eq!(true, args.search_zip); + + let args = parse_low_raw(["-z", "--no-search-zip"]).unwrap(); + assert_eq!(false, args.search_zip); + + let args = parse_low_raw(["--pre=foo", "--no-search-zip"]).unwrap(); + assert_eq!(Some(PathBuf::from("foo")), args.pre); + assert_eq!(false, args.search_zip); + + let args = parse_low_raw(["--pre=foo", "--search-zip"]).unwrap(); + assert_eq!(None, args.pre); + assert_eq!(true, args.search_zip); + + let args = parse_low_raw(["--pre=foo", "-z", "--no-search-zip"]).unwrap(); + assert_eq!(None, args.pre); + assert_eq!(false, args.search_zip); +} + +/// -S/--smart-case +#[derive(Debug)] +struct SmartCase; + +impl Flag for SmartCase { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'S') + } + fn name_long(&self) -> &'static str { + "smart-case" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Smart case search." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to searches case insensitively if the pattern is +all lowercase. Otherwise, ripgrep will search case sensitively. +.sp +A pattern is considered all lowercase if both of the following rules hold: +.sp +.IP \(bu 3n +First, the pattern contains at least one literal character. For example, +\fBa\\w\fP contains a literal (\fBa\fP) but just \fB\\w\fP does not. +.sp +.IP \(bu 3n +Second, of the literals in the pattern, none of them are considered to be +uppercase according to Unicode. For example, \fBfoo\\pL\fP has no uppercase +literals but \fBFoo\\pL\fP does. +.PP +This overrides the \flag{case-sensitive} and \flag{ignore-case} flags. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--smart-case flag has no negation"); + args.case = CaseMode::Smart; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_smart_case() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["--smart-case"]).unwrap(); + assert_eq!(CaseMode::Smart, args.case); + + let args = parse_low_raw(["-S"]).unwrap(); + assert_eq!(CaseMode::Smart, args.case); + + let args = parse_low_raw(["-S", "-s"]).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["-S", "-i"]).unwrap(); + assert_eq!(CaseMode::Insensitive, args.case); + + let args = parse_low_raw(["-s", "-S"]).unwrap(); + assert_eq!(CaseMode::Smart, args.case); + + let args = parse_low_raw(["-i", "-S"]).unwrap(); + assert_eq!(CaseMode::Smart, args.case); +} + +/// --sort-files +#[derive(Debug)] +struct SortFiles; + +impl Flag for SortFiles { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "sort-files" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-sort-files") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"(DEPRECATED) Sort results by file path." + } + fn doc_long(&self) -> &'static str { + r" +DEPRECATED. Use \fB\-\-sort=path\fP instead. +.sp +This flag instructs ripgrep to sort search results by file path +lexicographically in ascending order. Note that this currently disables all +parallelism and runs search in a single thread. +.sp +This flag overrides \flag{sort} and \flag{sortr}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.sort = if v.unwrap_switch() { + Some(SortMode { reverse: false, kind: SortModeKind::Path }) + } else { + None + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_sort_files() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sort-files"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sort-files", "--no-sort-files"]).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sort", "created", "--sort-files"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sort-files", "--sort", "created"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Created }), + args.sort + ); + + let args = parse_low_raw(["--sortr", "created", "--sort-files"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sort-files", "--sortr", "created"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: true, kind: SortModeKind::Created }), + args.sort + ); + + let args = parse_low_raw(["--sort=path", "--no-sort-files"]).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sortr=path", "--no-sort-files"]).unwrap(); + assert_eq!(None, args.sort); +} + +/// --sort +#[derive(Debug)] +struct Sort; + +impl Flag for Sort { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "sort" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SORTBY") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Sort results in ascending order." + } + fn doc_long(&self) -> &'static str { + r" +This flag enables sorting of results in ascending order. The possible values +for this flag are: +.sp +.IP 12 +\fBnone\fP +(Default) Do not sort results. Fastest. Can be multi-threaded. +.TP 12 +\fBpath\fP +Sort by file path. Always single-threaded. +.TP 12 +\fBmodified\fP +Sort by the last modified time on a file. Always single-threaded. +.TP 12 +\fBaccessed\fP +Sort by the last accessed time on a file. Always single-threaded. +.TP 12 +\fBcreated\fP +Sort by the creation time on a file. Always single-threaded. +.PP +If the chosen (manually or by-default) sorting criteria isn't available on your +system (for example, creation time is not available on ext4 file systems), then +ripgrep will attempt to detect this, print an error and exit without searching. +.sp +To sort results in reverse or descending order, use the \flag{sortr} flag. Also, +this flag overrides \flag{sortr}. +.sp +Note that sorting results currently always forces ripgrep to abandon +parallelism and run in a single thread. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &["none", "path", "modified", "accessed", "created"] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let kind = match convert::str(&v.unwrap_value())? { + "none" => { + args.sort = None; + return Ok(()); + } + "path" => SortModeKind::Path, + "modified" => SortModeKind::LastModified, + "accessed" => SortModeKind::LastAccessed, + "created" => SortModeKind::Created, + unk => anyhow::bail!("choice '{unk}' is unrecognized"), + }; + args.sort = Some(SortMode { reverse: false, kind }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_sort() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sort", "path"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sort", "path", "--sort=created"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Created }), + args.sort + ); + + let args = parse_low_raw(["--sort=none"]).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sort", "path", "--sort=none"]).unwrap(); + assert_eq!(None, args.sort); +} + +/// --sortr +#[derive(Debug)] +struct Sortr; + +impl Flag for Sortr { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "sortr" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SORTBY") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Sort results in descending order." + } + fn doc_long(&self) -> &'static str { + r" +This flag enables sorting of results in descending order. The possible values +for this flag are: +.sp +.IP 12 +\fBnone\fP +(Default) Do not sort results. Fastest. Can be multi-threaded. +.TP 12 +\fBpath\fP +Sort by file path. Always single-threaded. +.TP 12 +\fBmodified\fP +Sort by the last modified time on a file. Always single-threaded. +.TP 12 +\fBaccessed\fP +Sort by the last accessed time on a file. Always single-threaded. +.TP 12 +\fBcreated\fP +Sort by the creation time on a file. Always single-threaded. +.PP +If the chosen (manually or by-default) sorting criteria isn't available on your +system (for example, creation time is not available on ext4 file systems), then +ripgrep will attempt to detect this, print an error and exit without searching. +.sp +To sort results in ascending order, use the \flag{sort} flag. Also, this flag +overrides \flag{sort}. +.sp +Note that sorting results currently always forces ripgrep to abandon +parallelism and run in a single thread. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &["none", "path", "modified", "accessed", "created"] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let kind = match convert::str(&v.unwrap_value())? { + "none" => { + args.sort = None; + return Ok(()); + } + "path" => SortModeKind::Path, + "modified" => SortModeKind::LastModified, + "accessed" => SortModeKind::LastAccessed, + "created" => SortModeKind::Created, + unk => anyhow::bail!("choice '{unk}' is unrecognized"), + }; + args.sort = Some(SortMode { reverse: true, kind }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_sortr() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sortr", "path"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: true, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sortr", "path", "--sortr=created"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: true, kind: SortModeKind::Created }), + args.sort + ); + + let args = parse_low_raw(["--sortr=none"]).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sortr", "path", "--sortr=none"]).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sort=path", "--sortr=path"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: true, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sortr=path", "--sort=path"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Path }), + args.sort + ); +} + +/// --stats +#[derive(Debug)] +struct Stats; + +impl Flag for Stats { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "stats" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-stats") + } + fn doc_category(&self) -> Category { + Category::Logging + } + fn doc_short(&self) -> &'static str { + r"Print statistics about the search." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will print aggregate statistics about the search. When +this flag is present, ripgrep will print at least the following stats to +stdout at the end of the search: number of matched lines, number of files with +matches, number of files searched, and the time taken for the entire search to +complete. +.sp +This set of aggregate statistics may expand over time. +.sp +Note that this flag has no effect if \flag{files}, \flag{files-with-matches} or +\flag{files-without-match} is passed. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.stats = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_stats() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.stats); + + let args = parse_low_raw(["--stats"]).unwrap(); + assert_eq!(true, args.stats); + + let args = parse_low_raw(["--stats", "--no-stats"]).unwrap(); + assert_eq!(false, args.stats); +} + +/// --stop-on-nonmatch +#[derive(Debug)] +struct StopOnNonmatch; + +impl Flag for StopOnNonmatch { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "stop-on-nonmatch" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Stop searching after a non-match." + } + fn doc_long(&self) -> &'static str { + r" +Enabling this option will cause ripgrep to stop reading a file once it +encounters a non-matching line after it has encountered a matching line. +This is useful if it is expected that all matches in a given file will be on +sequential lines, for example due to the lines being sorted. +.sp +This overrides the \flag{multiline} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--stop-on-nonmatch has no negation"); + args.stop_on_nonmatch = true; + args.multiline = false; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_stop_on_nonmatch() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.stop_on_nonmatch); + + let args = parse_low_raw(["--stop-on-nonmatch"]).unwrap(); + assert_eq!(true, args.stop_on_nonmatch); + + let args = parse_low_raw(["--stop-on-nonmatch", "-U"]).unwrap(); + assert_eq!(true, args.multiline); + assert_eq!(false, args.stop_on_nonmatch); + + let args = parse_low_raw(["-U", "--stop-on-nonmatch"]).unwrap(); + assert_eq!(false, args.multiline); + assert_eq!(true, args.stop_on_nonmatch); + + let args = + parse_low_raw(["--stop-on-nonmatch", "--no-multiline"]).unwrap(); + assert_eq!(false, args.multiline); + assert_eq!(true, args.stop_on_nonmatch); +} + +/// -a/--text +#[derive(Debug)] +struct Text; + +impl Flag for Text { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'a') + } + fn name_long(&self) -> &'static str { + "text" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-text") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Search binary files as if they were text." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to search binary files as if they were text. When +this flag is present, ripgrep's binary file detection is disabled. This means +that when a binary file is searched, its contents may be printed if there is +a match. This may cause escape codes to be printed that alter the behavior of +your terminal. +.sp +When binary file detection is enabled, it is imperfect. In general, it uses +a simple heuristic. If a \fBNUL\fP byte is seen during search, then the file +is considered binary and searching stops (unless this flag is present). +Alternatively, if the \flag{binary} flag is used, then ripgrep will only quit +when it sees a \fBNUL\fP byte after it sees a match (or searches the entire +file). +.sp +This flag overrides the \flag{binary} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.binary = if v.unwrap_switch() { + BinaryMode::AsText + } else { + BinaryMode::Auto + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_text() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--text"]).unwrap(); + assert_eq!(BinaryMode::AsText, args.binary); + + let args = parse_low_raw(["-a"]).unwrap(); + assert_eq!(BinaryMode::AsText, args.binary); + + let args = parse_low_raw(["-a", "--no-text"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["-a", "--binary"]).unwrap(); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let args = parse_low_raw(["--binary", "-a"]).unwrap(); + assert_eq!(BinaryMode::AsText, args.binary); + + let args = parse_low_raw(["-a", "--no-binary"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--binary", "--no-text"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); +} + +/// -j/--threads +#[derive(Debug)] +struct Threads; + +impl Flag for Threads { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'j') + } + fn name_long(&self) -> &'static str { + "threads" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Set the approximate number of threads to use." + } + fn doc_long(&self) -> &'static str { + r" +This flag sets the approximate number of threads to use. A value of \fB0\fP +(which is the default) causes ripgrep to choose the thread count using +heuristics. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let threads = convert::usize(&v.unwrap_value())?; + args.threads = if threads == 0 { None } else { Some(threads) }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_threads() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.threads); + + let args = parse_low_raw(["--threads", "5"]).unwrap(); + assert_eq!(Some(5), args.threads); + + let args = parse_low_raw(["-j", "5"]).unwrap(); + assert_eq!(Some(5), args.threads); + + let args = parse_low_raw(["-j5"]).unwrap(); + assert_eq!(Some(5), args.threads); + + let args = parse_low_raw(["-j5", "-j10"]).unwrap(); + assert_eq!(Some(10), args.threads); + + let args = parse_low_raw(["-j5", "-j0"]).unwrap(); + assert_eq!(None, args.threads); +} + +/// --trace +#[derive(Debug)] +struct Trace; + +impl Flag for Trace { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "trace" + } + fn doc_category(&self) -> Category { + Category::Logging + } + fn doc_short(&self) -> &'static str { + r"Show trace messages." + } + fn doc_long(&self) -> &'static str { + r" +Show trace messages. This shows even more detail than the \flag{debug} +flag. Generally, one should only use this if \flag{debug} doesn't emit the +information you're looking for. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--trace can only be enabled"); + args.logging = Some(LoggingMode::Trace); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_trace() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.logging); + + let args = parse_low_raw(["--trace"]).unwrap(); + assert_eq!(Some(LoggingMode::Trace), args.logging); + + let args = parse_low_raw(["--debug", "--trace"]).unwrap(); + assert_eq!(Some(LoggingMode::Trace), args.logging); +} + +/// --trim +#[derive(Debug)] +struct Trim; + +impl Flag for Trim { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "trim" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-trim") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Trim prefix whitespace from matches." + } + fn doc_long(&self) -> &'static str { + r" +When set, all ASCII whitespace at the beginning of each line printed will be +removed. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.trim = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_trim() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.trim); + + let args = parse_low_raw(["--trim"]).unwrap(); + assert_eq!(true, args.trim); + + let args = parse_low_raw(["--trim", "--no-trim"]).unwrap(); + assert_eq!(false, args.trim); +} + +/// -t/--type +#[derive(Debug)] +struct Type; + +impl Flag for Type { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b't') + } + fn name_long(&self) -> &'static str { + "type" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("TYPE") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Only search files matching TYPE." + } + fn doc_long(&self) -> &'static str { + r#" +This flag limits ripgrep to searching files matching \fITYPE\fP. Multiple +\flag{type} flags may be provided. +.sp +This flag supports the special value \fBall\fP, which will behave as if +\flag{type} was provided for every file type supported by ripgrep (including +any custom file types). The end result is that \fB\-\-type=all\fP causes +ripgrep to search in "whitelist" mode, where it will only search files it +recognizes via its type definitions. +.sp +To see the list of available file types, use the \flag{type-list} flag. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.type_changes.push(TypeChange::Select { + name: convert::string(v.unwrap_value())?, + }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_type() { + let select = |name: &str| TypeChange::Select { name: name.to_string() }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.type_changes); + + let args = parse_low_raw(["--type", "rust"]).unwrap(); + assert_eq!(vec![select("rust")], args.type_changes); + + let args = parse_low_raw(["-t", "rust"]).unwrap(); + assert_eq!(vec![select("rust")], args.type_changes); + + let args = parse_low_raw(["-trust"]).unwrap(); + assert_eq!(vec![select("rust")], args.type_changes); + + let args = parse_low_raw(["-trust", "-tpython"]).unwrap(); + assert_eq!(vec![select("rust"), select("python")], args.type_changes); + + let args = parse_low_raw(["-tabcdefxyz"]).unwrap(); + assert_eq!(vec![select("abcdefxyz")], args.type_changes); +} + +/// --type-add +#[derive(Debug)] +struct TypeAdd; + +impl Flag for TypeAdd { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "type-add" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("TYPESPEC") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Add a new glob for a file type." + } + fn doc_long(&self) -> &'static str { + r" +This flag adds a new glob for a particular file type. Only one glob can be +added at a time. Multiple \flag{type-add} flags can be provided. Unless +\flag{type-clear} is used, globs are added to any existing globs defined inside +of ripgrep. +.sp +Note that this must be passed to every invocation of ripgrep. Type settings are +not persisted. See \fBCONFIGURATION FILES\fP for a workaround. +.sp +Example: +.sp +.EX + rg \-\-type\-add 'foo:*.foo' -tfoo \fIPATTERN\fP +.EE +.sp +This flag can also be used to include rules from other types with the special +include directive. The include directive permits specifying one or more other +type names (separated by a comma) that have been defined and its rules will +automatically be imported into the type specified. For example, to create a +type called src that matches C++, Python and Markdown files, one can use: +.sp +.EX + \-\-type\-add 'src:include:cpp,py,md' +.EE +.sp +Additional glob rules can still be added to the src type by using this flag +again: +.sp +.EX + \-\-type\-add 'src:include:cpp,py,md' \-\-type\-add 'src:*.foo' +.EE +.sp +Note that type names must consist only of Unicode letters or numbers. +Punctuation characters are not allowed. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.type_changes + .push(TypeChange::Add { def: convert::string(v.unwrap_value())? }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_type_add() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.type_changes); + + let args = parse_low_raw(["--type-add", "foo"]).unwrap(); + assert_eq!( + vec![TypeChange::Add { def: "foo".to_string() }], + args.type_changes + ); + + let args = parse_low_raw(["--type-add", "foo", "--type-add=bar"]).unwrap(); + assert_eq!( + vec![ + TypeChange::Add { def: "foo".to_string() }, + TypeChange::Add { def: "bar".to_string() } + ], + args.type_changes + ); +} + +/// --type-clear +#[derive(Debug)] +struct TypeClear; + +impl Flag for TypeClear { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "type-clear" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("TYPE") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Clear globs for a file type." + } + fn doc_long(&self) -> &'static str { + r" +Clear the file type globs previously defined for \fITYPE\fP. This clears any +previously defined globs for the \fITYPE\fP, but globs can be added after this +flag. +.sp +Note that this must be passed to every invocation of ripgrep. Type settings are +not persisted. See \fBCONFIGURATION FILES\fP for a workaround. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.type_changes.push(TypeChange::Clear { + name: convert::string(v.unwrap_value())?, + }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_type_clear() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.type_changes); + + let args = parse_low_raw(["--type-clear", "foo"]).unwrap(); + assert_eq!( + vec![TypeChange::Clear { name: "foo".to_string() }], + args.type_changes + ); + + let args = + parse_low_raw(["--type-clear", "foo", "--type-clear=bar"]).unwrap(); + assert_eq!( + vec![ + TypeChange::Clear { name: "foo".to_string() }, + TypeChange::Clear { name: "bar".to_string() } + ], + args.type_changes + ); +} + +/// --type-not +#[derive(Debug)] +struct TypeNot; + +impl Flag for TypeNot { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'T') + } + fn name_long(&self) -> &'static str { + "type-not" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("TYPE") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Do not search files matching TYPE." + } + fn doc_long(&self) -> &'static str { + r#" +Do not search files matching \fITYPE\fP. Multiple \flag{type-not} flags may be +provided. Use the \flag{type-list} flag to list all available types. +.sp +This flag supports the special value \fBall\fP, which will behave +as if \flag{type-not} was provided for every file type supported by +ripgrep (including any custom file types). The end result is that +\fB\-\-type\-not=all\fP causes ripgrep to search in "blacklist" mode, where it +will only search files that are unrecognized by its type definitions. +.sp +To see the list of available file types, use the \flag{type-list} flag. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.type_changes.push(TypeChange::Negate { + name: convert::string(v.unwrap_value())?, + }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_type_not() { + let select = |name: &str| TypeChange::Select { name: name.to_string() }; + let negate = |name: &str| TypeChange::Negate { name: name.to_string() }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.type_changes); + + let args = parse_low_raw(["--type-not", "rust"]).unwrap(); + assert_eq!(vec![negate("rust")], args.type_changes); + + let args = parse_low_raw(["-T", "rust"]).unwrap(); + assert_eq!(vec![negate("rust")], args.type_changes); + + let args = parse_low_raw(["-Trust"]).unwrap(); + assert_eq!(vec![negate("rust")], args.type_changes); + + let args = parse_low_raw(["-Trust", "-Tpython"]).unwrap(); + assert_eq!(vec![negate("rust"), negate("python")], args.type_changes); + + let args = parse_low_raw(["-Tabcdefxyz"]).unwrap(); + assert_eq!(vec![negate("abcdefxyz")], args.type_changes); + + let args = parse_low_raw(["-Trust", "-ttoml", "-Tjson"]).unwrap(); + assert_eq!( + vec![negate("rust"), select("toml"), negate("json")], + args.type_changes + ); +} + +/// --type-list +#[derive(Debug)] +struct TypeList; + +impl Flag for TypeList { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "type-list" + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Show all supported file types." + } + fn doc_long(&self) -> &'static str { + r" +Show all supported file types and their corresponding globs. This takes any +\flag{type-add} and \flag{type-clear} flags given into account. Each type is +printed on its own line, followed by a \fB:\fP and then a comma-delimited list +of globs for that type on the same line. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--type-list has no negation"); + args.mode.update(Mode::Types); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_type_list() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--type-list"]).unwrap(); + assert_eq!(Mode::Types, args.mode); +} + +/// -u/--unrestricted +#[derive(Debug)] +struct Unrestricted; + +impl Flag for Unrestricted { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'u') + } + fn name_long(&self) -> &'static str { + "unrestricted" + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r#"Reduce the level of "smart" filtering."# + } + fn doc_long(&self) -> &'static str { + r#" +This flag reduces the level of "smart" filtering. Repeated uses (up to 3) reduces +the filtering even more. When repeated three times, ripgrep will search every +file in a directory tree. +.sp +A single \flag{unrestricted} flag is equivalent to \flag{no-ignore}. Two +\flag{unrestricted} flags is equivalent to \flag{no-ignore} \flag{hidden}. +Three \flag{unrestricted} flags is equivalent to \flag{no-ignore} \flag{hidden} +\flag{binary}. +.sp +The only filtering ripgrep still does when \fB-uuu\fP is given is to skip +symbolic links and to avoid printing matches from binary files. Symbolic links +can be followed via the \flag{follow} flag, and binary files can be treated as +text files via the \flag{text} flag. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--unrestricted has no negation"); + args.unrestricted = args.unrestricted.saturating_add(1); + anyhow::ensure!( + args.unrestricted <= 3, + "flag can only be repeated up to 3 times" + ); + if args.unrestricted == 1 { + NoIgnore.update(FlagValue::Switch(true), args)?; + } else if args.unrestricted == 2 { + Hidden.update(FlagValue::Switch(true), args)?; + } else { + assert_eq!(args.unrestricted, 3); + Binary.update(FlagValue::Switch(true), args)?; + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_unrestricted() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_vcs); + assert_eq!(false, args.hidden); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--unrestricted"]).unwrap(); + assert_eq!(true, args.no_ignore_vcs); + assert_eq!(false, args.hidden); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--unrestricted", "-u"]).unwrap(); + assert_eq!(true, args.no_ignore_vcs); + assert_eq!(true, args.hidden); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["-uuu"]).unwrap(); + assert_eq!(true, args.no_ignore_vcs); + assert_eq!(true, args.hidden); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let result = parse_low_raw(["-uuuu"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --version +#[derive(Debug)] +struct Version; + +impl Flag for Version { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'V') + } + fn name_long(&self) -> &'static str { + "version" + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Print ripgrep's version." + } + fn doc_long(&self) -> &'static str { + r" +This flag prints ripgrep's version. This also may print other relevant +information, such as the presence of target specific optimizations and the +\fBgit\fP revision that this build of ripgrep was compiled from. +" + } + + fn update(&self, v: FlagValue, _: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--version has no negation"); + // Since this flag has different semantics for -V and --version and the + // Flag trait doesn't support encoding this sort of thing, we handle it + // as a special case in the parser. + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_version() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.special); + + let args = parse_low_raw(["-V"]).unwrap(); + assert_eq!(Some(SpecialMode::VersionShort), args.special); + + let args = parse_low_raw(["--version"]).unwrap(); + assert_eq!(Some(SpecialMode::VersionLong), args.special); + + let args = parse_low_raw(["-V", "--version"]).unwrap(); + assert_eq!(Some(SpecialMode::VersionLong), args.special); + + let args = parse_low_raw(["--version", "-V"]).unwrap(); + assert_eq!(Some(SpecialMode::VersionShort), args.special); +} + +/// --vimgrep +#[derive(Debug)] +struct Vimgrep; + +impl Flag for Vimgrep { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "vimgrep" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print results im a vim compatible format." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to print results with every match on its own line, +including line numbers and column numbers. +.sp +With this option, a line with more than one match will be printed in its +entirety more than once. For that reason, the total amount of output as a +result of this flag can be quadratic in the size of the input. For example, +if the pattern matches every byte in an input file, then each line will be +repeated for every byte matched. For this reason, users should only use this +flag when there is no other choice. Editor integrations should prefer some +other way of reading results from ripgrep, such as via the \flag{json} flag. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &[] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--vimgrep has no negation"); + args.vimgrep = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_vimgrep() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.vimgrep); + + let args = parse_low_raw(["--vimgrep"]).unwrap(); + assert_eq!(true, args.vimgrep); +} + +/// --with-filename +#[derive(Debug)] +struct WithFilename; + +impl Flag for WithFilename { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'H') + } + fn name_long(&self) -> &'static str { + "with-filename" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print the file path with each matching line." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to print the file path for each matching line. +This is the default when more than one file is searched. If \flag{heading} is +enabled (the default when printing to a terminal), the file path will be shown +above clusters of matches from each file; otherwise, the file name will be +shown as a prefix for each matched line. +.sp +This flag overrides \flag{no-filename}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--with-filename has no defined negation"); + args.with_filename = Some(true); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_with_filename() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.with_filename); + + let args = parse_low_raw(["--with-filename"]).unwrap(); + assert_eq!(Some(true), args.with_filename); + + let args = parse_low_raw(["-H"]).unwrap(); + assert_eq!(Some(true), args.with_filename); +} + +/// --no-filename +#[derive(Debug)] +struct WithFilenameNo; + +impl Flag for WithFilenameNo { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'I') + } + fn name_long(&self) -> &'static str { + "no-filename" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Never print the path with each matching line." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to never print the file path with each matching +line. This is the default when ripgrep is explicitly instructed to search one +file or stdin. +.sp +This flag overrides \flag{with-filename}. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &[] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--no-filename has no defined negation"); + args.with_filename = Some(false); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_with_filename_no() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.with_filename); + + let args = parse_low_raw(["--no-filename"]).unwrap(); + assert_eq!(Some(false), args.with_filename); + + let args = parse_low_raw(["-I"]).unwrap(); + assert_eq!(Some(false), args.with_filename); + + let args = parse_low_raw(["-I", "-H"]).unwrap(); + assert_eq!(Some(true), args.with_filename); + + let args = parse_low_raw(["-H", "-I"]).unwrap(); + assert_eq!(Some(false), args.with_filename); +} + +/// -w/--word-regexp +#[derive(Debug)] +struct WordRegexp; + +impl Flag for WordRegexp { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'w') + } + fn name_long(&self) -> &'static str { + "word-regexp" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Show matches surrounded by word boundaries." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will only show matches surrounded by word boundaries. +This is equivalent to surrounding every pattern with \fB\\b{start-half}\fP +and \fB\\b{end-half}\fP. +.sp +This overrides the \flag{line-regexp} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--word-regexp has no negation"); + args.boundary = Some(BoundaryMode::Word); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_word_regexp() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.boundary); + + let args = parse_low_raw(["--word-regexp"]).unwrap(); + assert_eq!(Some(BoundaryMode::Word), args.boundary); + + let args = parse_low_raw(["-w"]).unwrap(); + assert_eq!(Some(BoundaryMode::Word), args.boundary); + + let args = parse_low_raw(["-x", "-w"]).unwrap(); + assert_eq!(Some(BoundaryMode::Word), args.boundary); + + let args = parse_low_raw(["-w", "-x"]).unwrap(); + assert_eq!(Some(BoundaryMode::Line), args.boundary); +} + +mod convert { + use std::ffi::{OsStr, OsString}; + + use anyhow::Context; + + pub(super) fn str(v: &OsStr) -> anyhow::Result<&str> { + let Some(s) = v.to_str() else { + anyhow::bail!("value is not valid UTF-8") + }; + Ok(s) + } + + pub(super) fn string(v: OsString) -> anyhow::Result { + let Ok(s) = v.into_string() else { + anyhow::bail!("value is not valid UTF-8") + }; + Ok(s) + } + + pub(super) fn usize(v: &OsStr) -> anyhow::Result { + str(v)?.parse().context("value is not a valid number") + } + + pub(super) fn u64(v: &OsStr) -> anyhow::Result { + str(v)?.parse().context("value is not a valid number") + } + + pub(super) fn human_readable_u64(v: &OsStr) -> anyhow::Result { + grep::cli::parse_human_readable_size(str(v)?).context("invalid size") + } + + pub(super) fn human_readable_usize(v: &OsStr) -> anyhow::Result { + let size = human_readable_u64(v)?; + let Ok(size) = usize::try_from(size) else { + anyhow::bail!("size is too big") + }; + Ok(size) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn available_shorts() { + let mut total = vec![false; 128]; + for byte in 0..=0x7F { + match byte { + b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => { + total[usize::from(byte)] = true + } + _ => continue, + } + } + + let mut taken = vec![false; 128]; + for flag in FLAGS.iter() { + let Some(short) = flag.name_short() else { continue }; + taken[usize::from(short)] = true; + } + + for byte in 0..=0x7F { + if total[usize::from(byte)] && !taken[usize::from(byte)] { + eprintln!("{}", char::from(byte)); + } + } + } + + #[test] + fn shorts_all_ascii_alphanumeric() { + for flag in FLAGS.iter() { + let Some(byte) = flag.name_short() else { continue }; + let long = flag.name_long(); + assert!( + byte.is_ascii_alphanumeric() || byte == b'.', + "\\x{byte:0X} is not a valid short flag for {long}", + ) + } + } + + #[test] + fn longs_all_ascii_alphanumeric() { + for flag in FLAGS.iter() { + let long = flag.name_long(); + let count = long.chars().count(); + assert!(count >= 2, "flag '{long}' is less than 2 characters"); + assert!( + long.chars().all(|c| c.is_ascii_alphanumeric() || c == '-'), + "flag '{long}' does not match ^[-0-9A-Za-z]+$", + ); + for alias in flag.aliases() { + let count = alias.chars().count(); + assert!( + count >= 2, + "flag '{long}' has alias '{alias}' that is \ + less than 2 characters", + ); + assert!( + alias + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-'), + "flag '{long}' has alias '{alias}' that does not \ + match ^[-0-9A-Za-z]+$", + ); + } + let Some(negated) = flag.name_negated() else { continue }; + let count = negated.chars().count(); + assert!( + count >= 2, + "flag '{long}' has negation '{negated}' that is \ + less than 2 characters", + ); + assert!( + negated.chars().all(|c| c.is_ascii_alphanumeric() || c == '-'), + "flag '{long}' has negation '{negated}' that \ + does not match ^[-0-9A-Za-z]+$", + ); + } + } + + #[test] + fn shorts_no_duplicates() { + let mut taken = vec![false; 128]; + for flag in FLAGS.iter() { + let Some(short) = flag.name_short() else { continue }; + let long = flag.name_long(); + assert!( + !taken[usize::from(short)], + "flag {long} has duplicate short flag {}", + char::from(short) + ); + taken[usize::from(short)] = true; + } + } + + #[test] + fn longs_no_duplicates() { + use std::collections::BTreeSet; + + let mut taken = BTreeSet::new(); + for flag in FLAGS.iter() { + let long = flag.name_long(); + assert!(taken.insert(long), "flag {long} has a duplicate name"); + for alias in flag.aliases() { + assert!( + taken.insert(alias), + "flag {long} has an alias {alias} that is duplicative" + ); + } + let Some(negated) = flag.name_negated() else { continue }; + assert!( + taken.insert(negated), + "negated flag {negated} has a duplicate name" + ); + } + } + + #[test] + fn non_switches_have_variable_names() { + for flag in FLAGS.iter() { + if flag.is_switch() { + continue; + } + let long = flag.name_long(); + assert!( + flag.doc_variable().is_some(), + "flag '{long}' should have a variable name" + ); + } + } + + #[test] + fn switches_have_no_choices() { + for flag in FLAGS.iter() { + if !flag.is_switch() { + continue; + } + let long = flag.name_long(); + let choices = flag.doc_choices(); + assert!( + choices.is_empty(), + "switch flag '{long}' \ + should not have any choices but has some: {choices:?}", + ); + } + } + + #[test] + fn choices_ascii_alphanumeric() { + for flag in FLAGS.iter() { + let long = flag.name_long(); + for choice in flag.doc_choices() { + assert!( + choice.chars().all(|c| c.is_ascii_alphanumeric() + || c == '-' + || c == ':'), + "choice '{choice}' for flag '{long}' does not match \ + ^[-:0-9A-Za-z]+$", + ) + } + } + } +} diff --git a/crates/core/flags/doc/help.rs b/crates/core/flags/doc/help.rs new file mode 100644 index 0000000000..353d1808b5 --- /dev/null +++ b/crates/core/flags/doc/help.rs @@ -0,0 +1,259 @@ +/*! +Provides routines for generating ripgrep's "short" and "long" help +documentation. + +The short version is used when the `-h` flag is given, while the long version +is used when the `--help` flag is given. +*/ + +use std::{collections::BTreeMap, fmt::Write}; + +use crate::flags::{defs::FLAGS, doc::version, Category, Flag}; + +const TEMPLATE_SHORT: &'static str = include_str!("template.short.help"); +const TEMPLATE_LONG: &'static str = include_str!("template.long.help"); + +/// Wraps `std::write!` and asserts there is no failure. +/// +/// We only write to `String` in this module. +macro_rules! write { + ($($tt:tt)*) => { std::write!($($tt)*).unwrap(); } +} + +/// Generate short documentation, i.e., for `-h`. +pub(crate) fn generate_short() -> String { + let mut cats: BTreeMap, Vec)> = + BTreeMap::new(); + let (mut maxcol1, mut maxcol2) = (0, 0); + for flag in FLAGS.iter().copied() { + let columns = + cats.entry(flag.doc_category()).or_insert((vec![], vec![])); + let (col1, col2) = generate_short_flag(flag); + maxcol1 = maxcol1.max(col1.len()); + maxcol2 = maxcol2.max(col2.len()); + columns.0.push(col1); + columns.1.push(col2); + } + let mut out = + TEMPLATE_SHORT.replace("!!VERSION!!", &version::generate_digits()); + for (cat, (col1, col2)) in cats.iter() { + let var = format!("!!{name}!!", name = cat.as_str()); + let val = format_short_columns(col1, col2, maxcol1, maxcol2); + out = out.replace(&var, &val); + } + out +} + +/// Generate short for a single flag. +/// +/// The first element corresponds to the flag name while the second element +/// corresponds to the documentation string. +fn generate_short_flag(flag: &dyn Flag) -> (String, String) { + let (mut col1, mut col2) = (String::new(), String::new()); + + // Some of the variable names are fine for longer form + // docs, but they make the succinct short help very noisy. + // So just shorten some of them. + let var = flag.doc_variable().map(|s| { + let mut s = s.to_string(); + s = s.replace("SEPARATOR", "SEP"); + s = s.replace("REPLACEMENT", "TEXT"); + s = s.replace("NUM+SUFFIX?", "NUM"); + s + }); + + // Generate the first column, the flag name. + if let Some(byte) = flag.name_short() { + let name = char::from(byte); + write!(col1, r"-{name}"); + write!(col1, r", "); + } + write!(col1, r"--{name}", name = flag.name_long()); + if let Some(var) = var.as_ref() { + write!(col1, r"={var}"); + } + + // And now the second column, with the description. + write!(col2, "{}", flag.doc_short()); + + (col1, col2) +} + +/// Write two columns of documentation. +/// +/// `maxcol1` should be the maximum length (in bytes) of the first column, +/// while `maxcol2` should be the maximum length (in bytes) of the second +/// column. +fn format_short_columns( + col1: &[String], + col2: &[String], + maxcol1: usize, + _maxcol2: usize, +) -> String { + assert_eq!(col1.len(), col2.len(), "columns must have equal length"); + const PAD: usize = 2; + let mut out = String::new(); + for (i, (c1, c2)) in col1.iter().zip(col2.iter()).enumerate() { + if i > 0 { + write!(out, "\n"); + } + + let pad = maxcol1 - c1.len() + PAD; + write!(out, " "); + write!(out, "{c1}"); + write!(out, "{}", " ".repeat(pad)); + write!(out, "{c2}"); + } + out +} + +/// Generate long documentation, i.e., for `--help`. +pub(crate) fn generate_long() -> String { + let mut cats = BTreeMap::new(); + for flag in FLAGS.iter().copied() { + let mut cat = cats.entry(flag.doc_category()).or_insert(String::new()); + if !cat.is_empty() { + write!(cat, "\n\n"); + } + generate_long_flag(flag, &mut cat); + } + + let mut out = + TEMPLATE_LONG.replace("!!VERSION!!", &version::generate_digits()); + for (cat, value) in cats.iter() { + let var = format!("!!{name}!!", name = cat.as_str()); + out = out.replace(&var, value); + } + out +} + +/// Write generated documentation for `flag` to `out`. +fn generate_long_flag(flag: &dyn Flag, out: &mut String) { + if let Some(byte) = flag.name_short() { + let name = char::from(byte); + write!(out, r" -{name}"); + if let Some(var) = flag.doc_variable() { + write!(out, r" {var}"); + } + write!(out, r", "); + } else { + write!(out, r" "); + } + + let name = flag.name_long(); + write!(out, r"--{name}"); + if let Some(var) = flag.doc_variable() { + write!(out, r"={var}"); + } + write!(out, "\n"); + + let doc = flag.doc_long().trim(); + let doc = super::render_custom_markup(doc, "flag", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + unreachable!(r"found unrecognized \flag{{{name}}} in --help docs") + }; + if let Some(name) = flag.name_short() { + write!(out, r"-{}/", char::from(name)); + } + write!(out, r"--{}", flag.name_long()); + }); + let doc = super::render_custom_markup(&doc, "flag-negate", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + unreachable!( + r"found unrecognized \flag-negate{{{name}}} in --help docs" + ) + }; + let Some(name) = flag.name_negated() else { + let long = flag.name_long(); + unreachable!( + "found \\flag-negate{{{long}}} in --help docs but \ + {long} does not have a negation" + ); + }; + write!(out, r"--{name}"); + }); + + let mut cleaned = remove_roff(&doc); + if let Some(negated) = flag.name_negated() { + // Flags that can be negated that aren't switches, like + // --context-separator, are somewhat weird. Because of that, the docs + // for those flags should discuss the semantics of negation explicitly. + // But for switches, the behavior is always the same. + if flag.is_switch() { + write!(cleaned, "\n\nThis flag can be disabled with --{negated}."); + } + } + let indent = " ".repeat(8); + let wrapopts = textwrap::Options::new(71) + // Normally I'd be fine with breaking at hyphens, but ripgrep's docs + // includes a lot of flag names, and they in turn contain hyphens. + // Breaking flag names across lines is not great. + .word_splitter(textwrap::WordSplitter::NoHyphenation); + for (i, paragraph) in cleaned.split("\n\n").enumerate() { + if i > 0 { + write!(out, "\n\n"); + } + let mut new = paragraph.to_string(); + if paragraph.lines().all(|line| line.starts_with(" ")) { + // Re-indent but don't refill so as to preserve line breaks + // in code/shell example snippets. + new = textwrap::indent(&new, &indent); + } else { + new = new.replace("\n", " "); + new = textwrap::refill(&new, &wrapopts); + new = textwrap::indent(&new, &indent); + } + write!(out, "{}", new.trim_end()); + } +} + +/// Removes roff syntax from `v` such that the result is approximately plain +/// text readable. +/// +/// This is basically a mish mash of heuristics based on the specific roff used +/// in the docs for the flags in this tool. If new kinds of roff are used in +/// the docs, then this may need to be updated to handle them. +fn remove_roff(v: &str) -> String { + let mut lines = vec![]; + for line in v.trim().lines() { + assert!(!line.is_empty(), "roff should have no empty lines"); + if line.starts_with(".") { + if line.starts_with(".IP ") { + let item_label = line + .split(" ") + .nth(1) + .expect("first argument to .IP") + .replace(r"\(bu", r"•") + .replace(r"\fB", "") + .replace(r"\fP", ":"); + lines.push(format!("{item_label}")); + } else if line.starts_with(".IB ") || line.starts_with(".BI ") { + let pieces = line + .split_whitespace() + .skip(1) + .collect::>() + .concat(); + lines.push(format!("{pieces}")); + } else if line.starts_with(".sp") + || line.starts_with(".PP") + || line.starts_with(".TP") + { + lines.push("".to_string()); + } + } else if line.starts_with(r"\fB") && line.ends_with(r"\fP") { + let line = line.replace(r"\fB", "").replace(r"\fP", ""); + lines.push(format!("{line}:")); + } else { + lines.push(line.to_string()); + } + } + // Squash multiple adjacent paragraph breaks into one. + lines.dedup_by(|l1, l2| l1.is_empty() && l2.is_empty()); + lines + .join("\n") + .replace(r"\fB", "") + .replace(r"\fI", "") + .replace(r"\fP", "") + .replace(r"\-", "-") + .replace(r"\\", r"\") +} diff --git a/crates/core/flags/doc/man.rs b/crates/core/flags/doc/man.rs new file mode 100644 index 0000000000..e0ed13bae6 --- /dev/null +++ b/crates/core/flags/doc/man.rs @@ -0,0 +1,110 @@ +/*! +Provides routines for generating ripgrep's man page in `roff` format. +*/ + +use std::{collections::BTreeMap, fmt::Write}; + +use crate::flags::{defs::FLAGS, doc::version, Flag}; + +const TEMPLATE: &'static str = include_str!("template.rg.1"); + +/// Wraps `std::write!` and asserts there is no failure. +/// +/// We only write to `String` in this module. +macro_rules! write { + ($($tt:tt)*) => { std::write!($($tt)*).unwrap(); } +} + +/// Wraps `std::writeln!` and asserts there is no failure. +/// +/// We only write to `String` in this module. +macro_rules! writeln { + ($($tt:tt)*) => { std::writeln!($($tt)*).unwrap(); } +} + +/// Returns a `roff` formatted string corresponding to ripgrep's entire man +/// page. +pub(crate) fn generate() -> String { + let mut cats = BTreeMap::new(); + for flag in FLAGS.iter().copied() { + let mut cat = cats.entry(flag.doc_category()).or_insert(String::new()); + if !cat.is_empty() { + writeln!(cat, ".sp"); + } + generate_flag(flag, &mut cat); + } + + let mut out = TEMPLATE.replace("!!VERSION!!", &version::generate_digits()); + for (cat, value) in cats.iter() { + let var = format!("!!{name}!!", name = cat.as_str()); + out = out.replace(&var, value); + } + out +} + +/// Writes `roff` formatted documentation for `flag` to `out`. +fn generate_flag(flag: &'static dyn Flag, out: &mut String) { + if let Some(byte) = flag.name_short() { + let name = char::from(byte); + write!(out, r"\fB\-{name}\fP"); + if let Some(var) = flag.doc_variable() { + write!(out, r" \fI{var}\fP"); + } + write!(out, r", "); + } + + let name = flag.name_long(); + write!(out, r"\fB\-\-{name}\fP"); + if let Some(var) = flag.doc_variable() { + write!(out, r"=\fI{var}\fP"); + } + write!(out, "\n"); + + writeln!(out, ".RS 4"); + let doc = flag.doc_long().trim(); + // Convert \flag{foo} into something nicer. + let doc = super::render_custom_markup(doc, "flag", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + unreachable!(r"found unrecognized \flag{{{name}}} in roff docs") + }; + out.push_str(r"\fB"); + if let Some(name) = flag.name_short() { + write!(out, r"\-{}/", char::from(name)); + } + write!(out, r"\-\-{}", flag.name_long()); + out.push_str(r"\fP"); + }); + // Convert \flag-negate{foo} into something nicer. + let doc = super::render_custom_markup(&doc, "flag-negate", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + unreachable!( + r"found unrecognized \flag-negate{{{name}}} in roff docs" + ) + }; + let Some(name) = flag.name_negated() else { + let long = flag.name_long(); + unreachable!( + "found \\flag-negate{{{long}}} in roff docs but \ + {long} does not have a negation" + ); + }; + out.push_str(r"\fB"); + write!(out, r"\-\-{name}"); + out.push_str(r"\fP"); + }); + writeln!(out, "{doc}"); + if let Some(negated) = flag.name_negated() { + // Flags that can be negated that aren't switches, like + // --context-separator, are somewhat weird. Because of that, the docs + // for those flags should discuss the semantics of negation explicitly. + // But for switches, the behavior is always the same. + if flag.is_switch() { + writeln!(out, ".sp"); + writeln!( + out, + r"This flag can be disabled with \fB\-\-{negated}\fP." + ); + } + } + writeln!(out, ".RE"); +} diff --git a/crates/core/flags/doc/mod.rs b/crates/core/flags/doc/mod.rs new file mode 100644 index 0000000000..c52a024f74 --- /dev/null +++ b/crates/core/flags/doc/mod.rs @@ -0,0 +1,38 @@ +/*! +Modules for generating documentation for ripgrep's flags. +*/ + +pub(crate) mod help; +pub(crate) mod man; +pub(crate) mod version; + +/// Searches for `\tag{...}` occurrences in `doc` and calls `replacement` for +/// each such tag found. +/// +/// The first argument given to `replacement` is the tag value, `...`. The +/// second argument is the buffer that accumulates the full replacement text. +/// +/// Since this function is only intended to be used on doc strings written into +/// the program source code, callers should panic in `replacement` if there are +/// any errors or unexpected circumstances. +fn render_custom_markup( + mut doc: &str, + tag: &str, + mut replacement: impl FnMut(&str, &mut String), +) -> String { + let mut out = String::with_capacity(doc.len()); + let tag_prefix = format!(r"\{tag}{{"); + while let Some(offset) = doc.find(&tag_prefix) { + out.push_str(&doc[..offset]); + + let start = offset + tag_prefix.len(); + let Some(end) = doc[start..].find('}').map(|i| start + i) else { + unreachable!(r"found {tag_prefix} without closing }}"); + }; + let name = &doc[start..end]; + replacement(name, &mut out); + doc = &doc[end + 1..]; + } + out.push_str(doc); + out +} diff --git a/crates/core/flags/doc/template.long.help b/crates/core/flags/doc/template.long.help new file mode 100644 index 0000000000..3a32594b7e --- /dev/null +++ b/crates/core/flags/doc/template.long.help @@ -0,0 +1,61 @@ +ripgrep !!VERSION!! +Andrew Gallant + +ripgrep (rg) recursively searches the current directory for a regex pattern. +By default, ripgrep will respect gitignore rules and automatically skip hidden +files/directories and binary files. + +Use -h for short descriptions and --help for more details. + +Project home page: https://github.com/BurntSushi/ripgrep + +USAGE: + rg [OPTIONS] PATTERN [PATH ...] + rg [OPTIONS] -e PATTERN ... [PATH ...] + rg [OPTIONS] -f PATTERNFILE ... [PATH ...] + rg [OPTIONS] --files [PATH ...] + rg [OPTIONS] --type-list + command | rg [OPTIONS] PATTERN + rg [OPTIONS] --help + rg [OPTIONS] --version + +POSITIONAL ARGUMENTS: + + A regular expression used for searching. To match a pattern beginning + with a dash, use the -e/--regexp flag. + + For example, to search for the literal '-foo', you can use this flag: + + rg -e -foo + + You can also use the special '--' delimiter to indicate that no more + flags will be provided. Namely, the following is equivalent to the + above: + + rg -- -foo + + ... + A file or directory to search. Directories are searched recursively. + File paths specified on the command line override glob and ignore + rules. + +INPUT OPTIONS: +!!input!! + +SEARCH OPTIONS: +!!search!! + +FILTER OPTIONS: +!!filter!! + +OUTPUT OPTIONS: +!!output!! + +OUTPUT MODES: +!!output-modes!! + +LOGGING OPTIONS: +!!logging!! + +OTHER BEHAVIORS: +!!other-behaviors!! diff --git a/crates/core/flags/doc/template.rg.1 b/crates/core/flags/doc/template.rg.1 new file mode 100644 index 0000000000..5c7fbdb8f9 --- /dev/null +++ b/crates/core/flags/doc/template.rg.1 @@ -0,0 +1,415 @@ +.TH RG 1 2023-11-13 "!!VERSION!!" "User Commands" +. +. +.SH NAME +rg \- recursively search the current directory for lines matching a pattern +. +. +.SH SYNOPSIS +.\" I considered using GNU troff's .SY and .YS "synopsis" macros here, but it +.\" looks like they aren't portable. Specifically, they don't appear to be in +.\" BSD's mdoc used on macOS. +.sp +\fBrg\fP [\fIOPTIONS\fP] \fIPATTERN\fP [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-e\fP \fIPATTERN\fP... [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-f\fP \fIPATTERNFILE\fP... [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-files\fP [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-type\-list\fP +.sp +\fIcommand\fP | \fBrg\fP [\fIOPTIONS\fP] \fIPATTERN\fP +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-help\fP +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-version\fP +. +. +.SH DESCRIPTION +ripgrep (rg) recursively searches the current directory for a regex pattern. +By default, ripgrep will respect your \fB.gitignore\fP and automatically skip +hidden files/directories and binary files. +.sp +ripgrep's default regex engine uses finite automata and guarantees linear +time searching. Because of this, features like backreferences and arbitrary +look-around are not supported. However, if ripgrep is built with PCRE2, +then the \fB\-P/\-\-pcre2\fP flag can be used to enable backreferences and +look-around. +.sp +ripgrep supports configuration files. Set \fBRIPGREP_CONFIG_PATH\fP to a +configuration file. The file can specify one shell argument per line. Lines +starting with \fB#\fP are ignored. For more details, see \fBCONFIGURATION +FILES\fP below. +.sp +ripgrep will automatically detect if stdin exists and search stdin for a regex +pattern, e.g. \fBls | rg foo\fP. In some environments, stdin may exist when +it shouldn't. To turn off stdin detection, one can explicitly specify the +directory to search, e.g. \fBrg foo ./\fP. +.sp +Tip: to disable all smart filtering and make ripgrep behave a bit more like +classical grep, use \fBrg -uuu\fP. +. +. +.SH REGEX SYNTAX +ripgrep uses Rust's regex engine by default, which documents its syntax: +\fIhttps://docs.rs/regex/1.*/regex/#syntax\fP +.sp +ripgrep uses byte-oriented regexes, which has some additional documentation: +\fIhttps://docs.rs/regex/1.*/regex/bytes/index.html#syntax\fP +.sp +To a first approximation, ripgrep uses Perl-like regexes without look-around or +backreferences. This makes them very similar to the "extended" (ERE) regular +expressions supported by *egrep*, but with a few additional features like +Unicode character classes. +.sp +If you're using ripgrep with the \fB\-P/\-\-pcre2\fP flag, then please consult +\fIhttps://www.pcre.org\fP or the PCRE2 man pages for documentation on the +supported syntax. +. +. +.SH POSITIONAL ARGUMENTS +.TP 12 +\fIPATTERN\fP +A regular expression used for searching. To match a pattern beginning with a +dash, use the \fB\-e/\-\-regexp\fP option. +.TP 12 +\fIPATH\fP +A file or directory to search. Directories are searched recursively. File paths +specified explicitly on the command line override glob and ignore rules. +. +. +.SH OPTIONS +This section documents all flags that ripgrep accepts. Flags are grouped into +categories below according to their function. +.sp +Note that many options can be turned on and off. In some cases, those flags are +not listed explicitly below. For example, the \fB\-\-column\fP flag (listed +below) enables column numbers in ripgrep's output, but the \fB\-\-no\-column\fP +flag (not listed below) disables them. The reverse can also exist. For example, +the \fB\-\-no\-ignore\fP flag (listed below) disables ripgrep's \fBgitignore\fP +logic, but the \fB\-\-ignore\fP flag (not listed below) enables it. These +flags are useful for overriding a ripgrep configuration file (or alias) on the +command line. Each flag's documentation notes whether an inverted flag exists. +In all cases, the flag specified last takes precedence. +. +.SS INPUT OPTIONS +!!input!! +. +.SS SEARCH OPTIONS +!!search!! +. +.SS FILTER OPTIONS +!!filter!! +. +.SS OUTPUT OPTIONS +!!output!! +. +.SS OUTPUT MODES +!!output-modes!! +. +.SS LOGGING OPTIONS +!!logging!! +. +.SS OTHER BEHAVIORS +!!other-behaviors!! +. +. +.SH EXIT STATUS +If ripgrep finds a match, then the exit status of the program is \fB0\fP. +If no match could be found, then the exit status is \fB1\fP. If an error +occurred, then the exit status is always \fB2\fP unless ripgrep was run with +the \fB\-q/\-\-quiet\fP flag and a match was found. In summary: +.sp +.IP \(bu 3n +\fB0\fP exit status occurs only when at least one match was found, and if +no error occurred, unless \fB\-q/\-\-quiet\fP was given. +. +.IP \(bu 3n +\fB1\fP exit status occurs only when no match was found and no error occurred. +. +.IP \(bu 3n +\fB2\fP exit status occurs when an error occurred. This is true for both +catastrophic errors (e.g., a regex syntax error) and for soft errors (e.g., +unable to read a file). +. +. +.SH AUTOMATIC FILTERING +ripgrep does a fair bit of automatic filtering by default. This section +describes that filtering and how to control it. +.sp +\fBTIP\fP: To disable automatic filtering, use \fBrg -uuu\fP. +.sp +ripgrep's automatic "smart" filtering is one of the most apparent +differentiating features between ripgrep and other tools like \fBgrep\fP. As +such, its behavior may be surprising to users that aren't expecting it. +.sp +ripgrep does four types of filtering automatically: +.sp +. +.IP 1. 3n +Files and directories that match ignore rules are not searched. +.IP 2. 3n +Hidden files and directories are not searched. +.IP 3. 3n +Binary files (files with a \fBNUL\fP byte) are not searched. +.IP 4. 3n +Symbolic links are not followed. +.PP +The first type of filtering is the most sophisticated. ripgrep will attempt to +respect your \fBgitignore\fP rules as faithfully as possible. In particular, +this includes the following: +. +.IP \(bu 3n +Any global rules, e.g., in \fB$HOME/.config/git/ignore\fP. +. +.IP \(bu 3n +Any rules in relevant \fB.gitignore\fP files. +. +.IP \(bu 3n +Any local rules, e.g., in \fB.git/info/exclude\fP. +.PP +In some cases, ripgrep and \fBgit\fP will not always be in sync in terms +of which files are ignored. For example, a file that is ignored via +\fB.gitignore\fP but is tracked by \fBgit\fP would not be searched by ripgrep +even though \fBgit\fP tracks it. This is unlikely to ever be fixed. Instead, +you should either make sure your exclude rules match the files you track +precisely, or otherwise use \fBgit grep\fP for search. +.sp +Additional ignore rules can be provided outside of a \fBgit\fP context: +. +.IP \(bu 3n +Any rules in \fB.ignore\fP. +. +.IP \(bu 3n +Any rules in \fB.rgignore\fP. +. +.IP \(bu 3n +Any rules in files specified with the \fB\-\-ignore\-file\fP flag. +.PP +The precedence of ignore rules is as follows, with later items overriding +earlier items: +. +.IP \(bu 3n +Files given by \fB\-\-ignore\-file\fP. +. +.IP \(bu 3n +Global gitignore rules, e.g., from \fB$HOME/.config/git/ignore\fP. +. +.IP \(bu 3n +Local rules from \fB.git/info/exclude\fP. +. +.IP \(bu 3n +Rules from \fB.gitignore\fP. +. +.IP \(bu 3n +Rules from \fB.ignore\fP. +. +.IP \(bu 3n +Rules from \fB.rgignore\fP. +.PP +So for example, if \fIfoo\fP were in a \fB.gitignore\fP and \fB!\fP\fIfoo\fP +were in an \fB.rgignore\fP, then \fIfoo\fP would not be ignored since +\fB.rgignore\fP takes precedence over \fB.gitignore\fP. +.sp +Each of the types of filtering can be configured via command line flags: +. +.IP \(bu 3n +There are several flags starting with \fB\-\-no\-ignore\fP that toggle which, +if any, ignore rules are respected. \fB\-\-no\-ignore\fP by itself will disable +all +of them. +. +.IP \(bu 3n +\fB\-./\-\-hidden\fP will force ripgrep to search hidden files and directories. +. +.IP \(bu 3n +\fB\-\-binary\fP will force ripgrep to search binary files. +. +.IP \(bu 3n +\fB\-L/\-\-follow\fP will force ripgrep to follow symlinks. +.PP +As a special short hand, the \fB\-u\fP flag can be specified up to three times. +Each additional time incrementally decreases filtering: +. +.IP \(bu 3n +\fB\-u\fP is equivalent to \fB\-\-no\-ignore\fP. +. +.IP \(bu 3n +\fB\-uu\fP is equivalent to \fB\-\-no\-ignore \-\-hidden\fP. +. +.IP \(bu 3n +\fB\-uuu\fP is equivalent to \fB\-\-no\-ignore \-\-hidden \-\-binary\fP. +.PP +In particular, \fBrg -uuu\fP should search the same exact content as \fBgrep +-r\fP. +. +. +.SH CONFIGURATION FILES +ripgrep supports reading configuration files that change ripgrep's default +behavior. The format of the configuration file is an "rc" style and is very +simple. It is defined by two rules: +. +.IP 1. 3n +Every line is a shell argument, after trimming whitespace. +. +.IP 2. 3n +Lines starting with \fB#\fP (optionally preceded by any amount of whitespace) +are ignored. +.PP +ripgrep will look for a single configuration file if and only if the +\fBRIPGREP_CONFIG_PATH\fP environment variable is set and is non-empty. +ripgrep will parse arguments from this file on startup and will behave as if +the arguments in this file were prepended to any explicit arguments given to +ripgrep on the command line. Note though that the \fBrg\fP command you run +must still be valid. That is, it must always contain at least one pattern at +the command line, even if the configuration file uses the \fB\-e/\-\-regexp\fP +flag. +.sp +For example, if your ripgreprc file contained a single line: +.sp +.EX + \-\-smart\-case +.EE +.sp +then the following command +.sp +.EX + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo +.EE +.sp +would behave identically to the following command: +.sp +.EX + rg \-\-smart-case foo +.EE +.sp +Another example is adding types, like so: +.sp +.EX + \-\-type-add + web:*.{html,css,js}* +.EE +.sp +The above would behave identically to the following command: +.sp +.EX + rg \-\-type\-add 'web:*.{html,css,js}*' foo +.EE +.sp +The same applies to using globs. This: +.sp +.EX + \-\-glob=!.git +.EE +.sp +or this: +.sp +.EX + \-\-glob + !.git +.EE +.sp +would behave identically to the following command: +.sp +.EX + rg \-\-glob '!.git' foo +.EE +.sp +The bottom line is that every shell argument needs to be on its own line. So +for example, a config file containing +.sp +.EX + \-j 4 +.EE +.sp +is probably not doing what you intend. Instead, you want +.sp +.EX + \-j + 4 +.EE +.sp +or +.sp +.EX + \-j4 +.EE +.sp +ripgrep also provides a flag, \fB\-\-no\-config\fP, that when present will +suppress any and all support for configuration. This includes any future +support for auto-loading configuration files from pre-determined paths. +.sp +Conflicts between configuration files and explicit arguments are handled +exactly like conflicts in the same command line invocation. That is, assuming +your config file contains only \fB\-\-smart\-case\fP, then this command: +.sp +.EX + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo \-\-case\-sensitive +.EE +.sp +is exactly equivalent to +.sp +.EX + rg \-\-smart\-case foo \-\-case\-sensitive +.EE +.sp +in which case, the \fB\-\-case\-sensitive\fP flag would override the +\fB\-\-smart\-case\fP flag. +. +. +.SH SHELL COMPLETION +Shell completion files are included in the release tarball for Bash, Fish, Zsh +and PowerShell. +.sp +For \fBbash\fP, move \fBrg.bash\fP to \fB$XDG_CONFIG_HOME/bash_completion\fP or +\fB/etc/bash_completion.d/\fP. +.sp +For \fBfish\fP, move \fBrg.fish\fP to \fB$HOME/.config/fish/completions\fP. +.sp +For \fBzsh\fP, move \fB_rg\fP to one of your \fB$fpath\fP directories. +. +. +.SH CAVEATS +ripgrep may abort unexpectedly when using default settings if it searches a +file that is simultaneously truncated. This behavior can be avoided by passing +the \fB\-\-no\-mmap\fP flag which will forcefully disable the use of memory +maps in all cases. +.sp +ripgrep may use a large amount of memory depending on a few factors. Firstly, +if ripgrep uses parallelism for search (the default), then the entire +output for each individual file is buffered into memory in order to prevent +interleaving matches in the output. To avoid this, you can disable parallelism +with the \fB\-j1\fP flag. Secondly, ripgrep always needs to have at least a +single line in memory in order to execute a search. A file with a very long +line can thus cause ripgrep to use a lot of memory. Generally, this only occurs +when searching binary data with the \fB\-a/\-\-text\fP flag enabled. (When the +\fB\-a/\-\-text\fP flag isn't enabled, ripgrep will replace all NUL bytes with +line terminators, which typically prevents exorbitant memory usage.) Thirdly, +when ripgrep searches a large file using a memory map, the process will likely +report its resident memory usage as the size of the file. However, this does +not mean ripgrep actually needed to use that much heap memory; the operating +system will generally handle this for you. +. +. +.SH VERSION +!!VERSION!! +. +. +.SH HOMEPAGE +\fIhttps://github.com/BurntSushi/ripgrep\fP +.sp +Please report bugs and feature requests to the issue tracker. Please do your +best to provide a reproducible test case for bugs. This should include the +corpus being searched, the \fBrg\fP command, the actual output and the expected +output. Please also include the output of running the same \fBrg\fP command but +with the \fB\-\-debug\fP flag. +.sp +If you have questions that don't obviously fall into the "bug" or "feature +request" category, then they are welcome in the Discussions section of the +issue tracker: \fIhttps://github.com/BurntSushi/ripgrep/discussions\fP. +. +. +.SH AUTHORS +Andrew Gallant <\fIjamslam@gmail.com\fP> diff --git a/crates/core/flags/doc/template.short.help b/crates/core/flags/doc/template.short.help new file mode 100644 index 0000000000..ad26bf165f --- /dev/null +++ b/crates/core/flags/doc/template.short.help @@ -0,0 +1,38 @@ +ripgrep !!VERSION!! +Andrew Gallant + +ripgrep (rg) recursively searches the current directory for a regex pattern. +By default, ripgrep will respect gitignore rules and automatically skip hidden +files/directories and binary files. + +Use -h for short descriptions and --help for more details. + +Project home page: https://github.com/BurntSushi/ripgrep + +USAGE: + rg [OPTIONS] PATTERN [PATH ...] + +POSITIONAL ARGUMENTS: + A regular expression used for searching. + ... A file or directory to search. + +INPUT OPTIONS: +!!input!! + +SEARCH OPTIONS: +!!search!! + +FILTER OPTIONS: +!!filter!! + +OUTPUT OPTIONS: +!!output!! + +OUTPUT MODES: +!!output-modes!! + +LOGGING OPTIONS: +!!logging!! + +OTHER BEHAVIORS: +!!other-behaviors!! diff --git a/crates/core/flags/doc/version.rs b/crates/core/flags/doc/version.rs new file mode 100644 index 0000000000..5604da1976 --- /dev/null +++ b/crates/core/flags/doc/version.rs @@ -0,0 +1,148 @@ +/*! +Provides routines for generating version strings. + +Version strings can be just the digits, an overall short one-line description +or something more verbose that includes things like CPU target feature support. +*/ + +use std::fmt::Write; + +/// Generates just the numerical part of the version of ripgrep. +/// +/// This includes the git revision hash. +pub(crate) fn generate_digits() -> String { + let semver = option_env!("CARGO_PKG_VERSION").unwrap_or("N/A"); + match option_env!("RIPGREP_BUILD_GIT_HASH") { + None => semver.to_string(), + Some(hash) => format!("{semver} (rev {hash})"), + } +} + +/// Generates a short version string of the form `ripgrep x.y.z`. +pub(crate) fn generate_short() -> String { + let digits = generate_digits(); + format!("ripgrep {digits}") +} + +/// Generates a longer multi-line version string. +/// +/// This includes not only the version of ripgrep but some other information +/// about its build. For example, SIMD support and PCRE2 support. +pub(crate) fn generate_long() -> String { + let (compile, runtime) = (compile_cpu_features(), runtime_cpu_features()); + + let mut out = String::new(); + writeln!(out, "{}", generate_short()).unwrap(); + writeln!(out, "features:{}", features().join(",")).unwrap(); + if !compile.is_empty() { + writeln!(out, "compile-simd:{}", compile.join(",")).unwrap(); + } + if !runtime.is_empty() { + writeln!(out, "runtime-simd:{}", runtime.join(",")).unwrap(); + } + out +} + +/// Returns the relevant SIMD features supported by the CPU at runtime. +/// +/// This is kind of a dirty violation of abstraction, since it assumes +/// knowledge about what specific SIMD features are being used by various +/// components. +fn runtime_cpu_features() -> Vec { + #[cfg(target_arch = "x86_64")] + { + let mut features = vec![]; + + let sse2 = is_x86_feature_detected!("sse2"); + features.push(format!("{sign}SSE2", sign = sign(sse2))); + + let ssse3 = is_x86_feature_detected!("ssse3"); + features.push(format!("{sign}SSSE3", sign = sign(ssse3))); + + let avx2 = is_x86_feature_detected!("avx2"); + features.push(format!("{sign}AVX2", sign = sign(avx2))); + + features + } + #[cfg(target_arch = "aarch64")] + { + let mut features = vec![]; + + // memchr and aho-corasick only use NEON when it is available at + // compile time. This isn't strictly necessary, but NEON is supposed + // to be available for all aarch64 targets. If this isn't true, please + // file an issue at https://github.com/BurntSushi/memchr. + let neon = cfg!(target_feature = "neon"); + features.push(format!("{sign}NEON", sign = sign(neon))); + + features + } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + vec![] + } +} + +/// Returns the SIMD features supported while compiling ripgrep. +/// +/// In essence, any features listed here are required to run ripgrep correctly. +/// +/// This is kind of a dirty violation of abstraction, since it assumes +/// knowledge about what specific SIMD features are being used by various +/// components. +/// +/// An easy way to enable everything available on your current CPU is to +/// compile ripgrep with `RUSTFLAGS="-C target-cpu=native"`. But note that +/// the binary produced by this will not be portable. +fn compile_cpu_features() -> Vec { + #[cfg(target_arch = "x86_64")] + { + let mut features = vec![]; + + let sse2 = cfg!(target_feature = "sse2"); + features.push(format!("{sign}SSE2", sign = sign(sse2))); + + let ssse3 = cfg!(target_feature = "ssse3"); + features.push(format!("{sign}SSSE3", sign = sign(ssse3))); + + let avx2 = cfg!(target_feature = "avx2"); + features.push(format!("{sign}AVX2", sign = sign(avx2))); + + features + } + #[cfg(target_arch = "aarch64")] + { + let mut features = vec![]; + + let neon = cfg!(target_feature = "neon"); + features.push(format!("{sign}NEON", sign = sign(neon))); + + features + } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + vec![] + } +} + +/// Returns a list of "features" supported (or not) by this build of ripgrpe. +fn features() -> Vec { + let mut features = vec![]; + + let simd_accel = cfg!(feature = "simd-accel"); + features.push(format!("{sign}simd-accel", sign = sign(simd_accel))); + + let pcre2 = cfg!(feature = "pcre2"); + features.push(format!("{sign}pcre2", sign = sign(pcre2))); + + features +} + +/// Returns `+` when `enabled` is `true` and `-` otherwise. +fn sign(enabled: bool) -> &'static str { + if enabled { + "+" + } else { + "-" + } +} diff --git a/crates/core/flags/hiargs.rs b/crates/core/flags/hiargs.rs new file mode 100644 index 0000000000..4f3b3f391a --- /dev/null +++ b/crates/core/flags/hiargs.rs @@ -0,0 +1,1409 @@ +/*! +Provides the definition of high level arguments from CLI flags. +*/ + +use std::{ + collections::HashSet, + path::{Path, PathBuf}, +}; + +use { + bstr::BString, + grep::printer::{ColorSpecs, SummaryKind}, +}; + +use crate::{ + flags::lowargs::{ + BinaryMode, BoundaryMode, BufferMode, CaseMode, ColorChoice, + ContextMode, ContextSeparator, EncodingMode, EngineChoice, + FieldContextSeparator, FieldMatchSeparator, LowArgs, MmapMode, Mode, + PatternSource, SearchMode, SortMode, SortModeKind, TypeChange, + }, + haystack::{Haystack, HaystackBuilder}, + search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder}, +}; + +/// A high level representation of CLI arguments. +/// +/// The distinction between low and high level arguments is somewhat arbitrary +/// and wishy washy. The main idea here is that high level arguments generally +/// require all of CLI parsing to be finished. For example, one cannot +/// construct a glob matcher until all of the glob patterns are known. +/// +/// So while low level arguments are collected during parsing itself, high +/// level arguments aren't created until parsing has completely finished. +#[derive(Debug)] +pub(crate) struct HiArgs { + binary: BinaryDetection, + boundary: Option, + buffer: BufferMode, + byte_offset: bool, + case: CaseMode, + color: ColorChoice, + colors: grep::printer::ColorSpecs, + column: bool, + context: ContextMode, + context_separator: ContextSeparator, + crlf: bool, + dfa_size_limit: Option, + encoding: EncodingMode, + engine: EngineChoice, + field_context_separator: FieldContextSeparator, + field_match_separator: FieldMatchSeparator, + file_separator: Option>, + fixed_strings: bool, + follow: bool, + globs: ignore::overrides::Override, + heading: bool, + hidden: bool, + hyperlink_config: grep::printer::HyperlinkConfig, + ignore_file_case_insensitive: bool, + ignore_file: Vec, + include_zero: bool, + invert_match: bool, + is_terminal_stdout: bool, + line_number: bool, + max_columns: Option, + max_columns_preview: bool, + max_count: Option, + max_depth: Option, + max_filesize: Option, + mmap_choice: grep::searcher::MmapChoice, + mode: Mode, + multiline: bool, + multiline_dotall: bool, + no_ignore_dot: bool, + no_ignore_exclude: bool, + no_ignore_files: bool, + no_ignore_global: bool, + no_ignore_parent: bool, + no_ignore_vcs: bool, + no_require_git: bool, + no_unicode: bool, + null_data: bool, + one_file_system: bool, + only_matching: bool, + path_separator: Option, + paths: Paths, + path_terminator: Option, + patterns: Patterns, + pre: Option, + pre_globs: ignore::overrides::Override, + quiet: bool, + quit_after_match: bool, + regex_size_limit: Option, + replace: Option, + search_zip: bool, + sort: Option, + stats: Option, + stop_on_nonmatch: bool, + threads: usize, + trim: bool, + types: ignore::types::Types, + vimgrep: bool, + with_filename: bool, +} + +impl HiArgs { + /// Convert low level arguments into high level arguments. + /// + /// This process can fail for a variety of reasons. For example, invalid + /// globs or some kind of environment issue. + pub(crate) fn from_low_args(mut low: LowArgs) -> anyhow::Result { + // Callers should not be trying to convert low-level arguments when + // a short-circuiting special mode is present. + assert_eq!(None, low.special, "special mode demands short-circuiting"); + // If the sorting mode isn't supported, then we bail loudly. I'm not + // sure if this is the right thing to do. We could silently "not sort" + // as well. If we wanted to go that route, then we could just set + // `low.sort = None` if `supported()` returns an error. + if let Some(ref sort) = low.sort { + sort.supported()?; + } + + // We modify the mode in-place on `low` so that subsequent conversions + // see the correct mode. + match low.mode { + Mode::Search(ref mut mode) => match *mode { + // treat `-v --count-matches` as `-v --count` + SearchMode::CountMatches if low.invert_match => { + *mode = SearchMode::Count; + } + // treat `-o --count` as `--count-matches` + SearchMode::Count if low.only_matching => { + *mode = SearchMode::CountMatches; + } + _ => {} + }, + _ => {} + } + + let mut state = State::new()?; + let patterns = Patterns::from_low_args(&mut state, &mut low)?; + let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?; + + let binary = BinaryDetection::from_low_args(&state, &low); + let colors = take_color_specs(&mut state, &mut low); + let hyperlink_config = take_hyperlink_config(&mut state, &mut low)?; + let stats = stats(&low); + let types = types(&low)?; + let globs = globs(&state, &low)?; + let pre_globs = preprocessor_globs(&state, &low)?; + + let color = match low.color { + ColorChoice::Auto if !state.is_terminal_stdout => { + ColorChoice::Never + } + _ => low.color, + }; + let column = low.column.unwrap_or(low.vimgrep); + let heading = match low.heading { + None => !low.vimgrep && state.is_terminal_stdout, + Some(false) => false, + Some(true) => !low.vimgrep, + }; + let path_terminator = if low.null { Some(b'\x00') } else { None }; + let quit_after_match = stats.is_none() && low.quiet; + let threads = if low.sort.is_some() || paths.is_one_file { + 1 + } else if let Some(threads) = low.threads { + threads + } else { + std::thread::available_parallelism().map_or(1, |n| n.get()).min(12) + }; + let with_filename = low + .with_filename + .unwrap_or_else(|| low.vimgrep || !paths.is_one_file); + + let file_separator = match low.mode { + Mode::Search(SearchMode::Standard) => { + if heading { + Some(b"".to_vec()) + } else if let ContextMode::Limited(ref limited) = low.context { + let (before, after) = limited.get(); + if before > 0 || after > 0 { + low.context_separator.clone().into_bytes() + } else { + None + } + } else { + None + } + } + _ => None, + }; + + let line_number = low.line_number.unwrap_or_else(|| { + if low.quiet { + return false; + } + let Mode::Search(ref search_mode) = low.mode else { return false }; + match *search_mode { + SearchMode::FilesWithMatches + | SearchMode::FilesWithoutMatch + | SearchMode::Count + | SearchMode::CountMatches => return false, + SearchMode::JSON => return true, + SearchMode::Standard => { + // A few things can imply counting line numbers. In + // particular, we generally want to show line numbers by + // default when printing to a tty for human consumption, + // except for one interesting case: when we're only + // searching stdin. This makes pipelines work as expected. + (state.is_terminal_stdout && !paths.is_only_stdin()) + || column + || low.vimgrep + } + } + }); + + let mmap_choice = { + // SAFETY: Memory maps are difficult to impossible to encapsulate + // safely in a portable way that doesn't simultaneously negate some + // of the benfits of using memory maps. For ripgrep's use, we never + // mutate a memory map and generally never store the contents of + // memory map in a data structure that depends on immutability. + // Generally speaking, the worst thing that can happen is a SIGBUS + // (if the underlying file is truncated while reading it), which + // will cause ripgrep to abort. This reasoning should be treated as + // suspect. + let maybe = unsafe { grep::searcher::MmapChoice::auto() }; + let never = grep::searcher::MmapChoice::never(); + match low.mmap { + MmapMode::Auto => { + if paths.paths.len() <= 10 + && paths.paths.iter().all(|p| p.is_file()) + { + // If we're only searching a few paths and all of them + // are files, then memory maps are probably faster. + maybe + } else { + never + } + } + MmapMode::AlwaysTryMmap => maybe, + MmapMode::Never => never, + } + }; + + Ok(HiArgs { + mode: low.mode, + patterns, + paths, + binary, + boundary: low.boundary, + buffer: low.buffer, + byte_offset: low.byte_offset, + case: low.case, + color, + colors, + column, + context: low.context, + context_separator: low.context_separator, + crlf: low.crlf, + dfa_size_limit: low.dfa_size_limit, + encoding: low.encoding, + engine: low.engine, + field_context_separator: low.field_context_separator, + field_match_separator: low.field_match_separator, + file_separator, + fixed_strings: low.fixed_strings, + follow: low.follow, + heading, + hidden: low.hidden, + hyperlink_config, + ignore_file: low.ignore_file, + ignore_file_case_insensitive: low.ignore_file_case_insensitive, + include_zero: low.include_zero, + invert_match: low.invert_match, + is_terminal_stdout: state.is_terminal_stdout, + line_number, + max_columns: low.max_columns, + max_columns_preview: low.max_columns_preview, + max_count: low.max_count, + max_depth: low.max_depth, + max_filesize: low.max_filesize, + mmap_choice, + multiline: low.multiline, + multiline_dotall: low.multiline_dotall, + no_ignore_dot: low.no_ignore_dot, + no_ignore_exclude: low.no_ignore_exclude, + no_ignore_files: low.no_ignore_files, + no_ignore_global: low.no_ignore_global, + no_ignore_parent: low.no_ignore_parent, + no_ignore_vcs: low.no_ignore_vcs, + no_require_git: low.no_require_git, + no_unicode: low.no_unicode, + null_data: low.null_data, + one_file_system: low.one_file_system, + only_matching: low.only_matching, + globs, + path_separator: low.path_separator, + path_terminator, + pre: low.pre, + pre_globs, + quiet: low.quiet, + quit_after_match, + regex_size_limit: low.regex_size_limit, + replace: low.replace, + search_zip: low.search_zip, + sort: low.sort, + stats, + stop_on_nonmatch: low.stop_on_nonmatch, + threads, + trim: low.trim, + types, + vimgrep: low.vimgrep, + with_filename, + }) + } + + /// Returns a writer for printing buffers to stdout. + /// + /// This is intended to be used from multiple threads. Namely, a buffer + /// writer can create new buffers that are sent to threads. Threads can + /// then independently write to the buffers. Once a unit of work is + /// complete, a buffer can be given to the buffer writer to write to + /// stdout. + pub(crate) fn buffer_writer(&self) -> termcolor::BufferWriter { + let mut wtr = + termcolor::BufferWriter::stdout(self.color.to_termcolor()); + wtr.separator(self.file_separator.clone()); + wtr + } + + /// Returns true when ripgrep had to guess to search the current working + /// directory. That is, it's true when ripgrep is called without any file + /// paths or directories to search. + /// + /// Other than changing how file paths are printed (i.e., without the + /// leading `./`), it's also useful to know for diagnostic reasons. For + /// example, ripgrep will print an error message when nothing is searched + /// since it's possible the ignore rules in play are too aggressive. But + /// this warning is only emitted when ripgrep was called without any + /// explicit file paths since otherwise the warning would likely be too + /// aggressive. + pub(crate) fn has_implicit_path(&self) -> bool { + self.paths.has_implicit_path + } + + /// Return a properly configured builder for constructing haystacks. + /// + /// The builder can be used to turn a directory entry (from the `ignore` + /// crate) into something that can be searched. + pub(crate) fn haystack_builder(&self) -> HaystackBuilder { + let mut builder = HaystackBuilder::new(); + builder.strip_dot_prefix(self.paths.has_implicit_path); + builder + } + + /// Return the matcher that should be used for searching using the engine + /// choice made by the user. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + pub(crate) fn matcher(&self) -> anyhow::Result { + match self.engine { + EngineChoice::Default => match self.matcher_rust() { + Ok(m) => Ok(m), + Err(err) => { + anyhow::bail!(suggest_other_engine(err.to_string())); + } + }, + EngineChoice::PCRE2 => Ok(self.matcher_pcre2()?), + EngineChoice::Auto => { + let rust_err = match self.matcher_rust() { + Ok(m) => return Ok(m), + Err(err) => err, + }; + log::debug!( + "error building Rust regex in hybrid mode:\n{rust_err}", + ); + + let pcre_err = match self.matcher_pcre2() { + Ok(m) => return Ok(m), + Err(err) => err, + }; + let divider = "~".repeat(79); + anyhow::bail!( + "regex could not be compiled with either the default \ + regex engine or with PCRE2.\n\n\ + default regex engine error:\n\ + {divider}\n\ + {rust_err}\n\ + {divider}\n\n\ + PCRE2 regex engine error:\n{pcre_err}", + ); + } + } + } + + /// Build a matcher using PCRE2. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + /// + /// If the `pcre2` feature is not enabled then this always returns an + /// error. + fn matcher_pcre2(&self) -> anyhow::Result { + #[cfg(feature = "pcre2")] + { + let mut builder = grep::pcre2::RegexMatcherBuilder::new(); + builder.multi_line(true).fixed_strings(self.fixed_strings); + match self.case { + CaseMode::Sensitive => builder.caseless(false), + CaseMode::Insensitive => builder.caseless(true), + CaseMode::Smart => builder.case_smart(true), + }; + if let Some(ref boundary) = self.boundary { + match *boundary { + BoundaryMode::Line => builder.whole_line(true), + BoundaryMode::Word => builder.word(true), + }; + } + // For whatever reason, the JIT craps out during regex compilation with + // a "no more memory" error on 32 bit systems. So don't use it there. + if cfg!(target_pointer_width = "64") { + builder + .jit_if_available(true) + // The PCRE2 docs say that 32KB is the default, and that 1MB + // should be big enough for anything. But let's crank it to + // 10MB. + .max_jit_stack_size(Some(10 * (1 << 20))); + } + if !self.no_unicode { + builder.utf(true).ucp(true); + } + if self.multiline { + builder.dotall(self.multiline_dotall); + } + if self.crlf { + builder.crlf(true); + } + let m = builder.build_many(&self.patterns.patterns)?; + Ok(PatternMatcher::PCRE2(m)) + } + #[cfg(not(feature = "pcre2"))] + { + Err(anyhow::anyhow!( + "PCRE2 is not available in this build of ripgrep" + )) + } + } + + /// Build a matcher using Rust's regex engine. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + fn matcher_rust(&self) -> anyhow::Result { + let mut builder = grep::regex::RegexMatcherBuilder::new(); + builder + .multi_line(true) + .unicode(!self.no_unicode) + .octal(false) + .fixed_strings(self.fixed_strings); + match self.case { + CaseMode::Sensitive => builder.case_insensitive(false), + CaseMode::Insensitive => builder.case_insensitive(true), + CaseMode::Smart => builder.case_smart(true), + }; + if let Some(ref boundary) = self.boundary { + match *boundary { + BoundaryMode::Line => builder.whole_line(true), + BoundaryMode::Word => builder.word(true), + }; + } + if self.multiline { + builder.dot_matches_new_line(self.multiline_dotall); + if self.crlf { + builder.crlf(true).line_terminator(None); + } + } else { + builder.line_terminator(Some(b'\n')).dot_matches_new_line(false); + if self.crlf { + builder.crlf(true); + } + // We don't need to set this in multiline mode since mulitline + // matchers don't use optimizations related to line terminators. + // Moreover, a mulitline regex used with --null-data should + // be allowed to match NUL bytes explicitly, which this would + // otherwise forbid. + if self.null_data { + builder.line_terminator(Some(b'\x00')); + } + } + if let Some(limit) = self.regex_size_limit { + builder.size_limit(limit); + } + if let Some(limit) = self.dfa_size_limit { + builder.dfa_size_limit(limit); + } + let m = match builder.build_many(&self.patterns.patterns) { + Ok(m) => m, + Err(err) => anyhow::bail!(suggest_multiline(err.to_string())), + }; + Ok(PatternMatcher::RustRegex(m)) + } + + /// Returns true if some non-zero number of matches is believed to be + /// possible. + /// + /// When this returns false, it is impossible for ripgrep to ever report + /// a match. + pub(crate) fn matches_possible(&self) -> bool { + if self.patterns.patterns.is_empty() { + return false; + } + if self.max_count == Some(0) { + return false; + } + true + } + + /// Returns the "mode" that ripgrep should operate in. + /// + /// This is generally useful for determining what action ripgrep should + /// take. The main mode is of course to "search," but there are other + /// non-search modes such as `--type-list` and `--files`. + pub(crate) fn mode(&self) -> Mode { + self.mode + } + + /// Returns a builder for constructing a "path printer." + /// + /// This is useful for the `--files` mode in ripgrep, where the printer + /// just needs to emit paths and not need to worry about the functionality + /// of searching. + pub(crate) fn path_printer_builder( + &self, + ) -> grep::printer::PathPrinterBuilder { + let mut builder = grep::printer::PathPrinterBuilder::new(); + builder + .color_specs(self.colors.clone()) + .hyperlink(self.hyperlink_config.clone()) + .separator(self.path_separator.clone()) + .terminator(self.path_terminator.unwrap_or(b'\n')); + builder + } + + /// Returns a printer for the given search mode. + /// + /// This chooses which printer to build (JSON, summary or standard) based + /// on the search mode given. + pub(crate) fn printer( + &self, + search_mode: SearchMode, + wtr: W, + ) -> Printer { + let summary_kind = if self.quiet { + SummaryKind::Quiet + } else { + match search_mode { + SearchMode::FilesWithMatches => SummaryKind::PathWithMatch, + SearchMode::FilesWithoutMatch => SummaryKind::PathWithoutMatch, + SearchMode::Count => SummaryKind::Count, + SearchMode::CountMatches => SummaryKind::CountMatches, + SearchMode::JSON => { + return Printer::JSON(self.printer_json(wtr)) + } + SearchMode::Standard => { + return Printer::Standard(self.printer_standard(wtr)) + } + } + }; + Printer::Summary(self.printer_summary(wtr, summary_kind)) + } + + /// Builds a JSON printer. + fn printer_json( + &self, + wtr: W, + ) -> grep::printer::JSON { + grep::printer::JSONBuilder::new() + .pretty(false) + .max_matches(self.max_count) + .always_begin_end(false) + .build(wtr) + } + + /// Builds a "standard" grep printer where matches are printed as plain + /// text lines. + fn printer_standard( + &self, + wtr: W, + ) -> grep::printer::Standard { + grep::printer::StandardBuilder::new() + .byte_offset(self.byte_offset) + .color_specs(self.colors.clone()) + .column(self.column) + .heading(self.heading) + .hyperlink(self.hyperlink_config.clone()) + .max_columns_preview(self.max_columns_preview) + .max_columns(self.max_columns) + .max_matches(self.max_count) + .only_matching(self.only_matching) + .path(self.with_filename) + .path_terminator(self.path_terminator.clone()) + .per_match_one_line(true) + .per_match(self.vimgrep) + .replacement(self.replace.clone().map(|r| r.into())) + .separator_context(self.context_separator.clone().into_bytes()) + .separator_field_context( + self.field_context_separator.clone().into_bytes(), + ) + .separator_field_match( + self.field_match_separator.clone().into_bytes(), + ) + .separator_path(self.path_separator.clone()) + .separator_search(self.file_separator.clone()) + .stats(self.stats.is_some()) + .trim_ascii(self.trim) + .build(wtr) + } + + /// Builds a "summary" printer where search results are aggregated on a + /// file-by-file basis. + fn printer_summary( + &self, + wtr: W, + kind: SummaryKind, + ) -> grep::printer::Summary { + grep::printer::SummaryBuilder::new() + .color_specs(self.colors.clone()) + .exclude_zero(!self.include_zero) + .hyperlink(self.hyperlink_config.clone()) + .kind(kind) + .max_matches(self.max_count) + .path(self.with_filename) + .path_terminator(self.path_terminator.clone()) + .separator_field(b":".to_vec()) + .separator_path(self.path_separator.clone()) + .stats(self.stats.is_some()) + .build(wtr) + } + + /// Returns true if ripgrep should operate in "quiet" mode. + /// + /// Generally speaking, quiet mode means that ripgrep should not print + /// anything to stdout. There are some exceptions. For example, when the + /// user has provided `--stats`, then ripgrep will print statistics to + /// stdout. + pub(crate) fn quiet(&self) -> bool { + self.quiet + } + + /// Returns true when ripgrep should stop searching after a single match is + /// found. + /// + /// This is useful for example when quiet mode is enabled. In that case, + /// users generally can't tell the difference in behavior between a search + /// that finds all matches and a search that only finds one of them. (An + /// exception here is if `--stats` is given, then `quit_after_match` will + /// always return false since the user expects ripgrep to find everything.) + pub(crate) fn quit_after_match(&self) -> bool { + self.quit_after_match + } + + /// Build a worker for executing searches. + /// + /// Search results are found using the given matcher and written to the + /// given printer. + pub(crate) fn search_worker( + &self, + matcher: PatternMatcher, + searcher: grep::searcher::Searcher, + printer: Printer, + ) -> anyhow::Result> { + let mut builder = SearchWorkerBuilder::new(); + builder + .preprocessor(self.pre.clone())? + .preprocessor_globs(self.pre_globs.clone()) + .search_zip(self.search_zip) + .binary_detection_explicit(self.binary.explicit.clone()) + .binary_detection_implicit(self.binary.implicit.clone()); + Ok(builder.build(matcher, searcher, printer)) + } + + /// Build a searcher from the command line parameters. + pub(crate) fn searcher(&self) -> anyhow::Result { + let line_term = if self.crlf { + grep::matcher::LineTerminator::crlf() + } else if self.null_data { + grep::matcher::LineTerminator::byte(b'\x00') + } else { + grep::matcher::LineTerminator::byte(b'\n') + }; + let mut builder = grep::searcher::SearcherBuilder::new(); + builder + .line_terminator(line_term) + .invert_match(self.invert_match) + .line_number(self.line_number) + .multi_line(self.multiline) + .memory_map(self.mmap_choice.clone()) + .stop_on_nonmatch(self.stop_on_nonmatch); + match self.context { + ContextMode::Passthru => { + builder.passthru(true); + } + ContextMode::Limited(ref limited) => { + let (before, after) = limited.get(); + builder.before_context(before); + builder.after_context(after); + } + } + match self.encoding { + EncodingMode::Auto => {} // default for the searcher + EncodingMode::Some(ref enc) => { + builder.encoding(Some(enc.clone())); + } + EncodingMode::Disabled => { + builder.bom_sniffing(false); + } + } + Ok(builder.build()) + } + + /// Given an iterator of haystacks, sort them if necessary. + /// + /// When sorting is necessary, this will collect the entire iterator into + /// memory, sort them and then return a new iterator. When sorting is not + /// necessary, then the iterator given is returned as is without collecting + /// it into memory. + /// + /// Once special case is when sorting by path in ascending order has been + /// requested. In this case, the iterator given is returned as is without + /// any additional sorting. This is done because `walk_builder()` will sort + /// the iterator it yields during directory traversal, so no additional + /// sorting is needed. + pub(crate) fn sort<'a, I>( + &self, + haystacks: I, + ) -> Box + 'a> + where + I: Iterator + 'a, + { + use std::{cmp::Ordering, fs::Metadata, io, time::SystemTime}; + + fn attach_timestamps( + haystacks: impl Iterator, + get: impl Fn(&Metadata) -> io::Result, + ) -> impl Iterator)> { + haystacks.map(move |s| { + let time = s.path().metadata().and_then(|m| get(&m)).ok(); + (s, time) + }) + } + + let Some(ref sort) = self.sort else { return Box::new(haystacks) }; + let mut with_timestamps: Vec<_> = match sort.kind { + SortModeKind::Path if !sort.reverse => return Box::new(haystacks), + SortModeKind::Path => todo!(), + SortModeKind::LastModified => { + attach_timestamps(haystacks, |md| md.modified()).collect() + } + SortModeKind::LastAccessed => { + attach_timestamps(haystacks, |md| md.accessed()).collect() + } + SortModeKind::Created => { + attach_timestamps(haystacks, |md| md.created()).collect() + } + }; + with_timestamps.sort_by(|(_, ref t1), (_, ref t2)| { + let ordering = match (*t1, *t2) { + // Both have metadata, do the obvious thing. + (Some(t1), Some(t2)) => t1.cmp(&t2), + // Things that error should appear later (when ascending). + (Some(_), None) => Ordering::Less, + // Things that error should appear later (when ascending). + (None, Some(_)) => Ordering::Greater, + // When both error, we can't distinguish, so treat as equal. + (None, None) => Ordering::Equal, + }; + if sort.reverse { + ordering.reverse() + } else { + ordering + } + }); + Box::new(with_timestamps.into_iter().map(|(s, _)| s)) + } + + /// Returns a stats object if the user requested that ripgrep keep track + /// of various metrics during a search. + /// + /// When this returns `None`, then callers may assume that the user did + /// not request statistics. + pub(crate) fn stats(&self) -> Option { + self.stats.clone() + } + + /// Returns a color-enabled writer for stdout. + /// + /// The writer returned is also configured to do either line or block + /// buffering, based on either explicit configuration from the user via CLI + /// flags, or automatically based on whether stdout is connected to a tty. + pub(crate) fn stdout(&self) -> grep::cli::StandardStream { + let color = self.color.to_termcolor(); + match self.buffer { + BufferMode::Auto => { + if self.is_terminal_stdout { + grep::cli::stdout_buffered_line(color) + } else { + grep::cli::stdout_buffered_block(color) + } + } + BufferMode::Line => grep::cli::stdout_buffered_line(color), + BufferMode::Block => grep::cli::stdout_buffered_block(color), + } + } + + /// Returns the total number of threads ripgrep should use to execute a + /// search. + /// + /// This number is the result of reasoning about both heuristics (like + /// the available number of cores) and whether ripgrep's mode supports + /// parallelism. It is intended that this number be used to directly + /// determine how many threads to spawn. + pub(crate) fn threads(&self) -> usize { + self.threads + } + + /// Returns the file type matcher that was built. + /// + /// The matcher includes both the default rules and any rules added by the + /// user for this specific invocation. + pub(crate) fn types(&self) -> &ignore::types::Types { + &self.types + } + + /// Create a new builder for recursive directory traversal. + /// + /// The builder returned can be used to start a single threaded or multi + /// threaded directory traversal. For multi threaded traversal, the number + /// of threads configured is equivalent to `HiArgs::threads`. + /// + /// If `HiArgs::threads` is equal to `1`, then callers should generally + /// choose to explicitly use single threaded traversal since it won't have + /// the unnecessary overhead of synchronization. + pub(crate) fn walk_builder(&self) -> anyhow::Result { + let mut builder = ignore::WalkBuilder::new(&self.paths.paths[0]); + for path in self.paths.paths.iter().skip(1) { + builder.add(path); + } + if !self.no_ignore_files { + for path in self.ignore_file.iter() { + if let Some(err) = builder.add_ignore(path) { + ignore_message!("{err}"); + } + } + } + builder + .max_depth(self.max_depth) + .follow_links(self.follow) + .max_filesize(self.max_filesize) + .threads(self.threads) + .same_file_system(self.one_file_system) + .skip_stdout(matches!(self.mode, Mode::Search(_))) + .overrides(self.globs.clone()) + .types(self.types.clone()) + .hidden(!self.hidden) + .parents(!self.no_ignore_parent) + .ignore(!self.no_ignore_dot) + .git_global(!self.no_ignore_vcs && !self.no_ignore_global) + .git_ignore(!self.no_ignore_vcs) + .git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude) + .require_git(!self.no_require_git) + .ignore_case_insensitive(self.ignore_file_case_insensitive); + if !self.no_ignore_dot { + builder.add_custom_ignore_filename(".rgignore"); + } + // When we want to sort paths lexicographically in ascending order, + // then we can actually do this during directory traversal itself. + // Otherwise, sorting is done by collecting all paths, sorting them and + // then searching them. + if let Some(ref sort) = self.sort { + assert_eq!(1, self.threads, "sorting implies single threaded"); + if !sort.reverse && matches!(sort.kind, SortModeKind::Path) { + builder.sort_by_file_name(|a, b| a.cmp(b)); + } + } + Ok(builder) + } +} + +/// State that only needs to be computed once during argument parsing. +/// +/// This state is meant to be somewhat generic and shared across multiple +/// low->high argument conversions. The state can even be mutated by various +/// conversions as a way to communicate changes to other conversions. For +/// example, reading patterns might consume from stdin. If we know stdin +/// has been consumed and no other file paths have been given, then we know +/// for sure that we should search the CWD. In this way, a state change +/// when reading the patterns can impact how the file paths are ultimately +/// generated. +#[derive(Debug)] +struct State { + /// Whether it's believed that tty is connected to stdout. Note that on + /// unix systems, this is always correct. On Windows, heuristics are used + /// by Rust's standard library, particularly for cygwin/MSYS environments. + is_terminal_stdout: bool, + /// Whether stdin has already been consumed. This is useful to know and for + /// providing good error messages when the user has tried to read from stdin + /// in two different places. For example, `rg -f - -`. + stdin_consumed: bool, + /// The current working directory. + cwd: PathBuf, +} + +impl State { + /// Initialize state to some sensible defaults. + /// + /// Note that the state values may change throughout the lifetime of + /// argument parsing. + fn new() -> anyhow::Result { + use std::io::IsTerminal; + + Ok(State { + is_terminal_stdout: std::io::stdout().is_terminal(), + stdin_consumed: false, + cwd: current_dir()?, + }) + } +} + +/// The disjunction of patterns to search for. +/// +/// The number of patterns can be empty, e.g., via `-f /dev/null`. +#[derive(Debug)] +struct Patterns { + /// The actual patterns to match. + patterns: Vec, +} + +impl Patterns { + /// Pulls the patterns out of the low arguments. + /// + /// This includes collecting patterns from -e/--regexp and -f/--file. + /// + /// If the invocation implies that the first positional argument is a + /// pattern (the common case), then the first positional argument is + /// extracted as well. + fn from_low_args( + state: &mut State, + low: &mut LowArgs, + ) -> anyhow::Result { + // The first positional is only a pattern when ripgrep is instructed to + // search and neither -e/--regexp nor -f/--file is given. Basically, + // the first positional is a pattern only when a pattern hasn't been + // given in some other way. + + // No search means no patterns. Even if -e/--regexp or -f/--file is + // given, we know we won't use them so don't bother collecting them. + if !matches!(low.mode, Mode::Search(_)) { + return Ok(Patterns { patterns: vec![] }); + } + // If we got nothing from -e/--regexp and -f/--file, then the first + // positional is a pattern. + if low.patterns.is_empty() { + anyhow::ensure!( + !low.positional.is_empty(), + "ripgrep requires at least one pattern to execute a search" + ); + let ospat = low.positional.remove(0); + let Ok(pat) = ospat.into_string() else { + anyhow::bail!("pattern given is not valid UTF-8") + }; + return Ok(Patterns { patterns: vec![pat] }); + } + // Otherwise, we need to slurp up our patterns from -e/--regexp and + // -f/--file. We de-duplicate as we go. If we don't de-duplicate, + // then it can actually lead to major slow downs for sloppy inputs. + // This might be surprising, and the regex engine will eventually + // de-duplicate duplicative branches in a single regex (maybe), but + // not until after it has gone through parsing and some other layers. + // If there are a lot of duplicates, then that can lead to a sizeable + // extra cost. It is lamentable that we pay the extra cost here to + // de-duplicate for a likely uncommon case, but I've seen this have a + // big impact on real world data. + let mut seen = HashSet::new(); + let mut patterns = Vec::with_capacity(low.patterns.len()); + let mut add = |pat: String| { + if !seen.contains(&pat) { + seen.insert(pat.clone()); + patterns.push(pat); + } + }; + for source in low.patterns.drain(..) { + match source { + PatternSource::Regexp(pat) => add(pat), + PatternSource::File(path) => { + if path == Path::new("-") { + anyhow::ensure!( + !state.stdin_consumed, + "error reading -f/--file from stdin: stdin \ + has already been consumed" + ); + for pat in grep::cli::patterns_from_stdin()? { + add(pat); + } + state.stdin_consumed = true; + } else { + for pat in grep::cli::patterns_from_path(&path)? { + add(pat); + } + } + } + } + } + Ok(Patterns { patterns }) + } +} + +/// The collection of paths we want to search for. +/// +/// This guarantees that there is always at least one path. +#[derive(Debug)] +struct Paths { + /// The actual paths. + paths: Vec, + /// This is true when ripgrep had to guess to search the current working + /// directory. e.g., When the user just runs `rg foo`. It is odd to need + /// this, but it subtly changes how the paths are printed. When no explicit + /// path is given, then ripgrep doesn't prefix each path with `./`. But + /// otherwise it does! This curious behavior matches what GNU grep does. + has_implicit_path: bool, + /// Set to true if it is known that only a single file descriptor will + /// be searched. + is_one_file: bool, +} + +impl Paths { + /// Drain the search paths out of the given low arguments. + fn from_low_args( + state: &mut State, + _: &Patterns, + low: &mut LowArgs, + ) -> anyhow::Result { + // We require a `&Patterns` even though we don't use it to ensure that + // patterns have already been read from LowArgs. This let's us safely + // assume that all remaining positional arguments are intended to be + // file paths. + + let mut paths = Vec::with_capacity(low.positional.len()); + for osarg in low.positional.drain(..) { + let path = PathBuf::from(osarg); + if state.stdin_consumed && path == Path::new("-") { + anyhow::bail!( + "error: attempted to read patterns from stdin \ + while also searching stdin", + ); + } + paths.push(path); + } + if !paths.is_empty() { + let is_one_file = paths.len() == 1 + && (paths[0] == Path::new("-") || paths[0].is_file()); + return Ok(Paths { paths, has_implicit_path: false, is_one_file }); + } + // N.B. is_readable_stdin is a heuristic! Part of the issue is that a + // lot of "exec process" APIs will open a stdin pipe even though stdin + // isn't really being used. ripgrep then thinks it should search stdin + // and one gets the appearance of it hanging. It's a terrible failure + // mode, but there really is no good way to mitigate it. It's just a + // consequence of letting the user type 'rg foo' and "guessing" that + // they meant to search the CWD. + let use_cwd = !grep::cli::is_readable_stdin() + || state.stdin_consumed + || !matches!(low.mode, Mode::Search(_)); + let (path, is_one_file) = if use_cwd { + (PathBuf::from("./"), false) + } else { + (PathBuf::from("-"), true) + }; + Ok(Paths { paths: vec![path], has_implicit_path: true, is_one_file }) + } + + /// Returns true if ripgrep will only search stdin and nothing else. + fn is_only_stdin(&self) -> bool { + self.paths.len() == 1 && self.paths[0] == Path::new("-") + } +} + +/// The "binary detection" configuration that ripgrep should use. +/// +/// ripgrep actually uses two different binary detection heuristics depending +/// on whether a file is explicitly being searched (e.g., via a CLI argument) +/// or implicitly searched (e.g., via directory traversal). In general, the +/// former can never use a heuristic that lets it "quit" seaching before +/// either getting EOF or finding a match. (Because doing otherwise would be +/// considered a filter, and ripgrep follows the rule that an explicitly given +/// file is always searched.) +#[derive(Debug)] +struct BinaryDetection { + explicit: grep::searcher::BinaryDetection, + implicit: grep::searcher::BinaryDetection, +} + +impl BinaryDetection { + /// Determines the correct binary detection mode from low-level arguments. + fn from_low_args(_: &State, low: &LowArgs) -> BinaryDetection { + let none = matches!(low.binary, BinaryMode::AsText) || low.null_data; + let convert = matches!(low.binary, BinaryMode::SearchAndSuppress); + let explicit = if none { + grep::searcher::BinaryDetection::none() + } else { + grep::searcher::BinaryDetection::convert(b'\x00') + }; + let implicit = if none { + grep::searcher::BinaryDetection::none() + } else if convert { + grep::searcher::BinaryDetection::convert(b'\x00') + } else { + grep::searcher::BinaryDetection::quit(b'\x00') + }; + BinaryDetection { explicit, implicit } + } +} + +/// Builds the file type matcher from low level arguments. +fn types(low: &LowArgs) -> anyhow::Result { + let mut builder = ignore::types::TypesBuilder::new(); + builder.add_defaults(); + for tychange in low.type_changes.iter() { + match tychange { + TypeChange::Clear { ref name } => { + builder.clear(name); + } + TypeChange::Add { ref def } => { + builder.add_def(def)?; + } + TypeChange::Select { ref name } => { + builder.select(name); + } + TypeChange::Negate { ref name } => { + builder.negate(name); + } + } + } + Ok(builder.build()?) +} + +/// Builds the glob "override" matcher from the CLI `-g/--glob` and `--iglob` +/// flags. +fn globs( + state: &State, + low: &LowArgs, +) -> anyhow::Result { + if low.globs.is_empty() && low.iglobs.is_empty() { + return Ok(ignore::overrides::Override::empty()); + } + let mut builder = ignore::overrides::OverrideBuilder::new(&state.cwd); + // Make all globs case insensitive with --glob-case-insensitive. + if low.glob_case_insensitive { + builder.case_insensitive(true).unwrap(); + } + for glob in low.globs.iter() { + builder.add(glob)?; + } + // This only enables case insensitivity for subsequent globs. + builder.case_insensitive(true).unwrap(); + for glob in low.iglobs.iter() { + builder.add(&glob)?; + } + Ok(builder.build()?) +} + +/// Builds a glob matcher for all of the preprocessor globs (via `--pre-glob`). +fn preprocessor_globs( + state: &State, + low: &LowArgs, +) -> anyhow::Result { + if low.pre_glob.is_empty() { + return Ok(ignore::overrides::Override::empty()); + } + let mut builder = ignore::overrides::OverrideBuilder::new(&state.cwd); + for glob in low.pre_glob.iter() { + builder.add(glob)?; + } + Ok(builder.build()?) +} + +/// Determines whether stats should be tracked for this search. If so, a stats +/// object is returned. +fn stats(low: &LowArgs) -> Option { + if !matches!(low.mode, Mode::Search(_)) { + return None; + } + if low.stats || matches!(low.mode, Mode::Search(SearchMode::JSON)) { + return Some(grep::printer::Stats::new()); + } + None +} + +/// Pulls out any color specs provided by the user and assembles them into one +/// single configuration. +fn take_color_specs(_: &mut State, low: &mut LowArgs) -> ColorSpecs { + let mut specs = grep::printer::default_color_specs(); + for spec in low.colors.drain(..) { + specs.push(spec); + } + ColorSpecs::new(&specs) +} + +/// Pulls out the necessary info from the low arguments to build a full +/// hyperlink configuration. +fn take_hyperlink_config( + _: &mut State, + low: &mut LowArgs, +) -> anyhow::Result { + let mut env = grep::printer::HyperlinkEnvironment::new(); + if let Some(hostname) = hostname(low.hostname_bin.as_deref()) { + log::debug!("found hostname for hyperlink configuration: {hostname}"); + env.host(Some(hostname)); + } + if let Some(wsl_prefix) = wsl_prefix() { + log::debug!( + "found wsl_prefix for hyperlink configuration: {wsl_prefix}" + ); + env.wsl_prefix(Some(wsl_prefix)); + } + let fmt = std::mem::take(&mut low.hyperlink_format); + log::debug!("hyperlink format: {:?}", fmt.to_string()); + Ok(grep::printer::HyperlinkConfig::new(env, fmt)) +} + +/// Attempts to discover the current working directory. +/// +/// This mostly just defers to the standard library, however, such things will +/// fail if ripgrep is in a directory that no longer exists. We attempt some +/// fallback mechanisms, such as querying the PWD environment variable, but +/// otherwise return an error. +fn current_dir() -> anyhow::Result { + let err = match std::env::current_dir() { + Err(err) => err, + Ok(cwd) => return Ok(cwd), + }; + if let Some(cwd) = std::env::var_os("PWD") { + if !cwd.is_empty() { + return Ok(PathBuf::from(cwd)); + } + } + anyhow::bail!( + "failed to get current working directory: {err}\n\ + did your CWD get deleted?", + ) +} + +/// Retrieves the hostname that should be used wherever a hostname is required. +/// +/// Currently, this is only used in the hyperlink format. +/// +/// This works by first running the given binary program (if present and with +/// no arguments) to get the hostname after trimming leading and trailing +/// whitespace. If that fails for any reason, then it falls back to getting +/// the hostname via platform specific means (e.g., `gethostname` on Unix). +/// +/// The purpose of `bin` is to make it possible for end users to override how +/// ripgrep determines the hostname. +fn hostname(bin: Option<&Path>) -> Option { + let Some(bin) = bin else { return platform_hostname() }; + let bin = match grep::cli::resolve_binary(bin) { + Ok(bin) => bin, + Err(err) => { + log::debug!( + "failed to run command '{bin:?}' to get hostname \ + (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let mut cmd = std::process::Command::new(&bin); + cmd.stdin(std::process::Stdio::null()); + let rdr = match grep::cli::CommandReader::new(&mut cmd) { + Ok(rdr) => rdr, + Err(err) => { + log::debug!( + "failed to spawn command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let out = match std::io::read_to_string(rdr) { + Ok(out) => out, + Err(err) => { + log::debug!( + "failed to read output from command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let hostname = out.trim(); + if hostname.is_empty() { + log::debug!( + "output from command '{bin:?}' is empty after trimming \ + leading and trailing whitespace (falling back to \ + platform hostname)", + ); + return platform_hostname(); + } + Some(hostname.to_string()) +} + +/// Attempts to get the hostname by using platform specific routines. +/// +/// For example, this will do `gethostname` on Unix and `GetComputerNameExW` on +/// Windows. +fn platform_hostname() -> Option { + let hostname_os = match grep::cli::hostname() { + Ok(x) => x, + Err(err) => { + log::debug!("could not get hostname: {}", err); + return None; + } + }; + let Some(hostname) = hostname_os.to_str() else { + log::debug!( + "got hostname {:?}, but it's not valid UTF-8", + hostname_os + ); + return None; + }; + Some(hostname.to_string()) +} + +/// Returns the value for the `{wslprefix}` variable in a hyperlink format. +/// +/// A WSL prefix is a share/network like thing that is meant to permit Windows +/// applications to open files stored within a WSL drive. +/// +/// If a WSL distro name is unavailable, not valid UTF-8 or this isn't running +/// in a Unix environment, then this returns None. +/// +/// See: +fn wsl_prefix() -> Option { + if !cfg!(unix) { + return None; + } + let distro_os = std::env::var_os("WSL_DISTRO_NAME")?; + let Some(distro) = distro_os.to_str() else { + log::debug!( + "found WSL_DISTRO_NAME={:?}, but value is not UTF-8", + distro_os + ); + return None; + }; + Some(format!("wsl$/{distro}")) +} + +/// Possibly suggest another regex engine based on the error message given. +/// +/// This inspects an error resulting from building a Rust regex matcher, and +/// if it's believed to correspond to a syntax error that another engine could +/// handle, then add a message to suggest the use of the engine flag. +fn suggest_other_engine(msg: String) -> String { + if let Some(pcre_msg) = suggest_pcre2(&msg) { + return pcre_msg; + } + msg +} + +/// Possibly suggest PCRE2 based on the error message given. +/// +/// Inspect an error resulting from building a Rust regex matcher, and if it's +/// believed to correspond to a syntax error that PCRE2 could handle, then +/// add a message to suggest the use of -P/--pcre2. +fn suggest_pcre2(msg: &str) -> Option { + if !cfg!(feature = "pcre2") { + return None; + } + if !msg.contains("backreferences") && !msg.contains("look-around") { + None + } else { + Some(format!( + "{msg} + +Consider enabling PCRE2 with the --pcre2 flag, which can handle backreferences +and look-around.", + )) + } +} + +/// Possibly suggest multiline mode based on the error message given. +/// +/// Does a bit of a hacky inspection of the given error message, and if it +/// looks like the user tried to type a literal line terminator then it will +/// return a new error message suggesting the use of -U/--multiline. +fn suggest_multiline(msg: String) -> String { + if msg.contains("the literal") && msg.contains("not allowed") { + format!( + "{msg} + +Consider enabling multiline mode with the --multiline flag (or -U for short). +When multiline mode is enabled, new line characters can be matched.", + ) + } else { + msg + } +} diff --git a/crates/core/flags/lowargs.rs b/crates/core/flags/lowargs.rs new file mode 100644 index 0000000000..184c96ae81 --- /dev/null +++ b/crates/core/flags/lowargs.rs @@ -0,0 +1,758 @@ +/*! +Provides the definition of low level arguments from CLI flags. +*/ + +use std::{ + ffi::{OsStr, OsString}, + path::PathBuf, +}; + +use { + bstr::{BString, ByteVec}, + grep::printer::{HyperlinkFormat, UserColorSpec}, +}; + +/// A collection of "low level" arguments. +/// +/// The "low level" here is meant to constrain this type to be as close to the +/// actual CLI flags and arguments as possible. Namely, other than some +/// convenience types to help validate flag values and deal with overrides +/// between flags, these low level arguments do not contain any higher level +/// abstractions. +/// +/// Another self-imposed constraint is that populating low level arguments +/// should not require anything other than validating what the user has +/// provided. For example, low level arguments should not contain a +/// `HyperlinkConfig`, since in order to get a full configuration, one needs to +/// discover the hostname of the current system (which might require running a +/// binary or a syscall). +/// +/// Low level arguments are populated by the parser directly via the `update` +/// method on the corresponding implementation of the `Flag` trait. +#[derive(Debug, Default)] +pub(crate) struct LowArgs { + // Essential arguments. + pub(crate) special: Option, + pub(crate) mode: Mode, + pub(crate) positional: Vec, + pub(crate) patterns: Vec, + // Everything else, sorted lexicographically. + pub(crate) binary: BinaryMode, + pub(crate) boundary: Option, + pub(crate) buffer: BufferMode, + pub(crate) byte_offset: bool, + pub(crate) case: CaseMode, + pub(crate) color: ColorChoice, + pub(crate) colors: Vec, + pub(crate) column: Option, + pub(crate) context: ContextMode, + pub(crate) context_separator: ContextSeparator, + pub(crate) crlf: bool, + pub(crate) dfa_size_limit: Option, + pub(crate) encoding: EncodingMode, + pub(crate) engine: EngineChoice, + pub(crate) field_context_separator: FieldContextSeparator, + pub(crate) field_match_separator: FieldMatchSeparator, + pub(crate) fixed_strings: bool, + pub(crate) follow: bool, + pub(crate) glob_case_insensitive: bool, + pub(crate) globs: Vec, + pub(crate) heading: Option, + pub(crate) hidden: bool, + pub(crate) hostname_bin: Option, + pub(crate) hyperlink_format: HyperlinkFormat, + pub(crate) iglobs: Vec, + pub(crate) ignore_file: Vec, + pub(crate) ignore_file_case_insensitive: bool, + pub(crate) include_zero: bool, + pub(crate) invert_match: bool, + pub(crate) line_number: Option, + pub(crate) logging: Option, + pub(crate) max_columns: Option, + pub(crate) max_columns_preview: bool, + pub(crate) max_count: Option, + pub(crate) max_depth: Option, + pub(crate) max_filesize: Option, + pub(crate) mmap: MmapMode, + pub(crate) multiline: bool, + pub(crate) multiline_dotall: bool, + pub(crate) no_config: bool, + pub(crate) no_ignore_dot: bool, + pub(crate) no_ignore_exclude: bool, + pub(crate) no_ignore_files: bool, + pub(crate) no_ignore_global: bool, + pub(crate) no_ignore_messages: bool, + pub(crate) no_ignore_parent: bool, + pub(crate) no_ignore_vcs: bool, + pub(crate) no_messages: bool, + pub(crate) no_require_git: bool, + pub(crate) no_unicode: bool, + pub(crate) null: bool, + pub(crate) null_data: bool, + pub(crate) one_file_system: bool, + pub(crate) only_matching: bool, + pub(crate) path_separator: Option, + pub(crate) pre: Option, + pub(crate) pre_glob: Vec, + pub(crate) quiet: bool, + pub(crate) regex_size_limit: Option, + pub(crate) replace: Option, + pub(crate) search_zip: bool, + pub(crate) sort: Option, + pub(crate) stats: bool, + pub(crate) stop_on_nonmatch: bool, + pub(crate) threads: Option, + pub(crate) trim: bool, + pub(crate) type_changes: Vec, + pub(crate) unrestricted: usize, + pub(crate) vimgrep: bool, + pub(crate) with_filename: Option, +} + +/// A "special" mode that supercedes everything else. +/// +/// When one of these modes is present, it overrides everything else and causes +/// ripgrep to short-circuit. In particular, we avoid converting low-level +/// argument types into higher level arguments types that can fail for various +/// reasons related to the environment. (Parsing the low-level arguments can +/// fail too, but usually not in a way that can't be worked around by removing +/// the corresponding arguments from the CLI command.) This is overall a hedge +/// to ensure that version and help information are basically always available. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum SpecialMode { + /// Show a condensed version of "help" output. Generally speaking, this + /// shows each flag and an extremely terse description of that flag on + /// a single line. This corresponds to the `-h` flag. + HelpShort, + /// Shows a very verbose version of the "help" output. The docs for some + /// flags will be paragraphs long. This corresponds to the `--help` flag. + HelpLong, + /// Show condensed version information. e.g., `ripgrep x.y.z`. + VersionShort, + /// Show verbose version information. Includes "short" information as well + /// as features included in the build. + VersionLong, + /// Show PCRE2's version information, or an error if this version of + /// ripgrep wasn't compiled with PCRE2 support. + VersionPCRE2, +} + +/// The overall mode that ripgrep should operate in. +/// +/// If ripgrep were designed without the legacy of grep, these would probably +/// be sub-commands? Perhaps not, since they aren't as frequently used. +/// +/// The point of putting these in one enum is that they are all mutually +/// exclusive and override one another. +/// +/// Note that -h/--help and -V/--version are not included in this because +/// they always overrides everything else, regardless of where it appears +/// in the command line. They are treated as "special" modes that short-circuit +/// ripgrep's usual flow. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum Mode { + /// ripgrep will execute a search of some kind. + Search(SearchMode), + /// Show the files that *would* be searched, but don't actually search + /// them. + Files, + /// List all file type definitions configured, including the default file + /// types and any additional file types added to the command line. + Types, + /// Generate various things like the man page and completion files. + Generate(GenerateMode), +} + +impl Default for Mode { + fn default() -> Mode { + Mode::Search(SearchMode::Standard) + } +} + +impl Mode { + /// Update this mode to the new mode while implementing various override + /// semantics. For example, a search mode cannot override a non-search + /// mode. + pub(crate) fn update(&mut self, new: Mode) { + match *self { + // If we're in a search mode, then anything can override it. + Mode::Search(_) => *self = new, + _ => { + // Once we're in a non-search mode, other non-search modes + // can override it. But search modes cannot. So for example, + // `--files -l` will still be Mode::Files. + if !matches!(*self, Mode::Search(_)) { + *self = new; + } + } + } + } +} + +/// The kind of search that ripgrep is going to perform. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum SearchMode { + /// The default standard mode of operation. ripgrep looks for matches and + /// prints them when found. + /// + /// There is no specific flag for this mode since it's the default. But + /// some of the modes below, like JSON, have negation flags like --no-json + /// that let you revert back to this default mode. + Standard, + /// Show files containing at least one match. + FilesWithMatches, + /// Show files that don't contain any matches. + FilesWithoutMatch, + /// Show files containing at least one match and the number of matching + /// lines. + Count, + /// Show files containing at least one match and the total number of + /// matches. + CountMatches, + /// Print matches in a JSON lines format. + JSON, +} + +/// The thing to generate via the --generate flag. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum GenerateMode { + /// Generate the raw roff used for the man page. + Man, + /// Completions for bash. + CompleteBash, + /// Completions for zsh. + CompleteZsh, + /// Completions for fish. + CompleteFish, + /// Completions for PowerShell. + CompletePowerShell, +} + +/// Indicates how ripgrep should treat binary data. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum BinaryMode { + /// Automatically determine the binary mode to use. Essentially, when + /// a file is searched explicitly, then it will be searched using the + /// `SearchAndSuppress` strategy. Otherwise, it will be searched in a way + /// that attempts to skip binary files as much as possible. That is, once + /// a file is classified as binary, searching will immediately stop. + Auto, + /// Search files even when they have binary data, but if a match is found, + /// suppress it and emit a warning. + /// + /// In this mode, `NUL` bytes are replaced with line terminators. This is + /// a heuristic meant to reduce heap memory usage, since true binary data + /// isn't line oriented. If one attempts to treat such data as line + /// oriented, then one may wind up with impractically large lines. For + /// example, many binary files contain very long runs of NUL bytes. + SearchAndSuppress, + /// Treat all files as if they were plain text. There's no skipping and no + /// replacement of `NUL` bytes with line terminators. + AsText, +} + +impl Default for BinaryMode { + fn default() -> BinaryMode { + BinaryMode::Auto + } +} + +/// Indicates what kind of boundary mode to use (line or word). +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum BoundaryMode { + /// Only allow matches when surrounded by line bounaries. + Line, + /// Only allow matches when surrounded by word bounaries. + Word, +} + +/// Indicates the buffer mode that ripgrep should use when printing output. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum BufferMode { + /// Select the buffer mode, 'line' or 'block', automatically based on + /// whether stdout is connected to a tty. + Auto, + /// Flush the output buffer whenever a line terminator is seen. + /// + /// This is useful when wants to see search results more immediately, + /// for example, with `tail -f`. + Line, + /// Flush the output buffer whenever it reaches some fixed size. The size + /// is usually big enough to hold many lines. + /// + /// This is useful for maximum performance, particularly when printing + /// lots of results. + Block, +} + +impl Default for BufferMode { + fn default() -> BufferMode { + BufferMode::Auto + } +} + +/// Indicates the case mode for how to interpret all patterns given to ripgrep. +/// +/// The default is `Sensitive`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum CaseMode { + /// Patterns are matched case sensitively. i.e., `a` does not match `A`. + Sensitive, + /// Patterns are matched case insensitively. i.e., `a` does match `A`. + Insensitive, + /// Patterns are automatically matched case insensitively only when they + /// consist of all lowercase literal characters. For example, the pattern + /// `a` will match `A` but `A` will not match `a`. + Smart, +} + +impl Default for CaseMode { + fn default() -> CaseMode { + CaseMode::Sensitive + } +} + +/// Indicates whether ripgrep should include color/hyperlinks in its output. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum ColorChoice { + /// Color and hyperlinks will never be used. + Never, + /// Color and hyperlinks will be used only when stdout is connected to a + /// tty. + Auto, + /// Color will always be used. + Always, + /// Color will always be used and only ANSI escapes will be used. + /// + /// This only makes sense in the context of legacy Windows console APIs. + /// At time of writing, ripgrep will try to use the legacy console APIs + /// if ANSI coloring isn't believed to be possible. This option will force + /// ripgrep to use ANSI coloring. + Ansi, +} + +impl Default for ColorChoice { + fn default() -> ColorChoice { + ColorChoice::Auto + } +} + +impl ColorChoice { + /// Convert this color choice to the corresponding termcolor type. + pub(crate) fn to_termcolor(&self) -> termcolor::ColorChoice { + match *self { + ColorChoice::Never => termcolor::ColorChoice::Never, + ColorChoice::Auto => termcolor::ColorChoice::Auto, + ColorChoice::Always => termcolor::ColorChoice::Always, + ColorChoice::Ansi => termcolor::ColorChoice::AlwaysAnsi, + } + } +} + +/// Indicates the line context options ripgrep should use for output. +/// +/// The default is no context at all. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum ContextMode { + /// All lines will be printed. That is, the context is unbounded. + Passthru, + /// Only show a certain number of lines before and after each match. + Limited(ContextModeLimited), +} + +impl Default for ContextMode { + fn default() -> ContextMode { + ContextMode::Limited(ContextModeLimited::default()) + } +} + +impl ContextMode { + /// Set the "before" context. + /// + /// If this was set to "passthru" context, then it is overridden in favor + /// of limited context with the given value for "before" and `0` for + /// "after." + pub(crate) fn set_before(&mut self, lines: usize) { + match *self { + ContextMode::Passthru => { + *self = ContextMode::Limited(ContextModeLimited { + before: Some(lines), + after: None, + both: None, + }) + } + ContextMode::Limited(ContextModeLimited { + ref mut before, + .. + }) => *before = Some(lines), + } + } + + /// Set the "after" context. + /// + /// If this was set to "passthru" context, then it is overridden in favor + /// of limited context with the given value for "after" and `0` for + /// "before." + pub(crate) fn set_after(&mut self, lines: usize) { + match *self { + ContextMode::Passthru => { + *self = ContextMode::Limited(ContextModeLimited { + before: None, + after: Some(lines), + both: None, + }) + } + ContextMode::Limited(ContextModeLimited { + ref mut after, .. + }) => *after = Some(lines), + } + } + + /// Set the "both" context. + /// + /// If this was set to "passthru" context, then it is overridden in favor + /// of limited context with the given value for "both" and `None` for + /// "before" and "after". + pub(crate) fn set_both(&mut self, lines: usize) { + match *self { + ContextMode::Passthru => { + *self = ContextMode::Limited(ContextModeLimited { + before: None, + after: None, + both: Some(lines), + }) + } + ContextMode::Limited(ContextModeLimited { + ref mut both, .. + }) => *both = Some(lines), + } + } + + /// A convenience function for use in tests that returns the limited + /// context. If this mode isn't limited, then it panics. + #[cfg(test)] + pub(crate) fn get_limited(&self) -> (usize, usize) { + match *self { + ContextMode::Passthru => unreachable!("context mode is passthru"), + ContextMode::Limited(ref limited) => limited.get(), + } + } +} + +/// A context mode for a finite number of lines. +/// +/// Namely, this indicates that a specific number of lines (possibly zero) +/// should be shown before and/or after each matching line. +/// +/// Note that there is a subtle difference between `Some(0)` and `None`. In the +/// former case, it happens when `0` is given explicitly, where as `None` is +/// the default value and occurs when no value is specified. +/// +/// `both` is only set by the -C/--context flag. The reason why we don't just +/// set before = after = --context is because the before and after context +/// settings always take precedent over the -C/--context setting, regardless of +/// order. Thus, we need to keep track of them separately. +#[derive(Debug, Default, Eq, PartialEq)] +pub(crate) struct ContextModeLimited { + before: Option, + after: Option, + both: Option, +} + +impl ContextModeLimited { + /// Returns the specific number of contextual lines that should be shown + /// around each match. This takes proper precedent into account, i.e., + /// that `before` and `after` both partially override `both` in all cases. + /// + /// By default, this returns `(0, 0)`. + pub(crate) fn get(&self) -> (usize, usize) { + let (mut before, mut after) = + self.both.map(|lines| (lines, lines)).unwrap_or((0, 0)); + // --before and --after always override --context, regardless + // of where they appear relative to each other. + if let Some(lines) = self.before { + before = lines; + } + if let Some(lines) = self.after { + after = lines; + } + (before, after) + } +} + +/// Represents the separator to use between non-contiguous sections of +/// contextual lines. +/// +/// The default is `--`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct ContextSeparator(Option); + +impl Default for ContextSeparator { + fn default() -> ContextSeparator { + ContextSeparator(Some(BString::from("--"))) + } +} + +impl ContextSeparator { + /// Create a new context separator from the user provided argument. This + /// handles unescaping. + pub(crate) fn new(os: &OsStr) -> anyhow::Result { + let Some(string) = os.to_str() else { + anyhow::bail!( + "separator must be valid UTF-8 (use escape sequences \ + to provide a separator that is not valid UTF-8)" + ) + }; + Ok(ContextSeparator(Some(Vec::unescape_bytes(string).into()))) + } + + /// Creates a new separator that intructs the printer to disable contextual + /// separators entirely. + pub(crate) fn disabled() -> ContextSeparator { + ContextSeparator(None) + } + + /// Return the raw bytes of this separator. + /// + /// If context separators were disabled, then this returns `None`. + /// + /// Note that this may return a `Some` variant with zero bytes. + pub(crate) fn into_bytes(self) -> Option> { + self.0.map(|sep| sep.into()) + } +} + +/// The encoding mode the searcher will use. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum EncodingMode { + /// Use only BOM sniffing to auto-detect an encoding. + Auto, + /// Use an explicit encoding forcefully, but let BOM sniffing override it. + Some(grep::searcher::Encoding), + /// Use no explicit encoding and disable all BOM sniffing. This will + /// always result in searching the raw bytes, regardless of their + /// true encoding. + Disabled, +} + +impl Default for EncodingMode { + fn default() -> EncodingMode { + EncodingMode::Auto + } +} + +/// The regex engine to use. +/// +/// The default is `Default`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum EngineChoice { + /// Uses the default regex engine: Rust's `regex` crate. + /// + /// (Well, technically it uses `regex-automata`, but `regex-automata` is + /// the implementation of the `regex` crate.) + Default, + /// Dynamically select the right engine to use. + /// + /// This works by trying to use the default engine, and if the pattern does + /// not compile, it switches over to the PCRE2 engine if it's available. + Auto, + /// Uses the PCRE2 regex engine if it's available. + PCRE2, +} + +impl Default for EngineChoice { + fn default() -> EngineChoice { + EngineChoice::Default + } +} + +/// The field context separator to use to between metadata for each contextual +/// line. +/// +/// The default is `-`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct FieldContextSeparator(BString); + +impl Default for FieldContextSeparator { + fn default() -> FieldContextSeparator { + FieldContextSeparator(BString::from("-")) + } +} + +impl FieldContextSeparator { + /// Create a new separator from the given argument value provided by the + /// user. Unescaping it automatically handled. + pub(crate) fn new(os: &OsStr) -> anyhow::Result { + let Some(string) = os.to_str() else { + anyhow::bail!( + "separator must be valid UTF-8 (use escape sequences \ + to provide a separator that is not valid UTF-8)" + ) + }; + Ok(FieldContextSeparator(Vec::unescape_bytes(string).into())) + } + + /// Return the raw bytes of this separator. + /// + /// Note that this may return an empty `Vec`. + pub(crate) fn into_bytes(self) -> Vec { + self.0.into() + } +} + +/// The field match separator to use to between metadata for each matching +/// line. +/// +/// The default is `:`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct FieldMatchSeparator(BString); + +impl Default for FieldMatchSeparator { + fn default() -> FieldMatchSeparator { + FieldMatchSeparator(BString::from(":")) + } +} + +impl FieldMatchSeparator { + /// Create a new separator from the given argument value provided by the + /// user. Unescaping it automatically handled. + pub(crate) fn new(os: &OsStr) -> anyhow::Result { + let Some(string) = os.to_str() else { + anyhow::bail!( + "separator must be valid UTF-8 (use escape sequences \ + to provide a separator that is not valid UTF-8)" + ) + }; + Ok(FieldMatchSeparator(Vec::unescape_bytes(string).into())) + } + + /// Return the raw bytes of this separator. + /// + /// Note that this may return an empty `Vec`. + pub(crate) fn into_bytes(self) -> Vec { + self.0.into() + } +} + +/// The type of logging to do. `Debug` emits some details while `Trace` emits +/// much more. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum LoggingMode { + Debug, + Trace, +} + +/// Indicates when to use memory maps. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum MmapMode { + /// This instructs ripgrep to use heuristics for selecting when to and not + /// to use memory maps for searching. + Auto, + /// This instructs ripgrep to always try memory maps when possible. (Memory + /// maps are not possible to use in all circumstances, for example, for + /// virtual files.) + AlwaysTryMmap, + /// Never use memory maps under any circumstances. This includes even + /// when multi-line search is enabled where ripgrep will read the entire + /// contents of a file on to the heap before searching it. + Never, +} + +impl Default for MmapMode { + fn default() -> MmapMode { + MmapMode::Auto + } +} + +/// Represents a source of patterns that ripgrep should search for. +/// +/// The reason to unify these is so that we can retain the order of `-f/--flag` +/// and `-e/--regexp` flags relative to one another. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum PatternSource { + /// Comes from the `-e/--regexp` flag. + Regexp(String), + /// Comes from the `-f/--file` flag. + File(PathBuf), +} + +/// The sort criteria, if present. +#[derive(Debug, Eq, PartialEq)] +pub(crate) struct SortMode { + /// Whether to reverse the sort criteria (i.e., descending order). + pub(crate) reverse: bool, + /// The actual sorting criteria. + pub(crate) kind: SortModeKind, +} + +/// The criteria to use for sorting. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum SortModeKind { + /// Sort by path. + Path, + /// Sort by last modified time. + LastModified, + /// Sort by last accessed time. + LastAccessed, + /// Sort by creation time. + Created, +} + +impl SortMode { + /// Checks whether the selected sort mode is supported. If it isn't, an + /// error (hopefully explaining why) is returned. + pub(crate) fn supported(&self) -> anyhow::Result<()> { + match self.kind { + SortModeKind::Path => Ok(()), + SortModeKind::LastModified => { + let md = std::env::current_exe() + .and_then(|p| p.metadata()) + .and_then(|md| md.modified()); + let Err(err) = md else { return Ok(()) }; + anyhow::bail!( + "sorting by last modified isn't supported: {err}" + ); + } + SortModeKind::LastAccessed => { + let md = std::env::current_exe() + .and_then(|p| p.metadata()) + .and_then(|md| md.accessed()); + let Err(err) = md else { return Ok(()) }; + anyhow::bail!( + "sorting by last accessed isn't supported: {err}" + ); + } + SortModeKind::Created => { + let md = std::env::current_exe() + .and_then(|p| p.metadata()) + .and_then(|md| md.created()); + let Err(err) = md else { return Ok(()) }; + anyhow::bail!( + "sorting by creation time isn't supported: {err}" + ); + } + } + } +} + +/// A single instance of either a change or a selection of one ripgrep's +/// file types. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum TypeChange { + /// Clear the given type from ripgrep. + Clear { name: String }, + /// Add the given type definition (name and glob) to ripgrep. + Add { def: String }, + /// Select the given type for filtering. + Select { name: String }, + /// Select the given type for filtering but negate it. + Negate { name: String }, +} diff --git a/crates/core/flags/mod.rs b/crates/core/flags/mod.rs new file mode 100644 index 0000000000..aefe1638bd --- /dev/null +++ b/crates/core/flags/mod.rs @@ -0,0 +1,282 @@ +/*! +Defines ripgrep's command line interface. + +This modules deals with everything involving ripgrep's flags and positional +arguments. This includes generating shell completions, `--help` output and even +ripgrep's man page. It's also responsible for parsing and validating every +flag (including reading ripgrep's config file), and manages the contact points +between these flags and ripgrep's cast of supporting libraries. For example, +once [`HiArgs`] has been created, it knows how to create a multi threaded +recursive directory traverser. +*/ +use std::{ + ffi::OsString, + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +pub(crate) use crate::flags::{ + complete::{ + bash::generate as generate_complete_bash, + fish::generate as generate_complete_fish, + powershell::generate as generate_complete_powershell, + zsh::generate as generate_complete_zsh, + }, + doc::{ + help::{ + generate_long as generate_help_long, + generate_short as generate_help_short, + }, + man::generate as generate_man_page, + version::{ + generate_long as generate_version_long, + generate_short as generate_version_short, + }, + }, + hiargs::HiArgs, + lowargs::{GenerateMode, Mode, SearchMode, SpecialMode}, + parse::{parse, ParseResult}, +}; + +mod complete; +mod config; +mod defs; +mod doc; +mod hiargs; +mod lowargs; +mod parse; + +/// A trait that encapsulates the definition of an optional flag for ripgrep. +/// +/// This trait is meant to be used via dynamic dispatch. Namely, the `defs` +/// module provides a single global slice of `&dyn Flag` values correspondings +/// to all of the flags in ripgrep. +/// +/// ripgrep's required positional arguments are handled by the parser and by +/// the conversion from low-level arguments to high level arguments. Namely, +/// all of ripgrep's positional arguments are treated as file paths, except +/// in certain circumstances where the first argument is treated as a regex +/// pattern. +/// +/// Note that each implementation of this trait requires a long flag name, +/// but can also optionally have a short version and even a negation flag. +/// For example, the `-E/--encoding` flag accepts a value, but it also has a +/// `--no-encoding` negation flag for reverting back to "automatic" encoding +/// detection. All three of `-E`, `--encoding` and `--no-encoding` are provided +/// by a single implementation of this trait. +/// +/// ripgrep only supports flags that are switches or flags that accept a single +/// value. Flags that accept multiple values are an unsupported abberation. +trait Flag: Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static { + /// Returns true if this flag is a switch. When a flag is a switch, the + /// CLI parser will look for a value after the flag is seen. + fn is_switch(&self) -> bool; + + /// A short single byte name for this flag. This returns `None` by default, + /// which signifies that the flag has no short name. + /// + /// The byte returned must be an ASCII codepoint that is a `.` or is + /// alpha-numeric. + fn name_short(&self) -> Option { + None + } + + /// Returns the long name of this flag. All flags must have a "long" name. + /// + /// The long name must be at least 2 bytes, and all of its bytes must be + /// ASCII codepoints that are either `-` or alpha-numeric. + fn name_long(&self) -> &'static str; + + /// Returns a list of aliases for this flag. + /// + /// The aliases must follow the same rules as `Flag::name_long`. + /// + /// By default, an empty slice is returned. + fn aliases(&self) -> &'static [&'static str] { + &[] + } + + /// Returns a negated name for this flag. The negation of a flag is + /// intended to have the opposite meaning of a flag or to otherwise turn + /// something "off" or revert it to its default behavior. + /// + /// Negated flags are not listed in their own section in the `-h/--help` + /// output or man page. Instead, they are automatically mentioned at the + /// end of the documentation section of the flag they negated. + /// + /// The aliases must follow the same rules as `Flag::name_long`. + /// + /// By default, a flag has no negation and this returns `None`. + fn name_negated(&self) -> Option<&'static str> { + None + } + + /// Returns the variable name describing the type of value this flag + /// accepts. This should always be set for non-switch flags and never set + /// for switch flags. + /// + /// For example, the `--max-count` flag has its variable name set to `NUM`. + /// + /// The convention is to capitalize variable names. + /// + /// By default this returns `None`. + fn doc_variable(&self) -> Option<&'static str> { + None + } + + /// Returns the category of this flag. + /// + /// Every flag must have a single category. Categories are used to organize + /// flags in the generated documentation. + fn doc_category(&self) -> Category; + + /// A (very) short documentation string describing what this flag does. + /// + /// This may sacrifice "proper English" in order to be as terse as + /// possible. Generally, we try to ensure that `rg -h` doesn't have any + /// lines that exceed 79 columns. + fn doc_short(&self) -> &'static str; + + /// A (possibly very) longer documentation string describing in full + /// detail what this flag does. This should be in mandoc/mdoc format. + fn doc_long(&self) -> &'static str; + + /// If this is a non-switch flag that accepts a small set of specific + /// values, then this should list them. + /// + /// This returns an empty slice by default. + fn doc_choices(&self) -> &'static [&'static str] { + &[] + } + + /// Given the parsed value (which might just be a switch), this should + /// update the state in `args` based on the value given for this flag. + /// + /// This may update state for other flags as appropriate. + /// + /// The `-V/--version` and `-h/--help` flags are treated specially in the + /// parser and should do nothing here. + /// + /// By convention, implementations should generally not try to "do" + /// anything other than validate the value given. For example, the + /// implementation for `--hostname-bin` should not try to resolve the + /// hostname to use by running the binary provided. That should be saved + /// for a later step. This convention is used to ensure that getting the + /// low-level arguments is as reliable and quick as possible. It also + /// ensures that "doing something" occurs a minimal number of times. For + /// example, by avoiding trying to find the hostname here, we can do it + /// once later no matter how many times `--hostname-bin` is provided. + /// + /// Implementations should not include the flag name in the error message + /// returned. The flag name is included automatically by the parser. + fn update( + &self, + value: FlagValue, + args: &mut crate::flags::lowargs::LowArgs, + ) -> anyhow::Result<()>; +} + +/// The category that a flag belongs to. +/// +/// Categories are used to organize flags into "logical" groups in the +/// generated documentation. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] +enum Category { + /// Flags related to how ripgrep reads its input. Its "input" generally + /// consists of the patterns it is trying to match and the haystacks it is + /// trying to search. + Input, + /// Flags related to the operation of the search itself. For example, + /// whether case insensitive matching is enabled. + Search, + /// Flags related to how ripgrep filters haystacks. For example, whether + /// to respect gitignore files or not. + Filter, + /// Flags related to how ripgrep shows its search results. For example, + /// whether to show line numbers or not. + Output, + /// Flags related to changing ripgrep's output at a more fundamental level. + /// For example, flags like `--count` suppress printing of individual + /// lines, and instead just print the total count of matches for each file + /// searched. + OutputModes, + /// Flags related to logging behavior such as emitting non-fatal error + /// messages or printing search statistics. + Logging, + /// Other behaviors not related to ripgrep's core functionality. For + /// example, printing the file type globbing rules, or printing the list + /// of files ripgrep would search without actually searching them. + OtherBehaviors, +} + +impl Category { + /// Returns a string representation of this category. + /// + /// This string is the name of the variable used in various templates for + /// generated documentation. This name can be used for interpolation. + fn as_str(&self) -> &'static str { + match *self { + Category::Input => "input", + Category::Search => "search", + Category::Filter => "filter", + Category::Output => "output", + Category::OutputModes => "output-modes", + Category::Logging => "logging", + Category::OtherBehaviors => "other-behaviors", + } + } +} + +/// Represents a value parsed from the command line. +/// +/// This doesn't include the corresponding flag, but values come in one of +/// two forms: a switch (on or off) or an arbitrary value. +/// +/// Note that the CLI doesn't directly support negated switches. For example, +/// you can'd do anything like `-n=false` or any of that nonsense. Instead, +/// the CLI parser knows about which flag names are negations and which aren't +/// (courtesy of the `Flag` trait). If a flag given is known as a negation, +/// then a `FlagValue::Switch(false)` value is passed into `Flag::update`. +#[derive(Debug)] +enum FlagValue { + /// A flag that is either on or off. + Switch(bool), + /// A flag that comes with an arbitrary user value. + Value(OsString), +} + +impl FlagValue { + /// Return the yes or no value of this switch. + /// + /// If this flag value is not a switch, then this panics. + /// + /// This is useful when writing the implementation of `Flag::update`. + /// namely, callers usually know whether a switch or a value is expected. + /// If a flag is something different, then it indicates a bug, and thus a + /// panic is acceptable. + fn unwrap_switch(self) -> bool { + match self { + FlagValue::Switch(yes) => yes, + FlagValue::Value(_) => { + unreachable!("got flag value but expected switch") + } + } + } + + /// Return the user provided value of this flag. + /// + /// If this flag is a switch, then this panics. + /// + /// This is useful when writing the implementation of `Flag::update`. + /// namely, callers usually know whether a switch or a value is expected. + /// If a flag is something different, then it indicates a bug, and thus a + /// panic is acceptable. + fn unwrap_value(self) -> OsString { + match self { + FlagValue::Switch(_) => { + unreachable!("got switch but expected flag value") + } + FlagValue::Value(v) => v, + } + } +} diff --git a/crates/core/flags/parse.rs b/crates/core/flags/parse.rs new file mode 100644 index 0000000000..8151110daf --- /dev/null +++ b/crates/core/flags/parse.rs @@ -0,0 +1,392 @@ +/*! +Parses command line arguments into a structured and typed representation. +*/ + +use std::ffi::OsString; + +use anyhow::Context; + +use crate::flags::{ + defs::FLAGS, + hiargs::HiArgs, + lowargs::{LoggingMode, LowArgs, SpecialMode}, + Flag, FlagValue, +}; + +/// The result of parsing CLI arguments. +/// +/// This is basically a `anyhow::Result`, but with one extra variant that is +/// inhabited whenever ripgrep should execute a "special" mode. That is, when a +/// user provides the `-h/--help` or `-V/--version` flags. +/// +/// This special variant exists to allow CLI parsing to short circuit as +/// quickly as is reasonable. For example, it lets CLI parsing avoid reading +/// ripgrep's configuration and converting low level arguments into a higher +/// level representation. +#[derive(Debug)] +pub(crate) enum ParseResult { + Special(SpecialMode), + Ok(T), + Err(anyhow::Error), +} + +impl ParseResult { + /// If this result is `Ok`, then apply `then` to it. Otherwise, return this + /// result unchanged. + fn and_then( + self, + mut then: impl FnMut(T) -> ParseResult, + ) -> ParseResult { + match self { + ParseResult::Special(mode) => ParseResult::Special(mode), + ParseResult::Ok(t) => then(t), + ParseResult::Err(err) => ParseResult::Err(err), + } + } +} + +/// Parse CLI arguments and convert then to their high level representation. +pub(crate) fn parse() -> ParseResult { + parse_low().and_then(|low| match HiArgs::from_low_args(low) { + Ok(hi) => ParseResult::Ok(hi), + Err(err) => ParseResult::Err(err), + }) +} + +/// Parse CLI arguments only into their low level representation. +/// +/// This takes configuration into account. That is, it will try to read +/// `RIPGREP_CONFIG_PATH` and prepend any arguments found there to the +/// arguments passed to this process. +/// +/// This will also set one-time global state flags, such as the log level and +/// whether messages should be printed. +fn parse_low() -> ParseResult { + if let Err(err) = crate::logger::Logger::init() { + let err = anyhow::anyhow!("failed to initialize logger: {err}"); + return ParseResult::Err(err); + } + + let parser = Parser::new(); + let mut low = LowArgs::default(); + if let Err(err) = parser.parse(std::env::args_os().skip(1), &mut low) { + return ParseResult::Err(err); + } + // Even though we haven't parsed the config file yet (assuming it exists), + // we can still use the arguments given on the CLI to setup ripgrep's + // logging preferences. Even if the config file changes them in some way, + // it's really the best we can do. This way, for example, folks can pass + // `--trace` and see any messages logged during config file parsing. + set_log_levels(&low); + // Before we try to take configuration into account, we can bail early + // if a special mode was enabled. This is basically only for version and + // help output which shouldn't be impacted by extra configuration. + if let Some(special) = low.special.take() { + return ParseResult::Special(special); + } + // If the end user says no config, then respect it. + if low.no_config { + log::debug!("not reading config files because --no-config is present"); + return ParseResult::Ok(low); + } + // Look for arguments from a config file. If we got nothing (whether the + // file is empty or RIPGREP_CONFIG_PATH wasn't set), then we don't need + // to re-parse. + let config_args = crate::flags::config::args(); + if config_args.is_empty() { + log::debug!("no extra arguments found from configuration file"); + return ParseResult::Ok(low); + } + // The final arguments are just the arguments from the CLI appending to + // the end of the config arguments. + let mut final_args = config_args; + final_args.extend(std::env::args_os().skip(1)); + + // Now do the CLI parsing dance again. + let mut low = LowArgs::default(); + if let Err(err) = parser.parse(final_args.into_iter(), &mut low) { + return ParseResult::Err(err); + } + // Reset the message and logging levels, since they could have changed. + set_log_levels(&low); + ParseResult::Ok(low) +} + +/// Sets global state flags that control logging based on low-level arguments. +fn set_log_levels(low: &LowArgs) { + crate::messages::set_messages(!low.no_messages); + crate::messages::set_ignore_messages(!low.no_ignore_messages); + match low.logging { + Some(LoggingMode::Trace) => { + log::set_max_level(log::LevelFilter::Trace) + } + Some(LoggingMode::Debug) => { + log::set_max_level(log::LevelFilter::Debug) + } + None => log::set_max_level(log::LevelFilter::Warn), + } +} + +/// Parse the sequence of CLI arguments given a low level typed set of +/// arguments. +/// +/// This is exposed for testing that the correct low-level arguments are parsed +/// from a CLI. It just runs the parser once over the CLI arguments. It doesn't +/// setup logging or read from a config file. +/// +/// This assumes the iterator given does *not* begin with the binary name. +#[cfg(test)] +pub(crate) fn parse_low_raw( + rawargs: impl IntoIterator>, +) -> anyhow::Result { + let mut args = LowArgs::default(); + Parser::new().parse(rawargs, &mut args)?; + Ok(args) +} + +/// Return the metadata for the flag of the given name. +pub(super) fn lookup(name: &str) -> Option<&'static dyn Flag> { + // N.B. Creating a new parser might look expensive, but it only builds + // the lookup trie exactly once. That is, we get a `&'static Parser` from + // `Parser::new()`. + match Parser::new().find_long(name) { + FlagLookup::Match(&FlagInfo { flag, .. }) => Some(flag), + _ => None, + } +} + +/// A parser for turning a sequence of command line arguments into a more +/// strictly typed set of arguments. +#[derive(Debug)] +struct Parser { + /// A single map that contains all possible flag names. This includes + /// short and long names, aliases and negations. This maps those names to + /// indices into `info`. + map: FlagMap, + /// A map from IDs returned by the `map` to the corresponding flag + /// information. + info: Vec, +} + +impl Parser { + /// Create a new parser. + /// + /// This always creates the same parser and only does it once. Callers may + /// call this repeatedly, and the parser will only be built once. + fn new() -> &'static Parser { + use std::sync::OnceLock; + + // Since a parser's state is immutable and completely determined by + // FLAGS, and since FLAGS is a constant, we can initialize it exactly + // once. + static P: OnceLock = OnceLock::new(); + P.get_or_init(|| { + let mut infos = vec![]; + for &flag in FLAGS.iter() { + infos.push(FlagInfo { + flag, + name: Ok(flag.name_long()), + kind: FlagInfoKind::Standard, + }); + for alias in flag.aliases() { + infos.push(FlagInfo { + flag, + name: Ok(alias), + kind: FlagInfoKind::Alias, + }); + } + if let Some(byte) = flag.name_short() { + infos.push(FlagInfo { + flag, + name: Err(byte), + kind: FlagInfoKind::Standard, + }); + } + if let Some(name) = flag.name_negated() { + infos.push(FlagInfo { + flag, + name: Ok(name), + kind: FlagInfoKind::Negated, + }); + } + } + let map = FlagMap::new(&infos); + Parser { map, info: infos } + }) + } + + /// Parse the given CLI arguments into a low level representation. + /// + /// The iterator given should *not* start with the binary name. + fn parse(&self, rawargs: I, args: &mut LowArgs) -> anyhow::Result<()> + where + I: IntoIterator, + O: Into, + { + let mut p = lexopt::Parser::from_args(rawargs); + while let Some(arg) = p.next().context("invalid CLI arguments")? { + let lookup = match arg { + lexopt::Arg::Value(value) => { + args.positional.push(value); + continue; + } + lexopt::Arg::Short(ch) if ch == 'h' => { + // Special case -h/--help since behavior is different + // based on whether short or long flag is given. + args.special = Some(SpecialMode::HelpShort); + continue; + } + lexopt::Arg::Short(ch) if ch == 'V' => { + // Special case -V/--version since behavior is different + // based on whether short or long flag is given. + args.special = Some(SpecialMode::VersionShort); + continue; + } + lexopt::Arg::Short(ch) => self.find_short(ch), + lexopt::Arg::Long(name) if name == "help" => { + // Special case -h/--help since behavior is different + // based on whether short or long flag is given. + args.special = Some(SpecialMode::HelpLong); + continue; + } + lexopt::Arg::Long(name) if name == "version" => { + // Special case -V/--version since behavior is different + // based on whether short or long flag is given. + args.special = Some(SpecialMode::VersionLong); + continue; + } + lexopt::Arg::Long(name) => self.find_long(name), + }; + let mat = match lookup { + FlagLookup::Match(mat) => mat, + FlagLookup::UnrecognizedShort(name) => { + anyhow::bail!("unrecognized flag -{name}") + } + FlagLookup::UnrecognizedLong(name) => { + anyhow::bail!("unrecognized flag --{name}") + } + }; + let value = if matches!(mat.kind, FlagInfoKind::Negated) { + // Negated flags are always switches, even if the non-negated + // flag is not. For example, --context-separator accepts a + // value, but --no-context-separator does not. + FlagValue::Switch(false) + } else if mat.flag.is_switch() { + FlagValue::Switch(true) + } else { + FlagValue::Value(p.value().with_context(|| { + format!("missing value for flag {mat}") + })?) + }; + mat.flag + .update(value, args) + .with_context(|| format!("error parsing flag {mat}"))?; + } + Ok(()) + } + + /// Look for a flag by its short name. + fn find_short(&self, ch: char) -> FlagLookup<'_> { + if !ch.is_ascii() { + return FlagLookup::UnrecognizedShort(ch); + } + let byte = u8::try_from(ch).unwrap(); + let Some(index) = self.map.find(&[byte]) else { + return FlagLookup::UnrecognizedShort(ch); + }; + FlagLookup::Match(&self.info[index]) + } + + /// Look for a flag by its long name. + /// + /// This also works for aliases and negated names. + fn find_long(&self, name: &str) -> FlagLookup<'_> { + let Some(index) = self.map.find(name.as_bytes()) else { + return FlagLookup::UnrecognizedLong(name.to_string()); + }; + FlagLookup::Match(&self.info[index]) + } +} + +/// The result of looking up a flag name. +#[derive(Debug)] +enum FlagLookup<'a> { + /// Lookup found a match and the metadata for the flag is attached. + Match(&'a FlagInfo), + /// The given short name is unrecognized. + UnrecognizedShort(char), + /// The given long name is unrecognized. + UnrecognizedLong(String), +} + +/// The info about a flag associated with a flag's ID in the the flag map. +#[derive(Debug)] +struct FlagInfo { + /// The flag object and its associated metadata. + flag: &'static dyn Flag, + /// The actual name that is stored in the Aho-Corasick automaton. When this + /// is a byte, it corresponds to a short single character ASCII flag. The + /// actual pattern that's in the Aho-Corasick automaton is just the single + /// byte. + name: Result<&'static str, u8>, + /// The type of flag that is stored for the corresponding Aho-Corasick + /// pattern. + kind: FlagInfoKind, +} + +/// The kind of flag that is being matched. +#[derive(Debug)] +enum FlagInfoKind { + /// A standard flag, e.g., --passthru. + Standard, + /// A negation of a standard flag, e.g., --no-multiline. + Negated, + /// An alias for a standard flag, e.g., --passthrough. + Alias, +} + +impl std::fmt::Display for FlagInfo { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self.name { + Ok(long) => write!(f, "--{long}"), + Err(short) => write!(f, "-{short}", short = char::from(short)), + } + } +} + +/// A map from flag names (short, long, negated and aliases) to their ID. +/// +/// Once an ID is known, it can be used to look up a flag's metadata in the +/// parser's internal state. +#[derive(Debug)] +struct FlagMap { + map: std::collections::HashMap, usize>, +} + +impl FlagMap { + /// Create a new map of flags for the given flag information. + /// + /// The index of each flag info corresponds to its ID. + fn new(infos: &[FlagInfo]) -> FlagMap { + let mut map = std::collections::HashMap::with_capacity(infos.len()); + for (i, info) in infos.iter().enumerate() { + match info.name { + Ok(name) => { + assert_eq!(None, map.insert(name.as_bytes().to_vec(), i)); + } + Err(byte) => { + assert_eq!(None, map.insert(vec![byte], i)); + } + } + } + FlagMap { map } + } + + /// Look for a match of `name` in the given Aho-Corasick automaton. + /// + /// This only returns a match if the one found has a length equivalent to + /// the length of the name given. + fn find(&self, name: &[u8]) -> Option { + self.map.get(name).copied() + } +} diff --git a/crates/core/subject.rs b/crates/core/haystack.rs similarity index 56% rename from crates/core/subject.rs rename to crates/core/haystack.rs index 077155219c..f88b6dedd3 100644 --- a/crates/core/subject.rs +++ b/crates/core/haystack.rs @@ -1,108 +1,111 @@ -use std::path::Path; +/*! +Defines a builder for haystacks. -/// A configuration for describing how subjects should be built. -#[derive(Clone, Debug)] -struct Config { - strip_dot_prefix: bool, -} +A "haystack" represents something we want to search. It encapsulates the logic +for whether a haystack ought to be searched or not, separate from the standard +ignore rules and other filtering logic. -impl Default for Config { - fn default() -> Config { - Config { strip_dot_prefix: false } - } -} +Effectively, a haystack wraps a directory entry and adds some light application +level logic around it. +*/ + +use std::path::Path; /// A builder for constructing things to search over. #[derive(Clone, Debug)] -pub struct SubjectBuilder { - config: Config, +pub(crate) struct HaystackBuilder { + strip_dot_prefix: bool, } -impl SubjectBuilder { - /// Return a new subject builder with a default configuration. - pub fn new() -> SubjectBuilder { - SubjectBuilder { config: Config::default() } +impl HaystackBuilder { + /// Return a new haystack builder with a default configuration. + pub(crate) fn new() -> HaystackBuilder { + HaystackBuilder { strip_dot_prefix: false } } - /// Create a new subject from a possibly missing directory entry. + /// Create a new haystack from a possibly missing directory entry. /// /// If the directory entry isn't present, then the corresponding error is - /// logged if messages have been configured. Otherwise, if the subject is - /// deemed searchable, then it is returned. - pub fn build_from_result( + /// logged if messages have been configured. Otherwise, if the directory + /// entry is deemed searchable, then it is returned as a haystack. + pub(crate) fn build_from_result( &self, result: Result, - ) -> Option { + ) -> Option { match result { Ok(dent) => self.build(dent), Err(err) => { - err_message!("{}", err); + err_message!("{err}"); None } } } - /// Create a new subject using this builder's configuration. + /// Create a new haystack using this builder's configuration. /// - /// If a subject could not be created or should otherwise not be searched, - /// then this returns `None` after emitting any relevant log messages. - pub fn build(&self, dent: ignore::DirEntry) -> Option { - let subj = - Subject { dent, strip_dot_prefix: self.config.strip_dot_prefix }; - if let Some(ignore_err) = subj.dent.error() { - ignore_message!("{}", ignore_err); + /// If a directory entry could not be created or should otherwise not be + /// searched, then this returns `None` after emitting any relevant log + /// messages. + fn build(&self, dent: ignore::DirEntry) -> Option { + let hay = Haystack { dent, strip_dot_prefix: self.strip_dot_prefix }; + if let Some(err) = hay.dent.error() { + ignore_message!("{err}"); } // If this entry was explicitly provided by an end user, then we always // want to search it. - if subj.is_explicit() { - return Some(subj); + if hay.is_explicit() { + return Some(hay); } // At this point, we only want to search something if it's explicitly a // file. This omits symlinks. (If ripgrep was configured to follow // symlinks, then they have already been followed by the directory // traversal.) - if subj.is_file() { - return Some(subj); + if hay.is_file() { + return Some(hay); } // We got nothing. Emit a debug message, but only if this isn't a // directory. Otherwise, emitting messages for directories is just // noisy. - if !subj.is_dir() { + if !hay.is_dir() { log::debug!( - "ignoring {}: failed to pass subject filter: \ + "ignoring {}: failed to pass haystack filter: \ file type: {:?}, metadata: {:?}", - subj.dent.path().display(), - subj.dent.file_type(), - subj.dent.metadata() + hay.dent.path().display(), + hay.dent.file_type(), + hay.dent.metadata() ); } None } - /// When enabled, if the subject's file path starts with `./` then it is + /// When enabled, if the haystack's file path starts with `./` then it is /// stripped. /// /// This is useful when implicitly searching the current working directory. - pub fn strip_dot_prefix(&mut self, yes: bool) -> &mut SubjectBuilder { - self.config.strip_dot_prefix = yes; + pub(crate) fn strip_dot_prefix( + &mut self, + yes: bool, + ) -> &mut HaystackBuilder { + self.strip_dot_prefix = yes; self } } -/// A subject is a thing we want to search. Generally, a subject is either a -/// file or stdin. +/// A haystack is a thing we want to search. +/// +/// Generally, a haystack is either a file or stdin. #[derive(Clone, Debug)] -pub struct Subject { +pub(crate) struct Haystack { dent: ignore::DirEntry, strip_dot_prefix: bool, } -impl Subject { - /// Return the file path corresponding to this subject. +impl Haystack { + /// Return the file path corresponding to this haystack. /// - /// If this subject corresponds to stdin, then a special `` path + /// If this haystack corresponds to stdin, then a special `` path /// is returned instead. - pub fn path(&self) -> &Path { + pub(crate) fn path(&self) -> &Path { if self.strip_dot_prefix && self.dent.path().starts_with("./") { self.dent.path().strip_prefix("./").unwrap() } else { @@ -111,21 +114,21 @@ impl Subject { } /// Returns true if and only if this entry corresponds to stdin. - pub fn is_stdin(&self) -> bool { + pub(crate) fn is_stdin(&self) -> bool { self.dent.is_stdin() } - /// Returns true if and only if this entry corresponds to a subject to + /// Returns true if and only if this entry corresponds to a haystack to /// search that was explicitly supplied by an end user. /// /// Generally, this corresponds to either stdin or an explicit file path /// argument. e.g., in `rg foo some-file ./some-dir/`, `some-file` is - /// an explicit subject, but, e.g., `./some-dir/some-other-file` is not. + /// an explicit haystack, but, e.g., `./some-dir/some-other-file` is not. /// /// However, note that ripgrep does not see through shell globbing. e.g., /// in `rg foo ./some-dir/*`, `./some-dir/some-other-file` will be treated - /// as an explicit subject. - pub fn is_explicit(&self) -> bool { + /// as an explicit haystack. + pub(crate) fn is_explicit(&self) -> bool { // stdin is obvious. When an entry has a depth of 0, that means it // was explicitly provided to our directory iterator, which means it // was in turn explicitly provided by the end user. The !is_dir check @@ -135,7 +138,7 @@ impl Subject { self.is_stdin() || (self.dent.depth() == 0 && !self.is_dir()) } - /// Returns true if and only if this subject points to a directory after + /// Returns true if and only if this haystack points to a directory after /// following symbolic links. fn is_dir(&self) -> bool { let ft = match self.dent.file_type() { @@ -150,7 +153,7 @@ impl Subject { self.dent.path_is_symlink() && self.dent.path().is_dir() } - /// Returns true if and only if this subject points to a file. + /// Returns true if and only if this haystack points to a file. fn is_file(&self) -> bool { self.dent.file_type().map_or(false, |ft| ft.is_file()) } diff --git a/crates/core/logger.rs b/crates/core/logger.rs index 0c5414c75f..0b58d1d6d3 100644 --- a/crates/core/logger.rs +++ b/crates/core/logger.rs @@ -1,7 +1,10 @@ -// This module defines a super simple logger that works with the `log` crate. -// We don't need anything fancy; just basic log levels and the ability to -// print to stderr. We therefore avoid bringing in extra dependencies just -// for this functionality. +/*! +Defines a super simple logger that works with the `log` crate. + +We don't do anything fancy. We just need basic log levels and the ability to +print to stderr. We therefore avoid bringing in extra dependencies just for +this functionality. +*/ use log::{self, Log}; @@ -10,15 +13,16 @@ use log::{self, Log}; /// This logger does no filtering. Instead, it relies on the `log` crates /// filtering via its global max_level setting. #[derive(Debug)] -pub struct Logger(()); +pub(crate) struct Logger(()); +/// A singleton used as the target for an implementation of the `Log` trait. const LOGGER: &'static Logger = &Logger(()); impl Logger { /// Create a new logger that logs to stderr and initialize it as the /// global logger. If there was a problem setting the logger, then an /// error is returned. - pub fn init() -> Result<(), log::SetLoggerError> { + pub(crate) fn init() -> Result<(), log::SetLoggerError> { log::set_logger(LOGGER) } } diff --git a/crates/core/main.rs b/crates/core/main.rs index 7cc59dd977..ab2230def6 100644 --- a/crates/core/main.rs +++ b/crates/core/main.rs @@ -1,21 +1,20 @@ -use std::{ - io::{self, Write}, - time::Instant, -}; +/*! +The main entry point into ripgrep. +*/ + +use std::{io::Write, process::ExitCode}; use ignore::WalkState; -use crate::{args::Args, subject::Subject}; +use crate::flags::{HiArgs, SearchMode}; #[macro_use] mod messages; -mod app; -mod args; -mod config; +mod flags; +mod haystack; mod logger; mod search; -mod subject; // Since Rust no longer uses jemalloc by default, ripgrep will, by default, // use the system allocator. On Linux, this would normally be glibc's @@ -40,143 +39,163 @@ mod subject; #[global_allocator] static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; -fn main() { - if let Err(err) = Args::parse().and_then(try_main) { - eprintln_locked!("{:#}", err); - std::process::exit(2); +/// Then, as it was, then again it will be. +fn main() -> ExitCode { + match run(flags::parse()) { + Ok(code) => code, + Err(err) => { + // Look for a broken pipe error. In this case, we generally want + // to exit "gracefully" with a success exit code. This matches + // existing Unix convention. We need to handle this explicitly + // since the Rust runtime doesn't ask for PIPE signals, and thus + // we get an I/O error instead. Traditional C Unix applications + // quit by getting a PIPE signal that they don't handle, and thus + // the unhandled signal causes the process to unceremoniously + // terminate. + for cause in err.chain() { + if let Some(ioerr) = cause.downcast_ref::() { + if ioerr.kind() == std::io::ErrorKind::BrokenPipe { + return ExitCode::from(0); + } + } + } + eprintln_locked!("{:#}", err); + ExitCode::from(2) + } } } -fn try_main(args: Args) -> anyhow::Result<()> { - use args::Command::*; - - let matched = match args.command() { - Search => search(&args), - SearchParallel => search_parallel(&args), - SearchNever => Ok(false), - Files => files(&args), - FilesParallel => files_parallel(&args), - Types => types(&args), - PCRE2Version => pcre2_version(&args), - }?; - if matched && (args.quiet() || !messages::errored()) { - std::process::exit(0) +/// The main entry point for ripgrep. +/// +/// The given parse result determines ripgrep's behavior. The parse +/// result should be the result of parsing CLI arguments in a low level +/// representation, and then followed by an attempt to convert them into a +/// higher level representation. The higher level representation has some nicer +/// abstractions, for example, instead of representing the `-g/--glob` flag +/// as a `Vec` (as in the low level representation), the globs are +/// converted into a single matcher. +fn run(result: crate::flags::ParseResult) -> anyhow::Result { + use crate::flags::{Mode, ParseResult}; + + let args = match result { + ParseResult::Err(err) => return Err(err), + ParseResult::Special(mode) => return special(mode), + ParseResult::Ok(args) => args, + }; + let matched = match args.mode() { + Mode::Search(_) if !args.matches_possible() => false, + Mode::Search(mode) if args.threads() == 1 => search(&args, mode)?, + Mode::Search(mode) => search_parallel(&args, mode)?, + Mode::Files if args.threads() == 1 => files(&args)?, + Mode::Files => files_parallel(&args)?, + Mode::Types => return types(&args), + Mode::Generate(mode) => return generate(mode), + }; + Ok(if matched && (args.quiet() || !messages::errored()) { + ExitCode::from(0) } else if messages::errored() { - std::process::exit(2) + ExitCode::from(2) } else { - std::process::exit(1) - } + ExitCode::from(1) + }) } -/// The top-level entry point for single-threaded search. This recursively -/// steps through the file list (current directory by default) and searches -/// each file sequentially. -fn search(args: &Args) -> anyhow::Result { - /// The meat of the routine is here. This lets us call the same iteration - /// code over each file regardless of whether we stream over the files - /// as they're produced by the underlying directory traversal or whether - /// they've been collected and sorted (for example) first. - fn iter( - args: &Args, - subjects: impl Iterator, - started_at: std::time::Instant, - ) -> anyhow::Result { - let quit_after_match = args.quit_after_match()?; - let mut stats = args.stats()?; - let mut searcher = args.search_worker(args.stdout())?; - let mut matched = false; - let mut searched = false; - - for subject in subjects { - searched = true; - let search_result = match searcher.search(&subject) { - Ok(search_result) => search_result, - // A broken pipe means graceful termination. - Err(err) if err.kind() == io::ErrorKind::BrokenPipe => break, - Err(err) => { - err_message!("{}: {}", subject.path().display(), err); - continue; - } - }; - matched |= search_result.has_match(); - if let Some(ref mut stats) = stats { - *stats += search_result.stats().unwrap(); - } - if matched && quit_after_match { - break; +/// The top-level entry point for single-threaded search. +/// +/// This recursively steps through the file list (current directory by default) +/// and searches each file sequentially. +fn search(args: &HiArgs, mode: SearchMode) -> anyhow::Result { + let started_at = std::time::Instant::now(); + let haystack_builder = args.haystack_builder(); + let unsorted = args + .walk_builder()? + .build() + .filter_map(|result| haystack_builder.build_from_result(result)); + let haystacks = args.sort(unsorted); + + let mut matched = false; + let mut searched = false; + let mut stats = args.stats(); + let mut searcher = args.search_worker( + args.matcher()?, + args.searcher()?, + args.printer(mode, args.stdout()), + )?; + for haystack in haystacks { + searched = true; + let search_result = match searcher.search(&haystack) { + Ok(search_result) => search_result, + // A broken pipe means graceful termination. + Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => break, + Err(err) => { + err_message!("{}: {}", haystack.path().display(), err); + continue; } + }; + matched = matched || search_result.has_match(); + if let Some(ref mut stats) = stats { + *stats += search_result.stats().unwrap(); } - if args.using_default_path() && !searched { - eprint_nothing_searched(); - } - if let Some(ref stats) = stats { - let elapsed = Instant::now().duration_since(started_at); - // We don't care if we couldn't print this successfully. - let _ = searcher.print_stats(elapsed, stats); + if matched && args.quit_after_match() { + break; } - Ok(matched) } - - let started_at = Instant::now(); - let subject_builder = args.subject_builder(); - let subjects = args - .walker()? - .filter_map(|result| subject_builder.build_from_result(result)); - if args.needs_stat_sort() { - let subjects = args.sort_by_stat(subjects).into_iter(); - iter(args, subjects, started_at) - } else { - iter(args, subjects, started_at) + if args.has_implicit_path() && !searched { + eprint_nothing_searched(); } + if let Some(ref stats) = stats { + let wtr = searcher.printer().get_mut(); + let _ = print_stats(mode, stats, started_at, wtr); + } + Ok(matched) } -/// The top-level entry point for multi-threaded search. The parallelism is -/// itself achieved by the recursive directory traversal. All we need to do is -/// feed it a worker for performing a search on each file. +/// The top-level entry point for multi-threaded search. +/// +/// The parallelism is itself achieved by the recursive directory traversal. +/// All we need to do is feed it a worker for performing a search on each file. /// /// Requesting a sorted output from ripgrep (such as with `--sort path`) will /// automatically disable parallelism and hence sorting is not handled here. -fn search_parallel(args: &Args) -> anyhow::Result { - use std::sync::atomic::{AtomicBool, Ordering::SeqCst}; - - let quit_after_match = args.quit_after_match()?; - let started_at = Instant::now(); - let subject_builder = args.subject_builder(); - let bufwtr = args.buffer_writer()?; - let stats = args.stats()?.map(std::sync::Mutex::new); +fn search_parallel(args: &HiArgs, mode: SearchMode) -> anyhow::Result { + use std::sync::atomic::{AtomicBool, Ordering}; + + let started_at = std::time::Instant::now(); + let haystack_builder = args.haystack_builder(); + let bufwtr = args.buffer_writer(); + let stats = args.stats().map(std::sync::Mutex::new); let matched = AtomicBool::new(false); let searched = AtomicBool::new(false); - let mut searcher_err = None; - args.walker_parallel()?.run(|| { + + let mut searcher = args.search_worker( + args.matcher()?, + args.searcher()?, + args.printer(mode, bufwtr.buffer()), + )?; + args.walk_builder()?.build_parallel().run(|| { let bufwtr = &bufwtr; let stats = &stats; let matched = &matched; let searched = &searched; - let subject_builder = &subject_builder; - let mut searcher = match args.search_worker(bufwtr.buffer()) { - Ok(searcher) => searcher, - Err(err) => { - searcher_err = Some(err); - return Box::new(move |_| WalkState::Quit); - } - }; + let haystack_builder = &haystack_builder; + let mut searcher = searcher.clone(); Box::new(move |result| { - let subject = match subject_builder.build_from_result(result) { - Some(subject) => subject, + let haystack = match haystack_builder.build_from_result(result) { + Some(haystack) => haystack, None => return WalkState::Continue, }; - searched.store(true, SeqCst); + searched.store(true, Ordering::SeqCst); searcher.printer().get_mut().clear(); - let search_result = match searcher.search(&subject) { + let search_result = match searcher.search(&haystack) { Ok(search_result) => search_result, Err(err) => { - err_message!("{}: {}", subject.path().display(), err); + err_message!("{}: {}", haystack.path().display(), err); return WalkState::Continue; } }; if search_result.has_match() { - matched.store(true, SeqCst); + matched.store(true, Ordering::SeqCst); } if let Some(ref locked_stats) = *stats { let mut stats = locked_stats.lock().unwrap(); @@ -184,128 +203,110 @@ fn search_parallel(args: &Args) -> anyhow::Result { } if let Err(err) = bufwtr.print(searcher.printer().get_mut()) { // A broken pipe means graceful termination. - if err.kind() == io::ErrorKind::BrokenPipe { + if err.kind() == std::io::ErrorKind::BrokenPipe { return WalkState::Quit; } // Otherwise, we continue on our merry way. - err_message!("{}: {}", subject.path().display(), err); + err_message!("{}: {}", haystack.path().display(), err); } - if matched.load(SeqCst) && quit_after_match { + if matched.load(Ordering::SeqCst) && args.quit_after_match() { WalkState::Quit } else { WalkState::Continue } }) }); - if let Some(err) = searcher_err.take() { - return Err(err); - } - if args.using_default_path() && !searched.load(SeqCst) { + if args.has_implicit_path() && !searched.load(Ordering::SeqCst) { eprint_nothing_searched(); } if let Some(ref locked_stats) = stats { - let elapsed = Instant::now().duration_since(started_at); let stats = locked_stats.lock().unwrap(); - let mut searcher = args.search_worker(args.stdout())?; - // We don't care if we couldn't print this successfully. - let _ = searcher.print_stats(elapsed, &stats); + let mut wtr = searcher.printer().get_mut(); + let _ = print_stats(mode, &stats, started_at, &mut wtr); + let _ = bufwtr.print(&mut wtr); } - Ok(matched.load(SeqCst)) + Ok(matched.load(Ordering::SeqCst)) } -fn eprint_nothing_searched() { - err_message!( - "No files were searched, which means ripgrep probably \ - applied a filter you didn't expect.\n\ - Running with --debug will show why files are being skipped." - ); -} +/// The top-level entry point for file listing without searching. +/// +/// This recursively steps through the file list (current directory by default) +/// and prints each path sequentially using a single thread. +fn files(args: &HiArgs) -> anyhow::Result { + let haystack_builder = args.haystack_builder(); + let unsorted = args + .walk_builder()? + .build() + .filter_map(|result| haystack_builder.build_from_result(result)); + let haystacks = args.sort(unsorted); -/// The top-level entry point for listing files without searching them. This -/// recursively steps through the file list (current directory by default) and -/// prints each path sequentially using a single thread. -fn files(args: &Args) -> anyhow::Result { - /// The meat of the routine is here. This lets us call the same iteration - /// code over each file regardless of whether we stream over the files - /// as they're produced by the underlying directory traversal or whether - /// they've been collected and sorted (for example) first. - fn iter( - args: &Args, - subjects: impl Iterator, - ) -> anyhow::Result { - let quit_after_match = args.quit_after_match()?; - let mut matched = false; - let mut path_printer = args.path_printer(args.stdout())?; - - for subject in subjects { - matched = true; - if quit_after_match { + let mut matched = false; + let mut path_printer = args.path_printer_builder().build(args.stdout()); + for haystack in haystacks { + matched = true; + if args.quit_after_match() { + break; + } + if let Err(err) = path_printer.write(haystack.path()) { + // A broken pipe means graceful termination. + if err.kind() == std::io::ErrorKind::BrokenPipe { break; } - if let Err(err) = path_printer.write(subject.path()) { - // A broken pipe means graceful termination. - if err.kind() == io::ErrorKind::BrokenPipe { - break; - } - // Otherwise, we have some other error that's preventing us from - // writing to stdout, so we should bubble it up. - return Err(err.into()); - } + // Otherwise, we have some other error that's preventing us from + // writing to stdout, so we should bubble it up. + return Err(err.into()); } - Ok(matched) - } - - let subject_builder = args.subject_builder(); - let subjects = args - .walker()? - .filter_map(|result| subject_builder.build_from_result(result)); - if args.needs_stat_sort() { - let subjects = args.sort_by_stat(subjects).into_iter(); - iter(args, subjects) - } else { - iter(args, subjects) } + Ok(matched) } -/// The top-level entry point for listing files without searching them. This -/// recursively steps through the file list (current directory by default) and -/// prints each path sequentially using multiple threads. +/// The top-level entry point for multi-threaded file listing without +/// searching. +/// +/// This recursively steps through the file list (current directory by default) +/// and prints each path sequentially using multiple threads. /// /// Requesting a sorted output from ripgrep (such as with `--sort path`) will /// automatically disable parallelism and hence sorting is not handled here. -fn files_parallel(args: &Args) -> anyhow::Result { - use std::sync::atomic::AtomicBool; - use std::sync::atomic::Ordering::SeqCst; - use std::sync::mpsc; - use std::thread; - - let quit_after_match = args.quit_after_match()?; - let subject_builder = args.subject_builder(); - let mut path_printer = args.path_printer(args.stdout())?; +fn files_parallel(args: &HiArgs) -> anyhow::Result { + use std::{ + sync::{ + atomic::{AtomicBool, Ordering}, + mpsc, + }, + thread, + }; + + let haystack_builder = args.haystack_builder(); + let mut path_printer = args.path_printer_builder().build(args.stdout()); let matched = AtomicBool::new(false); - let (tx, rx) = mpsc::channel::(); + let (tx, rx) = mpsc::channel::(); - let print_thread = thread::spawn(move || -> io::Result<()> { - for subject in rx.iter() { - path_printer.write(subject.path())?; + // We spawn a single printing thread to make sure we don't tear writes. + // We use a channel here under the presumption that it's probably faster + // than using a mutex in the worker threads below, but this has never been + // seriously litigated. + let print_thread = thread::spawn(move || -> std::io::Result<()> { + for haystack in rx.iter() { + path_printer.write(haystack.path())?; } Ok(()) }); - args.walker_parallel()?.run(|| { - let subject_builder = &subject_builder; + args.walk_builder()?.build_parallel().run(|| { + let haystack_builder = &haystack_builder; let matched = &matched; let tx = tx.clone(); Box::new(move |result| { - let subject = match subject_builder.build_from_result(result) { - Some(subject) => subject, + let haystack = match haystack_builder.build_from_result(result) { + Some(haystack) => haystack, None => return WalkState::Continue, }; - matched.store(true, SeqCst); - if quit_after_match { + matched.store(true, Ordering::SeqCst); + if args.quit_after_match() { WalkState::Quit } else { - match tx.send(subject) { + match tx.send(haystack) { Ok(_) => WalkState::Continue, Err(_) => WalkState::Quit, } @@ -317,18 +318,18 @@ fn files_parallel(args: &Args) -> anyhow::Result { // A broken pipe means graceful termination, so fall through. // Otherwise, something bad happened while writing to stdout, so bubble // it up. - if err.kind() != io::ErrorKind::BrokenPipe { + if err.kind() != std::io::ErrorKind::BrokenPipe { return Err(err.into()); } } - Ok(matched.load(SeqCst)) + Ok(matched.load(Ordering::SeqCst)) } -/// The top-level entry point for --type-list. -fn types(args: &Args) -> anyhow::Result { +/// The top-level entry point for `--type-list`. +fn types(args: &HiArgs) -> anyhow::Result { let mut count = 0; let mut stdout = args.stdout(); - for def in args.type_defs()? { + for def in args.types().definitions() { count += 1; stdout.write_all(def.name().as_bytes())?; stdout.write_all(b": ")?; @@ -343,32 +344,156 @@ fn types(args: &Args) -> anyhow::Result { } stdout.write_all(b"\n")?; } - Ok(count > 0) + Ok(ExitCode::from(if count == 0 { 1 } else { 0 })) +} + +/// Implements ripgrep's "generate" modes. +/// +/// These modes correspond to generating some kind of ancillary data related +/// to ripgrep. At present, this includes ripgrep's man page (in roff format) +/// and supported shell completions. +fn generate(mode: crate::flags::GenerateMode) -> anyhow::Result { + use crate::flags::GenerateMode; + + let output = match mode { + GenerateMode::Man => flags::generate_man_page(), + GenerateMode::CompleteBash => flags::generate_complete_bash(), + GenerateMode::CompleteZsh => flags::generate_complete_zsh(), + GenerateMode::CompleteFish => flags::generate_complete_fish(), + GenerateMode::CompletePowerShell => { + flags::generate_complete_powershell() + } + }; + writeln!(std::io::stdout(), "{}", output.trim_end())?; + Ok(ExitCode::from(0)) +} + +/// Implements ripgrep's "special" modes. +/// +/// A special mode is one that generally short-circuits most (not all) of +/// ripgrep's initialization logic and skips right to this routine. The +/// special modes essentially consist of printing help and version output. The +/// idea behind the short circuiting is to ensure there is as little as possible +/// (within reason) that would prevent ripgrep from emitting help output. +/// +/// For example, part of the initialization logic that is skipped (among +/// other things) is accessing the current working directory. If that fails, +/// ripgrep emits an error. We don't want to emit an error if it fails and +/// the user requested version or help information. +fn special(mode: crate::flags::SpecialMode) -> anyhow::Result { + use crate::flags::SpecialMode; + + let output = match mode { + SpecialMode::HelpShort => flags::generate_help_short(), + SpecialMode::HelpLong => flags::generate_help_long(), + SpecialMode::VersionShort => flags::generate_version_short(), + SpecialMode::VersionLong => flags::generate_version_long(), + // --pcre2-version is a little special because it emits an error + // exit code if this build of ripgrep doesn't support PCRE2. + SpecialMode::VersionPCRE2 => return version_pcre2(), + }; + writeln!(std::io::stdout(), "{}", output.trim_end())?; + Ok(ExitCode::from(0)) } -/// The top-level entry point for --pcre2-version. -fn pcre2_version(args: &Args) -> anyhow::Result { +/// The top-level entry point for `--pcre2-version`. +fn version_pcre2() -> anyhow::Result { + let mut stdout = std::io::stdout().lock(); + #[cfg(feature = "pcre2")] - fn imp(args: &Args) -> anyhow::Result { + { use grep::pcre2; - let mut stdout = args.stdout(); - let (major, minor) = pcre2::version(); writeln!(stdout, "PCRE2 {}.{} is available", major, minor)?; - if cfg!(target_pointer_width = "64") && pcre2::is_jit_available() { writeln!(stdout, "JIT is available")?; } - Ok(true) + Ok(ExitCode::from(0)) } #[cfg(not(feature = "pcre2"))] - fn imp(args: &Args) -> anyhow::Result { - let mut stdout = args.stdout(); + { writeln!(stdout, "PCRE2 is not available in this build of ripgrep.")?; - Ok(false) + Ok(ExitCode::from(1)) } +} - imp(args) +/// Prints a heuristic error messages when nothing is searched. +/// +/// This can happen if an applicable ignore file has one or more rules that +/// are too broad and cause ripgrep to ignore everything. +/// +/// We only show this error message when the user does *not* provide an +/// explicit path to search. This is because the message can otherwise be +/// noisy, e.g., when it is intended that there is nothing to search. +fn eprint_nothing_searched() { + err_message!( + "No files were searched, which means ripgrep probably \ + applied a filter you didn't expect.\n\ + Running with --debug will show why files are being skipped." + ); +} + +/// Prints the statistics given to the writer given. +/// +/// The search mode given determines whether the stats should be printed in +/// a plain text format or in a JSON format. +/// +/// The `started` time should be the time at which ripgrep started working. +/// +/// If an error occurs while writing, then writing stops and the error is +/// returned. Note that callers should probably ignore this errror, since +/// whether stats fail to print or not generally shouldn't cause ripgrep to +/// enter into an "error" state. And usually the only way for this to fail is +/// if writing to stdout itself fails. +fn print_stats( + mode: SearchMode, + stats: &grep::printer::Stats, + started: std::time::Instant, + mut wtr: W, +) -> std::io::Result<()> { + let elapsed = std::time::Instant::now().duration_since(started); + if matches!(mode, SearchMode::JSON) { + // We specifically match the format laid out by the JSON printer in + // the grep-printer crate. We simply "extend" it with the 'summary' + // message type. + serde_json::to_writer( + &mut wtr, + &serde_json::json!({ + "type": "summary", + "data": { + "stats": stats, + "elapsed_total": { + "secs": elapsed.as_secs(), + "nanos": elapsed.subsec_nanos(), + "human": format!("{:0.6}s", elapsed.as_secs_f64()), + }, + } + }), + )?; + write!(wtr, "\n") + } else { + write!( + wtr, + " +{matches} matches +{lines} matched lines +{searches_with_match} files contained matches +{searches} files searched +{bytes_printed} bytes printed +{bytes_searched} bytes searched +{search_time:0.6} seconds spent searching +{process_time:0.6} seconds +", + matches = stats.matches(), + lines = stats.matched_lines(), + searches_with_match = stats.searches_with_match(), + searches = stats.searches(), + bytes_printed = stats.bytes_printed(), + bytes_searched = stats.bytes_searched(), + search_time = stats.elapsed().as_secs_f64(), + process_time = elapsed.as_secs_f64(), + ) + } } diff --git a/crates/core/messages.rs b/crates/core/messages.rs index be9e10dcab..ea514c1715 100644 --- a/crates/core/messages.rs +++ b/crates/core/messages.rs @@ -1,21 +1,59 @@ +/*! +This module defines some macros and some light shared mutable state. + +This state is responsible for keeping track of whether we should emit certain +kinds of messages to the user (such as errors) that are distinct from the +standard "debug" or "trace" log messages. This state is specifically set at +startup time when CLI arguments are parsed and then never changed. + +The other state tracked here is whether ripgrep experienced an error +condition. Aside from errors associated with invalid CLI arguments, ripgrep +generally does not abort when an error occurs (e.g., if reading a file failed). +But when an error does occur, it will alter ripgrep's exit status. Thus, when +an error message is emitted via `err_message`, then a global flag is toggled +indicating that at least one error occurred. When ripgrep exits, this flag is +consulted to determine what the exit status ought to be. +*/ + use std::sync::atomic::{AtomicBool, Ordering}; +/// When false, "messages" will not be printed. static MESSAGES: AtomicBool = AtomicBool::new(false); +/// When false, "messages" related to ignore rules will not be printed. static IGNORE_MESSAGES: AtomicBool = AtomicBool::new(false); +/// Flipped to true when an error message is printed. static ERRORED: AtomicBool = AtomicBool::new(false); -/// Like eprintln, but locks STDOUT to prevent interleaving lines. +/// Like eprintln, but locks stdout to prevent interleaving lines. +/// +/// This locks stdout, not stderr, even though this prints to stderr. This +/// avoids the appearance of interleaving output when stdout and stderr both +/// correspond to a tty.) #[macro_export] macro_rules! eprintln_locked { ($($tt:tt)*) => {{ { + use std::io::Write; + // This is a bit of an abstraction violation because we explicitly - // lock STDOUT before printing to STDERR. This avoids interleaving + // lock stdout before printing to stderr. This avoids interleaving // lines within ripgrep because `search_parallel` uses `termcolor`, - // which accesses the same STDOUT lock when writing lines. + // which accesses the same stdout lock when writing lines. let stdout = std::io::stdout(); let _handle = stdout.lock(); - eprintln!($($tt)*); + // We specifically ignore any errors here. One plausible error we + // can get in some cases is a broken pipe error. And when that + // occurs, we should exit gracefully. Otherwise, just abort with + // an error code because there isn't much else we can do. + // + // See: https://github.com/BurntSushi/ripgrep/issues/1966 + if let Err(err) = writeln!(std::io::stderr(), $($tt)*) { + if err.kind() == std::io::ErrorKind::BrokenPipe { + std::process::exit(0); + } else { + std::process::exit(2); + } + } } }} } @@ -52,19 +90,19 @@ macro_rules! ignore_message { } /// Returns true if and only if messages should be shown. -pub fn messages() -> bool { +pub(crate) fn messages() -> bool { MESSAGES.load(Ordering::SeqCst) } /// Set whether messages should be shown or not. /// /// By default, they are not shown. -pub fn set_messages(yes: bool) { +pub(crate) fn set_messages(yes: bool) { MESSAGES.store(yes, Ordering::SeqCst) } /// Returns true if and only if "ignore" related messages should be shown. -pub fn ignore_messages() -> bool { +pub(crate) fn ignore_messages() -> bool { IGNORE_MESSAGES.load(Ordering::SeqCst) } @@ -75,16 +113,19 @@ pub fn ignore_messages() -> bool { /// Note that this is overridden if `messages` is disabled. Namely, if /// `messages` is disabled, then "ignore" messages are never shown, regardless /// of this setting. -pub fn set_ignore_messages(yes: bool) { +pub(crate) fn set_ignore_messages(yes: bool) { IGNORE_MESSAGES.store(yes, Ordering::SeqCst) } /// Returns true if and only if ripgrep came across a non-fatal error. -pub fn errored() -> bool { +pub(crate) fn errored() -> bool { ERRORED.load(Ordering::SeqCst) } /// Indicate that ripgrep has come across a non-fatal error. -pub fn set_errored() { +/// +/// Callers should not use this directly. Instead, it is called automatically +/// via the `err_message` macro. +pub(crate) fn set_errored() { ERRORED.store(true, Ordering::SeqCst); } diff --git a/crates/core/search.rs b/crates/core/search.rs index a6ceaeaaab..672734254d 100644 --- a/crates/core/search.rs +++ b/crates/core/search.rs @@ -1,59 +1,47 @@ -use std::{ - io, - path::{Path, PathBuf}, - time::Duration, -}; - -use { - grep::{ - cli, - matcher::Matcher, - printer::{Standard, Stats, Summary, JSON}, - regex::RegexMatcher as RustRegexMatcher, - searcher::{BinaryDetection, Searcher}, - }, - ignore::overrides::Override, - serde_json::{self as json, json}, - termcolor::WriteColor, -}; - -#[cfg(feature = "pcre2")] -use grep::pcre2::RegexMatcher as PCRE2RegexMatcher; - -use crate::subject::Subject; - -/// The configuration for the search worker. Among a few other things, the -/// configuration primarily controls the way we show search results to users -/// at a very high level. +/*! +Defines a very high level "search worker" abstraction. + +A search worker manages the high level interaction points between the matcher +(i.e., which regex engine is used), the searcher (i.e., how data is actually +read and matched using the regex engine) and the printer. For example, the +search worker is where things like preprocessors or decompression happens. +*/ + +use std::{io, path::Path}; + +use {grep::matcher::Matcher, termcolor::WriteColor}; + +/// The configuration for the search worker. +/// +/// Among a few other things, the configuration primarily controls the way we +/// show search results to users at a very high level. #[derive(Clone, Debug)] struct Config { - json_stats: bool, - preprocessor: Option, - preprocessor_globs: Override, + preprocessor: Option, + preprocessor_globs: ignore::overrides::Override, search_zip: bool, - binary_implicit: BinaryDetection, - binary_explicit: BinaryDetection, + binary_implicit: grep::searcher::BinaryDetection, + binary_explicit: grep::searcher::BinaryDetection, } impl Default for Config { fn default() -> Config { Config { - json_stats: false, preprocessor: None, - preprocessor_globs: Override::empty(), + preprocessor_globs: ignore::overrides::Override::empty(), search_zip: false, - binary_implicit: BinaryDetection::none(), - binary_explicit: BinaryDetection::none(), + binary_implicit: grep::searcher::BinaryDetection::none(), + binary_explicit: grep::searcher::BinaryDetection::none(), } } } /// A builder for configuring and constructing a search worker. #[derive(Clone, Debug)] -pub struct SearchWorkerBuilder { +pub(crate) struct SearchWorkerBuilder { config: Config, - command_builder: cli::CommandReaderBuilder, - decomp_builder: cli::DecompressionReaderBuilder, + command_builder: grep::cli::CommandReaderBuilder, + decomp_builder: grep::cli::DecompressionReaderBuilder, } impl Default for SearchWorkerBuilder { @@ -64,11 +52,11 @@ impl Default for SearchWorkerBuilder { impl SearchWorkerBuilder { /// Create a new builder for configuring and constructing a search worker. - pub fn new() -> SearchWorkerBuilder { - let mut cmd_builder = cli::CommandReaderBuilder::new(); + pub(crate) fn new() -> SearchWorkerBuilder { + let mut cmd_builder = grep::cli::CommandReaderBuilder::new(); cmd_builder.async_stderr(true); - let mut decomp_builder = cli::DecompressionReaderBuilder::new(); + let mut decomp_builder = grep::cli::DecompressionReaderBuilder::new(); decomp_builder.async_stderr(true); SearchWorkerBuilder { @@ -80,10 +68,10 @@ impl SearchWorkerBuilder { /// Create a new search worker using the given searcher, matcher and /// printer. - pub fn build( + pub(crate) fn build( &self, matcher: PatternMatcher, - searcher: Searcher, + searcher: grep::searcher::Searcher, printer: Printer, ) -> SearchWorker { let config = self.config.clone(); @@ -99,29 +87,17 @@ impl SearchWorkerBuilder { } } - /// Forcefully use JSON to emit statistics, even if the underlying printer - /// is not the JSON printer. - /// - /// This is useful for implementing flag combinations like - /// `--json --quiet`, which uses the summary printer for implementing - /// `--quiet` but still wants to emit summary statistics, which should - /// be JSON formatted because of the `--json` flag. - pub fn json_stats(&mut self, yes: bool) -> &mut SearchWorkerBuilder { - self.config.json_stats = yes; - self - } - /// Set the path to a preprocessor command. /// /// When this is set, instead of searching files directly, the given /// command will be run with the file path as the first argument, and the /// output of that command will be searched instead. - pub fn preprocessor( + pub(crate) fn preprocessor( &mut self, - cmd: Option, + cmd: Option, ) -> anyhow::Result<&mut SearchWorkerBuilder> { if let Some(ref prog) = cmd { - let bin = cli::resolve_binary(prog)?; + let bin = grep::cli::resolve_binary(prog)?; self.config.preprocessor = Some(bin); } else { self.config.preprocessor = None; @@ -132,9 +108,9 @@ impl SearchWorkerBuilder { /// Set the globs for determining which files should be run through the /// preprocessor. By default, with no globs and a preprocessor specified, /// every file is run through the preprocessor. - pub fn preprocessor_globs( + pub(crate) fn preprocessor_globs( &mut self, - globs: Override, + globs: ignore::overrides::Override, ) -> &mut SearchWorkerBuilder { self.config.preprocessor_globs = globs; self @@ -147,7 +123,10 @@ impl SearchWorkerBuilder { /// /// Note that if a preprocessor command is set, then it overrides this /// setting. - pub fn search_zip(&mut self, yes: bool) -> &mut SearchWorkerBuilder { + pub(crate) fn search_zip( + &mut self, + yes: bool, + ) -> &mut SearchWorkerBuilder { self.config.search_zip = yes; self } @@ -155,13 +134,14 @@ impl SearchWorkerBuilder { /// Set the binary detection that should be used when searching files /// found via a recursive directory search. /// - /// Generally, this binary detection may be `BinaryDetection::quit` if - /// we want to skip binary files completely. + /// Generally, this binary detection may be + /// `grep::searcher::BinaryDetection::quit` if we want to skip binary files + /// completely. /// /// By default, no binary detection is performed. - pub fn binary_detection_implicit( + pub(crate) fn binary_detection_implicit( &mut self, - detection: BinaryDetection, + detection: grep::searcher::BinaryDetection, ) -> &mut SearchWorkerBuilder { self.config.binary_implicit = detection; self @@ -170,14 +150,14 @@ impl SearchWorkerBuilder { /// Set the binary detection that should be used when searching files /// explicitly supplied by an end user. /// - /// Generally, this binary detection should NOT be `BinaryDetection::quit`, - /// since we never want to automatically filter files supplied by the end - /// user. + /// Generally, this binary detection should NOT be + /// `grep::searcher::BinaryDetection::quit`, since we never want to + /// automatically filter files supplied by the end user. /// /// By default, no binary detection is performed. - pub fn binary_detection_explicit( + pub(crate) fn binary_detection_explicit( &mut self, - detection: BinaryDetection, + detection: grep::searcher::BinaryDetection, ) -> &mut SearchWorkerBuilder { self.config.binary_explicit = detection; self @@ -191,14 +171,14 @@ impl SearchWorkerBuilder { /// every search also has some aggregate statistics or meta data that may be /// useful to higher level routines. #[derive(Clone, Debug, Default)] -pub struct SearchResult { +pub(crate) struct SearchResult { has_match: bool, - stats: Option, + stats: Option, } impl SearchResult { /// Whether the search found a match or not. - pub fn has_match(&self) -> bool { + pub(crate) fn has_match(&self) -> bool { self.has_match } @@ -206,103 +186,36 @@ impl SearchResult { /// /// It can be expensive to compute statistics, so these are only present /// if explicitly enabled in the printer provided by the caller. - pub fn stats(&self) -> Option<&Stats> { + pub(crate) fn stats(&self) -> Option<&grep::printer::Stats> { self.stats.as_ref() } } /// The pattern matcher used by a search worker. #[derive(Clone, Debug)] -pub enum PatternMatcher { - RustRegex(RustRegexMatcher), +pub(crate) enum PatternMatcher { + RustRegex(grep::regex::RegexMatcher), #[cfg(feature = "pcre2")] - PCRE2(PCRE2RegexMatcher), + PCRE2(grep::pcre2::RegexMatcher), } /// The printer used by a search worker. /// /// The `W` type parameter refers to the type of the underlying writer. -#[derive(Debug)] -pub enum Printer { +#[derive(Clone, Debug)] +pub(crate) enum Printer { /// Use the standard printer, which supports the classic grep-like format. - Standard(Standard), + Standard(grep::printer::Standard), /// Use the summary printer, which supports aggregate displays of search /// results. - Summary(Summary), + Summary(grep::printer::Summary), /// A JSON printer, which emits results in the JSON Lines format. - JSON(JSON), + JSON(grep::printer::JSON), } impl Printer { - fn print_stats( - &mut self, - total_duration: Duration, - stats: &Stats, - ) -> io::Result<()> { - match *self { - Printer::JSON(_) => self.print_stats_json(total_duration, stats), - Printer::Standard(_) | Printer::Summary(_) => { - self.print_stats_human(total_duration, stats) - } - } - } - - fn print_stats_human( - &mut self, - total_duration: Duration, - stats: &Stats, - ) -> io::Result<()> { - write!( - self.get_mut(), - " -{matches} matches -{lines} matched lines -{searches_with_match} files contained matches -{searches} files searched -{bytes_printed} bytes printed -{bytes_searched} bytes searched -{search_time:0.6} seconds spent searching -{process_time:0.6} seconds -", - matches = stats.matches(), - lines = stats.matched_lines(), - searches_with_match = stats.searches_with_match(), - searches = stats.searches(), - bytes_printed = stats.bytes_printed(), - bytes_searched = stats.bytes_searched(), - search_time = fractional_seconds(stats.elapsed()), - process_time = fractional_seconds(total_duration) - ) - } - - fn print_stats_json( - &mut self, - total_duration: Duration, - stats: &Stats, - ) -> io::Result<()> { - // We specifically match the format laid out by the JSON printer in - // the grep-printer crate. We simply "extend" it with the 'summary' - // message type. - let fractional = fractional_seconds(total_duration); - json::to_writer( - self.get_mut(), - &json!({ - "type": "summary", - "data": { - "stats": stats, - "elapsed_total": { - "secs": total_duration.as_secs(), - "nanos": total_duration.subsec_nanos(), - "human": format!("{:0.6}s", fractional), - }, - } - }), - )?; - write!(self.get_mut(), "\n") - } - /// Return a mutable reference to the underlying printer's writer. - pub fn get_mut(&mut self) -> &mut W { + pub(crate) fn get_mut(&mut self) -> &mut W { match *self { Printer::Standard(ref mut p) => p.get_mut(), Printer::Summary(ref mut p) => p.get_mut(), @@ -316,29 +229,32 @@ impl Printer { /// It is intended for a single worker to execute many searches, and is /// generally intended to be used from a single thread. When searching using /// multiple threads, it is better to create a new worker for each thread. -#[derive(Debug)] -pub struct SearchWorker { +#[derive(Clone, Debug)] +pub(crate) struct SearchWorker { config: Config, - command_builder: cli::CommandReaderBuilder, - decomp_builder: cli::DecompressionReaderBuilder, + command_builder: grep::cli::CommandReaderBuilder, + decomp_builder: grep::cli::DecompressionReaderBuilder, matcher: PatternMatcher, - searcher: Searcher, + searcher: grep::searcher::Searcher, printer: Printer, } impl SearchWorker { - /// Execute a search over the given subject. - pub fn search(&mut self, subject: &Subject) -> io::Result { - let bin = if subject.is_explicit() { + /// Execute a search over the given haystack. + pub(crate) fn search( + &mut self, + haystack: &crate::haystack::Haystack, + ) -> io::Result { + let bin = if haystack.is_explicit() { self.config.binary_explicit.clone() } else { self.config.binary_implicit.clone() }; - let path = subject.path(); + let path = haystack.path(); log::trace!("{}: binary detection: {:?}", path.display(), bin); self.searcher.set_binary_detection(bin); - if subject.is_stdin() { + if haystack.is_stdin() { self.search_reader(path, &mut io::stdin().lock()) } else if self.should_preprocess(path) { self.search_preprocessor(path) @@ -350,28 +266,10 @@ impl SearchWorker { } /// Return a mutable reference to the underlying printer. - pub fn printer(&mut self) -> &mut Printer { + pub(crate) fn printer(&mut self) -> &mut Printer { &mut self.printer } - /// Print the given statistics to the underlying writer in a way that is - /// consistent with this searcher's printer's format. - /// - /// While `Stats` contains a duration itself, this only corresponds to the - /// time spent searching, where as `total_duration` should roughly - /// approximate the lifespan of the ripgrep process itself. - pub fn print_stats( - &mut self, - total_duration: Duration, - stats: &Stats, - ) -> io::Result<()> { - if self.config.json_stats { - self.printer().print_stats_json(total_duration, stats) - } else { - self.printer().print_stats(total_duration, stats) - } - } - /// Returns true if and only if the given file path should be /// decompressed before searching. fn should_decompress(&self, path: &Path) -> bool { @@ -399,10 +297,11 @@ impl SearchWorker { &mut self, path: &Path, ) -> io::Result { + use std::{fs::File, process::Stdio}; + let bin = self.config.preprocessor.as_ref().unwrap(); let mut cmd = std::process::Command::new(bin); - cmd.arg(path) - .stdin(std::process::Stdio::from(std::fs::File::open(path)?)); + cmd.arg(path).stdin(Stdio::from(File::open(path)?)); let mut rdr = self.command_builder.build(&mut cmd).map_err(|err| { io::Error::new( @@ -478,7 +377,7 @@ impl SearchWorker { /// searcher and printer. fn search_path( matcher: M, - searcher: &mut Searcher, + searcher: &mut grep::searcher::Searcher, printer: &mut Printer, path: &Path, ) -> io::Result { @@ -514,7 +413,7 @@ fn search_path( /// and printer. fn search_reader( matcher: M, - searcher: &mut Searcher, + searcher: &mut grep::searcher::Searcher, printer: &mut Printer, path: &Path, mut rdr: R, @@ -546,8 +445,3 @@ fn search_reader( } } } - -/// Return the given duration as fractional seconds. -fn fractional_seconds(duration: Duration) -> f64 { - (duration.as_secs() as f64) + (duration.subsec_nanos() as f64 * 1e-9) -} diff --git a/crates/printer/src/json.rs b/crates/printer/src/json.rs index 1a0b6183f4..de4da9cc5a 100644 --- a/crates/printer/src/json.rs +++ b/crates/printer/src/json.rs @@ -447,7 +447,7 @@ impl JSONBuilder { /// } /// } /// ``` -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct JSON { config: Config, wtr: CounterWriter, diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs index 487299744b..2287b5d7cf 100644 --- a/crates/printer/src/standard.rs +++ b/crates/printer/src/standard.rs @@ -489,7 +489,7 @@ impl StandardBuilder { /// then the `new_no_color` constructor can be used, or, alternatively, /// the `termcolor::NoColor` adapter can be used to wrap any `io::Write` /// implementation without enabling any colors. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Standard { config: Config, wtr: RefCell>, diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs index 7c16223cba..e69703fe39 100644 --- a/crates/printer/src/summary.rs +++ b/crates/printer/src/summary.rs @@ -350,7 +350,7 @@ impl SummaryBuilder { /// /// This type is generic over `W`, which represents any implementation of /// the `termcolor::WriteColor` trait. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Summary { config: Config, wtr: RefCell>, diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs index 2d20a7dfb3..db19504ca9 100644 --- a/crates/printer/src/util.rs +++ b/crates/printer/src/util.rs @@ -46,7 +46,7 @@ impl Replacer { Replacer { space: None } } - /// Executes a replacement on the given subject string by replacing all + /// Executes a replacement on the given haystack string by replacing all /// matches with the given replacement. To access the result of the /// replacement, use the `replacement` method. /// @@ -55,7 +55,7 @@ impl Replacer { &'a mut self, searcher: &Searcher, matcher: &M, - mut subject: &[u8], + mut haystack: &[u8], range: std::ops::Range, replacement: &[u8], ) -> io::Result<()> { @@ -63,8 +63,8 @@ impl Replacer { // do this dance. let is_multi_line = searcher.multi_line_with_matcher(&matcher); if is_multi_line { - if subject[range.end..].len() >= MAX_LOOK_AHEAD { - subject = &subject[..range.end + MAX_LOOK_AHEAD]; + if haystack[range.end..].len() >= MAX_LOOK_AHEAD { + haystack = &haystack[..range.end + MAX_LOOK_AHEAD]; } } else { // When searching a single line, we should remove the line @@ -72,8 +72,8 @@ impl Replacer { // look-around) to observe the line terminator and not match // because of it. let mut m = Match::new(0, range.end); - trim_line_terminator(searcher, subject, &mut m); - subject = &subject[..m.end()]; + trim_line_terminator(searcher, haystack, &mut m); + haystack = &haystack[..m.end()]; } { let &mut Space { ref mut dst, ref mut caps, ref mut matches } = @@ -83,7 +83,7 @@ impl Replacer { replace_with_captures_in_context( matcher, - subject, + haystack, range.clone(), caps, dst, @@ -91,7 +91,7 @@ impl Replacer { let start = dst.len(); caps.interpolate( |name| matcher.capture_index(name), - subject, + haystack, replacement, dst, ); diff --git a/crates/searcher/src/searcher/mod.rs b/crates/searcher/src/searcher/mod.rs index abbc0209ea..ff1bea5931 100644 --- a/crates/searcher/src/searcher/mod.rs +++ b/crates/searcher/src/searcher/mod.rs @@ -124,7 +124,7 @@ impl BinaryDetection { /// source data from an encoding to UTF-8 before searching. /// /// An `Encoding` will always be cheap to clone. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct Encoding(&'static encoding_rs::Encoding); impl Encoding { diff --git a/tests/feature.rs b/tests/feature.rs index 8021043eac..5321d11005 100644 --- a/tests/feature.rs +++ b/tests/feature.rs @@ -411,7 +411,8 @@ rgtest!( |dir: Dir, mut cmd: TestCommand| { dir.create("sherlock", SHERLOCK); - let lines = cmd.arg("--stats").arg("Sherlock").stdout(); + let lines = cmd.arg("-j1").arg("--stats").arg("Sherlock").stdout(); + assert!(lines.contains("Sherlock")); assert!(lines.contains("2 matched lines")); assert!(lines.contains("1 files contained matches")); assert!(lines.contains("1 files searched")); @@ -423,7 +424,40 @@ rgtest!(f411_parallel_search_stats, |dir: Dir, mut cmd: TestCommand| { dir.create("sherlock_1", SHERLOCK); dir.create("sherlock_2", SHERLOCK); - let lines = cmd.arg("--stats").arg("Sherlock").stdout(); + let lines = cmd.arg("-j2").arg("--stats").arg("Sherlock").stdout(); + dbg!(&lines); + assert!(lines.contains("4 matched lines")); + assert!(lines.contains("2 files contained matches")); + assert!(lines.contains("2 files searched")); + assert!(lines.contains("seconds")); +}); + +rgtest!( + f411_single_threaded_quiet_search_stats, + |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let lines = cmd + .arg("--quiet") + .arg("-j1") + .arg("--stats") + .arg("Sherlock") + .stdout(); + assert!(!lines.contains("Sherlock")); + assert!(lines.contains("2 matched lines")); + assert!(lines.contains("1 files contained matches")); + assert!(lines.contains("1 files searched")); + assert!(lines.contains("seconds")); + } +); + +rgtest!(f411_parallel_quiet_search_stats, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock_1", SHERLOCK); + dir.create("sherlock_2", SHERLOCK); + + let lines = + cmd.arg("-j2").arg("--quiet").arg("--stats").arg("Sherlock").stdout(); + assert!(!lines.contains("Sherlock")); assert!(lines.contains("4 matched lines")); assert!(lines.contains("2 files contained matches")); assert!(lines.contains("2 files searched")); diff --git a/tests/json.rs b/tests/json.rs index ff0b5aae68..86d8518a4e 100644 --- a/tests/json.rs +++ b/tests/json.rs @@ -189,6 +189,19 @@ rgtest!(basic, |dir: Dir, mut cmd: TestCommand| { assert_eq!(msgs[4].unwrap_summary().stats.bytes_printed, 494); }); +rgtest!(quiet_stats, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--json") + .arg("--quiet") + .arg("--stats") + .arg("Sherlock Holmes") + .arg("sherlock"); + + let msgs = json_decode(&cmd.stdout()); + assert_eq!(msgs[0].unwrap_summary().stats.searches_with_match, 1); + assert_eq!(msgs[0].unwrap_summary().stats.bytes_searched, 367); +}); + #[cfg(unix)] rgtest!(notutf8, |dir: Dir, mut cmd: TestCommand| { use std::ffi::OsStr; diff --git a/tests/regression.rs b/tests/regression.rs index 994006a74a..04138fdf70 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -402,6 +402,7 @@ rgtest!(r428_unrecognized_style, |dir: Dir, mut cmd: TestCommand| { let output = cmd.cmd().output().unwrap(); let stderr = String::from_utf8_lossy(&output.stderr); let expected = "\ +error parsing flag --colors: \ unrecognized style attribute ''. Choose from: nobold, bold, nointense, \ intense, nounderline, underline. "; diff --git a/tests/util.rs b/tests/util.rs index 4f958eb80c..b7fc9f789c 100644 --- a/tests/util.rs +++ b/tests/util.rs @@ -78,7 +78,7 @@ impl Dir { nice_err(&dir, fs::remove_dir_all(&dir)); } nice_err(&dir, repeat(|| fs::create_dir_all(&dir))); - Dir { root: root, dir: dir, pcre2: false } + Dir { root, dir, pcre2: false } } /// Use PCRE2 for this test. @@ -167,7 +167,7 @@ impl Dir { if self.is_pcre2() { cmd.arg("--pcre2"); } - TestCommand { dir: self.clone(), cmd: cmd } + TestCommand { dir: self.clone(), cmd } } /// Returns the path to the ripgrep executable.