Skip to content

Commit

Permalink
Misc tab refactoring (#1424)
Browse files Browse the repository at this point in the history
* Move tabs logic into utils

* Re-use buffer returned by tabs::expand

* Add TabCfg to configure tabs

Use the String from this config for the tab replacement. This avoids
creating a new String for each processed line.

* Avoid unicode segmentation for each line just to remove a prefix

In some code paths no prefix is removed, and in almost all other
cases the prefix is just ascii.

This simplifies a lot of calls.

* Set default tab with to 8

Editors like vim, emacs, nano and most terminal emulators set
this value as the default tab display width.
  • Loading branch information
th1000s authored May 31, 2023
1 parent 65418aa commit 139cdb9
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 52 deletions.
2 changes: 1 addition & 1 deletion src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -935,7 +935,7 @@ pub struct Opt {
/// syntax highlighting.
pub syntax_theme: Option<String>,

#[arg(long = "tabs", default_value = "4", value_name = "N")]
#[arg(long = "tabs", default_value = "8", value_name = "N")]
/// The number of spaces to replace tab characters with.
///
/// Use --tabs=0 to pass tab characters through directly, but note that in that case delta will
Expand Down
4 changes: 2 additions & 2 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ pub struct Config {
pub syntax_dummy_theme: SyntaxTheme,
pub syntax_set: SyntaxSet,
pub syntax_theme: Option<SyntaxTheme>,
pub tab_width: usize,
pub tab_cfg: utils::tabs::TabCfg,
pub tokenization_regex: Regex,
pub true_color: bool,
pub truncation_symbol: String,
Expand Down Expand Up @@ -357,7 +357,7 @@ impl From<cli::Opt> for Config {
syntax_dummy_theme: SyntaxTheme::default(),
syntax_set: opt.computed.syntax_set,
syntax_theme: opt.computed.syntax_theme,
tab_width: opt.tab_width,
tab_cfg: utils::tabs::TabCfg::new(opt.tab_width),
tokenization_regex,
true_color: opt.computed.true_color,
truncation_symbol: format!("{}→{}", ansi::ANSI_SGR_REVERSE, ansi::ANSI_SGR_RESET),
Expand Down
18 changes: 6 additions & 12 deletions src/handlers/grep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@ use std::borrow::Cow;
use lazy_static::lazy_static;
use regex::Regex;
use serde::Deserialize;
use unicode_segmentation::UnicodeSegmentation;

use crate::ansi;
use crate::delta::{State, StateMachine};
use crate::handlers::{self, ripgrep_json};
use crate::paint::{self, expand_tabs, BgShouldFill, StyleSectionSpecifier};
use crate::paint::{self, BgShouldFill, StyleSectionSpecifier};
use crate::style::Style;
use crate::utils::process;
use crate::utils::{process, tabs};

#[derive(Debug, PartialEq, Eq)]
pub struct GrepLine<'b> {
Expand Down Expand Up @@ -139,11 +138,8 @@ impl<'a> StateMachine<'a> {
// (At the time of writing, we are in this
// arm iff we are handling `ripgrep --json`
// output.)
grep_line.code = paint::expand_tabs(
grep_line.code.graphemes(true),
self.config.tab_width,
)
.into();
grep_line.code =
tabs::expand(&grep_line.code, &self.config.tab_cfg).into();
make_style_sections(
&grep_line.code,
submatches,
Expand All @@ -158,10 +154,8 @@ impl<'a> StateMachine<'a> {
// enough. But at the point it is guaranteed
// that this handler is going to handle this
// line, so mutating it is acceptable.
self.raw_line = expand_tabs(
self.raw_line.graphemes(true),
self.config.tab_width,
);
self.raw_line =
tabs::expand(&self.raw_line, &self.config.tab_cfg);
get_code_style_sections(
&self.raw_line,
self.config.grep_match_word_style,
Expand Down
11 changes: 5 additions & 6 deletions src/handlers/hunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ use lazy_static::lazy_static;
use crate::cli;
use crate::config::{delta_unreachable, Config};
use crate::delta::{DiffType, InMergeConflict, MergeParents, State, StateMachine};
use crate::paint::{expand_tabs, prepare, prepare_raw_line};
use crate::paint::{prepare, prepare_raw_line};
use crate::style;
use crate::utils::process::{self, CallingProcess};
use unicode_segmentation::UnicodeSegmentation;
use crate::utils::tabs;

// HACK: WordDiff should probably be a distinct top-level line state
pub fn is_word_diff() -> bool {
Expand Down Expand Up @@ -118,10 +118,9 @@ impl<'a> StateMachine<'a> {
// is not a hunk line, but the parser does not have a more accurate state corresponding
// to this.
self.painter.paint_buffered_minus_and_plus_lines();
self.painter.output_buffer.push_str(&expand_tabs(
self.raw_line.graphemes(true),
self.config.tab_width,
));
self.painter
.output_buffer
.push_str(&tabs::expand(&self.raw_line, &self.config.tab_cfg));
self.painter.output_buffer.push('\n');
State::HunkZero(Unified, None)
}
Expand Down
38 changes: 8 additions & 30 deletions src/paint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use itertools::Itertools;
use syntect::easy::HighlightLines;
use syntect::highlighting::Style as SyntectStyle;
use syntect::parsing::{SyntaxReference, SyntaxSet};
use unicode_segmentation::UnicodeSegmentation;

use crate::config::{self, delta_unreachable, Config};
use crate::delta::{DiffType, InMergeConflict, MergeParents, State};
Expand All @@ -20,7 +19,7 @@ use crate::minusplus::*;
use crate::paint::superimpose_style_sections::superimpose_style_sections;
use crate::style::Style;
use crate::{ansi, style};
use crate::{edits, utils};
use crate::{edits, utils, utils::tabs};

pub type LineSections<'a, S> = Vec<(S, &'a str)>;

Expand Down Expand Up @@ -261,10 +260,7 @@ impl<'p> Painter<'p> {
state: State,
background_color_extends_to_terminal_width: BgShouldFill,
) {
let lines = vec![(
expand_tabs(line.graphemes(true), self.config.tab_width),
state,
)];
let lines = vec![(tabs::expand(line, &self.config.tab_cfg), state)];
let syntax_style_sections =
get_syntax_style_sections_for_lines(&lines, self.highlighter.as_mut(), self.config);
let diff_style_sections = match style_sections {
Expand Down Expand Up @@ -561,8 +557,9 @@ pub fn prepare(line: &str, prefix_length: usize, config: &config::Config) -> Str
// The prefix contains -/+/space characters, added by git. We removes them now so they
// are not present during syntax highlighting or wrapping. If --keep-plus-minus-markers
// is in effect the prefix is re-inserted in Painter::paint_line.
let line = line.graphemes(true).skip(prefix_length);
format!("{}\n", expand_tabs(line, config.tab_width))
let mut line = tabs::remove_prefix_and_expand(prefix_length, line, &config.tab_cfg);
line.push('\n');
line
} else {
"\n".to_string()
}
Expand All @@ -571,28 +568,9 @@ pub fn prepare(line: &str, prefix_length: usize, config: &config::Config) -> Str
// Remove initial -/+ characters, expand tabs as spaces, retaining ANSI sequences. Terminate with
// newline character.
pub fn prepare_raw_line(raw_line: &str, prefix_length: usize, config: &config::Config) -> String {
format!(
"{}\n",
ansi::ansi_preserving_slice(
&expand_tabs(raw_line.graphemes(true), config.tab_width),
prefix_length
),
)
}

/// Expand tabs as spaces.
/// tab_width = 0 is documented to mean do not replace tabs.
pub fn expand_tabs<'a, I>(line: I, tab_width: usize) -> String
where
I: Iterator<Item = &'a str>,
{
if tab_width > 0 {
let tab_replacement = " ".repeat(tab_width);
line.map(|s| if s == "\t" { &tab_replacement } else { s })
.collect::<String>()
} else {
line.collect::<String>()
}
let mut line = tabs::expand(raw_line, &config.tab_cfg);
line.push('\n');
ansi::ansi_preserving_slice(&line, prefix_length)
}

pub fn paint_minus_and_plus_lines(
Expand Down
2 changes: 1 addition & 1 deletion src/subcommands/show_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ pub fn show_config(config: &config::Config, writer: &mut dyn Write) -> std::io::
cli::Width::Fixed(width) => width.to_string(),
cli::Width::Variable => "variable".to_string(),
},
tab_width = config.tab_width,
tab_width = config.tab_cfg.width(),
tokenization_regex = format_option_value(config.tokenization_regex.to_string()),
)?;
Ok(())
Expand Down
1 change: 1 addition & 0 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ pub mod process;
pub mod regex_replacement;
pub mod round_char_boundary;
pub mod syntect;
pub mod tabs;
pub mod workarounds;
64 changes: 64 additions & 0 deletions src/utils/tabs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use unicode_segmentation::UnicodeSegmentation;

#[derive(Debug, Clone)]
pub struct TabCfg {
replacement: String,
}

impl TabCfg {
pub fn new(width: usize) -> Self {
TabCfg {
replacement: " ".repeat(width),
}
}
pub fn width(&self) -> usize {
self.replacement.len()
}
pub fn replace(&self) -> bool {
!self.replacement.is_empty()
}
}

/// Expand tabs as spaces.
pub fn expand(line: &str, tab_cfg: &TabCfg) -> String {
if tab_cfg.replace() && line.as_bytes().iter().any(|c| *c == b'\t') {
itertools::join(line.split('\t'), &tab_cfg.replacement)
} else {
line.to_string()
}
}

/// Remove `prefix` chars from `line`, then call `tabs::expand()`.
pub fn remove_prefix_and_expand(prefix: usize, line: &str, tab_cfg: &TabCfg) -> String {
let line_bytes = line.as_bytes();
// The to-be-removed prefixes are almost always ascii +/- (or ++/ +/.. for merges) for
// which grapheme clusters are not required.
if line_bytes.len() >= prefix && line_bytes[..prefix].is_ascii() {
// Safety: slicing into the utf-8 line-str is ok, upto `prefix` only ascii was present.
expand(&line[prefix..], tab_cfg)
} else {
let cut_line = line.graphemes(true).skip(prefix).collect::<String>();
expand(&cut_line, tab_cfg)
}
}

#[cfg(test)]
pub mod tests {
use super::*;

#[test]
fn test_remove_prefix_and_expand() {
let line = "+-foo\tbar";
let result = remove_prefix_and_expand(2, line, &TabCfg::new(3));
assert_eq!(result, "foo bar");
let result = remove_prefix_and_expand(2, line, &TabCfg::new(0));
assert_eq!(result, "foo\tbar");

let utf8_prefix = "-│-foo\tbar";
let n = 3;
let result = remove_prefix_and_expand(n, utf8_prefix, &TabCfg::new(1));
assert_eq!(result, "foo bar");
// ensure non-ascii chars were removed:
assert!(utf8_prefix.len() - result.len() > n);
}
}

0 comments on commit 139cdb9

Please sign in to comment.