From 0387c220bb6992100e50caa09b7b5ec9b201a5a6 Mon Sep 17 00:00:00 2001 From: Rolo Date: Fri, 10 Jan 2025 09:57:20 -0800 Subject: [PATCH] refactor: change default `ParseMode` to `Raw` --- helix-core/src/args.rs | 67 ++++++++++++++++++++----------- helix-core/src/shellwords.rs | 68 +++++++++++++++++--------------- helix-term/src/commands/typed.rs | 9 ++++- 3 files changed, 87 insertions(+), 57 deletions(-) diff --git a/helix-core/src/args.rs b/helix-core/src/args.rs index 8a56f71578d54..86ae3a1004020 100644 --- a/helix-core/src/args.rs +++ b/helix-core/src/args.rs @@ -220,6 +220,7 @@ pub struct ArgsParser<'a> { idx: usize, start: usize, mode: ParseMode, + is_finished: bool, } impl<'a> ArgsParser<'a> { @@ -229,7 +230,8 @@ impl<'a> ArgsParser<'a> { input, idx: 0, start: 0, - mode: ParseMode::RawParams, + mode: ParseMode::Raw, + is_finished: false, } } @@ -271,8 +273,8 @@ impl<'a> ArgsParser<'a> { /// /// # Examples /// ``` - /// # use helix_core::args::ArgsParser; - /// let mut args = ArgsParser::from(r#"sed -n "s/test t/not /p""#); + /// # use helix_core::args::{ArgsParser, ParseMode}; + /// let mut args = ArgsParser::from(r#"sed -n "s/test t/not /p""#).with_mode(ParseMode::RawParams); /// assert_eq!("sed", args.next().unwrap()); /// assert_eq!(r#"-n "s/test t/not /p""#, args.rest()); /// ``` @@ -290,28 +292,38 @@ impl<'a> Iterator for ArgsParser<'a> { #[inline] #[allow(clippy::too_many_lines)] fn next(&mut self) -> Option { + // Special case so that `ArgsParser::new("")` and `Args::from("")` result in no iterations + // being done, and `ArgsParser::new("").count == 0` and `Args::from("").is_empty` is `true`. + if self.input.is_empty() { + return None; + } + match self.mode { - ParseMode::Raw => { + ParseMode::Raw if !self.is_finished => { self.start = self.input.len(); self.idx = self.input.len(); + self.is_finished = true; return Some(Cow::from(self.input)); } - ParseMode::Literal => { + ParseMode::Literal if !self.is_finished => { self.start = self.input.len(); self.idx = self.input.len(); + self.is_finished = true; return Some(unescape(self.input, true, false)); } - ParseMode::LiteralUnescapeBackslash => { + ParseMode::LiteralUnescapeBackslash if !self.is_finished => { self.start = self.input.len(); self.idx = self.input.len(); + self.is_finished = true; return Some(unescape(self.input, true, true)); } - ParseMode::UnescapeBackslash => { + ParseMode::UnescapeBackslash if !self.is_finished => { self.start = self.input.len(); self.idx = self.input.len(); + self.is_finished = true; return Some(unescape(self.input, false, true)); } @@ -544,12 +556,16 @@ mod test { #[test] fn should_parse_arguments_with_no_unescaping() { - let parser = Args::from(r#"single_word twó wörds \\three\ \"with\ escaping\\"#); + let mut parser = ArgsParser::from(r#"single_word twó wörds \\three\ \"with\ escaping\\"#) + .with_mode(ParseMode::RawParams); - assert_eq!(Cow::from("single_word"), parser[0]); - assert_eq!(Cow::from("twó"), parser[1]); - assert_eq!(Cow::from("wörds"), parser[2]); - assert_eq!(Cow::from(r#"\\three\ \"with\ escaping\\"#), parser[3]); + assert_eq!(Cow::from("single_word"), parser.next().unwrap()); + assert_eq!(Cow::from("twó"), parser.next().unwrap()); + assert_eq!(Cow::from("wörds"), parser.next().unwrap()); + assert_eq!( + Cow::from(r#"\\three\ \"with\ escaping\\"#), + parser.next().unwrap() + ); } #[test] @@ -574,7 +590,9 @@ mod test { #[test] fn should_split_args_no_slash_unescaping() { let args: Vec> = - ArgsParser::from(r#"single_word twó wörds \\three\ \"with\ escaping\\"#).collect(); + ArgsParser::from(r#"single_word twó wörds \\three\ \"with\ escaping\\"#) + .with_mode(ParseMode::RawParams) + .collect(); assert_eq!( vec![ @@ -589,7 +607,13 @@ mod test { #[test] fn should_have_empty_args() { - assert!(Args::from("").is_empty(),); + let args = Args::from(""); + let mut parser = ArgsParser::new(""); + + assert!(args.first().is_none()); + assert!(args.is_empty()); + assert!(parser.next().is_none()); + assert!(parser.is_empty()); } #[test] @@ -632,8 +656,9 @@ mod test { #[test] fn should_parse_args_even_with_leading_whitespace() { + let mut parser = ArgsParser::new(" a").with_mode(ParseMode::RawParams); // Three spaces - assert_eq!(Cow::from("a"), Args::from(" a")[0]); + assert_eq!(Cow::from("a"), parser.next().unwrap()); } #[test] @@ -702,19 +727,13 @@ mod test { #[test] fn should_return_rest_from_parser() { - let mut parser = ArgsParser::from(r#"statusline.center ["file-type","file-encoding"]"#); + let mut parser = ArgsParser::from(r#"statusline.center ["file-type","file-encoding"]"#) + .with_mode(ParseMode::RawParams); assert_eq!(Some("statusline.center"), parser.next().as_deref()); assert_eq!(r#"["file-type","file-encoding"]"#, parser.rest()); } - #[test] - fn should_return_no_args() { - let mut args = ArgsParser::new(""); - assert!(args.next().is_none()); - assert!(args.is_empty()); - } - #[test] fn should_leave_escaped_quotes() { let mut args = @@ -770,7 +789,7 @@ mod test { #[test] fn should_end_in_unterminated_quotes() { - let mut args = ArgsParser::new(r#"a.txt "b "#); + let mut args = ArgsParser::new(r#"a.txt "b "#).with_mode(ParseMode::RawParams); let last = args.by_ref().last(); assert_eq!(Some(Cow::from(r#""b "#)), last); diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs index ae62314049759..cc9f66f92446e 100644 --- a/helix-core/src/shellwords.rs +++ b/helix-core/src/shellwords.rs @@ -1,7 +1,7 @@ use smartstring::{LazyCompact, SmartString}; use std::borrow::Cow; -use crate::args::ArgsParser; +use crate::args::{ArgsParser, ParseMode}; /// A utility for parsing shell-like command lines. /// @@ -30,14 +30,9 @@ use crate::args::ArgsParser; /// /// ``` /// # use helix_core::shellwords::Shellwords; -/// # use helix_core::args::Args; /// /// let shellwords = Shellwords::from(":o a b c"); -/// let args = Args::from(shellwords.args()); -/// -/// assert_eq!("a", &args[0]); -/// assert_eq!("b", &args[1]); -/// assert_eq!("c", &args[2]); +/// assert_eq!("a b c", shellwords.args()); /// ``` #[derive(Clone, Copy)] pub struct Shellwords<'a> { @@ -92,30 +87,35 @@ impl<'a> Shellwords<'a> { #[inline] #[must_use] pub fn ends_with_whitespace(&self) -> bool { - ArgsParser::from(self.args()).last().map_or( - self.input.ends_with(' ') || self.input.ends_with('\t'), - |last| { - if cfg!(windows) { - let ends_with_whitespace = - self.input.ends_with(' ') || self.input.ends_with('\t'); - let last_starts_with_quote = - last.starts_with('"') && !last.starts_with('\'') && !last.starts_with('`'); - - ends_with_whitespace && !last_starts_with_quote - } else { - let ends_with_escaped_whitespace = - last.ends_with("\\ ") || last.ends_with("\\\t"); - let end_with_whitespace = - self.input.ends_with(' ') || self.input.ends_with('\t'); - let last_starts_with_quotes = - last.starts_with('"') && !last.starts_with('\'') && !last.starts_with('`'); - let ends_in_true_whitespace = - !ends_with_escaped_whitespace && end_with_whitespace; - - ends_in_true_whitespace && !last_starts_with_quotes - } - }, - ) + ArgsParser::from(dbg!(self.args())) + .with_mode(ParseMode::RawParams) + .last() + .map_or( + self.input.ends_with(' ') || self.input.ends_with('\t'), + |last| { + if cfg!(windows) { + let ends_with_whitespace = + self.input.ends_with(' ') || self.input.ends_with('\t'); + let last_starts_with_quote = last.starts_with('"') + || last.starts_with('\'') + || last.starts_with('`'); + + ends_with_whitespace && !last_starts_with_quote + } else { + let ends_with_escaped_whitespace = + last.ends_with("\\ ") || last.ends_with("\\\t"); + let end_with_whitespace = + self.input.ends_with(' ') || self.input.ends_with('\t'); + let last_starts_with_quote = last.starts_with('"') + || last.starts_with('\'') + || last.starts_with('`'); + let ends_in_true_whitespace = + !ends_with_escaped_whitespace && end_with_whitespace; + + ends_in_true_whitespace && !last_starts_with_quote + } + }, + ) } } @@ -395,4 +395,10 @@ mod test { unescape(r"helix-term\\", false, true) ); } + + #[test] + fn should_end_in_whitespace() { + assert!(!Shellwords::from(r#":option "abc "#).ends_with_whitespace()); + assert!(!Shellwords::from(":option abc").ends_with_whitespace()); + } } diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs index 2744c6fb42950..3925c68f76e75 100644 --- a/helix-term/src/commands/typed.rs +++ b/helix-term/src/commands/typed.rs @@ -3511,11 +3511,16 @@ mod tests { for case in cases { let shellwords = Shellwords::from(case.0); + let mut parser = ArgsParser::from(shellwords.args()).with_mode(ParseMode::RawParams); + let args: Vec<_> = parser.by_ref().collect(); + assert_eq!( case.1, - argument_number_of(&shellwords, Args::from(shellwords.args()).len()), - "`{}`", + argument_number_of(&shellwords, args.len()), + "`{}`: {:?}\n{:#?}", case.0, + parser, + args ); } }