Skip to content

Commit 90348b8

Browse files
pascalkuthearchseer
authored andcommitted
revamped snippet text element parsing
Snippet text elements can contain escape sequences that must be treated properly. Furthermore snippets must always escape certain characters (like `}` or `\`). The function has been updated to account for that. `text` is now also included with `anything` to match the grammar and can also match empty text. To avoid infinite loops the `non-empty` combinator has been added which is automatically used in the `one_or_more` and `zero_or more` combinator where the problemn would occur.
1 parent bbf4800 commit 90348b8

File tree

2 files changed

+85
-45
lines changed

2 files changed

+85
-45
lines changed

helix-lsp/src/snippet.rs

+72-45
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,17 @@ pub enum CaseChange {
1212

1313
#[derive(Debug, PartialEq, Eq)]
1414
pub enum FormatItem<'a> {
15-
Text(&'a str),
15+
Text(Tendril),
1616
Capture(usize),
1717
CaseChange(usize, CaseChange),
1818
Conditional(usize, Option<&'a str>, Option<&'a str>),
1919
}
2020

2121
#[derive(Debug, PartialEq, Eq)]
2222
pub struct Regex<'a> {
23-
value: &'a str,
23+
value: Tendril,
2424
replacement: Vec<FormatItem<'a>>,
25-
options: Option<&'a str>,
25+
options: Tendril,
2626
}
2727

2828
#[derive(Debug, PartialEq, Eq)]
@@ -36,14 +36,14 @@ pub enum SnippetElement<'a> {
3636
},
3737
Choice {
3838
tabstop: usize,
39-
choices: Vec<&'a str>,
39+
choices: Vec<Tendril>,
4040
},
4141
Variable {
4242
name: &'a str,
4343
default: Option<&'a str>,
4444
regex: Option<Regex<'a>>,
4545
},
46-
Text(&'a str),
46+
Text(Tendril),
4747
}
4848

4949
#[derive(Debug, PartialEq, Eq)]
@@ -67,12 +67,12 @@ fn render_elements(
6767

6868
for element in snippet_elements {
6969
match element {
70-
&Text(text) => {
70+
Text(text) => {
7171
// small optimization to avoid calling replace when it's unnecessary
7272
let text = if text.contains('\n') {
7373
Cow::Owned(text.replace('\n', newline_with_offset))
7474
} else {
75-
Cow::Borrowed(text)
75+
Cow::Borrowed(text.as_str())
7676
};
7777
*offset += text.chars().count();
7878
insert.push_str(&text);
@@ -160,6 +160,7 @@ pub fn render(
160160
}
161161

162162
mod parser {
163+
use helix_core::Tendril;
163164
use helix_parsec::*;
164165

165166
use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement};
@@ -210,8 +211,32 @@ mod parser {
210211
}
211212
}
212213

213-
fn text<'a, const SIZE: usize>(cs: [char; SIZE]) -> impl Parser<'a, Output = &'a str> {
214-
take_while(move |c| cs.into_iter().all(|c1| c != c1))
214+
const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$'];
215+
const REPLACE_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '/'];
216+
const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '|', ','];
217+
218+
fn text<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = Tendril> {
219+
move |input: &'a str| {
220+
let mut chars = input.char_indices();
221+
let mut res = Tendril::new();
222+
while let Some((i, c)) = chars.next() {
223+
match c {
224+
'\\' => {
225+
if let Some((_, c)) = chars.next() {
226+
if escape_chars.contains(&c) {
227+
res.push(c);
228+
continue;
229+
}
230+
}
231+
return Ok((&input[i..], res));
232+
}
233+
c if escape_chars.contains(&c) => return Ok((&input[i..], res)),
234+
c => res.push(c),
235+
}
236+
}
237+
238+
Ok(("", res))
239+
}
215240
}
216241

217242
fn digit<'a>() -> impl Parser<'a, Output = usize> {
@@ -274,20 +299,18 @@ mod parser {
274299
}
275300

276301
fn regex<'a>() -> impl Parser<'a, Output = Regex<'a>> {
277-
let text = map(text(['$', '/']), FormatItem::Text);
278-
let replacement = reparse_as(
279-
take_until(|c| c == '/'),
280-
one_or_more(choice!(format(), text)),
281-
);
282-
283302
map(
284303
seq!(
285304
"/",
286-
take_until(|c| c == '/'),
305+
// TODO parse as ECMAScript and convert to rust regex
306+
non_empty(text(&['/', '\\'])),
287307
"/",
288-
replacement,
308+
one_or_more(choice!(
309+
format(),
310+
map(text(REPLACE_ESCAPE_CHARS), FormatItem::Text)
311+
)),
289312
"/",
290-
optional(take_until(|c| c == '}')),
313+
text(&['}', '\\',]),
291314
),
292315
|(_, value, _, replacement, _, options)| Regex {
293316
value,
@@ -308,13 +331,12 @@ mod parser {
308331
}
309332

310333
fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
311-
let text = map(text(['$', '}']), SnippetElement::Text);
312334
map(
313335
seq!(
314336
"${",
315337
digit(),
316338
":",
317-
one_or_more(choice!(anything(), text)),
339+
one_or_more(anything(TEXT_ESCAPE_CHARS)),
318340
"}"
319341
),
320342
|seq| SnippetElement::Placeholder {
@@ -330,7 +352,7 @@ mod parser {
330352
"${",
331353
digit(),
332354
"|",
333-
sep(take_until(|c| c == ',' || c == '|'), ","),
355+
sep(text(CHOICE_TEXT_ESCAPE_CHARS), ","),
334356
"|}",
335357
),
336358
|seq| SnippetElement::Choice {
@@ -368,17 +390,21 @@ mod parser {
368390
)
369391
}
370392

371-
fn anything<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
372-
// The parser has to be constructed lazily to avoid infinite opaque type recursion
373-
|input: &'a str| {
374-
let parser = choice!(tabstop(), placeholder(), choice(), variable());
393+
fn anything<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = SnippetElement<'a>> {
394+
move |input: &'a str| {
395+
let parser = choice!(
396+
tabstop(),
397+
placeholder(),
398+
choice(),
399+
variable(),
400+
map(text(escape_chars), SnippetElement::Text)
401+
);
375402
parser.parse(input)
376403
}
377404
}
378405

379406
fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> {
380-
let text = map(text(['$']), SnippetElement::Text);
381-
map(one_or_more(choice!(anything(), text)), |parts| Snippet {
407+
map(one_or_more(anything(TEXT_ESCAPE_CHARS)), |parts| Snippet {
382408
elements: parts,
383409
})
384410
}
@@ -392,6 +418,7 @@ mod parser {
392418
}
393419
})
394420
}
421+
395422
#[cfg(test)]
396423
mod test {
397424
use super::SnippetElement::*;
@@ -407,12 +434,12 @@ mod parser {
407434
assert_eq!(
408435
Ok(Snippet {
409436
elements: vec![
410-
Text("match("),
437+
Text("match(".into()),
411438
Placeholder {
412439
tabstop: 1,
413-
value: vec!(Text("Arg1")),
440+
value: vec!(Text("Arg1".into())),
414441
},
415-
Text(")")
442+
Text(")".into())
416443
]
417444
}),
418445
parse("match(${1:Arg1})")
@@ -446,15 +473,15 @@ mod parser {
446473
assert_eq!(
447474
Ok(Snippet {
448475
elements: vec![
449-
Text("local "),
476+
Text("local ".into()),
450477
Placeholder {
451478
tabstop: 1,
452-
value: vec!(Text("var")),
479+
value: vec!(Text("var".into())),
453480
},
454-
Text(" = "),
481+
Text(" = ".into()),
455482
Placeholder {
456483
tabstop: 1,
457-
value: vec!(Text("value")),
484+
value: vec!(Text("value".into())),
458485
},
459486
]
460487
}),
@@ -468,7 +495,7 @@ mod parser {
468495
Ok(Snippet {
469496
elements: vec![Placeholder {
470497
tabstop: 1,
471-
value: vec!(Text("var, "), Tabstop { tabstop: 2 },),
498+
value: vec!(Text("var, ".into()), Tabstop { tabstop: 2 },),
472499
},]
473500
}),
474501
parse("${1:var, $2}")
@@ -482,10 +509,10 @@ mod parser {
482509
elements: vec![Placeholder {
483510
tabstop: 1,
484511
value: vec!(
485-
Text("foo "),
512+
Text("foo ".into()),
486513
Placeholder {
487514
tabstop: 2,
488-
value: vec!(Text("bar")),
515+
value: vec!(Text("bar".into())),
489516
},
490517
),
491518
},]
@@ -499,27 +526,27 @@ mod parser {
499526
assert_eq!(
500527
Ok(Snippet {
501528
elements: vec![
502-
Text("hello "),
529+
Text("hello ".into()),
503530
Tabstop { tabstop: 1 },
504531
Tabstop { tabstop: 2 },
505-
Text(" "),
532+
Text(" ".into()),
506533
Choice {
507534
tabstop: 1,
508-
choices: vec!["one", "two", "three"]
535+
choices: vec!["one".into(), "two".into(), "three".into()]
509536
},
510-
Text(" "),
537+
Text(" ".into()),
511538
Variable {
512539
name: "name",
513540
default: Some("foo"),
514541
regex: None
515542
},
516-
Text(" "),
543+
Text(" ".into()),
517544
Variable {
518545
name: "var",
519546
default: None,
520547
regex: None
521548
},
522-
Text(" "),
549+
Text(" ".into()),
523550
Variable {
524551
name: "TM",
525552
default: None,
@@ -539,9 +566,9 @@ mod parser {
539566
name: "TM_FILENAME",
540567
default: None,
541568
regex: Some(Regex {
542-
value: "(.*).+$",
569+
value: "(.*).+$".into(),
543570
replacement: vec![FormatItem::Capture(1)],
544-
options: None,
571+
options: Tendril::new(),
545572
}),
546573
}]
547574
}),

helix-parsec/src/lib.rs

+13
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,7 @@ pub fn zero_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
459459
where
460460
P: Parser<'a, Output = T>,
461461
{
462+
let parser = non_empty(parser);
462463
move |mut input| {
463464
let mut values = Vec::new();
464465

@@ -491,6 +492,7 @@ pub fn one_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
491492
where
492493
P: Parser<'a, Output = T>,
493494
{
495+
let parser = non_empty(parser);
494496
move |mut input| {
495497
let mut values = Vec::new();
496498

@@ -559,3 +561,14 @@ where
559561
Ok((input, values))
560562
}
561563
}
564+
565+
pub fn non_empty<'a, T>(p: impl Parser<'a, Output = T>) -> impl Parser<'a, Output = T> {
566+
move |input| {
567+
let (new_input, res) = p.parse(input)?;
568+
if new_input.len() == input.len() {
569+
Err(input)
570+
} else {
571+
Ok((new_input, res))
572+
}
573+
}
574+
}

0 commit comments

Comments
 (0)