Skip to content

Commit 6736475

Browse files
pascalkuthemtoohey31
authored andcommitted
switch to regex-cursor (helix-editor#9422)
1 parent 1a6cd5e commit 6736475

File tree

7 files changed

+175
-86
lines changed

7 files changed

+175
-86
lines changed

Cargo.lock

+16-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helix-core/src/selection.rs

+60-36
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ use crate::{
77
ensure_grapheme_boundary_next, ensure_grapheme_boundary_prev, next_grapheme_boundary,
88
prev_grapheme_boundary,
99
},
10+
line_ending::get_line_ending,
1011
movement::Direction,
1112
Assoc, ChangeSet, RopeGraphemes, RopeSlice,
1213
};
14+
use helix_stdx::rope::{self, RopeSliceExt};
1315
use smallvec::{smallvec, SmallVec};
1416
use std::borrow::Cow;
1517

@@ -708,12 +710,12 @@ impl IntoIterator for Selection {
708710
pub fn keep_or_remove_matches(
709711
text: RopeSlice,
710712
selection: &Selection,
711-
regex: &crate::regex::Regex,
713+
regex: &rope::Regex,
712714
remove: bool,
713715
) -> Option<Selection> {
714716
let result: SmallVec<_> = selection
715717
.iter()
716-
.filter(|range| regex.is_match(&range.fragment(text)) ^ remove)
718+
.filter(|range| regex.is_match(text.regex_input_at(range.from()..range.to())) ^ remove)
717719
.copied()
718720
.collect();
719721

@@ -724,25 +726,20 @@ pub fn keep_or_remove_matches(
724726
None
725727
}
726728

729+
// TODO: support to split on capture #N instead of whole match
727730
pub fn select_on_matches(
728731
text: RopeSlice,
729732
selection: &Selection,
730-
regex: &crate::regex::Regex,
733+
regex: &rope::Regex,
731734
) -> Option<Selection> {
732735
let mut result = SmallVec::with_capacity(selection.len());
733736

734737
for sel in selection {
735-
// TODO: can't avoid occasional allocations since Regex can't operate on chunks yet
736-
let fragment = sel.fragment(text);
737-
738-
let sel_start = sel.from();
739-
let start_byte = text.char_to_byte(sel_start);
740-
741-
for mat in regex.find_iter(&fragment) {
738+
for mat in regex.find_iter(text.regex_input_at(sel.from()..sel.to())) {
742739
// TODO: retain range direction
743740

744-
let start = text.byte_to_char(start_byte + mat.start());
745-
let end = text.byte_to_char(start_byte + mat.end());
741+
let start = text.byte_to_char(mat.start());
742+
let end = text.byte_to_char(mat.end());
746743

747744
let range = Range::new(start, end);
748745
// Make sure the match is not right outside of the selection.
@@ -761,12 +758,7 @@ pub fn select_on_matches(
761758
None
762759
}
763760

764-
// TODO: support to split on capture #N instead of whole match
765-
pub fn split_on_matches(
766-
text: RopeSlice,
767-
selection: &Selection,
768-
regex: &crate::regex::Regex,
769-
) -> Selection {
761+
pub fn split_on_newline(text: RopeSlice, selection: &Selection) -> Selection {
770762
let mut result = SmallVec::with_capacity(selection.len());
771763

772764
for sel in selection {
@@ -776,21 +768,47 @@ pub fn split_on_matches(
776768
continue;
777769
}
778770

779-
// TODO: can't avoid occasional allocations since Regex can't operate on chunks yet
780-
let fragment = sel.fragment(text);
781-
782771
let sel_start = sel.from();
783772
let sel_end = sel.to();
784773

785-
let start_byte = text.char_to_byte(sel_start);
774+
let mut start = sel_start;
786775

776+
for mat in sel.slice(text).lines() {
777+
let len = mat.len_chars();
778+
let line_end_len = get_line_ending(&mat).map(|le| le.len_chars()).unwrap_or(0);
779+
// TODO: retain range direction
780+
result.push(Range::new(start, start + len - line_end_len));
781+
start += len;
782+
}
783+
784+
if start < sel_end {
785+
result.push(Range::new(start, sel_end));
786+
}
787+
}
788+
789+
// TODO: figure out a new primary index
790+
Selection::new(result, 0)
791+
}
792+
793+
pub fn split_on_matches(text: RopeSlice, selection: &Selection, regex: &rope::Regex) -> Selection {
794+
let mut result = SmallVec::with_capacity(selection.len());
795+
796+
for sel in selection {
797+
// Special case: zero-width selection.
798+
if sel.from() == sel.to() {
799+
result.push(*sel);
800+
continue;
801+
}
802+
803+
let sel_start = sel.from();
804+
let sel_end = sel.to();
787805
let mut start = sel_start;
788806

789-
for mat in regex.find_iter(&fragment) {
807+
for mat in regex.find_iter(text.regex_input_at(sel_start..sel_end)) {
790808
// TODO: retain range direction
791-
let end = text.byte_to_char(start_byte + mat.start());
809+
let end = text.byte_to_char(mat.start());
792810
result.push(Range::new(start, end));
793-
start = text.byte_to_char(start_byte + mat.end());
811+
start = text.byte_to_char(mat.end());
794812
}
795813

796814
if start < sel_end {
@@ -1021,14 +1039,12 @@ mod test {
10211039

10221040
#[test]
10231041
fn test_select_on_matches() {
1024-
use crate::regex::{Regex, RegexBuilder};
1025-
10261042
let r = Rope::from_str("Nobody expects the Spanish inquisition");
10271043
let s = r.slice(..);
10281044

10291045
let selection = Selection::single(0, r.len_chars());
10301046
assert_eq!(
1031-
select_on_matches(s, &selection, &Regex::new(r"[A-Z][a-z]*").unwrap()),
1047+
select_on_matches(s, &selection, &rope::Regex::new(r"[A-Z][a-z]*").unwrap()),
10321048
Some(Selection::new(
10331049
smallvec![Range::new(0, 6), Range::new(19, 26)],
10341050
0
@@ -1038,8 +1054,14 @@ mod test {
10381054
let r = Rope::from_str("This\nString\n\ncontains multiple\nlines");
10391055
let s = r.slice(..);
10401056

1041-
let start_of_line = RegexBuilder::new(r"^").multi_line(true).build().unwrap();
1042-
let end_of_line = RegexBuilder::new(r"$").multi_line(true).build().unwrap();
1057+
let start_of_line = rope::RegexBuilder::new()
1058+
.syntax(rope::Config::new().multi_line(true))
1059+
.build(r"^")
1060+
.unwrap();
1061+
let end_of_line = rope::RegexBuilder::new()
1062+
.syntax(rope::Config::new().multi_line(true))
1063+
.build(r"$")
1064+
.unwrap();
10431065

10441066
// line without ending
10451067
assert_eq!(
@@ -1077,9 +1099,9 @@ mod test {
10771099
select_on_matches(
10781100
s,
10791101
&Selection::single(0, s.len_chars()),
1080-
&RegexBuilder::new(r"^[a-z ]*$")
1081-
.multi_line(true)
1082-
.build()
1102+
&rope::RegexBuilder::new()
1103+
.syntax(rope::Config::new().multi_line(true))
1104+
.build(r"^[a-z ]*$")
10831105
.unwrap()
10841106
),
10851107
Some(Selection::new(
@@ -1171,13 +1193,15 @@ mod test {
11711193

11721194
#[test]
11731195
fn test_split_on_matches() {
1174-
use crate::regex::Regex;
1175-
11761196
let text = Rope::from(" abcd efg wrs xyz 123 456");
11771197

11781198
let selection = Selection::new(smallvec![Range::new(0, 9), Range::new(11, 20),], 0);
11791199

1180-
let result = split_on_matches(text.slice(..), &selection, &Regex::new(r"\s+").unwrap());
1200+
let result = split_on_matches(
1201+
text.slice(..),
1202+
&selection,
1203+
&rope::Regex::new(r"\s+").unwrap(),
1204+
);
11811205

11821206
assert_eq!(
11831207
result.ranges(),

helix-core/src/syntax.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use arc_swap::{ArcSwap, Guard};
1212
use bitflags::bitflags;
1313
use globset::GlobSet;
1414
use hashbrown::raw::RawTable;
15+
use helix_stdx::rope::{self, RopeSliceExt};
1516
use slotmap::{DefaultKey as LayerId, HopSlotMap};
1617

1718
use std::{
@@ -1961,11 +1962,16 @@ impl HighlightConfiguration {
19611962
node_slice
19621963
};
19631964

1964-
static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap());
1965+
static SHEBANG_REGEX: Lazy<rope::Regex> =
1966+
Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
19651967

19661968
injection_capture = SHEBANG_REGEX
1967-
.captures(&Cow::from(lines))
1968-
.map(|cap| InjectionLanguageMarker::Shebang(cap[1].to_owned()))
1969+
.captures_iter(lines.regex_input())
1970+
.map(|cap| {
1971+
let cap = lines.byte_slice(cap.get_group(1).unwrap().range());
1972+
InjectionLanguageMarker::Shebang(cap.into())
1973+
})
1974+
.next()
19691975
} else if index == self.injection_content_capture_index {
19701976
content_node = Some(capture.node);
19711977
}

helix-stdx/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dunce = "1.0"
1616
etcetera = "0.8"
1717
ropey = { version = "1.6.1", default-features = false }
1818
which = "6.0"
19+
regex-cursor = "0.1.3"
1920

2021
[dev-dependencies]
2122
tempfile = "3.10"

helix-stdx/src/rope.rs

+43-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,22 @@
1+
use std::ops::{Bound, RangeBounds};
2+
3+
pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex};
4+
pub use regex_cursor::regex_automata::util::syntax::Config;
5+
use regex_cursor::{Input as RegexInput, RopeyCursor};
16
use ropey::RopeSlice;
27

3-
pub trait RopeSliceExt: Sized {
8+
pub trait RopeSliceExt<'a>: Sized {
49
fn ends_with(self, text: &str) -> bool;
510
fn starts_with(self, text: &str) -> bool;
11+
fn regex_input(self) -> RegexInput<RopeyCursor<'a>>;
12+
fn regex_input_at_bytes<R: RangeBounds<usize>>(
13+
self,
14+
byte_range: R,
15+
) -> RegexInput<RopeyCursor<'a>>;
16+
fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>>;
617
}
718

8-
impl RopeSliceExt for RopeSlice<'_> {
19+
impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
920
fn ends_with(self, text: &str) -> bool {
1021
let len = self.len_bytes();
1122
if len < text.len() {
@@ -23,4 +34,34 @@ impl RopeSliceExt for RopeSlice<'_> {
2334
self.get_byte_slice(..len - text.len())
2435
.map_or(false, |start| start == text)
2536
}
37+
38+
fn regex_input(self) -> RegexInput<RopeyCursor<'a>> {
39+
RegexInput::new(self)
40+
}
41+
42+
fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>> {
43+
let start_bound = match char_range.start_bound() {
44+
Bound::Included(&val) => Bound::Included(self.char_to_byte(val)),
45+
Bound::Excluded(&val) => Bound::Excluded(self.char_to_byte(val)),
46+
Bound::Unbounded => Bound::Unbounded,
47+
};
48+
let end_bound = match char_range.end_bound() {
49+
Bound::Included(&val) => Bound::Included(self.char_to_byte(val)),
50+
Bound::Excluded(&val) => Bound::Excluded(self.char_to_byte(val)),
51+
Bound::Unbounded => Bound::Unbounded,
52+
};
53+
self.regex_input_at_bytes((start_bound, end_bound))
54+
}
55+
fn regex_input_at_bytes<R: RangeBounds<usize>>(
56+
self,
57+
byte_range: R,
58+
) -> RegexInput<RopeyCursor<'a>> {
59+
let input = match byte_range.start_bound() {
60+
Bound::Included(&pos) | Bound::Excluded(&pos) => {
61+
RegexInput::new(RopeyCursor::at(self, pos))
62+
}
63+
Bound::Unbounded => RegexInput::new(self),
64+
};
65+
input.range(byte_range)
66+
}
2667
}

0 commit comments

Comments
 (0)