@@ -7,9 +7,11 @@ use crate::{
7
7
ensure_grapheme_boundary_next, ensure_grapheme_boundary_prev, next_grapheme_boundary,
8
8
prev_grapheme_boundary,
9
9
} ,
10
+ line_ending:: get_line_ending,
10
11
movement:: Direction ,
11
12
Assoc , ChangeSet , RopeGraphemes , RopeSlice ,
12
13
} ;
14
+ use helix_stdx:: rope:: { self , RopeSliceExt } ;
13
15
use smallvec:: { smallvec, SmallVec } ;
14
16
use std:: borrow:: Cow ;
15
17
@@ -708,12 +710,12 @@ impl IntoIterator for Selection {
708
710
pub fn keep_or_remove_matches (
709
711
text : RopeSlice ,
710
712
selection : & Selection ,
711
- regex : & crate :: regex :: Regex ,
713
+ regex : & rope :: Regex ,
712
714
remove : bool ,
713
715
) -> Option < Selection > {
714
716
let result: SmallVec < _ > = selection
715
717
. iter ( )
716
- . filter ( |range| regex. is_match ( & range. fragment ( text ) ) ^ remove)
718
+ . filter ( |range| regex. is_match ( text . regex_input_at ( range. from ( ) ..range . to ( ) ) ) ^ remove)
717
719
. copied ( )
718
720
. collect ( ) ;
719
721
@@ -724,25 +726,20 @@ pub fn keep_or_remove_matches(
724
726
None
725
727
}
726
728
729
+ // TODO: support to split on capture #N instead of whole match
727
730
pub fn select_on_matches (
728
731
text : RopeSlice ,
729
732
selection : & Selection ,
730
- regex : & crate :: regex :: Regex ,
733
+ regex : & rope :: Regex ,
731
734
) -> Option < Selection > {
732
735
let mut result = SmallVec :: with_capacity ( selection. len ( ) ) ;
733
736
734
737
for sel in selection {
735
- // TODO: can't avoid occasional allocations since Regex can't operate on chunks yet
736
- let fragment = sel. fragment ( text) ;
737
-
738
- let sel_start = sel. from ( ) ;
739
- let start_byte = text. char_to_byte ( sel_start) ;
740
-
741
- for mat in regex. find_iter ( & fragment) {
738
+ for mat in regex. find_iter ( text. regex_input_at ( sel. from ( ) ..sel. to ( ) ) ) {
742
739
// TODO: retain range direction
743
740
744
- let start = text. byte_to_char ( start_byte + mat. start ( ) ) ;
745
- let end = text. byte_to_char ( start_byte + mat. end ( ) ) ;
741
+ let start = text. byte_to_char ( mat. start ( ) ) ;
742
+ let end = text. byte_to_char ( mat. end ( ) ) ;
746
743
747
744
let range = Range :: new ( start, end) ;
748
745
// Make sure the match is not right outside of the selection.
@@ -761,12 +758,7 @@ pub fn select_on_matches(
761
758
None
762
759
}
763
760
764
- // TODO: support to split on capture #N instead of whole match
765
- pub fn split_on_matches (
766
- text : RopeSlice ,
767
- selection : & Selection ,
768
- regex : & crate :: regex:: Regex ,
769
- ) -> Selection {
761
+ pub fn split_on_newline ( text : RopeSlice , selection : & Selection ) -> Selection {
770
762
let mut result = SmallVec :: with_capacity ( selection. len ( ) ) ;
771
763
772
764
for sel in selection {
@@ -776,21 +768,47 @@ pub fn split_on_matches(
776
768
continue ;
777
769
}
778
770
779
- // TODO: can't avoid occasional allocations since Regex can't operate on chunks yet
780
- let fragment = sel. fragment ( text) ;
781
-
782
771
let sel_start = sel. from ( ) ;
783
772
let sel_end = sel. to ( ) ;
784
773
785
- let start_byte = text . char_to_byte ( sel_start) ;
774
+ let mut start = sel_start;
786
775
776
+ for mat in sel. slice ( text) . lines ( ) {
777
+ let len = mat. len_chars ( ) ;
778
+ let line_end_len = get_line_ending ( & mat) . map ( |le| le. len_chars ( ) ) . unwrap_or ( 0 ) ;
779
+ // TODO: retain range direction
780
+ result. push ( Range :: new ( start, start + len - line_end_len) ) ;
781
+ start += len;
782
+ }
783
+
784
+ if start < sel_end {
785
+ result. push ( Range :: new ( start, sel_end) ) ;
786
+ }
787
+ }
788
+
789
+ // TODO: figure out a new primary index
790
+ Selection :: new ( result, 0 )
791
+ }
792
+
793
+ pub fn split_on_matches ( text : RopeSlice , selection : & Selection , regex : & rope:: Regex ) -> Selection {
794
+ let mut result = SmallVec :: with_capacity ( selection. len ( ) ) ;
795
+
796
+ for sel in selection {
797
+ // Special case: zero-width selection.
798
+ if sel. from ( ) == sel. to ( ) {
799
+ result. push ( * sel) ;
800
+ continue ;
801
+ }
802
+
803
+ let sel_start = sel. from ( ) ;
804
+ let sel_end = sel. to ( ) ;
787
805
let mut start = sel_start;
788
806
789
- for mat in regex. find_iter ( & fragment ) {
807
+ for mat in regex. find_iter ( text . regex_input_at ( sel_start..sel_end ) ) {
790
808
// TODO: retain range direction
791
- let end = text. byte_to_char ( start_byte + mat. start ( ) ) ;
809
+ let end = text. byte_to_char ( mat. start ( ) ) ;
792
810
result. push ( Range :: new ( start, end) ) ;
793
- start = text. byte_to_char ( start_byte + mat. end ( ) ) ;
811
+ start = text. byte_to_char ( mat. end ( ) ) ;
794
812
}
795
813
796
814
if start < sel_end {
@@ -1021,14 +1039,12 @@ mod test {
1021
1039
1022
1040
#[ test]
1023
1041
fn test_select_on_matches ( ) {
1024
- use crate :: regex:: { Regex , RegexBuilder } ;
1025
-
1026
1042
let r = Rope :: from_str ( "Nobody expects the Spanish inquisition" ) ;
1027
1043
let s = r. slice ( ..) ;
1028
1044
1029
1045
let selection = Selection :: single ( 0 , r. len_chars ( ) ) ;
1030
1046
assert_eq ! (
1031
- select_on_matches( s, & selection, & Regex :: new( r"[A-Z][a-z]*" ) . unwrap( ) ) ,
1047
+ select_on_matches( s, & selection, & rope :: Regex :: new( r"[A-Z][a-z]*" ) . unwrap( ) ) ,
1032
1048
Some ( Selection :: new(
1033
1049
smallvec![ Range :: new( 0 , 6 ) , Range :: new( 19 , 26 ) ] ,
1034
1050
0
@@ -1038,8 +1054,14 @@ mod test {
1038
1054
let r = Rope :: from_str ( "This\n String\n \n contains multiple\n lines" ) ;
1039
1055
let s = r. slice ( ..) ;
1040
1056
1041
- let start_of_line = RegexBuilder :: new ( r"^" ) . multi_line ( true ) . build ( ) . unwrap ( ) ;
1042
- let end_of_line = RegexBuilder :: new ( r"$" ) . multi_line ( true ) . build ( ) . unwrap ( ) ;
1057
+ let start_of_line = rope:: RegexBuilder :: new ( )
1058
+ . syntax ( rope:: Config :: new ( ) . multi_line ( true ) )
1059
+ . build ( r"^" )
1060
+ . unwrap ( ) ;
1061
+ let end_of_line = rope:: RegexBuilder :: new ( )
1062
+ . syntax ( rope:: Config :: new ( ) . multi_line ( true ) )
1063
+ . build ( r"$" )
1064
+ . unwrap ( ) ;
1043
1065
1044
1066
// line without ending
1045
1067
assert_eq ! (
@@ -1077,9 +1099,9 @@ mod test {
1077
1099
select_on_matches(
1078
1100
s,
1079
1101
& Selection :: single( 0 , s. len_chars( ) ) ,
1080
- & RegexBuilder :: new( r"^[a-z ]*$" )
1081
- . multi_line( true )
1082
- . build( )
1102
+ & rope :: RegexBuilder :: new( )
1103
+ . syntax ( rope :: Config :: new ( ) . multi_line( true ) )
1104
+ . build( r"^[a-z ]*$" )
1083
1105
. unwrap( )
1084
1106
) ,
1085
1107
Some ( Selection :: new(
@@ -1171,13 +1193,15 @@ mod test {
1171
1193
1172
1194
#[ test]
1173
1195
fn test_split_on_matches ( ) {
1174
- use crate :: regex:: Regex ;
1175
-
1176
1196
let text = Rope :: from ( " abcd efg wrs xyz 123 456" ) ;
1177
1197
1178
1198
let selection = Selection :: new ( smallvec ! [ Range :: new( 0 , 9 ) , Range :: new( 11 , 20 ) , ] , 0 ) ;
1179
1199
1180
- let result = split_on_matches ( text. slice ( ..) , & selection, & Regex :: new ( r"\s+" ) . unwrap ( ) ) ;
1200
+ let result = split_on_matches (
1201
+ text. slice ( ..) ,
1202
+ & selection,
1203
+ & rope:: Regex :: new ( r"\s+" ) . unwrap ( ) ,
1204
+ ) ;
1181
1205
1182
1206
assert_eq ! (
1183
1207
result. ranges( ) ,
0 commit comments