@@ -100,24 +100,30 @@ pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> {
100
100
// - the first remaining bytes, < 2 word size
101
101
let len = text. len ( ) ;
102
102
let ptr = text. as_ptr ( ) ;
103
- let usize_bytes = mem :: size_of :: < usize > ( ) ;
103
+ type Chunk = usize ;
104
104
105
- let mut offset = {
106
- // We call this just to obtain the length of the suffix
107
- let ( _, _, suffix) = unsafe { text. align_to :: < usize > ( ) } ;
108
- len - suffix. len ( )
105
+ let ( min_aligned_offset, max_aligned_offset) = {
106
+ // We call this just to obtain the length of the prefix and suffix.
107
+ // In the middle we always process two chunks at once.
108
+ let ( prefix, _, suffix) = unsafe { text. align_to :: < ( Chunk , Chunk ) > ( ) } ;
109
+ ( prefix. len ( ) , len - suffix. len ( ) )
109
110
} ;
111
+
112
+ let mut offset = max_aligned_offset;
110
113
if let Some ( index) = text[ offset..] . iter ( ) . rposition ( |elt| * elt == x) {
111
114
return Some ( offset + index) ;
112
115
}
113
116
114
- // search the body of the text
117
+ // search the body of the text, make sure we don't cross min_aligned_offset.
118
+ // offset is always aligned, so just testing `>` is sufficient and avoids possible
119
+ // overflow.
115
120
let repeated_x = repeat_byte ( x) ;
121
+ let chunk_bytes = mem:: size_of :: < Chunk > ( ) ;
116
122
117
- while offset >= 2 * usize_bytes {
123
+ while offset > min_aligned_offset {
118
124
unsafe {
119
- let u = * ( ptr. offset ( offset as isize - 2 * usize_bytes as isize ) as * const usize ) ;
120
- let v = * ( ptr. offset ( offset as isize - usize_bytes as isize ) as * const usize ) ;
125
+ let u = * ( ptr. offset ( offset as isize - 2 * chunk_bytes as isize ) as * const Chunk ) ;
126
+ let v = * ( ptr. offset ( offset as isize - chunk_bytes as isize ) as * const Chunk ) ;
121
127
122
128
// break if there is a matching byte
123
129
let zu = contains_zero_byte ( u ^ repeated_x) ;
@@ -126,7 +132,7 @@ pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> {
126
132
break ;
127
133
}
128
134
}
129
- offset -= 2 * usize_bytes ;
135
+ offset -= 2 * chunk_bytes ;
130
136
}
131
137
132
138
// find the byte before the point the body loop stopped
0 commit comments