Skip to content

Commit af6a161

Browse files
committed
Auto merge of rust-lang#123175 - Nilstrieb:debug-strict-overflow, r=wesleywiser
Add add/sub methods that only panic with debug assertions to rustc This mitigates the perf impact of enabling overflow checks on rustc. The change to use overflow checks will be done in a later PR. For rust-lang/compiler-team#724, based on data gathered in rust-lang#119440.
2 parents 618c300 + 5039160 commit af6a161

File tree

6 files changed

+109
-27
lines changed

6 files changed

+109
-27
lines changed

compiler/rustc_data_structures/src/sip128.rs

+21-17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
//! This is a copy of `core::hash::sip` adapted to providing 128 bit hashes.
22
3+
// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
4+
// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
5+
use rustc_serialize::int_overflow::{DebugStrictAdd, DebugStrictSub};
36
use std::hash::Hasher;
47
use std::mem::{self, MaybeUninit};
58
use std::ptr;
@@ -103,19 +106,19 @@ unsafe fn copy_nonoverlapping_small(src: *const u8, dst: *mut u8, count: usize)
103106
}
104107

105108
let mut i = 0;
106-
if i + 3 < count {
109+
if i.debug_strict_add(3) < count {
107110
ptr::copy_nonoverlapping(src.add(i), dst.add(i), 4);
108-
i += 4;
111+
i = i.debug_strict_add(4);
109112
}
110113

111-
if i + 1 < count {
114+
if i.debug_strict_add(1) < count {
112115
ptr::copy_nonoverlapping(src.add(i), dst.add(i), 2);
113-
i += 2
116+
i = i.debug_strict_add(2)
114117
}
115118

116119
if i < count {
117120
*dst.add(i) = *src.add(i);
118-
i += 1;
121+
i = i.debug_strict_add(1);
119122
}
120123

121124
debug_assert_eq!(i, count);
@@ -211,14 +214,14 @@ impl SipHasher128 {
211214
debug_assert!(nbuf < BUFFER_SIZE);
212215
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
213216

214-
if nbuf + LEN < BUFFER_SIZE {
217+
if nbuf.debug_strict_add(LEN) < BUFFER_SIZE {
215218
unsafe {
216219
// The memcpy call is optimized away because the size is known.
217220
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
218221
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
219222
}
220223

221-
self.nbuf = nbuf + LEN;
224+
self.nbuf = nbuf.debug_strict_add(LEN);
222225

223226
return;
224227
}
@@ -265,8 +268,9 @@ impl SipHasher128 {
265268
// This function should only be called when the write fills the buffer.
266269
// Therefore, when LEN == 1, the new `self.nbuf` must be zero.
267270
// LEN is statically known, so the branch is optimized away.
268-
self.nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE };
269-
self.processed += BUFFER_SIZE;
271+
self.nbuf =
272+
if LEN == 1 { 0 } else { nbuf.debug_strict_add(LEN).debug_strict_sub(BUFFER_SIZE) };
273+
self.processed = self.processed.debug_strict_add(BUFFER_SIZE);
270274
}
271275
}
272276

@@ -277,7 +281,7 @@ impl SipHasher128 {
277281
let nbuf = self.nbuf;
278282
debug_assert!(nbuf < BUFFER_SIZE);
279283

280-
if nbuf + length < BUFFER_SIZE {
284+
if nbuf.debug_strict_add(length) < BUFFER_SIZE {
281285
unsafe {
282286
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
283287

@@ -289,7 +293,7 @@ impl SipHasher128 {
289293
}
290294
}
291295

292-
self.nbuf = nbuf + length;
296+
self.nbuf = nbuf.debug_strict_add(length);
293297

294298
return;
295299
}
@@ -315,7 +319,7 @@ impl SipHasher128 {
315319
// This function should only be called when the write fills the buffer,
316320
// so we know that there is enough input to fill the current element.
317321
let valid_in_elem = nbuf % ELEM_SIZE;
318-
let needed_in_elem = ELEM_SIZE - valid_in_elem;
322+
let needed_in_elem = ELEM_SIZE.debug_strict_sub(valid_in_elem);
319323

320324
let src = msg.as_ptr();
321325
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
@@ -327,7 +331,7 @@ impl SipHasher128 {
327331
// ELEM_SIZE` to show the compiler that this loop's upper bound is > 0.
328332
// We know that is true, because last step ensured we have a full
329333
// element in the buffer.
330-
let last = nbuf / ELEM_SIZE + 1;
334+
let last = (nbuf / ELEM_SIZE).debug_strict_add(1);
331335

332336
for i in 0..last {
333337
let elem = self.buf.get_unchecked(i).assume_init().to_le();
@@ -338,7 +342,7 @@ impl SipHasher128 {
338342

339343
// Process the remaining element-sized chunks of input.
340344
let mut processed = needed_in_elem;
341-
let input_left = length - processed;
345+
let input_left = length.debug_strict_sub(processed);
342346
let elems_left = input_left / ELEM_SIZE;
343347
let extra_bytes_left = input_left % ELEM_SIZE;
344348

@@ -347,7 +351,7 @@ impl SipHasher128 {
347351
self.state.v3 ^= elem;
348352
Sip13Rounds::c_rounds(&mut self.state);
349353
self.state.v0 ^= elem;
350-
processed += ELEM_SIZE;
354+
processed = processed.debug_strict_add(ELEM_SIZE);
351355
}
352356

353357
// Copy remaining input into start of buffer.
@@ -356,7 +360,7 @@ impl SipHasher128 {
356360
copy_nonoverlapping_small(src, dst, extra_bytes_left);
357361

358362
self.nbuf = extra_bytes_left;
359-
self.processed += nbuf + processed;
363+
self.processed = self.processed.debug_strict_add(nbuf.debug_strict_add(processed));
360364
}
361365
}
362366

@@ -394,7 +398,7 @@ impl SipHasher128 {
394398
};
395399

396400
// Finalize the hash.
397-
let length = self.processed + self.nbuf;
401+
let length = self.processed.debug_strict_add(self.nbuf);
398402
let b: u64 = ((length as u64 & 0xff) << 56) | elem;
399403

400404
state.v3 ^= b;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// This would belong to `rustc_data_structures`, but `rustc_serialize` needs it too.
2+
3+
/// Addition, but only overflow checked when `cfg(debug_assertions)` is set
4+
/// instead of respecting `-Coverflow-checks`.
5+
///
6+
/// This exists for performance reasons, as we ship rustc with overflow checks.
7+
/// While overflow checks are perf neutral in almost all of the compiler, there
8+
/// are a few particularly hot areas where we don't want overflow checks in our
9+
/// dist builds. Overflow is still a bug there, so we want overflow check for
10+
/// builds with debug assertions.
11+
///
12+
/// That's a long way to say that this should be used in areas where overflow
13+
/// is a bug but overflow checking is too slow.
14+
pub trait DebugStrictAdd {
15+
/// See [`DebugStrictAdd`].
16+
fn debug_strict_add(self, other: Self) -> Self;
17+
}
18+
19+
macro_rules! impl_debug_strict_add {
20+
($( $ty:ty )*) => {
21+
$(
22+
impl DebugStrictAdd for $ty {
23+
fn debug_strict_add(self, other: Self) -> Self {
24+
if cfg!(debug_assertions) {
25+
self + other
26+
} else {
27+
self.wrapping_add(other)
28+
}
29+
}
30+
}
31+
)*
32+
};
33+
}
34+
35+
/// See [`DebugStrictAdd`].
36+
pub trait DebugStrictSub {
37+
/// See [`DebugStrictAdd`].
38+
fn debug_strict_sub(self, other: Self) -> Self;
39+
}
40+
41+
macro_rules! impl_debug_strict_sub {
42+
($( $ty:ty )*) => {
43+
$(
44+
impl DebugStrictSub for $ty {
45+
fn debug_strict_sub(self, other: Self) -> Self {
46+
if cfg!(debug_assertions) {
47+
self - other
48+
} else {
49+
self.wrapping_sub(other)
50+
}
51+
}
52+
}
53+
)*
54+
};
55+
}
56+
57+
impl_debug_strict_add! {
58+
u8 u16 u32 u64 u128 usize
59+
i8 i16 i32 i64 i128 isize
60+
}
61+
62+
impl_debug_strict_sub! {
63+
u8 u16 u32 u64 u128 usize
64+
i8 i16 i32 i64 i128 isize
65+
}

compiler/rustc_serialize/src/leb128.rs

+9-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
use crate::opaque::MemDecoder;
22
use crate::serialize::Decoder;
33

4+
// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
5+
// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
6+
use crate::int_overflow::DebugStrictAdd;
7+
48
/// Returns the length of the longest LEB128 encoding for `T`, assuming `T` is an integer type
59
pub const fn max_leb128_len<T>() -> usize {
610
// The longest LEB128 encoding for an integer uses 7 bits per byte.
@@ -24,15 +28,15 @@ macro_rules! impl_write_unsigned_leb128 {
2428
*out.get_unchecked_mut(i) = value as u8;
2529
}
2630

27-
i += 1;
31+
i = i.debug_strict_add(1);
2832
break;
2933
} else {
3034
unsafe {
3135
*out.get_unchecked_mut(i) = ((value & 0x7f) | 0x80) as u8;
3236
}
3337

3438
value >>= 7;
35-
i += 1;
39+
i = i.debug_strict_add(1);
3640
}
3741
}
3842

@@ -69,7 +73,7 @@ macro_rules! impl_read_unsigned_leb128 {
6973
} else {
7074
result |= ((byte & 0x7F) as $int_ty) << shift;
7175
}
72-
shift += 7;
76+
shift = shift.debug_strict_add(7);
7377
}
7478
}
7579
};
@@ -101,7 +105,7 @@ macro_rules! impl_write_signed_leb128 {
101105
*out.get_unchecked_mut(i) = byte;
102106
}
103107

104-
i += 1;
108+
i = i.debug_strict_add(1);
105109

106110
if !more {
107111
break;
@@ -130,7 +134,7 @@ macro_rules! impl_read_signed_leb128 {
130134
loop {
131135
byte = decoder.read_u8();
132136
result |= <$int_ty>::from(byte & 0x7F) << shift;
133-
shift += 7;
137+
shift = shift.debug_strict_add(7);
134138

135139
if (byte & 0x80) == 0 {
136140
break;

compiler/rustc_serialize/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@ pub use self::serialize::{Decodable, Decoder, Encodable, Encoder};
2323

2424
mod serialize;
2525

26+
pub mod int_overflow;
2627
pub mod leb128;
2728
pub mod opaque;

compiler/rustc_serialize/src/opaque.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ use std::ops::Range;
77
use std::path::Path;
88
use std::path::PathBuf;
99

10+
// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
11+
// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
12+
use crate::int_overflow::DebugStrictAdd;
13+
1014
// -----------------------------------------------------------------------------
1115
// Encoder
1216
// -----------------------------------------------------------------------------
@@ -65,7 +69,7 @@ impl FileEncoder {
6569
// Tracking position this way instead of having a `self.position` field
6670
// means that we only need to update `self.buffered` on a write call,
6771
// as opposed to updating `self.position` and `self.buffered`.
68-
self.flushed + self.buffered
72+
self.flushed.debug_strict_add(self.buffered)
6973
}
7074

7175
#[cold]
@@ -119,7 +123,7 @@ impl FileEncoder {
119123
}
120124
if let Some(dest) = self.buffer_empty().get_mut(..buf.len()) {
121125
dest.copy_from_slice(buf);
122-
self.buffered += buf.len();
126+
self.buffered = self.buffered.debug_strict_add(buf.len());
123127
} else {
124128
self.write_all_cold_path(buf);
125129
}
@@ -158,7 +162,7 @@ impl FileEncoder {
158162
if written > N {
159163
Self::panic_invalid_write::<N>(written);
160164
}
161-
self.buffered += written;
165+
self.buffered = self.buffered.debug_strict_add(written);
162166
}
163167

164168
#[cold]

compiler/rustc_span/src/span_encoding.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ use crate::{BytePos, SpanData};
55

66
use rustc_data_structures::fx::FxIndexSet;
77

8+
// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
9+
// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
10+
use rustc_serialize::int_overflow::DebugStrictAdd;
11+
812
/// A compressed span.
913
///
1014
/// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only
@@ -166,7 +170,7 @@ impl Span {
166170
debug_assert!(len <= MAX_LEN);
167171
SpanData {
168172
lo: BytePos(self.lo_or_index),
169-
hi: BytePos(self.lo_or_index + len),
173+
hi: BytePos(self.lo_or_index.debug_strict_add(len)),
170174
ctxt: SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32),
171175
parent: None,
172176
}
@@ -179,7 +183,7 @@ impl Span {
179183
};
180184
SpanData {
181185
lo: BytePos(self.lo_or_index),
182-
hi: BytePos(self.lo_or_index + len),
186+
hi: BytePos(self.lo_or_index.debug_strict_add(len)),
183187
ctxt: SyntaxContext::root(),
184188
parent: Some(parent),
185189
}

0 commit comments

Comments
 (0)