Skip to content

Commit

Permalink
Rollup merge of rust-lang#37427 - nnethercote:opt-IchHasher, r=michae…
Browse files Browse the repository at this point in the history
…lwoerister

Reduce the number of bytes hashed by IchHasher.

IchHasher uses blake2b hashing, which is expensive, so the fewer bytes hashed
the better. There are two big ways to reduce the number of bytes hashed.
- Filenames in spans account for ~66% of all bytes (for builds with debuginfo).
  The vast majority of spans have the same filename for the start of the span
  and the end of the span, so hashing the filename just once in those cases is
  a big win.
- u32 and u64 and usize values account for ~25%--33% of all bytes (for builds
  with debuginfo). The vast majority of these are small, i.e. fit in a u8, so
  shrinking them down before hashing is also a big win.

This PR implements these two optimizations. I'm certain the first one is safe.
I'm about 90% sure that the second one is safe.

Here are measurements of the number of bytes hashed when doing
debuginfo-enabled builds of stdlib and
rustc-benchmarks/syntex-0.42.2-incr-clean.

```
                    stdlib   syntex-incr
                    ------   -----------
original       156,781,386   255,095,596
half-SawSpan   106,744,403   176,345,419
short-ints      45,890,534   118,014,227
no-SawSpan[*]    6,831,874    45,875,714

[*] don't hash the SawSpan at all. Not part of this PR, just implemented for
    comparison's sake.
```

For debug builds of syntex-0.42.2-incr-clean, the two changes give a 1--2%
speed-up.
  • Loading branch information
Jonathan Turner committed Nov 4, 2016
2 parents a267214 + d73c68c commit e5ebb60
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 12 deletions.
38 changes: 37 additions & 1 deletion src/librustc_incremental/calculate_svh/hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
// except according to those terms.

use std::mem;
use std::hash::Hasher;
use rustc_data_structures::blake2b::Blake2bHasher;
use rustc::ty::util::ArchIndependentHasher;
use ich::Fingerprint;
use rustc_serialize::leb128::write_unsigned_leb128;

#[derive(Debug)]
pub struct IchHasher {
state: ArchIndependentHasher<Blake2bHasher>,
leb128_helper: Vec<u8>,
bytes_hashed: u64,
}

Expand All @@ -24,6 +27,7 @@ impl IchHasher {
let hash_size = mem::size_of::<Fingerprint>();
IchHasher {
state: ArchIndependentHasher::new(Blake2bHasher::new(hash_size, &[])),
leb128_helper: vec![],
bytes_hashed: 0
}
}
Expand All @@ -37,9 +41,19 @@ impl IchHasher {
fingerprint.0.copy_from_slice(self.state.into_inner().finalize());
fingerprint
}

#[inline]
fn write_uleb128(&mut self, value: u64) {
let len = write_unsigned_leb128(&mut self.leb128_helper, 0, value);
self.state.write(&self.leb128_helper[0..len]);
self.bytes_hashed += len as u64;
}
}

impl ::std::hash::Hasher for IchHasher {
// For the non-u8 integer cases we leb128 encode them first. Because small
// integers dominate, this significantly and cheaply reduces the number of
// bytes hashed, which is good because blake2b is expensive.
impl Hasher for IchHasher {
fn finish(&self) -> u64 {
bug!("Use other finish() implementation to get the full 128-bit hash.");
}
Expand All @@ -49,4 +63,26 @@ impl ::std::hash::Hasher for IchHasher {
self.state.write(bytes);
self.bytes_hashed += bytes.len() as u64;
}

// There is no need to leb128-encode u8 values.

#[inline]
fn write_u16(&mut self, i: u16) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.write_uleb128(i);
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.write_uleb128(i as u64);
}
}
42 changes: 31 additions & 11 deletions src/librustc_incremental/calculate_svh/svh_visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ impl<'a, 'hash, 'tcx> StrictVersionHashVisitor<'a, 'hash, 'tcx> {
// within the CodeMap.
// Also note that we are hashing byte offsets for the column, not unicode
// codepoint offsets. For the purpose of the hash that's sufficient.
// Also, hashing filenames is expensive so we avoid doing it twice when the
// span starts and ends in the same file, which is almost always the case.
fn hash_span(&mut self, span: Span) {
debug!("hash_span: st={:?}", self.st);

Expand All @@ -103,21 +105,35 @@ impl<'a, 'hash, 'tcx> StrictVersionHashVisitor<'a, 'hash, 'tcx> {
span.hi
};

let loc1 = self.codemap.byte_pos_to_line_and_col(span.lo);
let loc2 = self.codemap.byte_pos_to_line_and_col(span_hi);

let expansion_kind = match span.expn_id {
let expn_kind = match span.expn_id {
NO_EXPANSION => SawSpanExpnKind::NoExpansion,
COMMAND_LINE_EXPN => SawSpanExpnKind::CommandLine,
_ => SawSpanExpnKind::SomeExpansion,
};

SawSpan(loc1.as_ref().map(|&(ref fm, line, col)| (&fm.name[..], line, col)),
loc2.as_ref().map(|&(ref fm, line, col)| (&fm.name[..], line, col)),
expansion_kind)
.hash(self.st);
let loc1 = self.codemap.byte_pos_to_line_and_col(span.lo);
let loc1 = loc1.as_ref()
.map(|&(ref fm, line, col)| (&fm.name[..], line, col))
.unwrap_or(("???", 0, BytePos(0)));

let loc2 = self.codemap.byte_pos_to_line_and_col(span_hi);
let loc2 = loc2.as_ref()
.map(|&(ref fm, line, col)| (&fm.name[..], line, col))
.unwrap_or(("???", 0, BytePos(0)));

let saw = if loc1.0 == loc2.0 {
SawSpan(loc1.0,
loc1.1, loc1.2,
loc2.1, loc2.2,
expn_kind)
} else {
SawSpanTwoFiles(loc1.0, loc1.1, loc1.2,
loc2.0, loc2.1, loc2.2,
expn_kind)
};
saw.hash(self.st);

if expansion_kind == SawSpanExpnKind::SomeExpansion {
if expn_kind == SawSpanExpnKind::SomeExpansion {
let call_site = self.codemap.codemap().source_callsite(span);
self.hash_span(call_site);
}
Expand Down Expand Up @@ -189,9 +205,13 @@ enum SawAbiComponent<'a> {
SawAssocTypeBinding,
SawAttribute(ast::AttrStyle),
SawMacroDef,
SawSpan(Option<(&'a str, usize, BytePos)>,
Option<(&'a str, usize, BytePos)>,
SawSpan(&'a str,
usize, BytePos,
usize, BytePos,
SawSpanExpnKind),
SawSpanTwoFiles(&'a str, usize, BytePos,
&'a str, usize, BytePos,
SawSpanExpnKind),
}

/// SawExprComponent carries all of the information that we want
Expand Down

0 comments on commit e5ebb60

Please sign in to comment.