Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce the number of bytes hashed by IchHasher. #37427

Merged
merged 2 commits into from
Nov 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion src/librustc_incremental/calculate_svh/hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
// except according to those terms.

use std::mem;
use std::hash::Hasher;
use rustc_data_structures::blake2b::Blake2bHasher;
use rustc::ty::util::ArchIndependentHasher;
use ich::Fingerprint;
use rustc_serialize::leb128::write_unsigned_leb128;

#[derive(Debug)]
pub struct IchHasher {
state: ArchIndependentHasher<Blake2bHasher>,
leb128_helper: Vec<u8>,
bytes_hashed: u64,
}

Expand All @@ -24,6 +27,7 @@ impl IchHasher {
let hash_size = mem::size_of::<Fingerprint>();
IchHasher {
state: ArchIndependentHasher::new(Blake2bHasher::new(hash_size, &[])),
leb128_helper: vec![],
bytes_hashed: 0
}
}
Expand All @@ -37,9 +41,19 @@ impl IchHasher {
fingerprint.0.copy_from_slice(self.state.into_inner().finalize());
fingerprint
}

#[inline]
fn write_uleb128(&mut self, value: u64) {
let len = write_unsigned_leb128(&mut self.leb128_helper, 0, value);
self.state.write(&self.leb128_helper[0..len]);
self.bytes_hashed += len as u64;
}
}

impl ::std::hash::Hasher for IchHasher {
// For the non-u8 integer cases we leb128 encode them first. Because small
// integers dominate, this significantly and cheaply reduces the number of
// bytes hashed, which is good because blake2b is expensive.
impl Hasher for IchHasher {
fn finish(&self) -> u64 {
bug!("Use other finish() implementation to get the full 128-bit hash.");
}
Expand All @@ -49,4 +63,26 @@ impl ::std::hash::Hasher for IchHasher {
self.state.write(bytes);
self.bytes_hashed += bytes.len() as u64;
}

// There is no need to leb128-encode u8 values.

#[inline]
fn write_u16(&mut self, i: u16) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.write_uleb128(i);
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.write_uleb128(i as u64);
}
}
42 changes: 31 additions & 11 deletions src/librustc_incremental/calculate_svh/svh_visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ impl<'a, 'hash, 'tcx> StrictVersionHashVisitor<'a, 'hash, 'tcx> {
// within the CodeMap.
// Also note that we are hashing byte offsets for the column, not unicode
// codepoint offsets. For the purpose of the hash that's sufficient.
// Also, hashing filenames is expensive so we avoid doing it twice when the
// span starts and ends in the same file, which is almost always the case.
fn hash_span(&mut self, span: Span) {
debug_assert!(self.hash_spans);
debug!("hash_span: st={:?}", self.st);
Expand All @@ -98,21 +100,35 @@ impl<'a, 'hash, 'tcx> StrictVersionHashVisitor<'a, 'hash, 'tcx> {
span.hi
};

let loc1 = self.codemap.byte_pos_to_line_and_col(span.lo);
let loc2 = self.codemap.byte_pos_to_line_and_col(span_hi);

let expansion_kind = match span.expn_id {
let expn_kind = match span.expn_id {
NO_EXPANSION => SawSpanExpnKind::NoExpansion,
COMMAND_LINE_EXPN => SawSpanExpnKind::CommandLine,
_ => SawSpanExpnKind::SomeExpansion,
};

SawSpan(loc1.as_ref().map(|&(ref fm, line, col)| (&fm.name[..], line, col)),
loc2.as_ref().map(|&(ref fm, line, col)| (&fm.name[..], line, col)),
expansion_kind)
.hash(self.st);
let loc1 = self.codemap.byte_pos_to_line_and_col(span.lo);
let loc1 = loc1.as_ref()
.map(|&(ref fm, line, col)| (&fm.name[..], line, col))
.unwrap_or(("???", 0, BytePos(0)));

let loc2 = self.codemap.byte_pos_to_line_and_col(span_hi);
let loc2 = loc2.as_ref()
.map(|&(ref fm, line, col)| (&fm.name[..], line, col))
.unwrap_or(("???", 0, BytePos(0)));

let saw = if loc1.0 == loc2.0 {
SawSpan(loc1.0,
loc1.1, loc1.2,
loc2.1, loc2.2,
expn_kind)
} else {
SawSpanTwoFiles(loc1.0, loc1.1, loc1.2,
loc2.0, loc2.1, loc2.2,
expn_kind)
};
saw.hash(self.st);

if expansion_kind == SawSpanExpnKind::SomeExpansion {
if expn_kind == SawSpanExpnKind::SomeExpansion {
let call_site = self.codemap.codemap().source_callsite(span);
self.hash_span(call_site);
}
Expand Down Expand Up @@ -184,9 +200,13 @@ enum SawAbiComponent<'a> {
SawAssocTypeBinding,
SawAttribute(ast::AttrStyle),
SawMacroDef,
SawSpan(Option<(&'a str, usize, BytePos)>,
Option<(&'a str, usize, BytePos)>,
SawSpan(&'a str,
usize, BytePos,
usize, BytePos,
SawSpanExpnKind),
SawSpanTwoFiles(&'a str, usize, BytePos,
&'a str, usize, BytePos,
SawSpanExpnKind),
}

/// SawExprComponent carries all of the information that we want
Expand Down