rust-lang · bors · Nov 5, 2016 · Oct 26, 2016 · Oct 26, 2016
diff --git a/src/librustc_incremental/calculate_svh/hasher.rs b/src/librustc_incremental/calculate_svh/hasher.rs
@@ -9,13 +9,16 @@
 // except according to those terms.
 
 use std::mem;
+use std::hash::Hasher;
 use rustc_data_structures::blake2b::Blake2bHasher;
 use rustc::ty::util::ArchIndependentHasher;
 use ich::Fingerprint;
+use rustc_serialize::leb128::write_unsigned_leb128;
 
 #[derive(Debug)]
 pub struct IchHasher {
     state: ArchIndependentHasher<Blake2bHasher>,
+    leb128_helper: Vec<u8>,
     bytes_hashed: u64,
 }
 
@@ -24,6 +27,7 @@ impl IchHasher {
         let hash_size = mem::size_of::<Fingerprint>();
         IchHasher {
             state: ArchIndependentHasher::new(Blake2bHasher::new(hash_size, &[])),
+            leb128_helper: vec![],
             bytes_hashed: 0
         }
     }
@@ -37,9 +41,19 @@ impl IchHasher {
         fingerprint.0.copy_from_slice(self.state.into_inner().finalize());
         fingerprint
     }
+
+    #[inline]
+    fn write_uleb128(&mut self, value: u64) {
+        let len = write_unsigned_leb128(&mut self.leb128_helper, 0, value);
+        self.state.write(&self.leb128_helper[0..len]);
+        self.bytes_hashed += len as u64;
+    }
 }
 
-impl ::std::hash::Hasher for IchHasher {
+// For the non-u8 integer cases we leb128 encode them first. Because small
+// integers dominate, this significantly and cheaply reduces the number of
+// bytes hashed, which is good because blake2b is expensive.
+impl Hasher for IchHasher {
     fn finish(&self) -> u64 {
         bug!("Use other finish() implementation to get the full 128-bit hash.");
     }
@@ -49,4 +63,26 @@ impl ::std::hash::Hasher for IchHasher {
         self.state.write(bytes);
         self.bytes_hashed += bytes.len() as u64;
     }
+
+    // There is no need to leb128-encode u8 values.
+
+    #[inline]
+    fn write_u16(&mut self, i: u16) {
+        self.write_uleb128(i as u64);
+    }
+
+    #[inline]
+    fn write_u32(&mut self, i: u32) {
+        self.write_uleb128(i as u64);
+    }
+
+    #[inline]
+    fn write_u64(&mut self, i: u64) {
+        self.write_uleb128(i);
+    }
+
+    #[inline]
+    fn write_usize(&mut self, i: usize) {
+        self.write_uleb128(i as u64);
+    }
 }
diff --git a/src/librustc_incremental/calculate_svh/svh_visitor.rs b/src/librustc_incremental/calculate_svh/svh_visitor.rs
@@ -82,6 +82,8 @@ impl<'a, 'hash, 'tcx> StrictVersionHashVisitor<'a, 'hash, 'tcx> {
     // within the CodeMap.
     // Also note that we are hashing byte offsets for the column, not unicode
     // codepoint offsets. For the purpose of the hash that's sufficient.
+    // Also, hashing filenames is expensive so we avoid doing it twice when the
+    // span starts and ends in the same file, which is almost always the case.
     fn hash_span(&mut self, span: Span) {
         debug_assert!(self.hash_spans);
         debug!("hash_span: st={:?}", self.st);
@@ -98,21 +100,35 @@ impl<'a, 'hash, 'tcx> StrictVersionHashVisitor<'a, 'hash, 'tcx> {
             span.hi
         };
 
-        let loc1 = self.codemap.byte_pos_to_line_and_col(span.lo);
-        let loc2 = self.codemap.byte_pos_to_line_and_col(span_hi);
-
-        let expansion_kind = match span.expn_id {
+        let expn_kind = match span.expn_id {
             NO_EXPANSION => SawSpanExpnKind::NoExpansion,
             COMMAND_LINE_EXPN => SawSpanExpnKind::CommandLine,
             _ => SawSpanExpnKind::SomeExpansion,
         };
 
-        SawSpan(loc1.as_ref().map(|&(ref fm, line, col)| (&fm.name[..], line, col)),
-                loc2.as_ref().map(|&(ref fm, line, col)| (&fm.name[..], line, col)),
-                expansion_kind)
-            .hash(self.st);
+        let loc1 = self.codemap.byte_pos_to_line_and_col(span.lo);
+        let loc1 = loc1.as_ref()
+                       .map(|&(ref fm, line, col)| (&fm.name[..], line, col))
+                       .unwrap_or(("???", 0, BytePos(0)));
+
+        let loc2 = self.codemap.byte_pos_to_line_and_col(span_hi);
+        let loc2 = loc2.as_ref()
+                       .map(|&(ref fm, line, col)| (&fm.name[..], line, col))
+                       .unwrap_or(("???", 0, BytePos(0)));
+
+        let saw = if loc1.0 == loc2.0 {
+            SawSpan(loc1.0,
+                    loc1.1, loc1.2,
+                    loc2.1, loc2.2,
+                    expn_kind)
+        } else {
+            SawSpanTwoFiles(loc1.0, loc1.1, loc1.2,
+                            loc2.0, loc2.1, loc2.2,
+                            expn_kind)
+        };
+        saw.hash(self.st);
 
-        if expansion_kind == SawSpanExpnKind::SomeExpansion {
+        if expn_kind == SawSpanExpnKind::SomeExpansion {
             let call_site = self.codemap.codemap().source_callsite(span);
             self.hash_span(call_site);
         }
@@ -184,9 +200,13 @@ enum SawAbiComponent<'a> {
     SawAssocTypeBinding,
     SawAttribute(ast::AttrStyle),
     SawMacroDef,
-    SawSpan(Option<(&'a str, usize, BytePos)>,
-            Option<(&'a str, usize, BytePos)>,
+    SawSpan(&'a str,
+            usize, BytePos,
+            usize, BytePos,
             SawSpanExpnKind),
+    SawSpanTwoFiles(&'a str, usize, BytePos,
+                    &'a str, usize, BytePos,
+                    SawSpanExpnKind),
 }
 
 /// SawExprComponent carries all of the information that we want