Skip to content

Commit

Permalink
Auto merge of #50997 - michaelwoerister:pre-analyze-filemaps, r=<try>
Browse files Browse the repository at this point in the history
 Make FileMap::{lines, multibyte_chars, non_narrow_chars} non-mutable.

This PR removes most of the interior mutability from `FileMap`, which should be beneficial, especially in a multithreaded setting. This is achieved by initializing the state in question when the filemap is constructed instead of during lexing. Hopefully this doesn't degrade performance.

cc @wesleywiser
  • Loading branch information
bors committed May 25, 2018
2 parents 910e29a + 2874a87 commit 23299ed
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 214 deletions.
30 changes: 12 additions & 18 deletions src/librustc/ich/impls_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,27 +457,21 @@ impl<'a> HashStable<StableHashingContext<'a>> for FileMap {
src_hash.hash_stable(hcx, hasher);

// We only hash the relative position within this filemap
lines.with_lock(|lines| {
lines.len().hash_stable(hcx, hasher);
for &line in lines.iter() {
stable_byte_pos(line, start_pos).hash_stable(hcx, hasher);
}
});
lines.len().hash_stable(hcx, hasher);
for &line in lines.iter() {
stable_byte_pos(line, start_pos).hash_stable(hcx, hasher);
}

// We only hash the relative position within this filemap
multibyte_chars.with_lock(|multibyte_chars| {
multibyte_chars.len().hash_stable(hcx, hasher);
for &char_pos in multibyte_chars.iter() {
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
}
});
multibyte_chars.len().hash_stable(hcx, hasher);
for &char_pos in multibyte_chars.iter() {
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
}

non_narrow_chars.with_lock(|non_narrow_chars| {
non_narrow_chars.len().hash_stable(hcx, hasher);
for &char_pos in non_narrow_chars.iter() {
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
}
});
non_narrow_chars.len().hash_stable(hcx, hasher);
for &char_pos in non_narrow_chars.iter() {
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/librustc/ty/maps/on_disk_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ impl<'a, 'tcx, 'x> SpecializedDecoder<Span> for CacheDecoder<'a, 'tcx, 'x> {
let len = BytePos::decode(self)?;

let file_lo = self.file_index_to_file(file_lo_index);
let lo = file_lo.lines.borrow()[line_lo - 1] + col_lo;
let lo = file_lo.lines[line_lo - 1] + col_lo;
let hi = lo + len;

let expn_info_tag = u8::decode(self)?;
Expand Down
9 changes: 3 additions & 6 deletions src/librustc_metadata/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1164,9 +1164,9 @@ impl<'a, 'tcx> CrateMetadata {
src_hash,
start_pos,
end_pos,
lines,
multibyte_chars,
non_narrow_chars,
mut lines,
mut multibyte_chars,
mut non_narrow_chars,
name_hash,
.. } = filemap_to_import;

Expand All @@ -1177,15 +1177,12 @@ impl<'a, 'tcx> CrateMetadata {
// `CodeMap::new_imported_filemap()` will then translate those
// coordinates to their new global frame of reference when the
// offset of the FileMap is known.
let mut lines = lines.into_inner();
for pos in &mut lines {
*pos = *pos - start_pos;
}
let mut multibyte_chars = multibyte_chars.into_inner();
for mbc in &mut multibyte_chars {
mbc.pos = mbc.pos - start_pos;
}
let mut non_narrow_chars = non_narrow_chars.into_inner();
for swc in &mut non_narrow_chars {
*swc = *swc - start_pos;
}
Expand Down
123 changes: 28 additions & 95 deletions src/libsyntax/codemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,7 @@ impl CodeMap {
}
}

/// Creates a new filemap without setting its line information. If you don't
/// intend to set the line information yourself, you should use new_filemap_and_lines.
/// Creates a new filemap.
/// This does not ensure that only one FileMap exists per file name.
pub fn new_filemap(&self, filename: FileName, src: String) -> Lrc<FileMap> {
let start_pos = self.next_start_pos();
Expand Down Expand Up @@ -247,22 +246,6 @@ impl CodeMap {
filemap
}

/// Creates a new filemap and sets its line information.
/// This does not ensure that only one FileMap exists per file name.
pub fn new_filemap_and_lines(&self, filename: &Path, src: &str) -> Lrc<FileMap> {
let fm = self.new_filemap(filename.to_owned().into(), src.to_owned());
let mut byte_pos: u32 = fm.start_pos.0;
for line in src.lines() {
// register the start of this line
fm.next_line(BytePos(byte_pos));

// update byte_pos to include this line and the \n at the end
byte_pos += line.len() as u32 + 1;
}
fm
}


/// Allocates a new FileMap representing a source file from an external
/// crate. The source code of such an "imported filemap" is not available,
/// but we still know enough to generate accurate debuginfo location
Expand Down Expand Up @@ -305,9 +288,9 @@ impl CodeMap {
external_src: Lock::new(ExternalSource::AbsentOk),
start_pos,
end_pos,
lines: Lock::new(file_local_lines),
multibyte_chars: Lock::new(file_local_multibyte_chars),
non_narrow_chars: Lock::new(file_local_non_narrow_chars),
lines: file_local_lines,
multibyte_chars: file_local_multibyte_chars,
non_narrow_chars: file_local_non_narrow_chars,
name_hash,
});

Expand Down Expand Up @@ -345,21 +328,22 @@ impl CodeMap {
match self.lookup_line(pos) {
Ok(FileMapAndLine { fm: f, line: a }) => {
let line = a + 1; // Line numbers start at 1
let linebpos = (*f.lines.borrow())[a];
let linebpos = f.lines[a];
let linechpos = self.bytepos_to_file_charpos(linebpos);
let col = chpos - linechpos;

let col_display = {
let non_narrow_chars = f.non_narrow_chars.borrow();
let start_width_idx = non_narrow_chars
let start_width_idx = f
.non_narrow_chars
.binary_search_by_key(&linebpos, |x| x.pos())
.unwrap_or_else(|x| x);
let end_width_idx = non_narrow_chars
let end_width_idx = f
.non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let special_chars = end_width_idx - start_width_idx;
let non_narrow: usize =
non_narrow_chars[start_width_idx..end_width_idx]
let non_narrow: usize = f
.non_narrow_chars[start_width_idx..end_width_idx]
.into_iter()
.map(|x| x.width())
.sum();
Expand All @@ -380,12 +364,12 @@ impl CodeMap {
}
Err(f) => {
let col_display = {
let non_narrow_chars = f.non_narrow_chars.borrow();
let end_width_idx = non_narrow_chars
let end_width_idx = f
.non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let non_narrow: usize =
non_narrow_chars[0..end_width_idx]
let non_narrow: usize = f
.non_narrow_chars[0..end_width_idx]
.into_iter()
.map(|x| x.width())
.sum();
Expand Down Expand Up @@ -830,7 +814,7 @@ impl CodeMap {
// The number of extra bytes due to multibyte chars in the FileMap
let mut total_extra_bytes = 0;

for mbc in map.multibyte_chars.borrow().iter() {
for mbc in map.multibyte_chars.iter() {
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
// every character is at least one byte, so we only
Expand Down Expand Up @@ -1028,51 +1012,16 @@ impl FilePathMapping {
#[cfg(test)]
mod tests {
use super::*;
use std::borrow::Cow;
use rustc_data_structures::sync::Lrc;

#[test]
fn t1 () {
let cm = CodeMap::new(FilePathMapping::empty());
let fm = cm.new_filemap(PathBuf::from("blork.rs").into(),
"first line.\nsecond line".to_string());
fm.next_line(BytePos(0));
// Test we can get lines with partial line info.
assert_eq!(fm.get_line(0), Some(Cow::from("first line.")));
// TESTING BROKEN BEHAVIOR: line break declared before actual line break.
fm.next_line(BytePos(10));
assert_eq!(fm.get_line(1), Some(Cow::from(".")));
fm.next_line(BytePos(12));
assert_eq!(fm.get_line(2), Some(Cow::from("second line")));
}

#[test]
#[should_panic]
fn t2 () {
let cm = CodeMap::new(FilePathMapping::empty());
let fm = cm.new_filemap(PathBuf::from("blork.rs").into(),
"first line.\nsecond line".to_string());
// TESTING *REALLY* BROKEN BEHAVIOR:
fm.next_line(BytePos(0));
fm.next_line(BytePos(10));
fm.next_line(BytePos(2));
}

fn init_code_map() -> CodeMap {
let cm = CodeMap::new(FilePathMapping::empty());
let fm1 = cm.new_filemap(PathBuf::from("blork.rs").into(),
"first line.\nsecond line".to_string());
let fm2 = cm.new_filemap(PathBuf::from("empty.rs").into(),
"".to_string());
let fm3 = cm.new_filemap(PathBuf::from("blork2.rs").into(),
"first line.\nsecond line".to_string());

fm1.next_line(BytePos(0));
fm1.next_line(BytePos(12));
fm2.next_line(fm2.start_pos);
fm3.next_line(fm3.start_pos);
fm3.next_line(fm3.start_pos + BytePos(12));

cm.new_filemap(PathBuf::from("blork.rs").into(),
"first line.\nsecond line".to_string());
cm.new_filemap(PathBuf::from("empty.rs").into(),
"".to_string());
cm.new_filemap(PathBuf::from("blork2.rs").into(),
"first line.\nsecond line".to_string());
cm
}

Expand Down Expand Up @@ -1125,26 +1074,10 @@ mod tests {
fn init_code_map_mbc() -> CodeMap {
let cm = CodeMap::new(FilePathMapping::empty());
// € is a three byte utf8 char.
let fm1 =
cm.new_filemap(PathBuf::from("blork.rs").into(),
"fir€st €€€€ line.\nsecond line".to_string());
let fm2 = cm.new_filemap(PathBuf::from("blork2.rs").into(),
"first line€€.\n€ second line".to_string());

fm1.next_line(BytePos(0));
fm1.next_line(BytePos(28));
fm2.next_line(fm2.start_pos);
fm2.next_line(fm2.start_pos + BytePos(20));

fm1.record_multibyte_char(BytePos(3), 3);
fm1.record_multibyte_char(BytePos(9), 3);
fm1.record_multibyte_char(BytePos(12), 3);
fm1.record_multibyte_char(BytePos(15), 3);
fm1.record_multibyte_char(BytePos(18), 3);
fm2.record_multibyte_char(fm2.start_pos + BytePos(10), 3);
fm2.record_multibyte_char(fm2.start_pos + BytePos(13), 3);
fm2.record_multibyte_char(fm2.start_pos + BytePos(18), 3);

cm.new_filemap(PathBuf::from("blork.rs").into(),
"fir€st €€€€ line.\nsecond line".to_string());
cm.new_filemap(PathBuf::from("blork2.rs").into(),
"first line€€.\n€ second line".to_string());
cm
}

Expand Down Expand Up @@ -1196,7 +1129,7 @@ mod tests {
let cm = CodeMap::new(FilePathMapping::empty());
let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n";
let selection = " \n ~~\n~~~\n~~~~~ \n \n";
cm.new_filemap_and_lines(Path::new("blork.rs"), inputtext);
cm.new_filemap(Path::new("blork.rs").to_owned().into(), inputtext.to_string());
let span = span_from_selection(inputtext, selection);

// check that we are extracting the text we thought we were extracting
Expand Down Expand Up @@ -1239,7 +1172,7 @@ mod tests {
let inputtext = "bbbb BB\ncc CCC\n";
let selection1 = " ~~\n \n";
let selection2 = " \n ~~~\n";
cm.new_filemap_and_lines(Path::new("blork.rs"), inputtext);
cm.new_filemap(Path::new("blork.rs").to_owned().into(), inputtext.to_owned());
let span1 = span_from_selection(inputtext, selection1);
let span2 = span_from_selection(inputtext, selection2);

Expand Down
6 changes: 4 additions & 2 deletions src/libsyntax/ext/expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1451,17 +1451,19 @@ impl<'a, 'b> Folder for InvocationCollector<'a, 'b> {

match String::from_utf8(buf) {
Ok(src) => {
let src_interned = Symbol::intern(&src);

// Add this input file to the code map to make it available as
// dependency information
self.cx.codemap().new_filemap_and_lines(&filename, &src);
self.cx.codemap().new_filemap(filename.into(), src);

let include_info = vec![
dummy_spanned(ast::NestedMetaItemKind::MetaItem(
attr::mk_name_value_item_str(Ident::from_str("file"),
dummy_spanned(file)))),
dummy_spanned(ast::NestedMetaItemKind::MetaItem(
attr::mk_name_value_item_str(Ident::from_str("contents"),
dummy_spanned(Symbol::intern(&src))))),
dummy_spanned(src_interned)))),
];

let include_ident = Ident::from_str("include");
Expand Down
8 changes: 5 additions & 3 deletions src/libsyntax/ext/source_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,13 @@ pub fn expand_include_str(cx: &mut ExtCtxt, sp: Span, tts: &[tokenstream::TokenT
};
match String::from_utf8(bytes) {
Ok(src) => {
let interned_src = Symbol::intern(&src);

// Add this input file to the code map to make it available as
// dependency information
cx.codemap().new_filemap_and_lines(&file, &src);
cx.codemap().new_filemap(file.into(), src);

base::MacEager::expr(cx.expr_str(sp, Symbol::intern(&src)))
base::MacEager::expr(cx.expr_str(sp, interned_src))
}
Err(_) => {
cx.span_err(sp,
Expand Down Expand Up @@ -182,7 +184,7 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt, sp: Span, tts: &[tokenstream::Toke
Ok(..) => {
// Add this input file to the code map to make it available as
// dependency information, but don't enter it's contents
cx.codemap().new_filemap_and_lines(&file, "");
cx.codemap().new_filemap(file.into(), "".to_string());

base::MacEager::expr(cx.expr_lit(sp, ast::LitKind::ByteStr(Lrc::new(bytes))))
}
Expand Down
18 changes: 0 additions & 18 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,6 @@ pub struct StringReader<'a> {
pub filemap: Lrc<syntax_pos::FileMap>,
/// Stop reading src at this index.
pub end_src_index: usize,
/// Whether to record new-lines and multibyte chars in filemap.
/// This is only necessary the first time a filemap is lexed.
/// If part of a filemap is being re-lexed, this should be set to false.
pub save_new_lines_and_multibyte: bool,
// cached:
peek_tok: token::Token,
peek_span: Span,
Expand Down Expand Up @@ -190,7 +186,6 @@ impl<'a> StringReader<'a> {
ch: Some('\n'),
filemap,
end_src_index: src.len(),
save_new_lines_and_multibyte: true,
// dummy values; not read
peek_tok: token::Eof,
peek_span: syntax_pos::DUMMY_SP,
Expand Down Expand Up @@ -227,7 +222,6 @@ impl<'a> StringReader<'a> {
let mut sr = StringReader::new_raw_internal(sess, begin.fm, None);

// Seek the lexer to the right byte range.
sr.save_new_lines_and_multibyte = false;
sr.next_pos = span.lo();
sr.end_src_index = sr.src_index(span.hi());

Expand Down Expand Up @@ -460,18 +454,6 @@ impl<'a> StringReader<'a> {
let next_ch = char_at(&self.src, next_src_index);
let next_ch_len = next_ch.len_utf8();

if self.ch.unwrap() == '\n' {
if self.save_new_lines_and_multibyte {
self.filemap.next_line(self.next_pos);
}
}
if next_ch_len > 1 {
if self.save_new_lines_and_multibyte {
self.filemap.record_multibyte_char(self.next_pos, next_ch_len);
}
}
self.filemap.record_width(self.next_pos, next_ch);

self.ch = Some(next_ch);
self.pos = self.next_pos;
self.next_pos = self.next_pos + Pos::from_usize(next_ch_len);
Expand Down
2 changes: 1 addition & 1 deletion src/libsyntax/test_snippet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ fn test_harness(file_text: &str, span_labels: Vec<SpanLabel>, expected_output: &
let output = Arc::new(Mutex::new(Vec::new()));

let code_map = Lrc::new(CodeMap::new(FilePathMapping::empty()));
code_map.new_filemap_and_lines(Path::new("test.rs"), &file_text);
code_map.new_filemap(Path::new("test.rs").to_owned().into(), file_text.to_owned());

let primary_span = make_span(&file_text, &span_labels[0].start, &span_labels[0].end);
let mut msp = MultiSpan::from_span(primary_span);
Expand Down
Loading

0 comments on commit 23299ed

Please sign in to comment.