Skip to content

Commit

Permalink
Rollup merge of rust-lang#63525 - matklad:centraliza-file-loading, r=…
Browse files Browse the repository at this point in the history
…petrochenkov

Make sure that all file loading happens via SourceMap

That way, callers don't need to repeat "let's add this to sm manually
for tracking dependencies" trick.

It should make it easier to switch to using `FileLoader` for binary
files in the future as well

cc rust-lang#62948

r? @petrochenkov
  • Loading branch information
Centril committed Aug 15, 2019
2 parents 7ff5b38 + 14bc998 commit ce4a45d
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 34 deletions.
13 changes: 5 additions & 8 deletions src/libsyntax/ext/expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ use syntax_pos::{Span, DUMMY_SP, FileName};

use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use std::fs;
use std::io::ErrorKind;
use std::{iter, mem};
use std::ops::DerefMut;
Expand Down Expand Up @@ -1239,13 +1238,11 @@ impl<'a, 'b> MutVisitor for InvocationCollector<'a, 'b> {
}

let filename = self.cx.resolve_path(&*file.as_str(), it.span());
match fs::read_to_string(&filename) {
Ok(src) => {
let src_interned = Symbol::intern(&src);

// Add this input file to the code map to make it available as
// dependency information
self.cx.source_map().new_source_file(filename.into(), src);
match self.cx.source_map().load_file(&filename) {
Ok(source_file) => {
let src = source_file.src.as_ref()
.expect("freshly loaded file should have a source");
let src_interned = Symbol::intern(src.as_str());

let include_info = vec![
ast::NestedMetaItem::MetaItem(
Expand Down
20 changes: 20 additions & 0 deletions src/libsyntax/source_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,26 @@ impl SourceMap {
Ok(self.new_source_file(filename, src))
}

/// Loads source file as a binary blob.
///
/// Unlike `load_file`, guarantees that no normalization like BOM-removal
/// takes place.
pub fn load_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> {
// Ideally, this should use `self.file_loader`, but it can't
// deal with binary files yet.
let bytes = fs::read(path)?;

// We need to add file to the `SourceMap`, so that it is present
// in dep-info. There's also an edge case that file might be both
// loaded as a binary via `include_bytes!` and as proper `SourceFile`
// via `mod`, so we try to use real file contents and not just an
// empty string.
let text = std::str::from_utf8(&bytes).unwrap_or("")
.to_string();
self.new_source_file(path.to_owned().into(), text);
Ok(bytes)
}

pub fn files(&self) -> MappedLockGuard<'_, Vec<Lrc<SourceFile>>> {
LockGuard::map(self.files.borrow(), |files| &mut files.source_files)
}
Expand Down
37 changes: 11 additions & 26 deletions src/libsyntax_ext/source_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ use syntax::tokenstream;
use smallvec::SmallVec;
use syntax_pos::{self, Pos, Span};

use std::fs;
use std::io::ErrorKind;
use rustc_data_structures::sync::Lrc;

// These macros all relate to the file system; they either return
Expand Down Expand Up @@ -114,20 +112,17 @@ pub fn expand_include_str(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::To
None => return DummyResult::any(sp)
};
let file = cx.resolve_path(file, sp);
match fs::read_to_string(&file) {
Ok(src) => {
let interned_src = Symbol::intern(&src);

// Add this input file to the code map to make it available as
// dependency information
cx.source_map().new_source_file(file.into(), src);

base::MacEager::expr(cx.expr_str(sp, interned_src))
match cx.source_map().load_binary_file(&file) {
Ok(bytes) => match std::str::from_utf8(&bytes) {
Ok(src) => {
let interned_src = Symbol::intern(&src);
base::MacEager::expr(cx.expr_str(sp, interned_src))
}
Err(_) => {
cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
DummyResult::any(sp)
}
},
Err(ref e) if e.kind() == ErrorKind::InvalidData => {
cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
DummyResult::any(sp)
}
Err(e) => {
cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e));
DummyResult::any(sp)
Expand All @@ -142,18 +137,8 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::
None => return DummyResult::any(sp)
};
let file = cx.resolve_path(file, sp);
match fs::read(&file) {
match cx.source_map().load_binary_file(&file) {
Ok(bytes) => {
// Add the contents to the source map if it contains UTF-8.
let (contents, bytes) = match String::from_utf8(bytes) {
Ok(s) => {
let bytes = s.as_bytes().to_owned();
(s, bytes)
},
Err(e) => (String::new(), e.into_bytes()),
};
cx.source_map().new_source_file(file.into(), contents);

base::MacEager::expr(cx.expr_lit(sp, ast::LitKind::ByteStr(Lrc::new(bytes))))
},
Err(e) => {
Expand Down
1 change: 1 addition & 0 deletions src/test/ui/.gitattributes
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
lexer-crlf-line-endings-string-literal-doc-comment.rs -text
trailing-carriage-return-in-string.rs -text
*.bin -text
2 changes: 2 additions & 0 deletions src/test/ui/include-macros/data.bin
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
This file starts with BOM.
Lines are separated by \r\n.
12 changes: 12 additions & 0 deletions src/test/ui/include-macros/normalization.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// run-pass

fn main() {
assert_eq!(
&include_bytes!("data.bin")[..],
&b"\xEF\xBB\xBFThis file starts with BOM.\r\nLines are separated by \\r\\n.\r\n"[..],
);
assert_eq!(
include_str!("data.bin"),
"\u{FEFF}This file starts with BOM.\r\nLines are separated by \\r\\n.\r\n",
);
}

0 comments on commit ce4a45d

Please sign in to comment.