From 1623f4554a849e9f45e3dd9777eb08504f4acae2 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Thu, 3 Dec 2020 11:56:36 +0000 Subject: [PATCH 1/3] Faster label loading. A significant cost of starting the JIT is loading our mapping labels from DWARF. This change extracts the labels (at compile time) into a faster to load format. Rough benchmarking: before: ``` Mean Std.Dev. Min Median Max real 22.323 0.305 21.924 22.303 22.858 user 127.221 0.266 126.793 127.324 127.580 sys 8.258 0.734 7.281 8.002 9.156 ``` after: ``` Mean Std.Dev. Min Median Max real 9.082 0.544 8.398 9.157 9.896 user 98.287 0.393 97.876 98.134 98.995 sys 7.054 0.595 6.009 7.202 7.789 ``` --- Cargo.lock | 7 +- compiler/rustc_codegen_ssa/Cargo.toml | 9 +- compiler/rustc_codegen_ssa/src/back/link.rs | 9 ++ compiler/rustc_codegen_ssa/src/sir.rs | 135 +++++++++++++++++++- 4 files changed, 157 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0d7e1f8f2d2..45c6f8853fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1322,8 +1322,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce" dependencies = [ "compiler_builtins", + "fallible-iterator", + "indexmap", "rustc-std-workspace-alloc", "rustc-std-workspace-core", + "stable_deref_trait", ] [[package]] @@ -3568,13 +3571,16 @@ dependencies = [ name = "rustc_codegen_ssa" version = "0.0.0" dependencies = [ + "bincode", "bitflags", "cc", + "gimli", "indexmap", "jobserver", "libc", "memmap", "num_cpus", + "object", "pathdiff", "rustc_apfloat", "rustc_ast", @@ -5638,7 +5644,6 @@ dependencies = [ [[package]] name = "ykpack" version = "0.1.0" -source = "git+https://github.com/softdevteam/yk#bf9f42278be72f072a34e95d3f671bdcf19ed048" dependencies = [ "bincode", "bitflags", diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml index 0fb805c7b9c..356d5e6ed81 100644 --- a/compiler/rustc_codegen_ssa/Cargo.toml +++ b/compiler/rustc_codegen_ssa/Cargo.toml @@ -36,5 +36,12 @@ rustc_target = { path = "../rustc_target" } rustc_session = { path = "../rustc_session" } # Yorick -ykpack = { git = "https://github.com/softdevteam/yk" } +bincode = "1.3.1" +gimli = "0.23.0" indexmap = "1.5.2" +ykpack = { git = "https://github.com/softdevteam/yk" } + +[dependencies.object] +version = "0.22.0" +default-features = false +features = ["read_core", "elf"] diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index 85f4d997a44..4bfe1e11389 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -96,6 +96,15 @@ pub fn link_binary<'a, B: ArchiveBuilder<'a>>( path.as_ref(), target_cpu, ); + + // If we have emitted SIR labels into DWARF then we now extract them into a + // faster (to load at runtime) ELF section. + if sess.opts.cg.tracer.sir_labels() + && crate_name != crate::sir::BUILD_SCRIPT_CRATE + && crate_type == CrateType::Executable + { + crate::sir::labels::add_yk_label_section(&out_filename); + } } } if sess.opts.json_artifact_notifications { diff --git a/compiler/rustc_codegen_ssa/src/sir.rs b/compiler/rustc_codegen_ssa/src/sir.rs index 3a08e523d51..71b61f1955a 100644 --- a/compiler/rustc_codegen_ssa/src/sir.rs +++ b/compiler/rustc_codegen_ssa/src/sir.rs @@ -25,7 +25,7 @@ use std::hash::{BuildHasherDefault, Hash, Hasher}; use std::io; use ykpack; -const BUILD_SCRIPT_CRATE: &str = "build_script_build"; +pub const BUILD_SCRIPT_CRATE: &str = "build_script_build"; const CHECKABLE_BINOPS: [ykpack::BinOp; 5] = [ ykpack::BinOp::Add, ykpack::BinOp::Sub, @@ -890,3 +890,136 @@ impl SirTypes { }) } } + +pub mod labels { + use object::{Object, ObjectSection}; + use std::{ + convert::TryFrom, + fs, + path::{Path, PathBuf}, + process::Command, + }; + + fn split_symbol(s: &str) -> (String, u32) { + let data: Vec<&str> = s.split(':').collect(); + debug_assert!(data.len() == 3); + let sym = data[1].to_owned(); + let bb_idx = data[2].parse::().unwrap(); + (sym, bb_idx) + } + + pub fn add_yk_label_section(exe_path: &Path) { + let labels = extract_dwarf_labels(exe_path).unwrap(); + let mut tempf = tempfile::NamedTempFile::new().unwrap(); + bincode::serialize_into(&mut tempf, &labels).unwrap(); + add_section(exe_path, tempf.path()); + } + + fn add_section(exe_path: &Path, sec_data_path: &Path) { + let mut out_path = PathBuf::from(exe_path); + out_path.set_extension("with_labels"); + Command::new("objcopy") + .args(&[ + "--add-section", + &format!("{}={}", ykpack::YKLABELS_SECTION, sec_data_path.to_str().unwrap()), + "--set-section-flags", + &format!("{}=contents,alloc,readonly", ykpack::YKLABELS_SECTION), + exe_path.to_str().unwrap(), + out_path.to_str().unwrap(), + ]) + .output() + .expect("failed to insert labels section"); + std::fs::rename(out_path, exe_path).unwrap(); + } + + fn extract_dwarf_labels(exe_filename: &Path) -> Result, gimli::Error> { + let file = fs::File::open(exe_filename).unwrap(); + let mmap = unsafe { memmap::Mmap::map(&file).unwrap() }; + let object = object::File::parse(&*mmap).unwrap(); + let endian = if object.is_little_endian() { + gimli::RunTimeEndian::Little + } else { + gimli::RunTimeEndian::Big + }; + let mut labels = Vec::new(); + + let loader = |id: gimli::SectionId| -> Result<&[u8], gimli::Error> { + Ok(object + .section_by_name(id.name()) + .map(|sec| sec.data().expect("failed to decompress section")) + .unwrap_or(&[] as &[u8])) + }; + let sup_loader = |_| Ok(&[] as &[u8]); + let dwarf_cow = gimli::Dwarf::load(&loader, &sup_loader)?; + let borrow_section: &dyn for<'a> Fn( + &&'a [u8], + ) + -> gimli::EndianSlice<'a, gimli::RunTimeEndian> = + &|section| gimli::EndianSlice::new(section, endian); + let dwarf = dwarf_cow.borrow(&borrow_section); + let mut iter = dwarf.units(); + let mut subaddr = None; + while let Some(header) = iter.next()? { + let unit = dwarf.unit(header)?; + let mut entries = unit.entries(); + while let Some((_, entry)) = entries.next_dfs()? { + if entry.tag() == gimli::DW_TAG_subprogram { + if let Some(_name) = entry.attr_value(gimli::DW_AT_linkage_name)? { + if let Some(lowpc) = entry.attr_value(gimli::DW_AT_low_pc)? { + let addr = match lowpc { + gimli::AttributeValue::Addr(v) => v as u64, + _ => { + panic!("Error reading dwarf information. Expected type 'Addr'.") + } + }; + // We can not accurately insert labels at the beginning of functions, + // because the label is offset by the function headers. We thus simply + // remember the subprogram's address so we can later assign it to the first + // block (ending with '_0') of this subprogram. + subaddr = Some(addr); + } + } + } else if entry.tag() == gimli::DW_TAG_label { + if let Some(name) = entry.attr_value(gimli::DW_AT_name)? { + if let Some(es) = name.string_value(&dwarf.debug_str) { + let s = es.to_string()?; + if s.starts_with("__YK_") { + if let Some(lowpc) = entry.attr_value(gimli::DW_AT_low_pc)? { + let addr = match lowpc { + gimli::AttributeValue::Addr(v) => v as u64, + _ => panic!( + "Error reading dwarf information. Expected type 'Addr'." + ), + }; + if subaddr.is_some() && s.ends_with("_0") { + // This is the first block of the subprogram. Assign its label + // to the subprogram's address. + let (fsym, bb) = split_symbol(s); + labels.push(ykpack::SirLabel { + off: usize::try_from(subaddr.unwrap()).unwrap(), + symbol_name: fsym, + bb, + }); + subaddr = None; + } else { + let (fsym, bb) = split_symbol(s); + labels.push(ykpack::SirLabel { + off: usize::try_from(addr).unwrap(), + symbol_name: fsym, + bb, + }); + } + } else { + // Ignore labels that have no address. + } + } + } + } + } + } + } + + labels.sort_by_key(|l| l.off); + Ok(labels) + } +} From 8cc1d23d70e82ec02a51881e6fc4cb615c1fba97 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Mon, 7 Dec 2020 15:02:59 +0000 Subject: [PATCH 2/3] Address comments. --- compiler/rustc_codegen_ssa/src/sir.rs | 61 ++++++++++++++------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/sir.rs b/compiler/rustc_codegen_ssa/src/sir.rs index 71b61f1955a..e20bf960632 100644 --- a/compiler/rustc_codegen_ssa/src/sir.rs +++ b/compiler/rustc_codegen_ssa/src/sir.rs @@ -900,7 +900,8 @@ pub mod labels { process::Command, }; - fn split_symbol(s: &str) -> (String, u32) { + /// Splits a Yorick mapping label name into its constituent fields. + fn split_label_name(s: &str) -> (String, u32) { let data: Vec<&str> = s.split(':').collect(); debug_assert!(data.len() == 3); let sym = data[1].to_owned(); @@ -908,6 +909,7 @@ pub mod labels { (sym, bb_idx) } + /// Add a Yorick label section to the specified executable. pub fn add_yk_label_section(exe_path: &Path) { let labels = extract_dwarf_labels(exe_path).unwrap(); let mut tempf = tempfile::NamedTempFile::new().unwrap(); @@ -915,6 +917,7 @@ pub mod labels { add_section(exe_path, tempf.path()); } + /// Copies the bytes in `sec_data_path` into a new Yorick label section of an executable. fn add_section(exe_path: &Path, sec_data_path: &Path) { let mut out_path = PathBuf::from(exe_path); out_path.set_extension("with_labels"); @@ -932,6 +935,8 @@ pub mod labels { std::fs::rename(out_path, exe_path).unwrap(); } + /// Walks the DWARF tree of the specified executable and extracts Yorick location mapping + /// labels. The labels are returned in a vector sorted by the file offset in the executable. fn extract_dwarf_labels(exe_filename: &Path) -> Result, gimli::Error> { let file = fs::File::open(exe_filename).unwrap(); let mmap = unsafe { memmap::Mmap::map(&file).unwrap() }; @@ -941,8 +946,6 @@ pub mod labels { } else { gimli::RunTimeEndian::Big }; - let mut labels = Vec::new(); - let loader = |id: gimli::SectionId| -> Result<&[u8], gimli::Error> { Ok(object .section_by_name(id.name()) @@ -959,6 +962,7 @@ pub mod labels { let dwarf = dwarf_cow.borrow(&borrow_section); let mut iter = dwarf.units(); let mut subaddr = None; + let mut labels = Vec::new(); while let Some(header) = iter.next()? { let unit = dwarf.unit(header)?; let mut entries = unit.entries(); @@ -966,17 +970,16 @@ pub mod labels { if entry.tag() == gimli::DW_TAG_subprogram { if let Some(_name) = entry.attr_value(gimli::DW_AT_linkage_name)? { if let Some(lowpc) = entry.attr_value(gimli::DW_AT_low_pc)? { - let addr = match lowpc { - gimli::AttributeValue::Addr(v) => v as u64, - _ => { - panic!("Error reading dwarf information. Expected type 'Addr'.") - } - }; - // We can not accurately insert labels at the beginning of functions, - // because the label is offset by the function headers. We thus simply - // remember the subprogram's address so we can later assign it to the first - // block (ending with '_0') of this subprogram. - subaddr = Some(addr); + if let gimli::AttributeValue::Addr(v) = lowpc { + // We can not accurately insert labels at the beginning of + // functions, because the label is offset by the function headers. + // We thus simply remember the subprogram's address so we can later + // assign it to the first block (ending with '_0') of this + // subprogram. + subaddr = Some(v as u64); + } else { + panic!("Error reading dwarf information. Expected type 'Addr'.") + } } } } else if entry.tag() == gimli::DW_TAG_label { @@ -985,16 +988,10 @@ pub mod labels { let s = es.to_string()?; if s.starts_with("__YK_") { if let Some(lowpc) = entry.attr_value(gimli::DW_AT_low_pc)? { - let addr = match lowpc { - gimli::AttributeValue::Addr(v) => v as u64, - _ => panic!( - "Error reading dwarf information. Expected type 'Addr'." - ), - }; if subaddr.is_some() && s.ends_with("_0") { - // This is the first block of the subprogram. Assign its label - // to the subprogram's address. - let (fsym, bb) = split_symbol(s); + // This is the first block of the subprogram. Assign its + // label to the subprogram's address. + let (fsym, bb) = split_label_name(s); labels.push(ykpack::SirLabel { off: usize::try_from(subaddr.unwrap()).unwrap(), symbol_name: fsym, @@ -1002,12 +999,18 @@ pub mod labels { }); subaddr = None; } else { - let (fsym, bb) = split_symbol(s); - labels.push(ykpack::SirLabel { - off: usize::try_from(addr).unwrap(), - symbol_name: fsym, - bb, - }); + let (fsym, bb) = split_label_name(s); + if let gimli::AttributeValue::Addr(v) = lowpc { + labels.push(ykpack::SirLabel { + off: usize::try_from(v as u64).unwrap(), + symbol_name: fsym, + bb, + }); + } else { + panic!( + "Error reading dwarf information. Expected type 'Addr'." + ); + } } } else { // Ignore labels that have no address. From 01d328fe942177780bd16da71e7bb826942b7c00 Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Mon, 7 Dec 2020 16:52:26 +0000 Subject: [PATCH 3/3] Address more comments. --- compiler/rustc_codegen_ssa/src/sir.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/sir.rs b/compiler/rustc_codegen_ssa/src/sir.rs index e20bf960632..3394a327dbf 100644 --- a/compiler/rustc_codegen_ssa/src/sir.rs +++ b/compiler/rustc_codegen_ssa/src/sir.rs @@ -936,7 +936,7 @@ pub mod labels { } /// Walks the DWARF tree of the specified executable and extracts Yorick location mapping - /// labels. The labels are returned in a vector sorted by the file offset in the executable. + /// labels. Returns an list of labels ordered by file offset (ascending). fn extract_dwarf_labels(exe_filename: &Path) -> Result, gimli::Error> { let file = fs::File::open(exe_filename).unwrap(); let mmap = unsafe { memmap::Mmap::map(&file).unwrap() }; @@ -976,7 +976,7 @@ pub mod labels { // We thus simply remember the subprogram's address so we can later // assign it to the first block (ending with '_0') of this // subprogram. - subaddr = Some(v as u64); + subaddr = Some(u64::try_from(v).unwrap()); } else { panic!("Error reading dwarf information. Expected type 'Addr'.") } @@ -1002,7 +1002,8 @@ pub mod labels { let (fsym, bb) = split_label_name(s); if let gimli::AttributeValue::Addr(v) = lowpc { labels.push(ykpack::SirLabel { - off: usize::try_from(v as u64).unwrap(), + off: usize::try_from(u64::try_from(v).unwrap()) + .unwrap(), symbol_name: fsym, bb, });