Auto merge of #7381 - alexcrichton:cpu-usage-graph, r=ehuss

Update `-Ztimings` with CPU usage information This commit updates the graph generated by `-Ztimings` to include CPU usage information, ideally showing how Cargo/rustc used the CPU throughout the build, ideally seeing nice periods of parallelism and also periods of missed parallelism.
rust-lang · Sep 19, 2019 · b385d51 · b385d51
2 parents d96c050 + 1d5d19e
commit b385d51
Show file tree

Hide file tree

Showing 5 changed files with 303 additions and 4 deletions.
diff --git a/src/cargo/core/compiler/job_queue.rs b/src/cargo/core/compiler/job_queue.rs
@@ -5,6 +5,7 @@ use std::marker;
 use std::mem;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::sync::Arc;
+use std::time::Duration;
 
 use crossbeam_utils::thread::Scope;
 use failure::format_err;
@@ -321,15 +322,26 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> {
             // to the jobserver itself.
             tokens.truncate(self.active.len() - 1);
 
+            // Record some timing information if `-Ztimings` is enabled, and
+            // this'll end up being a noop if we're not recording this
+            // information.
             self.timings
                 .mark_concurrency(self.active.len(), queue.len(), self.queue.len());
+            self.timings.record_cpu();
 
             // Drain all events at once to avoid displaying the progress bar
-            // unnecessarily.
+            // unnecessarily. If there's no events we actually block waiting for
+            // an event, but we keep a "heartbeat" going to allow `record_cpu`
+            // to run above to calculate CPU usage over time. To do this we
+            // listen for a message with a timeout, and on timeout we run the
+            // previous parts of the loop again.
             let events: Vec<_> = self.rx.try_iter().collect();
             let events = if events.is_empty() {
                 self.show_progress(finished, total);
-                vec![self.rx.recv().unwrap()]
+                match self.rx.recv_timeout(Duration::from_millis(500)) {
+                    Ok(message) => vec![message],
+                    Err(_) => continue,
+                }
             } else {
                 events
             };

diff --git a/src/cargo/core/compiler/timings.js b/src/cargo/core/compiler/timings.js
@@ -193,6 +193,21 @@ function render_timing_graph() {
       y: TOP_MARGIN + GRAPH_HEIGHT * (1.0 - (v / max_v))
     };
   }
+
+  const cpuFillStyle = 'rgba(250, 119, 0, 0.2)';
+  ctx.beginPath();
+  ctx.fillStyle = cpuFillStyle;
+  let bottomLeft = coord(CPU_USAGE[0][0], 0);
+  ctx.moveTo(bottomLeft.x, bottomLeft.y);
+  for (let i=0; i < CPU_USAGE.length; i++) {
+    let [time, usage] = CPU_USAGE[i];
+    let {x, y} = coord(time, usage / 100.0 * max_v);
+    ctx.lineTo(x, y);
+  }
+  let bottomRight = coord(CPU_USAGE[CPU_USAGE.length - 1][0], 0);
+  ctx.lineTo(bottomRight.x, bottomRight.y);
+  ctx.fill();
+
   function draw_line(style, key) {
     let first = CONCURRENCY_DATA[0];
     let last = coord(first.t, key(first));
@@ -216,15 +231,15 @@ function render_timing_graph() {
   // Draw a legend.
   ctx.restore();
   ctx.save();
-  ctx.translate(graph_width-120, MARGIN);
+  ctx.translate(graph_width-150, MARGIN);
   // background
   ctx.fillStyle = '#fff';
   ctx.strokeStyle = '#000';
   ctx.lineWidth = 1;
   ctx.textBaseline = 'middle'
   ctx.textAlign = 'start';
   ctx.beginPath();
-  ctx.rect(0, 0, 120, 62);
+  ctx.rect(0, 0, 150, 82);
   ctx.stroke();
   ctx.fill();
 
@@ -251,6 +266,13 @@ function render_timing_graph() {
   ctx.stroke();
   ctx.fillText('Active', 54, 51);
 
+  ctx.beginPath();
+  ctx.fillStyle = cpuFillStyle
+  ctx.fillRect(15, 60, 30, 15);
+  ctx.fill();
+  ctx.fillStyle = 'black';
+  ctx.fillText('CPU Usage', 54, 71);
+
   ctx.restore();
 }
 

diff --git a/src/cargo/core/compiler/timings.rs b/src/cargo/core/compiler/timings.rs
@@ -5,6 +5,7 @@
 use super::{CompileMode, Unit};
 use crate::core::compiler::BuildContext;
 use crate::core::PackageId;
+use crate::util::cpu::State;
 use crate::util::machine_message::{self, Message};
 use crate::util::{paths, CargoResult, Config};
 use std::collections::HashMap;
@@ -46,6 +47,13 @@ pub struct Timings<'a, 'cfg> {
     /// Concurrency-tracking information. This is periodically updated while
     /// compilation progresses.
     concurrency: Vec<Concurrency>,
+    /// Last recorded state of the system's CPUs and when it happened
+    last_cpu_state: Option<State>,
+    last_cpu_recording: Instant,
+    /// Recorded CPU states, stored as tuples. First element is when the
+    /// recording was taken and second element is percentage usage of the
+    /// system.
+    cpu_usage: Vec<(f64, f64)>,
 }
 
 /// Tracking information for an individual unit.
@@ -134,6 +142,9 @@ impl<'a, 'cfg> Timings<'a, 'cfg> {
             unit_times: Vec::new(),
             active: HashMap::new(),
             concurrency: Vec::new(),
+            last_cpu_state: State::current().ok(),
+            last_cpu_recording: Instant::now(),
+            cpu_usage: Vec::new(),
         }
     }
 
@@ -253,6 +264,31 @@ impl<'a, 'cfg> Timings<'a, 'cfg> {
         self.total_dirty += 1;
     }
 
+    /// Take a sample of CPU usage
+    pub fn record_cpu(&mut self) {
+        if !self.enabled {
+            return;
+        }
+        let prev = match &mut self.last_cpu_state {
+            Some(state) => state,
+            None => return,
+        };
+        // Don't take samples too too frequently, even if requested.
+        let now = Instant::now();
+        if self.last_cpu_recording.elapsed() < Duration::from_millis(100) {
+            return;
+        }
+        let current = match State::current() {
+            Ok(s) => s,
+            Err(_) => return,
+        };
+        let pct_idle = current.idle_since(prev);
+        *prev = current;
+        self.last_cpu_recording = now;
+        let dur = d_as_f64(now.duration_since(self.start));
+        self.cpu_usage.push((dur, 100.0 - pct_idle));
+    }
+
     /// Call this when all units are finished.
     pub fn finished(&mut self) -> CargoResult<()> {
         if !self.enabled {
@@ -442,6 +478,11 @@ impl<'a, 'cfg> Timings<'a, 'cfg> {
             "const CONCURRENCY_DATA = {};",
             serde_json::to_string_pretty(&self.concurrency)?
         )?;
+        writeln!(
+            f,
+            "const CPU_USAGE = {};",
+            serde_json::to_string_pretty(&self.cpu_usage)?
+        )?;
         Ok(())
     }
 

diff --git a/src/cargo/util/cpu.rs b/src/cargo/util/cpu.rs
@@ -0,0 +1,223 @@
+use std::io;
+
+pub struct State(imp::State);
+
+impl State {
+    /// Captures the current state of all CPUs on the system.
+    ///
+    /// The `State` returned here isn't too meaningful in terms of
+    /// interpretation across platforms, but it can be compared to previous
+    /// states to get a meaningful cross-platform number.
+    pub fn current() -> io::Result<State> {
+        imp::current().map(State)
+    }
+
+    /// Returns the percentage of time CPUs were idle from the current state
+    /// relative to the previous state, as a percentage from 0.0 to 100.0.
+    ///
+    /// This function will return, as a percentage, the amount of time that the
+    /// entire system was idle between the `previous` state and this own state.
+    /// This can be useful to compare two snapshots in time of CPU usage to see
+    /// how the CPU usage compares between the two.
+    pub fn idle_since(&self, previous: &State) -> f64 {
+        imp::pct_idle(&previous.0, &self.0)
+    }
+}
+
+#[cfg(target_os = "linux")]
+mod imp {
+    use std::fs::File;
+    use std::io::{self, Read};
+
+    pub struct State {
+        user: u64,
+        nice: u64,
+        system: u64,
+        idle: u64,
+        iowait: u64,
+        irq: u64,
+        softirq: u64,
+        steal: u64,
+        guest: u64,
+        guest_nice: u64,
+    }
+
+    pub fn current() -> io::Result<State> {
+        let mut state = String::new();
+        File::open("/proc/stat")?.read_to_string(&mut state)?;
+        let mut parts = state.lines().next().unwrap().split_whitespace();
+        if parts.next() != Some("cpu") {
+            return Err(io::Error::new(
+                io::ErrorKind::Other,
+                "cannot parse /proc/stat",
+            ));
+        }
+
+        Ok(State {
+            user: parts.next().unwrap().parse::<u64>().unwrap(),
+            nice: parts.next().unwrap().parse::<u64>().unwrap(),
+            system: parts.next().unwrap().parse::<u64>().unwrap(),
+            idle: parts.next().unwrap().parse::<u64>().unwrap(),
+            iowait: parts.next().unwrap().parse::<u64>().unwrap(),
+            irq: parts.next().unwrap().parse::<u64>().unwrap(),
+            softirq: parts.next().unwrap().parse::<u64>().unwrap(),
+            steal: parts.next().unwrap().parse::<u64>().unwrap(),
+            guest: parts.next().unwrap().parse::<u64>().unwrap(),
+            guest_nice: parts.next().unwrap().parse::<u64>().unwrap(),
+        })
+    }
+
+    pub fn pct_idle(prev: &State, next: &State) -> f64 {
+        let user = next.user - prev.user;
+        let nice = next.nice - prev.nice;
+        let system = next.system - prev.system;
+        let idle = next.idle - prev.idle;
+        let iowait = next.iowait - prev.iowait;
+        let irq = next.irq - prev.irq;
+        let softirq = next.softirq - prev.softirq;
+        let steal = next.steal - prev.steal;
+        let guest = next.guest - prev.guest;
+        let guest_nice = next.guest_nice - prev.guest_nice;
+        let total =
+            user + nice + system + idle + iowait + irq + softirq + steal + guest + guest_nice;
+
+        (idle as f64) / (total as f64) * 100.0
+    }
+}
+
+#[cfg(target_os = "macos")]
+#[allow(bad_style)]
+mod imp {
+    use std::io;
+    use std::ptr;
+    use std::slice;
+
+    type host_t = u32;
+    type mach_port_t = u32;
+    type processor_flavor_t = i32;
+    type natural_t = u32;
+    type processor_info_array_t = *mut i32;
+    type mach_msg_type_number_t = i32;
+    type kern_return_t = i32;
+
+    const PROESSOR_CPU_LOAD_INFO: processor_flavor_t = 2;
+    const CPU_STATE_USER: usize = 0;
+    const CPU_STATE_SYSTEM: usize = 1;
+    const CPU_STATE_IDLE: usize = 2;
+    const CPU_STATE_NICE: usize = 3;
+
+    extern "C" {
+        fn mach_host_self() -> mach_port_t;
+        fn host_processor_info(
+            host: host_t,
+            flavor: processor_flavor_t,
+            out_processor_count: *mut natural_t,
+            out_processor_info: *mut processor_info_array_t,
+            out_processor_infoCnt: *mut mach_msg_type_number_t,
+        ) -> kern_return_t;
+    }
+
+    pub struct State {
+        user: u64,
+        system: u64,
+        idle: u64,
+        nice: u64,
+    }
+
+    pub fn current() -> io::Result<State> {
+        unsafe {
+            let mut num_cpus_u = 0;
+            let mut cpu_info = ptr::null_mut();
+            let mut cpu_info_cnt = 0;
+            let err = host_processor_info(
+                mach_host_self(),
+                PROESSOR_CPU_LOAD_INFO,
+                &mut num_cpus_u,
+                &mut cpu_info,
+                &mut cpu_info_cnt,
+            );
+            if err != 0 {
+                return Err(io::Error::last_os_error());
+            }
+            let cpu_info = slice::from_raw_parts(cpu_info, cpu_info_cnt as usize);
+            let mut ret = State {
+                user: 0,
+                system: 0,
+                idle: 0,
+                nice: 0,
+            };
+            for chunk in cpu_info.chunks(num_cpus_u as usize) {
+                ret.user += chunk[CPU_STATE_USER] as u64;
+                ret.system += chunk[CPU_STATE_SYSTEM] as u64;
+                ret.idle += chunk[CPU_STATE_IDLE] as u64;
+                ret.nice += chunk[CPU_STATE_NICE] as u64;
+            }
+            Ok(ret)
+        }
+    }
+
+    pub fn pct_idle(prev: &State, next: &State) -> f64 {
+        let user = next.user - prev.user;
+        let system = next.system - prev.system;
+        let idle = next.idle - prev.idle;
+        let nice = next.nice - prev.nice;
+        let total = user + system + idle + nice;
+        (idle as f64) / (total as f64) * 100.0
+    }
+}
+
+#[cfg(windows)]
+mod imp {
+    use std::io;
+    use std::mem;
+    use winapi::shared::minwindef::*;
+    use winapi::um::processthreadsapi::*;
+
+    pub struct State {
+        idle: FILETIME,
+        kernel: FILETIME,
+        user: FILETIME,
+    }
+
+    pub fn current() -> io::Result<State> {
+        unsafe {
+            let mut ret = mem::zeroed::<State>();
+            let r = GetSystemTimes(&mut ret.idle, &mut ret.kernel, &mut ret.user);
+            if r != 0 {
+                Ok(ret)
+            } else {
+                Err(io::Error::last_os_error())
+            }
+        }
+    }
+
+    pub fn pct_idle(prev: &State, next: &State) -> f64 {
+        fn to_u64(a: &FILETIME) -> u64 {
+            ((a.dwHighDateTime as u64) << 32) | (a.dwLowDateTime as u64)
+        }
+
+        let idle = to_u64(&next.idle) - to_u64(&prev.idle);
+        let kernel = to_u64(&next.kernel) - to_u64(&prev.kernel);
+        let user = to_u64(&next.user) - to_u64(&prev.user);
+        let total = user + kernel;
+        (idle as f64) / (total as f64) * 100.0
+    }
+}
+
+#[cfg(not(any(target_os = "linux", target_os = "macos", windows)))]
+mod imp {
+    use std::io;
+
+    pub struct State;
+
+    pub fn current() -> io::Result<State> {
+        Err(io::Error::new(
+            io::ErrorKind::Other,
+            "unsupported platform to learn CPU state",
+        ))
+    }
+
+    pub fn pct_idle(_prev: &State, _next: &State) -> f64 {
+        unimplemented!()
+    }
+}
diff --git a/src/cargo/util/mod.rs b/src/cargo/util/mod.rs
@@ -33,6 +33,7 @@ mod canonical_url;
 mod cfg;
 pub mod command_prelude;
 pub mod config;
+pub mod cpu;
 mod dependency_queue;
 pub mod diagnostic_server;
 pub mod errors;