From 8407c482354e7befb516501d8b1d0658fd5c5939 Mon Sep 17 00:00:00 2001 From: Remi Bernotavicius Date: Sat, 11 Jan 2020 19:42:32 -0500 Subject: [PATCH 1/7] Run rustfmt on all the source code --- examples/group.rs | 39 ++++++++++++------ examples/insns-for-pid.rs | 2 +- examples/println-cpi.rs | 22 ++++++---- src/events.rs | 6 +-- src/lib.rs | 85 ++++++++++++++++++++++----------------- src/syscalls.rs | 43 +++++++++++--------- 6 files changed, 114 insertions(+), 83 deletions(-) diff --git a/examples/group.rs b/examples/group.rs index dd7e7fc..9cf034a 100644 --- a/examples/group.rs +++ b/examples/group.rs @@ -1,5 +1,5 @@ -use perf_event::{Builder, Group}; use perf_event::events::{Cache, CacheOp, CacheResult, Hardware, WhichCache}; +use perf_event::{Builder, Group}; fn main() -> std::io::Result<()> { const ACCESS: Cache = Cache { @@ -7,13 +7,22 @@ fn main() -> std::io::Result<()> { operation: CacheOp::READ, result: CacheResult::ACCESS, }; - const MISS: Cache = Cache { result: CacheResult::MISS, ..ACCESS }; + const MISS: Cache = Cache { + result: CacheResult::MISS, + ..ACCESS + }; let mut group = Group::new()?; let access_counter = Builder::new().group(&group).kind(ACCESS).build()?; let miss_counter = Builder::new().group(&group).kind(MISS).build()?; - let branches = Builder::new().group(&group).kind(Hardware::BRANCH_INSTRUCTIONS).build()?; - let missed_branches = Builder::new().group(&group).kind(Hardware::BRANCH_MISSES).build()?; + let branches = Builder::new() + .group(&group) + .kind(Hardware::BRANCH_INSTRUCTIONS) + .build()?; + let missed_branches = Builder::new() + .group(&group) + .kind(Hardware::BRANCH_MISSES) + .build()?; let vec = (0..=51).collect::>(); @@ -22,15 +31,19 @@ fn main() -> std::io::Result<()> { group.disable()?; let counts = group.read()?; - println!("L1D cache misses/references: {} / {} ({:.0}%)", - counts[&miss_counter], - counts[&access_counter], - (counts[&miss_counter] as f64 / counts[&access_counter] as f64) * 100.0); - - println!("branch prediction misses/total: {} / {} ({:.0}%)", - counts[&missed_branches], - counts[&branches], - (counts[&missed_branches] as f64 / counts[&branches] as f64) * 100.0); + println!( + "L1D cache misses/references: {} / {} ({:.0}%)", + counts[&miss_counter], + counts[&access_counter], + (counts[&miss_counter] as f64 / counts[&access_counter] as f64) * 100.0 + ); + + println!( + "branch prediction misses/total: {} / {} ({:.0}%)", + counts[&missed_branches], + counts[&branches], + (counts[&missed_branches] as f64 / counts[&branches] as f64) * 100.0 + ); // You can iterate over a `Counts` value: for (id, value) in &counts { diff --git a/examples/insns-for-pid.rs b/examples/insns-for-pid.rs index 09d4ef3..3fc73ae 100644 --- a/examples/insns-for-pid.rs +++ b/examples/insns-for-pid.rs @@ -1,6 +1,6 @@ use libc::pid_t; -use perf_event::Builder; use perf_event::events::Hardware; +use perf_event::Builder; use std::thread::sleep; use std::time::Duration; diff --git a/examples/println-cpi.rs b/examples/println-cpi.rs index ffc381d..36d66b3 100644 --- a/examples/println-cpi.rs +++ b/examples/println-cpi.rs @@ -1,10 +1,16 @@ fn main() -> std::io::Result<()> { - use perf_event::{Builder, Group}; use perf_event::events::Hardware; + use perf_event::{Builder, Group}; let mut group = Group::new()?; - let cycles = Builder::new().group(&group).kind(Hardware::CPU_CYCLES).build()?; - let insns = Builder::new().group(&group).kind(Hardware::INSTRUCTIONS).build()?; + let cycles = Builder::new() + .group(&group) + .kind(Hardware::CPU_CYCLES) + .build()?; + let insns = Builder::new() + .group(&group) + .kind(Hardware::INSTRUCTIONS) + .build()?; let vec = (0..=51).collect::>(); @@ -13,10 +19,12 @@ fn main() -> std::io::Result<()> { group.disable()?; let counts = group.read()?; - println!("cycles / instructions: {} / {} ({:.2} cpi)", - counts[&cycles], - counts[&insns], - (counts[&cycles] as f64 / counts[&insns] as f64)); + println!( + "cycles / instructions: {} / {} ({:.2} cpi)", + counts[&cycles], + counts[&insns], + (counts[&cycles] as f64 / counts[&insns] as f64) + ); Ok(()) } diff --git a/src/events.rs b/src/events.rs index 72f40b1..e36450a 100644 --- a/src/events.rs +++ b/src/events.rs @@ -27,7 +27,7 @@ //! [`Cache`]: struct.Cache.html #![allow(non_camel_case_types)] -use perf_event_open_sys::bindings as bindings; +use perf_event_open_sys::bindings; /// Any sort of event. This is a sum of the [`Hardware`], /// [`Software`], and [`Cache`] types, which all implement @@ -238,9 +238,7 @@ impl From for Event { impl Cache { fn as_config(&self) -> u64 { - self.which as u64 | - ((self.operation as u64) << 8) | - ((self.result as u64) << 16) + self.which as u64 | ((self.operation as u64) << 8) | ((self.result as u64) << 16) } } diff --git a/src/lib.rs b/src/lib.rs index cf0703b..ed66fb4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -318,7 +318,7 @@ pub struct Group { /// [`Counter::id`]: struct.Counter.html#method.id pub struct Counts { // Raw results from the `read`. - data: Vec + data: Vec, } impl<'a> EventPid<'a> { @@ -327,8 +327,7 @@ impl<'a> EventPid<'a> { match self { EventPid::ThisProcess => (0, 0), EventPid::Other(pid) => (*pid, 0), - EventPid::CGroup(file) => - (file.as_raw_fd(), sys::bindings::PERF_FLAG_PID_CGROUP), + EventPid::CGroup(file) => (file.as_raw_fd(), sys::bindings::PERF_FLAG_PID_CGROUP), } } } @@ -476,11 +475,9 @@ impl<'a> Builder<'a> { // assigned us, so we can find our results in a Counts structure. Even // if we're not part of a group, we'll use it in `Debug` output. let mut id = 0_64; - check_syscall(|| unsafe { - sys::ioctls::ID(file.as_raw_fd(), &mut id) - })?; + check_syscall(|| unsafe { sys::ioctls::ID(file.as_raw_fd(), &mut id) })?; - Ok(Counter { file, id, }) + Ok(Counter { file, id }) } } @@ -506,9 +503,7 @@ impl Counter { /// [`reset`]: #method.reset /// [`enable`]: struct.Group.html#method.enable pub fn enable(&mut self) -> io::Result<()> { - check_syscall(|| unsafe { - sys::ioctls::ENABLE(self.file.as_raw_fd(), 0) - }).map(|_| ()) + check_syscall(|| unsafe { sys::ioctls::ENABLE(self.file.as_raw_fd(), 0) }).map(|_| ()) } /// Make this `Counter` stop counting its designated event. Its count is @@ -519,9 +514,7 @@ impl Counter { /// /// [`disable`]: struct.Group.html#method.disable pub fn disable(&mut self) -> io::Result<()> { - check_syscall(|| unsafe { - sys::ioctls::DISABLE(self.file.as_raw_fd(), 0) - }).map(|_| ()) + check_syscall(|| unsafe { sys::ioctls::DISABLE(self.file.as_raw_fd(), 0) }).map(|_| ()) } /// Reset the value of this `Counter` to zero. @@ -531,9 +524,7 @@ impl Counter { /// /// [`reset`]: struct.Group.html#method.reset pub fn reset(&mut self) -> io::Result<()> { - check_syscall(|| unsafe { - sys::ioctls::RESET(self.file.as_raw_fd(), 0) - }).map(|_| ()) + check_syscall(|| unsafe { sys::ioctls::RESET(self.file.as_raw_fd(), 0) }).map(|_| ()) } /// Return this `Counter`'s current value as a `u64`. @@ -551,8 +542,12 @@ impl Counter { impl std::fmt::Debug for Counter { fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(fmt, "Counter {{ fd: {}, id: {} }}", - self.file.as_raw_fd(), self.id) + write!( + fmt, + "Counter {{ fd: {}, id: {} }}", + self.file.as_raw_fd(), + self.id + ) } } @@ -570,8 +565,9 @@ impl Group { attrs.set_exclude_hv(1); // Arrange to be able to identify the counters we read back. - attrs.read_format = (sys::bindings::perf_event_read_format_PERF_FORMAT_ID | - sys::bindings::perf_event_read_format_PERF_FORMAT_GROUP) as u64; + attrs.read_format = (sys::bindings::perf_event_read_format_PERF_FORMAT_ID + | sys::bindings::perf_event_read_format_PERF_FORMAT_GROUP) + as u64; let file = unsafe { File::from_raw_fd(check_syscall(|| { @@ -581,13 +577,15 @@ impl Group { // Retrieve the ID the kernel assigned us. let mut id = 0_64; - check_syscall(|| unsafe { - sys::ioctls::ID(file.as_raw_fd(), &mut id) - })?; + check_syscall(|| unsafe { sys::ioctls::ID(file.as_raw_fd(), &mut id) })?; let max_members = AtomicUsize::new(0); - Ok(Group { file, id, max_members }) + Ok(Group { + file, + id, + max_members, + }) } /// Allow all `Counter`s in this `Group` to begin counting their designated @@ -615,9 +613,12 @@ impl Group { fn generic_ioctl(&mut self, f: unsafe fn(c_int, c_uint) -> c_int) -> io::Result<()> { check_syscall(|| unsafe { - f(self.file.as_raw_fd(), - sys::bindings::perf_event_ioc_flags_PERF_IOC_FLAG_GROUP) - }).map(|_| ()) + f( + self.file.as_raw_fd(), + sys::bindings::perf_event_ioc_flags_PERF_IOC_FLAG_GROUP, + ) + }) + .map(|_| ()) } /// Return the values of all the `Counter`s in this `Group` as a [`Counts`] @@ -664,8 +665,12 @@ impl Group { impl std::fmt::Debug for Group { fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(fmt, "Group {{ fd: {}, id: {} }}", - self.file.as_raw_fd(), self.id) + write!( + fmt, + "Group {{ fd: {}, id: {} }}", + self.file.as_raw_fd(), + self.id + ) } } @@ -677,8 +682,7 @@ impl Counts { fn nth_ref(&self, n: usize) -> (u64, &u64) { assert!(n < self.len()); // (id, &value) - (self.data[1 + 2 * n + 1], - &self.data[1 + 2 * n]) + (self.data[1 + 2 * n + 1], &self.data[1 + 2 * n]) } } @@ -694,7 +698,7 @@ impl Counts { /// [`Group::read`]: struct.Group.html#method.read pub struct CountsIter<'c> { counts: &'c Counts, - next: usize + next: usize, } impl<'c> Iterator for CountsIter<'c> { @@ -773,17 +777,20 @@ impl std::fmt::Debug for Counts { unsafe trait SliceAsBytesMut: Sized { fn slice_as_bytes_mut(slice: &mut [Self]) -> &mut [u8] { unsafe { - std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut u8, - std::mem::size_of_val(slice)) + std::slice::from_raw_parts_mut( + slice.as_mut_ptr() as *mut u8, + std::mem::size_of_val(slice), + ) } } } -unsafe impl SliceAsBytesMut for u64 { } +unsafe impl SliceAsBytesMut for u64 {} fn check_syscall(f: F) -> io::Result -where F: FnOnce() -> R, - R: PartialOrd + Default +where + F: FnOnce() -> R, + R: PartialOrd + Default, { let result = f(); if result < R::default() { @@ -795,5 +802,7 @@ where F: FnOnce() -> R, #[test] fn simple_build() { - Builder::new().build().expect("Couldn't build default Counter"); + Builder::new() + .build() + .expect("Couldn't build default Counter"); } diff --git a/src/syscalls.rs b/src/syscalls.rs index 9da324e..ba6e9c5 100644 --- a/src/syscalls.rs +++ b/src/syscalls.rs @@ -4,28 +4,29 @@ use std::io; use std::os::raw::{c_int, c_ulong}; use std::os::unix::io::{FromRawFd, RawFd}; -pub fn perf_event_open(attrs: &bindings::perf_event_attr, - pid: bindings::__kernel_pid_t, - cpu: c_int, - group_fd: c_int, - flags: c_ulong) -> io::Result -{ +pub fn perf_event_open( + attrs: &bindings::perf_event_attr, + pid: bindings::__kernel_pid_t, + cpu: c_int, + group_fd: c_int, + flags: c_ulong, +) -> io::Result { let result = unsafe { - libc::syscall(bindings::__NR_perf_event_open as libc::c_long, - attrs as *const bindings::perf_event_attr, - pid, - cpu, - group_fd, - flags) + libc::syscall( + bindings::__NR_perf_event_open as libc::c_long, + attrs as *const bindings::perf_event_attr, + pid, + cpu, + group_fd, + flags, + ) }; if result < 0 { return Err(io::Error::last_os_error()); } - let file = unsafe { - File::from_raw_fd(result as RawFd) - }; + let file = unsafe { File::from_raw_fd(result as RawFd) }; Ok(file) } @@ -51,7 +52,7 @@ pub mod ioctls { pub unsafe fn $name(file: &File, arg: $arg_type) -> io::Result { untyped_ioctl(file, bindings::$ioctl, arg) } - } + }; } define_ioctls! { @@ -69,10 +70,12 @@ pub mod ioctls { { MODIFY_ATTRIBUTES, perf_event_ioctls_MODIFY_ATTRIBUTES, *mut perf_event_attr } } - unsafe fn untyped_ioctl(file: &File, ioctl: bindings::perf_event_ioctls, arg: A) -> io::Result { - let result = libc::ioctl(file.as_raw_fd() as c_int, - ioctl as c_ulong, - arg); + unsafe fn untyped_ioctl( + file: &File, + ioctl: bindings::perf_event_ioctls, + arg: A, + ) -> io::Result { + let result = libc::ioctl(file.as_raw_fd() as c_int, ioctl as c_ulong, arg); if result < 0 { return Err(io::Error::last_os_error()); From aa19267e9db04b41a14be272622ed02896bf32b1 Mon Sep 17 00:00:00 2001 From: Remi Bernotavicius Date: Mon, 13 Jan 2020 15:35:59 -0500 Subject: [PATCH 2/7] Rename build to counter --- examples/group.rs | 8 ++++---- examples/insns-for-pid.rs | 2 +- examples/println-cpi.rs | 4 ++-- examples/println.rs | 2 +- src/events.rs | 4 ++-- src/lib.rs | 36 ++++++++++++++++++------------------ 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/examples/group.rs b/examples/group.rs index 9cf034a..fbc1676 100644 --- a/examples/group.rs +++ b/examples/group.rs @@ -13,16 +13,16 @@ fn main() -> std::io::Result<()> { }; let mut group = Group::new()?; - let access_counter = Builder::new().group(&group).kind(ACCESS).build()?; - let miss_counter = Builder::new().group(&group).kind(MISS).build()?; + let access_counter = Builder::new().group(&group).kind(ACCESS).counter()?; + let miss_counter = Builder::new().group(&group).kind(MISS).counter()?; let branches = Builder::new() .group(&group) .kind(Hardware::BRANCH_INSTRUCTIONS) - .build()?; + .counter()?; let missed_branches = Builder::new() .group(&group) .kind(Hardware::BRANCH_MISSES) - .build()?; + .counter()?; let vec = (0..=51).collect::>(); diff --git a/examples/insns-for-pid.rs b/examples/insns-for-pid.rs index 3fc73ae..5f72a4e 100644 --- a/examples/insns-for-pid.rs +++ b/examples/insns-for-pid.rs @@ -14,7 +14,7 @@ fn main() -> std::io::Result<()> { let mut insns = Builder::new() .observe_pid(pid) .kind(Hardware::BRANCH_INSTRUCTIONS) - .build()?; + .counter()?; // Count instructions in PID for five seconds. insns.enable()?; diff --git a/examples/println-cpi.rs b/examples/println-cpi.rs index 36d66b3..f7017f1 100644 --- a/examples/println-cpi.rs +++ b/examples/println-cpi.rs @@ -6,11 +6,11 @@ fn main() -> std::io::Result<()> { let cycles = Builder::new() .group(&group) .kind(Hardware::CPU_CYCLES) - .build()?; + .counter()?; let insns = Builder::new() .group(&group) .kind(Hardware::INSTRUCTIONS) - .build()?; + .counter()?; let vec = (0..=51).collect::>(); diff --git a/examples/println.rs b/examples/println.rs index 8760e3d..1c88c95 100644 --- a/examples/println.rs +++ b/examples/println.rs @@ -1,7 +1,7 @@ use perf_event::Builder; fn main() -> std::io::Result<()> { - let mut counter = Builder::new().build()?; + let mut counter = Builder::new().counter()?; let vec = (0..=51).collect::>(); diff --git a/src/events.rs b/src/events.rs index e36450a..29d05f0 100644 --- a/src/events.rs +++ b/src/events.rs @@ -211,8 +211,8 @@ impl From for Event { /// // Construct a `Group` containing the two new counters, from which we /// // can get counts over matching periods of time. /// let mut group = Group::new()?; -/// let access_counter = Builder::new().group(&group).kind(ACCESS).build()?; -/// let miss_counter = Builder::new().group(&group).kind(MISS).build()?; +/// let access_counter = Builder::new().group(&group).kind(ACCESS).counter()?; +/// let miss_counter = Builder::new().group(&group).kind(MISS).counter()?; /// # Ok(()) } /// /// [`which`]: enum.WhichCache.html diff --git a/src/lib.rs b/src/lib.rs index ed66fb4..c38e5c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,8 +13,8 @@ //! fn main() -> std::io::Result<()> { //! // A `Group` lets us enable and disable several counters atomically. //! let mut group = Group::new()?; -//! let cycles = Builder::new().group(&group).kind(Hardware::CPU_CYCLES).build()?; -//! let insns = Builder::new().group(&group).kind(Hardware::INSTRUCTIONS).build()?; +//! let cycles = Builder::new().group(&group).kind(Hardware::CPU_CYCLES).counter()?; +//! let insns = Builder::new().group(&group).kind(Hardware::INSTRUCTIONS).counter()?; //! //! let vec = (0..=51).collect::>(); //! @@ -93,7 +93,7 @@ pub mod events; /// use perf_event::Builder; /// /// fn main() -> std::io::Result<()> { -/// let mut counter = Builder::new().build()?; +/// let mut counter = Builder::new().counter()?; /// /// let vec = (0..=51).collect::>(); /// @@ -142,7 +142,7 @@ pub struct Counter { /// /// # use perf_event::Builder; /// # fn main() -> std::io::Result<()> { -/// let mut insns = Builder::new().build()?; +/// let mut insns = Builder::new().counter()?; /// # Ok(()) } /// /// The [`kind`] method lets you specify what sort of event you want to @@ -153,7 +153,7 @@ pub struct Counter { /// # fn main() -> std::io::Result<()> { /// let mut insns = Builder::new() /// .kind(Hardware::BRANCH_INSTRUCTIONS) -/// .build()?; +/// .counter()?; /// # Ok(()) } /// /// The [`group`] method lets you gather individual counters into `Group` @@ -163,8 +163,8 @@ pub struct Counter { /// # use perf_event::events::Hardware; /// # fn main() -> std::io::Result<()> { /// let mut group = Group::new()?; -/// let cycles = Builder::new().group(&group).kind(Hardware::CPU_CYCLES).build()?; -/// let insns = Builder::new().group(&group).kind(Hardware::INSTRUCTIONS).build()?; +/// let cycles = Builder::new().group(&group).kind(Hardware::CPU_CYCLES).counter()?; +/// let insns = Builder::new().group(&group).kind(Hardware::INSTRUCTIONS).counter()?; /// # Ok(()) } /// /// Other methods let you select: @@ -220,8 +220,8 @@ enum EventPid<'a> { /// use perf_event::events::Hardware; /// /// let mut group = Group::new()?; -/// let cycles = Builder::new().group(&group).kind(Hardware::CPU_CYCLES).build()?; -/// let insns = Builder::new().group(&group).kind(Hardware::INSTRUCTIONS).build()?; +/// let cycles = Builder::new().group(&group).kind(Hardware::CPU_CYCLES).counter()?; +/// let insns = Builder::new().group(&group).kind(Hardware::INSTRUCTIONS).counter()?; /// /// let vec = (0..=51).collect::>(); /// @@ -290,8 +290,8 @@ pub struct Group { /// # fn main() -> std::io::Result<()> { /// # use perf_event::{Builder, Group}; /// # let mut group = Group::new()?; -/// # let cycles = Builder::new().group(&group).build()?; -/// # let insns = Builder::new().group(&group).build()?; +/// # let cycles = Builder::new().group(&group).counter()?; +/// # let insns = Builder::new().group(&group).counter()?; /// let counts = group.read()?; /// println!("cycles / instructions: {} / {} ({:.2} cpi)", /// counts[&cycles], @@ -408,8 +408,8 @@ impl<'a> Builder<'a> { /// const MISS: Cache = Cache { result: CacheResult::MISS, ..ACCESS }; /// /// let mut group = Group::new()?; - /// let access_counter = Builder::new().group(&group).kind(ACCESS).build()?; - /// let miss_counter = Builder::new().group(&group).kind(MISS).build()?; + /// let access_counter = Builder::new().group(&group).kind(ACCESS).counter()?; + /// let miss_counter = Builder::new().group(&group).kind(MISS).counter()?; /// # Ok(()) } /// /// [`Event`]: events/enum.Event.html @@ -443,7 +443,7 @@ impl<'a> Builder<'a> { /// /// [`Counter`]: struct.Counter.html /// [`enable`]: struct.Counter.html#method.enable - pub fn build(self) -> std::io::Result { + pub fn counter(self) -> std::io::Result { let cpu = match self.cpu { Some(cpu) => cpu as c_int, None => -1, @@ -629,8 +629,8 @@ impl Group { /// /// ```ignore /// let mut group = Group::new()?; - /// let counter1 = Builder::new().group(&group).kind(...).build()?; - /// let counter2 = Builder::new().group(&group).kind(...).build()?; + /// let counter1 = Builder::new().group(&group).kind(...).counter()?; + /// let counter2 = Builder::new().group(&group).kind(...).counter()?; /// ... /// let counts = group.read()?; /// println!("Rhombus inclinations per taxi medallion: {} / {} ({:.0}%)", @@ -733,7 +733,7 @@ impl Counts { /// # fn main() -> std::io::Result<()> { /// # use perf_event::{Builder, Group}; /// # let mut group = Group::new()?; - /// # let cycle_counter = Builder::new().group(&group).build()?; + /// # let cycle_counter = Builder::new().group(&group).counter()?; /// # let counts = group.read()?; /// let cycles = counts[&cycle_counter]; /// # Ok(()) } @@ -803,6 +803,6 @@ where #[test] fn simple_build() { Builder::new() - .build() + .counter() .expect("Couldn't build default Counter"); } From 8c2ffb59cff71e55c581de81d3348eab25de66fe Mon Sep 17 00:00:00 2001 From: Remi Bernotavicius Date: Tue, 14 Jan 2020 00:28:18 -0500 Subject: [PATCH 3/7] Add very simple sampling support. This patch only supports the PERF_RECORD_SAMPLE events, and is missing support for certain things contained within the sample. It is also missing support for samples other than PERF_RECORD_SAMPLE. --- Cargo.toml | 1 + examples/sample.rs | 43 +++++ src/lib.rs | 400 +++++++++++++++++++++++++++++++++++++++-- src/sample.rs | 436 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 863 insertions(+), 17 deletions(-) create mode 100644 examples/sample.rs create mode 100644 src/sample.rs diff --git a/Cargo.toml b/Cargo.toml index bc6e100..827deeb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,4 @@ description = "A Rust interface to Linux performance monitoring" [dependencies] perf-event-open-sys = "0.3" libc = "0.2" +byte = "0.2.4" diff --git a/examples/sample.rs b/examples/sample.rs new file mode 100644 index 0000000..a1eeb5d --- /dev/null +++ b/examples/sample.rs @@ -0,0 +1,43 @@ +use perf_event::{events, Builder}; +use std::time::{Duration, Instant}; + +fn main() -> std::io::Result<()> { + let mut handles: Vec>> = vec![]; + + let end = Instant::now() + Duration::from_secs(10); + + for cpu in 0..8 { + let handle = std::thread::spawn(move || { + let sample_stream = Builder::new() + .kind(events::Hardware::CPU_CYCLES) + .one_cpu(cpu) + .observe_all() + .sample_callchain() + .sample_frequency(4000) + .sample_ip() + .sample_tid() + .sample_time() + .sample_cpu() + .sample_period() + .sample_stream()?; + + sample_stream.enable()?; + + let mut now = Instant::now(); + while now < end { + if let Some(sample) = sample_stream.read(Some(end - now))? { + println!("{:#?}", sample); + } + now = Instant::now(); + } + + Ok(()) + }); + handles.push(handle); + } + + for handle in handles { + handle.join().unwrap()?; + } + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index c38e5c2..4c7534b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,7 +35,7 @@ //! call; that documentation has the authoritative explanations of exactly what //! all the counters mean. //! -//! There are two main types for measurement: +//! There are three main types for measurement: //! //! - A [`Counter`] is an individual counter. Use [`Builder`] to //! construct one. @@ -44,6 +44,9 @@ //! disabled atomically, so that they cover exactly the same period of //! execution, allowing meaningful comparisons of the individual values. //! +//! - A [`SampleStream`] is a stream of information from the kernel containing instantaneous +//! information and events about that being profiled. +//! //! ### Call for PRs //! //! Linux's `perf_event_open` API can report all sorts of things this crate @@ -61,6 +64,7 @@ //! inclusion, so be forewarned.) //! //! [`Counter`]: struct.Counter.html +//! [`SampleStream`]: struct.SampleStream.html //! [`Builder`]: struct.Builder.html //! [`Group`]: struct.Group.html //! [man]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html @@ -68,15 +72,18 @@ #![deny(missing_docs)] use events::Event; -use libc::pid_t; +use libc::{mmap, munmap, pid_t, poll, pollfd, MAP_SHARED, POLLIN, PROT_READ, PROT_WRITE}; use perf_event_open_sys as sys; +use sample::{PerfRecord, PerfSampleType, PerfSampleTypeSet}; +use std::convert::TryInto; use std::fs::File; use std::io::{self, Read}; -use std::os::raw::{c_int, c_uint, c_ulong}; +use std::os::raw::{c_int, c_uint, c_ulong, c_void}; use std::os::unix::io::{AsRawFd, FromRawFd}; -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; pub mod events; +pub mod sample; /// A counter for one kind of kernel or hardware event. /// @@ -185,6 +192,8 @@ pub struct Builder<'a> { cpu: Option, kind: Event, group: Option<&'a Group>, + sample_type_set: PerfSampleTypeSet, + sample_frequency: u64, } #[derive(Debug)] @@ -197,6 +206,9 @@ enum EventPid<'a> { /// Monitor members of the given cgroup. CGroup(&'a File), + + /// Monitor all other processes. + All, } /// A group of counters that can be managed as a unit. @@ -328,6 +340,7 @@ impl<'a> EventPid<'a> { EventPid::ThisProcess => (0, 0), EventPid::Other(pid) => (*pid, 0), EventPid::CGroup(file) => (file.as_raw_fd(), sys::bindings::PERF_FLAG_PID_CGROUP), + EventPid::All => (-1, 0), } } } @@ -339,6 +352,8 @@ impl<'a> Default for Builder<'a> { cpu: None, kind: Event::Hardware(events::Hardware::INSTRUCTIONS), group: None, + sample_type_set: Default::default(), + sample_frequency: 0, } } } @@ -364,6 +379,13 @@ impl<'a> Builder<'a> { self } + /// Observe all processes on the machine. When observing all processes, it is not allowed to + /// also observe any cpu. + pub fn observe_all(mut self) -> Builder<'a> { + self.who = EventPid::All; + self + } + /// Observe code running in the given [cgroup][man-cgroups] (container). The /// `cgroup` argument should be a `File` referring to the cgroup's directory /// in the cgroupfs filesystem. @@ -421,6 +443,83 @@ impl<'a> Builder<'a> { self } + /// When sampling, include the current instruction pointer. + pub fn sample_ip(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::IP); + self + } + + /// When sampling, include the current process id / thread id. + pub fn sample_tid(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::TID); + self + } + + /// When sampling, include a timestamp in the sample. + pub fn sample_time(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::TIME); + self + } + + /// When sampling, include the address of the relevant tracepoint, breakpoint or software + /// event. + pub fn sample_address(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::ADDR); + self + } + + /// When sampling, include the current callchain. + pub fn sample_callchain(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::CALLCHAIN); + self + } + + /// When sampling, include a unique id. If part of a group, this will instead be the group + /// leader ID. + pub fn sample_id(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::ID); + self + } + + /// When sampling, include a value representing the current CPU. + pub fn sample_cpu(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::CPU); + self + } + + /// When sampling, include in the sample the current sampling period. + pub fn sample_period(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::PERIOD); + self + } + + /// When sampling, include a unique ID. This is different from `sample_id` in that it is never + /// the group leader ID. + pub fn sample_stream_id(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::STREAM_ID); + self + } + + /// When sampling, include the raw sample. + pub fn sample_raw(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::RAW); + self + } + + /// When sampling, include a weight value that indicates how costly the event was. This allows + /// expensive events to stand out more clearly in profiles. + pub fn sample_weight(mut self) -> Builder<'a> { + self.sample_type_set.add(PerfSampleType::WEIGHT); + self + } + + /// Set the frequency to sample at in Herts. If this frequency is too high, the kernel may + /// reject it. + pub fn sample_frequency(mut self, sample_frequency: u64) -> Builder<'a> { + self.sample_frequency = sample_frequency; + self + } + /// Place the counter in the given [`Group`]. Groups allow a set of counters /// to be enabled, disabled, or read as a single atomic operation, so that /// the counts can be usefully compared. @@ -431,19 +530,7 @@ impl<'a> Builder<'a> { self } - /// Construct a [`Counter`] according to the specifications made on this - /// `Builder`. - /// - /// A freshly built `Counter` is disabled. To begin counting events, you - /// must call [`enable`] on the `Counter` or the `Group` to which it belongs. - /// - /// Unfortunately, problems in counter configuration are detected at this - /// point, by the kernel, not earlier when the offending request is made on - /// the `Builder`. The kernel's returned errors are not always helpful. - /// - /// [`Counter`]: struct.Counter.html - /// [`enable`]: struct.Counter.html#method.enable - pub fn counter(self) -> std::io::Result { + fn build(self, sample: bool) -> std::io::Result<(sys::bindings::perf_event_attr, File)> { let cpu = match self.cpu { Some(cpu) => cpu as c_int, None => -1, @@ -465,12 +552,41 @@ impl<'a> Builder<'a> { attrs.set_exclude_kernel(1); attrs.set_exclude_hv(1); + if sample { + attrs.set_freq(1); + attrs.set_precise_ip(3); + + attrs.set_watermark(1); + attrs.__bindgen_anon_2.wakeup_watermark = 1; + + attrs.__bindgen_anon_1.sample_freq = self.sample_frequency; + attrs.sample_type = self.sample_type_set.0; + } + let file = unsafe { File::from_raw_fd(check_syscall(|| { sys::perf_event_open(&mut attrs, pid, cpu, group_fd, flags as c_ulong) })?) }; + Ok((attrs, file)) + } + + /// Construct a [`Counter`] according to the specifications made on this + /// `Builder`. + /// + /// A freshly built `Counter` is disabled. To begin counting events, you + /// must call [`enable`] on the `Counter` or the `Group` to which it belongs. + /// + /// Unfortunately, problems in counter configuration are detected at this + /// point, by the kernel, not earlier when the offending request is made on + /// the `Builder`. The kernel's returned errors are not always helpful. + /// + /// [`Counter`]: struct.Counter.html + /// [`enable`]: struct.Counter.html#method.enable + pub fn counter(self) -> std::io::Result { + let (_, file) = self.build(false)?; + // If we're going to be part of a Group, retrieve the ID the kernel // assigned us, so we can find our results in a Counts structure. Even // if we're not part of a group, we'll use it in `Debug` output. @@ -479,6 +595,18 @@ impl<'a> Builder<'a> { Ok(Counter { file, id }) } + + /// Construct a [`SampleStream`]. + /// + /// A freshly built `SampleStream` is disabled. To being reading records from the read, you + /// must call [`enable`] on the `SampleStream` or the `Group` to which it belongs. + /// + /// [`SampleStream`]: struct.SampleStream.html + /// [`enable`]: struct.SampleStream.html#method.enable + pub fn sample_stream(self) -> std::io::Result { + let (attrs, file) = self.build(true)?; + SampleStream::new(attrs, file) + } } impl Counter { @@ -806,3 +934,241 @@ fn simple_build() { .counter() .expect("Couldn't build default Counter"); } + +// Use a pretty big buffer because we don't want to drop any entries +const SAMPLE_BUFFER_SIZE: usize = 528384; + +fn wait_for_readable_or_timeout(file: &File, timeout: Option) -> bool { + let mut pollfd = pollfd { + fd: file.as_raw_fd(), + events: POLLIN, + revents: 0, + }; + let timeout = timeout.map(|d| d.as_millis() as c_int).unwrap_or(-1); + let events = unsafe { poll(&mut pollfd, 1, timeout) }; + events == 0 +} + +// We create our own version of this rather than use bindgen's so we can have the atomics. +#[repr(C)] +struct PerfEventMmapPage { + /// version number of this structure + version: u32, + + /// lowest version this is compat with + compat_version: u32, + + /// seqlock for synchronization + lock: u32, + + /// hardware counter identifier + index: u32, + + /// add to hardware counter value + offset: i64, + + /// time event active + time_enabled: u64, + + /// time event on CPU + time_running: u64, + + capabilites: u64, + pmc_width: u16, + time_shift: u16, + time_mult: u32, + time_offset: u64, + __reserved: [u64; 120], /* Pad to 1 k */ + + /// head in the data section + data_head: AtomicU64, + + /// user-space written tail + data_tail: AtomicU64, + + /// where the buffer starts + data_offset: u64, + + /// data buffer size + data_size: u64, + + aux_head: u64, + aux_tail: u64, + aux_offset: u64, + aux_size: u64, +} + +/// A stream of samples being sent to us from the kernel. These samples represent instantaneous +/// states or events concerning the process(es) being profiled. +/// +/// Internally the samples are queued up in a ring-buffer. The kernel writes samples into the +/// buffer, and [`SampleStream.read`] deques them. If the buffer it full, the kernel will overwrite +/// old samples effectively dropping them. +pub struct SampleStream { + file: File, + mapped_memory: *mut c_void, + attrs: sys::bindings::perf_event_attr, +} + +unsafe impl Send for SampleStream {} +unsafe impl Sync for SampleStream {} + +impl SampleStream { + fn new(attrs: sys::bindings::perf_event_attr, file: File) -> std::io::Result { + let mapped_memory = check_syscall(|| unsafe { + mmap( + std::ptr::null_mut(), + SAMPLE_BUFFER_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED, + file.as_raw_fd(), + 0, + ) as isize + })? as *mut c_void; + Ok(Self { + file, + mapped_memory, + attrs, + }) + } + + /// Begin sampling. If read is called before the stream is enabled, it will block until it is. + pub fn enable(&self) -> io::Result<()> { + check_syscall(|| unsafe { sys::ioctls::ENABLE(self.file.as_raw_fd(), 0) }).map(|_| ()) + } + + // If a thread is asleep in read, calling this function does not wake it up. As such the only + // utility to this function is to tell the kernel to stop sending us events when we destroy the + // stream. + fn disable(&self) -> io::Result<()> { + check_syscall(|| unsafe { sys::ioctls::DISABLE(self.file.as_raw_fd(), 0) }).map(|_| ()) + } + + /// Pop a sample from the buffer. If the buffer is empty, blocking waiting for there to be one + /// to return. If a timeout is given, it only blocks for up to the given timeout. When the + /// timeout it reached, None is returned. + pub fn read(&self, timeout: Option) -> io::Result> { + // XXX There is definitely a way to implement this function that has less copies and no + // heap allocations. If we made some circular reader type we could decode records directly + // from that. It just makes things a bit trickier. + + // wait for there to be data in the buffer, or the timeout. + if wait_for_readable_or_timeout(&self.file, timeout) { + return Ok(None); + } + + // The kernel gives us records in a ring buffer. As the kernel adds records to the head, we + // are consuming from the tail. If the buffer is full, the kernel drops records. + let header: *mut PerfEventMmapPage = unsafe { std::mem::transmute(self.mapped_memory) }; + let header = unsafe { &mut *header }; + + let tail = header.data_tail.load(Ordering::Relaxed); + let head = header.data_head.load(Ordering::Relaxed); + + // If we waited for the file to become readable and didn't time out, there should be + // something. + assert!(head != tail, "Unexpectedly no data in buffer"); + + // The actual data part of the collection comes some amount after the header. The header + // says exactly where. + let data_slice = unsafe { + std::slice::from_raw_parts( + self.mapped_memory.offset(header.data_offset as isize) as *mut u8, + header.data_size as usize, + ) + }; + + // The tail of the ring-buffer is always increasing. To get the actual offset we need to + // look tail modulo the size of the buffer. + let header_index = (tail as usize) % data_slice.len(); + + // Since this is a ring-buffer, whatever we are reading can possibly go off the end of the + // buffer and loop back around to the front. We are forced to piece it together. + fn read_circular(data: &[u8], index: usize, length: usize) -> Vec { + let first_part = std::cmp::min(length, data.len() - index); + let mut record = data[index..(index + first_part)].to_vec(); + let second_part = length - first_part; + record.extend_from_slice(&data[..second_part]); + record + } + + // Each record has a header telling us the size and type. + const HEADER_SIZE: usize = std::mem::size_of::(); + let record_header_data: Box<[u8; HEADER_SIZE]> = + read_circular(data_slice, header_index, HEADER_SIZE) + .into_boxed_slice() + .try_into() + .unwrap(); + let record_header: Box = + unsafe { std::mem::transmute(record_header_data) }; + + // Decode the record + let record_index = (header_index + HEADER_SIZE) % data_slice.len(); + let record_body = read_circular(data_slice, record_index, record_header.size as usize); + let record = + PerfRecord::decode(&self.attrs, record_header.type_, &*record_body).map_err(|_| { + std::io::Error::new(std::io::ErrorKind::Other, "failed to decode event") + })?; + + // Update the tail of the buffer to let the kernel know we have consumed this record. + header + .data_tail + .store(tail + record_header.size as u64, Ordering::Relaxed); + + Ok(Some(record)) + } +} + +impl Drop for SampleStream { + fn drop(&mut self) { + // Only error we reasonably expect is EINVAL + self.disable().unwrap(); + check_syscall(|| unsafe { munmap(self.mapped_memory, SAMPLE_BUFFER_SIZE) }).unwrap(); + } +} + +#[test] +fn sample_stream() -> std::io::Result<()> { + use std::sync::atomic::AtomicBool; + + let sample_stream = Builder::new() + .kind(events::Hardware::CPU_CYCLES) + // This frequency isn't guaranteed to work. + .sample_frequency(4000) + .sample_ip() + .sample_tid() + .sample_time() + .sample_cpu() + .sample_period() + .sample_callchain() + .sample_stream()?; + + sample_stream.enable()?; + + static DONE: AtomicBool = AtomicBool::new(false); + + let current_pid = unsafe { libc::getpid() }; + + // Sample on a different thread and create samples on the main thread until we get at least + // ten. + std::thread::spawn(move || { + for _ in 0..10 { + if let Some(PerfRecord::Sample(sample)) = sample_stream.read(None).unwrap() { + // We should only get samples for the pid we asked for. + assert_eq!(sample.pid.unwrap(), current_pid); + + // XXX its hard to verify other stuff about the sample since we can't predict what + // the value are. + } else { + panic!(); + } + } + DONE.store(true, Ordering::Relaxed); + }); + + while !DONE.load(Ordering::Relaxed) { + std::thread::sleep(std::time::Duration::from_millis(1)); + } + + Ok(()) +} diff --git a/src/sample.rs b/src/sample.rs new file mode 100644 index 0000000..df73b47 --- /dev/null +++ b/src/sample.rs @@ -0,0 +1,436 @@ +//! Types relating to sampling. +//! +//! When sampling, it is possible to receive instantaneous data or events concerning the +//! process(es) being profiled. + +#![allow(non_camel_case_types)] + +use byte::{BytesExt, Result}; +use libc::pid_t; +use perf_event_open_sys as sys; +use std::os::raw::c_void; + +/// Controls the various fields that are provided in [`PerfRecordSample`] when sampling. +/// +/// Corresponds to the `sample_type` field `perf_event_attr` in the [`perf_event_open`][man] man +/// page. +/// +/// [man]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html +/// +/// Not all possible values of this enum are currently included +#[derive(Copy, Clone)] +#[repr(u64)] +pub enum PerfSampleType { + /// Fill out the `ip` field of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + IP = sys::bindings::perf_event_sample_format_PERF_SAMPLE_IP, + + /// Fill out the `pid` / `tid` fields of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + TID = sys::bindings::perf_event_sample_format_PERF_SAMPLE_TID, + + /// Fill out the `timestamp` field of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + TIME = sys::bindings::perf_event_sample_format_PERF_SAMPLE_TIME, + + /// Fill out the `addr` field of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + ADDR = sys::bindings::perf_event_sample_format_PERF_SAMPLE_ADDR, + + /// Fill out the `callchain` field of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + CALLCHAIN = sys::bindings::perf_event_sample_format_PERF_SAMPLE_CALLCHAIN, + + /// Fill out the `id` field of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + ID = sys::bindings::perf_event_sample_format_PERF_SAMPLE_ID, + + /// Fill out the `cpu` field of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + CPU = sys::bindings::perf_event_sample_format_PERF_SAMPLE_CPU, + + /// Fill out the `period` field of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + PERIOD = sys::bindings::perf_event_sample_format_PERF_SAMPLE_PERIOD, + + /// Fill out the `stream_id` field of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + STREAM_ID = sys::bindings::perf_event_sample_format_PERF_SAMPLE_STREAM_ID, + + /// Fill out the `raw_sample` field of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + RAW = sys::bindings::perf_event_sample_format_PERF_SAMPLE_RAW, + + /// Fill out the `weight` field of [`PerfRecordSample`] when sampling. + /// + /// [`PerfRecordSample`]: struct.PerfRecordSample.html + WEIGHT = sys::bindings::perf_event_sample_format_PERF_SAMPLE_WEIGHT, +} + +/// A set of PerfSampleType that is implemented using bit math. +#[derive(Default)] +pub struct PerfSampleTypeSet(pub sys::bindings::perf_event_sample_format); + +impl PerfSampleTypeSet { + /// Add the given PerfSampleType to the set + pub fn add(&mut self, sample_type: PerfSampleType) { + self.0 |= sample_type as sys::bindings::perf_event_sample_format; + } + + /// Returns true if the set contains the given PerfSampleType + pub fn contains(&self, sample_type: PerfSampleType) -> bool { + self.0 & sample_type as sys::bindings::perf_event_sample_format != 0 + } +} + +/// This record indicates a throttle / unthrottle event. +#[derive(Debug)] +pub struct PerfRecordThrottle { + /// Timestamp of when the record was created + pub time: u64, + /// A unique ID. If the event is a member of an event group, the group leader ID is + /// returned. + pub id: u64, + /// A unique ID. Unlike the above field, this is always the actual ID and never the group + /// leader ID. + pub stream_id: u64, +} + +impl PerfRecordThrottle { + fn decode(_attrs: &sys::bindings::perf_event_attr, data: &[u8]) -> Result { + let mut offset = 0; + Ok(Self { + time: data.read(&mut offset)?, + id: data.read(&mut offset)?, + stream_id: data.read(&mut offset)?, + }) + } +} + +#[test] +fn decode_perf_record_throttle() { + let attrs = sys::bindings::perf_event_attr::default(); + let record = PerfRecordThrottle::decode( + &attrs, + &[ + 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, + ], + ) + .unwrap(); + assert_eq!(record.time, 1); + assert_eq!(record.id, 2); + assert_eq!(record.stream_id, 3); +} + +/// This record is a sample of the current state of some process. +/// +/// Corresponds to the anonymous struct under `PERF_RECORD_SAMPLE` in [`perf_event_open`][man] man +/// page. +/// +/// [man]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html +#[derive(Debug, Default, PartialEq, Eq)] +pub struct PerfRecordSample { + /// Instruction pointer. Included if [`PerfSampleType::IP`] is enabled. + pub ip: Option<*const c_void>, + + /// Process id. Included if [`PerfSampleType::TID`] is enabled. + pub pid: Option, + + /// Thread id. Included if [`PerfSampleType::TID`] is enabled. + pub tid: Option, + + /// Timestamp of when the sample was taken. Obtained via local_clock() which is a hardware + /// timestamp if available and the jiffies value if not. + pub time: Option, + + /// Usually the address of a tracepoint, breakpoint, or software event; otherwise the value is + /// 0. Included if [`PerfSampleType::ADDR`] is enabled. + pub addr: Option, + + /// A unique ID. If the event is a member of an event group, the group leader ID is + /// returned. Included if [`PerfSampleType::ID`] is enabled. + pub id: Option, + + /// A unique ID. Unlike the above field, this is always the actual ID and never the group + /// leader ID. Included if [`PerfSampleType::STREAM_ID`] is enabled. + pub stream_id: Option, + + /// Value indicating which CPU was being used. Included if [`PerfSampleType::CPU`] is enabled. + pub cpu: Option, + + /// Value indicating the current sampling period. Included if [`PerfSampleType::PERIOD`] is + /// enabled. + pub period: Option, + + // XXX placeholder; read format stuff not supported yet. + _v: (), + + /// The current callchain. Included if [`PerfSampleType::CALLCHAIN`] is enabled. + pub callchain: Option>, + + /// This contains the raw record data. Included if [`PerfSampleType::RAW`] is enabled. + /// + /// This raw record data is opaque with respect to the ABI. The ABI doesn't make any promises + /// with respect to the stability of its content, it may vary depending on the event, hardware, + /// and kernel versions. + pub raw_sample: Option>, + + // XXX placeholder; branch stack stuff not supported yet. + _lbr: (), + + // XXX placeholder; user register stuff not supported yet. + _user_regs: (), + + // XXX placeholder; user stack stuff not supported yet. + _user_stack: (), + + /// Value provided by the hardware that indicates how costly the event was. Included if + /// [`PerfSampleType::WEIGHT`] is enabled. + /// + /// This allows expensive events to stand out more clearly in profiles. + pub weight: Option, + + // XXX placeholder; data_src stuff not supported yet. + _data_src: (), + + // XXX placeholder; transaction stuff not supported yet. + _transaction: (), + + // XXX placeholder; cpu register stuff not supported yet. + _cpu_regs: (), +} + +impl PerfRecordSample { + fn decode(attrs: &sys::bindings::perf_event_attr, data: &[u8]) -> Result { + let sample_type = PerfSampleTypeSet(attrs.sample_type); + let mut offset = 0; + + let ip = if sample_type.contains(PerfSampleType::IP) { + Some(data.read::(&mut offset)? as *const _) + } else { + None + }; + + let (pid, tid) = if sample_type.contains(PerfSampleType::TID) { + (Some(data.read(&mut offset)?), Some(data.read(&mut offset)?)) + } else { + (None, None) + }; + + let time = if sample_type.contains(PerfSampleType::TIME) { + Some(data.read(&mut offset)?) + } else { + None + }; + + let addr = if sample_type.contains(PerfSampleType::ADDR) { + Some(data.read(&mut offset)?) + } else { + None + }; + + let id = if sample_type.contains(PerfSampleType::ID) { + Some(data.read(&mut offset)?) + } else { + None + }; + + let stream_id = if sample_type.contains(PerfSampleType::STREAM_ID) { + Some(data.read(&mut offset)?) + } else { + None + }; + + let cpu = if sample_type.contains(PerfSampleType::CPU) { + let value = data.read(&mut offset)?; + let _res: u32 = data.read(&mut offset)?; + Some(value) + } else { + None + }; + + let period = if sample_type.contains(PerfSampleType::PERIOD) { + Some(data.read(&mut offset)?) + } else { + None + }; + + let callchain = if sample_type.contains(PerfSampleType::CALLCHAIN) { + let len: u64 = data.read(&mut offset)?; + let mut callchain = vec![]; + for _ in 0..len { + callchain.push(data.read::(&mut offset)? as *const _); + } + Some(callchain) + } else { + None + }; + + let raw_sample = if sample_type.contains(PerfSampleType::RAW) { + let len: u32 = data.read(&mut offset)?; + let mut sample = vec![]; + for _ in 0..len { + sample.push(data.read(&mut offset)?); + } + Some(sample) + } else { + None + }; + + let weight = if sample_type.contains(PerfSampleType::WEIGHT) { + Some(data.read(&mut offset)?) + } else { + None + }; + + Ok(Self { + ip, + pid, + tid, + time, + addr, + id, + stream_id, + cpu, + period, + callchain, + raw_sample, + weight, + ..Default::default() + }) + } +} + +#[test] +fn decode_perf_record_sample_empty() { + let attrs = sys::bindings::perf_event_attr::default(); + let record = PerfRecordSample::decode(&attrs, &[]).unwrap(); + assert_eq!(record, Default::default()); +} + +#[cfg(test)] +fn make_test_record(sample_type: PerfSampleType, data: &[u8]) -> PerfRecordSample { + let mut sample_type_set = PerfSampleTypeSet::default(); + sample_type_set.add(sample_type); + let mut attrs = sys::bindings::perf_event_attr::default(); + attrs.sample_type = sample_type_set.0; + PerfRecordSample::decode(&attrs, data).unwrap() +} + +#[test] +fn decode_perf_record_sample_ip() { + let record = make_test_record(PerfSampleType::IP, &[5, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(record.ip, Some(5 as *const _)); +} + +#[test] +fn decode_perf_record_sample_pid_tid() { + let record = make_test_record(PerfSampleType::TID, &[5, 0, 0, 0, 6, 0, 0, 0]); + assert_eq!(record.pid, Some(5)); + assert_eq!(record.tid, Some(6)); +} + +#[test] +fn decode_perf_record_sample_time() { + let record = make_test_record(PerfSampleType::TIME, &[8, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(record.time, Some(8)); +} + +#[test] +fn decode_perf_record_sample_addr() { + let record = make_test_record(PerfSampleType::ADDR, &[9, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(record.addr, Some(9)); +} + +#[test] +fn decode_perf_record_sample_id() { + let record = make_test_record(PerfSampleType::ID, &[10, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(record.id, Some(10)); +} + +#[test] +fn decode_perf_record_sample_stream_id() { + let record = make_test_record(PerfSampleType::STREAM_ID, &[11, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(record.stream_id, Some(11)); +} + +#[test] +fn decode_perf_record_sample_cpu() { + let record = make_test_record(PerfSampleType::CPU, &[12, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(record.cpu, Some(12)); +} + +#[test] +fn decode_perf_record_sample_period() { + let record = make_test_record(PerfSampleType::PERIOD, &[13, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(record.period, Some(13)); +} + +#[test] +fn decode_perf_record_sample_call_chain() { + let record = make_test_record( + PerfSampleType::CALLCHAIN, + &[ + 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, + ], + ); + assert_eq!(record.callchain, Some(vec![1 as *const _, 2 as *const _])); +} + +#[test] +fn decode_perf_record_sample_raw_sample() { + let record = make_test_record(PerfSampleType::RAW, &[4, 0, 0, 0, 3, 4, 5, 6]); + assert_eq!(record.raw_sample, Some(vec![3, 4, 5, 6])); +} + +#[test] +fn decode_perf_record_sample_weight() { + let record = make_test_record(PerfSampleType::WEIGHT, &[5, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(record.weight, Some(5)); +} + +/// This is a single sample representing the instantaneous state or an event concerning the process +/// being profiled. +#[derive(Debug)] +pub enum PerfRecord { + /// Throttle record + Throttle(PerfRecordThrottle), + + /// Unthrottle record + Unthrottle(PerfRecordThrottle), + + /// Sample record + Sample(PerfRecordSample), +} + +impl PerfRecord { + /// Decode the `PerfRecord` from the information given to us from the kernel. The given + /// `perf_event_attr` is the one passed to `perf_event_open`. The given type is that from + /// `perf_event_header`, and the data is the record payload. + /// + /// Returns the decoded record or an error if the record was malformed for some reason. + pub fn decode(attrs: &sys::bindings::perf_event_attr, type_: u32, data: &[u8]) -> Result { + Ok(match type_ { + sys::bindings::perf_event_type_PERF_RECORD_SAMPLE => { + PerfRecord::Sample(PerfRecordSample::decode(attrs, data)?) + } + sys::bindings::perf_event_type_PERF_RECORD_THROTTLE => { + PerfRecord::Throttle(PerfRecordThrottle::decode(attrs, data)?) + } + sys::bindings::perf_event_type_PERF_RECORD_UNTHROTTLE => { + PerfRecord::Unthrottle(PerfRecordThrottle::decode(attrs, data)?) + } + t => panic!("Unknown perf_event_type {}", t), + }) + } +} From e73689f3a927843ce98c4caa7f81fa826717b478 Mon Sep 17 00:00:00 2001 From: Remi Bernotavicius Date: Sun, 19 Apr 2020 20:29:41 -0700 Subject: [PATCH 4/7] Fix / update comments based on feedback --- src/lib.rs | 10 ++++------ src/sample.rs | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4c7534b..3bfd48d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -598,7 +598,7 @@ impl<'a> Builder<'a> { /// Construct a [`SampleStream`]. /// - /// A freshly built `SampleStream` is disabled. To being reading records from the read, you + /// A freshly built `SampleStream` is disabled. To begin reading records from the stream, you /// must call [`enable`] on the `SampleStream` or the `Group` to which it belongs. /// /// [`SampleStream`]: struct.SampleStream.html @@ -1002,8 +1002,8 @@ struct PerfEventMmapPage { /// states or events concerning the process(es) being profiled. /// /// Internally the samples are queued up in a ring-buffer. The kernel writes samples into the -/// buffer, and [`SampleStream.read`] deques them. If the buffer it full, the kernel will overwrite -/// old samples effectively dropping them. +/// buffer, and [`SampleStream.read`] dequeues them. If the buffer is full, the kernel will +/// overwrite old samples effectively dropping them. pub struct SampleStream { file: File, mapped_memory: *mut c_void, @@ -1156,9 +1156,6 @@ fn sample_stream() -> std::io::Result<()> { if let Some(PerfRecord::Sample(sample)) = sample_stream.read(None).unwrap() { // We should only get samples for the pid we asked for. assert_eq!(sample.pid.unwrap(), current_pid); - - // XXX its hard to verify other stuff about the sample since we can't predict what - // the value are. } else { panic!(); } @@ -1166,6 +1163,7 @@ fn sample_stream() -> std::io::Result<()> { DONE.store(true, Ordering::Relaxed); }); + // This busy-wait loop creates activity for the sampling thread to observe. while !DONE.load(Ordering::Relaxed) { std::thread::sleep(std::time::Duration::from_millis(1)); } diff --git a/src/sample.rs b/src/sample.rs index df73b47..5b2c179 100644 --- a/src/sample.rs +++ b/src/sample.rs @@ -17,7 +17,7 @@ use std::os::raw::c_void; /// /// [man]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html /// -/// Not all possible values of this enum are currently included +/// Not all possible values of this enum are currently included. #[derive(Copy, Clone)] #[repr(u64)] pub enum PerfSampleType { From 19b25325f98bd1d47f15a9eee9f1b7b1bd2efdb1 Mon Sep 17 00:00:00 2001 From: Remi Bernotavicius Date: Sun, 19 Apr 2020 20:36:31 -0700 Subject: [PATCH 5/7] Apply feedback to sample stream test --- src/lib.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3bfd48d..70493e6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1129,7 +1129,7 @@ impl Drop for SampleStream { #[test] fn sample_stream() -> std::io::Result<()> { - use std::sync::atomic::AtomicBool; + use std::sync::{atomic::AtomicBool, Arc}; let sample_stream = Builder::new() .kind(events::Hardware::CPU_CYCLES) @@ -1145,13 +1145,14 @@ fn sample_stream() -> std::io::Result<()> { sample_stream.enable()?; - static DONE: AtomicBool = AtomicBool::new(false); + let done = Arc::new(AtomicBool::new(false)); let current_pid = unsafe { libc::getpid() }; // Sample on a different thread and create samples on the main thread until we get at least // ten. - std::thread::spawn(move || { + let other_done = done.clone(); + let handle = std::thread::spawn(move || { for _ in 0..10 { if let Some(PerfRecord::Sample(sample)) = sample_stream.read(None).unwrap() { // We should only get samples for the pid we asked for. @@ -1160,13 +1161,15 @@ fn sample_stream() -> std::io::Result<()> { panic!(); } } - DONE.store(true, Ordering::Relaxed); + other_done.store(true, Ordering::Relaxed); }); // This busy-wait loop creates activity for the sampling thread to observe. - while !DONE.load(Ordering::Relaxed) { + while !done.load(Ordering::Relaxed) { std::thread::sleep(std::time::Duration::from_millis(1)); } + handle.join().unwrap(); + Ok(()) } From c999610033b1befa90ec6e06cddac88faf91c9c9 Mon Sep 17 00:00:00 2001 From: Remi Bernotavicius Date: Sun, 19 Apr 2020 20:45:40 -0700 Subject: [PATCH 6/7] Apply feedback to stream implementation --- src/lib.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 70493e6..61fdfe3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -936,7 +936,12 @@ fn simple_build() { } // Use a pretty big buffer because we don't want to drop any entries -const SAMPLE_BUFFER_SIZE: usize = 528384; +const SAMPLE_BUFFER_PAGES: usize = 128 + 1; + +/// Size of a page of memory in bytes. +fn page_size() -> usize { + (unsafe { libc::sysconf(libc::_SC_PAGESIZE) }) as usize +} fn wait_for_readable_or_timeout(file: &File, timeout: Option) -> bool { let mut pollfd = pollfd { @@ -1018,7 +1023,7 @@ impl SampleStream { let mapped_memory = check_syscall(|| unsafe { mmap( std::ptr::null_mut(), - SAMPLE_BUFFER_SIZE, + SAMPLE_BUFFER_PAGES * page_size(), PROT_READ | PROT_WRITE, MAP_SHARED, file.as_raw_fd(), @@ -1059,7 +1064,7 @@ impl SampleStream { // The kernel gives us records in a ring buffer. As the kernel adds records to the head, we // are consuming from the tail. If the buffer is full, the kernel drops records. - let header: *mut PerfEventMmapPage = unsafe { std::mem::transmute(self.mapped_memory) }; + let header = self.mapped_memory as *mut PerfEventMmapPage; let header = unsafe { &mut *header }; let tail = header.data_tail.load(Ordering::Relaxed); @@ -1123,7 +1128,8 @@ impl Drop for SampleStream { fn drop(&mut self) { // Only error we reasonably expect is EINVAL self.disable().unwrap(); - check_syscall(|| unsafe { munmap(self.mapped_memory, SAMPLE_BUFFER_SIZE) }).unwrap(); + check_syscall(|| unsafe { munmap(self.mapped_memory, SAMPLE_BUFFER_PAGES * page_size()) }) + .unwrap(); } } From ca95bef910013f33ccc6bfc9a35195a81c19507b Mon Sep 17 00:00:00 2001 From: Remi Bernotavicius Date: Sun, 19 Apr 2020 20:54:26 -0700 Subject: [PATCH 7/7] Remove sample helpers on Builder in favor of one sample function --- examples/sample.rs | 14 +++++------ src/lib.rs | 62 +++++++--------------------------------------- 2 files changed, 16 insertions(+), 60 deletions(-) diff --git a/examples/sample.rs b/examples/sample.rs index a1eeb5d..0afc28d 100644 --- a/examples/sample.rs +++ b/examples/sample.rs @@ -1,4 +1,4 @@ -use perf_event::{events, Builder}; +use perf_event::{events, sample::PerfSampleType, Builder}; use std::time::{Duration, Instant}; fn main() -> std::io::Result<()> { @@ -12,13 +12,13 @@ fn main() -> std::io::Result<()> { .kind(events::Hardware::CPU_CYCLES) .one_cpu(cpu) .observe_all() - .sample_callchain() + .sample(PerfSampleType::CALLCHAIN) .sample_frequency(4000) - .sample_ip() - .sample_tid() - .sample_time() - .sample_cpu() - .sample_period() + .sample(PerfSampleType::IP) + .sample(PerfSampleType::TID) + .sample(PerfSampleType::TIME) + .sample(PerfSampleType::CPU) + .sample(PerfSampleType::PERIOD) .sample_stream()?; sample_stream.enable()?; diff --git a/src/lib.rs b/src/lib.rs index 61fdfe3..26f433f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -443,53 +443,9 @@ impl<'a> Builder<'a> { self } - /// When sampling, include the current instruction pointer. - pub fn sample_ip(mut self) -> Builder<'a> { - self.sample_type_set.add(PerfSampleType::IP); - self - } - - /// When sampling, include the current process id / thread id. - pub fn sample_tid(mut self) -> Builder<'a> { - self.sample_type_set.add(PerfSampleType::TID); - self - } - - /// When sampling, include a timestamp in the sample. - pub fn sample_time(mut self) -> Builder<'a> { - self.sample_type_set.add(PerfSampleType::TIME); - self - } - - /// When sampling, include the address of the relevant tracepoint, breakpoint or software - /// event. - pub fn sample_address(mut self) -> Builder<'a> { - self.sample_type_set.add(PerfSampleType::ADDR); - self - } - - /// When sampling, include the current callchain. - pub fn sample_callchain(mut self) -> Builder<'a> { - self.sample_type_set.add(PerfSampleType::CALLCHAIN); - self - } - - /// When sampling, include a unique id. If part of a group, this will instead be the group - /// leader ID. - pub fn sample_id(mut self) -> Builder<'a> { - self.sample_type_set.add(PerfSampleType::ID); - self - } - - /// When sampling, include a value representing the current CPU. - pub fn sample_cpu(mut self) -> Builder<'a> { - self.sample_type_set.add(PerfSampleType::CPU); - self - } - - /// When sampling, include in the sample the current sampling period. - pub fn sample_period(mut self) -> Builder<'a> { - self.sample_type_set.add(PerfSampleType::PERIOD); + /// When sampling, include the given type. + pub fn sample(mut self, type_: PerfSampleType) -> Builder<'a> { + self.sample_type_set.add(type_); self } @@ -1141,12 +1097,12 @@ fn sample_stream() -> std::io::Result<()> { .kind(events::Hardware::CPU_CYCLES) // This frequency isn't guaranteed to work. .sample_frequency(4000) - .sample_ip() - .sample_tid() - .sample_time() - .sample_cpu() - .sample_period() - .sample_callchain() + .sample(PerfSampleType::IP) + .sample(PerfSampleType::TID) + .sample(PerfSampleType::TIME) + .sample(PerfSampleType::CPU) + .sample(PerfSampleType::PERIOD) + .sample(PerfSampleType::CALLCHAIN) .sample_stream()?; sample_stream.enable()?;