Skip to content

Commit

Permalink
perf: Add PERF_RECORD_NAMESPACES to include namespaces related info
Browse files Browse the repository at this point in the history
With the advert of container technologies like docker, that depend on
namespaces for isolation, there is a need for tracing support for
namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for
recording namespaces related info. By recording info for every
namespace, it is left to userspace to take a call on the definition of a
container and trace containers by updating perf tool accordingly.

Each namespace has a combination of device and inode numbers. Though
every namespace has the same device number currently, that may change in
future to avoid the need for a namespace of namespaces. Considering such
possibility, record both device and inode numbers separately for each
namespace.

Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Sargun Dhillon <sargun@sargun.me>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/148891929686.25309.2827618988917007768.stgit@hbathini.in.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
  • Loading branch information
Hari Bathini authored and acmel committed Mar 13, 2017
1 parent 3ef5b40 commit e422267
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 1 deletion.
2 changes: 2 additions & 0 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks

extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);

/* Callchains */
Expand Down Expand Up @@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; }
Expand Down
32 changes: 31 additions & 1 deletion include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
__reserved_1 : 36;
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;

union {
__u32 wakeup_events; /* wakeup every n events */
Expand Down Expand Up @@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};

struct perf_ns_link_info {
__u64 dev;
__u64 ino;
};

enum {
NET_NS_INDEX = 0,
UTS_NS_INDEX = 1,
IPC_NS_INDEX = 2,
PID_NS_INDEX = 3,
USER_NS_INDEX = 4,
MNT_NS_INDEX = 5,
CGROUP_NS_INDEX = 6,

NR_NAMESPACES, /* number of available namespaces */
};

enum perf_event_type {

/*
Expand Down Expand Up @@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,

/*
* struct {
* struct perf_event_header header;
* u32 pid;
* u32 tid;
* u64 nr_namespaces;
* { u64 dev, inode; } [nr_namespaces];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_NAMESPACES = 16,

PERF_RECORD_MAX, /* non-ABI */
};

Expand Down
139 changes: 139 additions & 0 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>

#include "internal.h"

Expand Down Expand Up @@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);

static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
Expand Down Expand Up @@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
if (event->attr.namespaces)
atomic_dec(&nr_namespaces_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
Expand Down Expand Up @@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task)
{
perf_event_task(task, NULL, 1);
perf_event_namespaces(task);
}

/*
Expand Down Expand Up @@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
perf_event_comm_event(&comm_event);
}

/*
* namespaces tracking
*/

struct perf_namespaces_event {
struct task_struct *task;

struct {
struct perf_event_header header;

u32 pid;
u32 tid;
u64 nr_namespaces;
struct perf_ns_link_info link_info[NR_NAMESPACES];
} event_id;
};

static int perf_event_namespaces_match(struct perf_event *event)
{
return event->attr.namespaces;
}

static void perf_event_namespaces_output(struct perf_event *event,
void *data)
{
struct perf_namespaces_event *namespaces_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
int ret;

if (!perf_event_namespaces_match(event))
return;

perf_event_header__init_id(&namespaces_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
namespaces_event->event_id.header.size);
if (ret)
return;

namespaces_event->event_id.pid = perf_event_pid(event,
namespaces_event->task);
namespaces_event->event_id.tid = perf_event_tid(event,
namespaces_event->task);

perf_output_put(&handle, namespaces_event->event_id);

perf_event__output_id_sample(event, &handle, &sample);

perf_output_end(&handle);
}

static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
struct path ns_path;
struct inode *ns_inode;
void *error;

error = ns_get_path(&ns_path, task, ns_ops);
if (!error) {
ns_inode = ns_path.dentry->d_inode;
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
ns_link_info->ino = ns_inode->i_ino;
}
}

void perf_event_namespaces(struct task_struct *task)
{
struct perf_namespaces_event namespaces_event;
struct perf_ns_link_info *ns_link_info;

if (!atomic_read(&nr_namespaces_events))
return;

namespaces_event = (struct perf_namespaces_event){
.task = task,
.event_id = {
.header = {
.type = PERF_RECORD_NAMESPACES,
.misc = 0,
.size = sizeof(namespaces_event.event_id),
},
/* .pid */
/* .tid */
.nr_namespaces = NR_NAMESPACES,
/* .link_info[NR_NAMESPACES] */
},
};

ns_link_info = namespaces_event.event_id.link_info;

perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
task, &mntns_operations);

#ifdef CONFIG_USER_NS
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
task, &userns_operations);
#endif
#ifdef CONFIG_NET_NS
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
task, &netns_operations);
#endif
#ifdef CONFIG_UTS_NS
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
task, &utsns_operations);
#endif
#ifdef CONFIG_IPC_NS
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
task, &ipcns_operations);
#endif
#ifdef CONFIG_PID_NS
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
task, &pidns_operations);
#endif
#ifdef CONFIG_CGROUPS
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
task, &cgroupns_operations);
#endif

perf_iterate_sb(perf_event_namespaces_output,
&namespaces_event,
NULL);
}

/*
* mmap tracking
*/
Expand Down Expand Up @@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
if (event->attr.namespaces)
atomic_inc(&nr_namespaces_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq)
Expand Down Expand Up @@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
return -EACCES;
}

if (attr.namespaces) {
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
}

if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;
Expand Down
2 changes: 2 additions & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
}
}

perf_event_namespaces(current);

bad_unshare_cleanup_cred:
if (new_cred)
put_cred(new_cred);
Expand Down
3 changes: 3 additions & 0 deletions kernel/nsproxy.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>

static struct kmem_cache *nsproxy_cachep;

Expand Down Expand Up @@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
goto out;
}
switch_task_namespaces(tsk, new_nsproxy);

perf_event_namespaces(tsk);
out:
fput(file);
return err;
Expand Down

0 comments on commit e422267

Please sign in to comment.