Skip to content

Commit

Permalink
memlat: Optimize perf event reads when possible
Browse files Browse the repository at this point in the history
We can skip the locking and other overhead of perf_event_read_value()
when we know in advance that the perf event in question can be read from
the current CPU. This occurs when either the perf event permits reads
from CPUs other than the one its on, or when the CPU doing the reads is
the same CPU that owns the perf event.

Our PMU drivers only set two possible values for `readable_on_cpus`:
CPU_MASK_ALL or nothing. As such, we can simply check for CPU_MASK_ALL
beforehand in order to determine if the perf event allows non-local
reads.

We can also reduce the scope of under_scm_call() since we now know which
CPU we're reading a perf event from, thus reducing the false positive
rate of under_scm_call() as it is now per-CPU.

Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
  • Loading branch information
kerneltoast authored and utziacre committed Jul 5, 2024
1 parent d012032 commit 50608db
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 18 deletions.
43 changes: 35 additions & 8 deletions drivers/devfreq/arm-memlat-mon.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ struct event_data {
struct perf_event *pevent;
unsigned long prev_count;
unsigned long last_delta;
bool any_cpu_readable;
};

struct cpu_data {
Expand Down Expand Up @@ -165,7 +166,37 @@ static inline void read_event(struct event_data *event)
if (!event->pevent)
return;

total = perf_event_read_value(event->pevent, &enabled, &running);
if (event->any_cpu_readable) {
if (perf_event_read_local(event->pevent, &total, NULL, NULL))
return;
} else {
unsigned int ev_cpu = READ_ONCE(event->pevent->oncpu);
bool local_read;
int ret;

if (ev_cpu >= nr_cpu_ids)
return;

local_irq_disable();
if ((local_read = (ev_cpu == raw_smp_processor_id())))
ret = perf_event_read_local(event->pevent, &total, NULL, NULL);
local_irq_enable();

if (!local_read) {
/*
* Some SCM calls take very long (20+ ms), so the perf
* event IPI could lag on the CPU running the SCM call.
*/
if (under_scm_call(ev_cpu))
return;

total = perf_event_read_value(event->pevent, &enabled,
&running);
} else if (ret) {
return;
}
}

ev_count = total - event->prev_count;
event->prev_count = total;
event->last_delta = ev_count;
Expand Down Expand Up @@ -226,13 +257,6 @@ static unsigned long get_cnt(struct memlat_hwmon *hw)
struct memlat_cpu_grp *cpu_grp = mon->cpu_grp;
unsigned int cpu;

/*
* Some of SCM call is very heavy(+20ms) so perf IPI could
* be stuck on the CPU which contributes long latency.
*/
if (under_scm_call())
return 0;

for_each_cpu(cpu, &mon->cpus) {
struct cpu_data *cpu_data = to_cpu_data(cpu_grp, cpu);
struct event_data *common_evs = cpu_data->common_evs;
Expand Down Expand Up @@ -291,6 +315,7 @@ static struct perf_event_attr *alloc_attr(void)
static int set_event(struct event_data *ev, int cpu, unsigned int event_id,
struct perf_event_attr *attr)
{
static struct cpumask all_cpu_mask = CPU_MASK_ALL;
struct perf_event *pevent;

if (!event_id)
Expand All @@ -303,6 +328,8 @@ static int set_event(struct event_data *ev, int cpu, unsigned int event_id,

ev->pevent = pevent;
perf_event_enable(pevent);
ev->any_cpu_readable =
cpumask_equal(&pevent->readable_on_cpus, &all_cpu_mask);

return 0;
}
Expand Down
21 changes: 12 additions & 9 deletions drivers/soc/qcom/scm.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#define SCM_INTERRUPTED 1
#define SCM_V2_EBUSY -12

static atomic_t scm_call_count = ATOMIC_INIT(0);
static DEFINE_PER_CPU(atomic_t, scm_call_count);
static DEFINE_MUTEX(scm_lock);

/*
Expand Down Expand Up @@ -147,11 +147,12 @@ static int ___scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5,
static int __scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5,
u64 *ret1, u64 *ret2, u64 *ret3)
{
atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
int ret;

atomic_inc(&scm_call_count);
atomic_inc(cnt);
ret = ___scm_call_armv8_64(x0, x1, x2, x3, x4, x5, ret1, ret2, ret3);
atomic_dec(&scm_call_count);
atomic_dec(cnt);

return ret;
}
Expand Down Expand Up @@ -209,11 +210,12 @@ static int ___scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
u64 *ret1, u64 *ret2, u64 *ret3)
{
atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
int ret;

atomic_inc(&scm_call_count);
atomic_inc(cnt);
ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3);
atomic_dec(&scm_call_count);
atomic_dec(cnt);

return ret;
}
Expand Down Expand Up @@ -271,11 +273,12 @@ static int ___scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
u64 *ret1, u64 *ret2, u64 *ret3)
{
atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
int ret;

atomic_inc(&scm_call_count);
atomic_inc(cnt);
ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3);
atomic_dec(&scm_call_count);
atomic_dec(cnt);

return ret;
}
Expand Down Expand Up @@ -773,7 +776,7 @@ early_initcall(scm_mem_protection_init);

#endif

bool under_scm_call(void)
bool under_scm_call(int cpu)
{
return atomic_read(&scm_call_count);
return atomic_read(per_cpu_ptr(&scm_call_count, cpu));
}
10 changes: 9 additions & 1 deletion include/soc/qcom/scm.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ extern int scm_get_feat_version(u32 feat);
extern bool is_scm_armv8(void);

extern struct mutex scm_lmh_lock;
extern bool under_scm_call(void);
extern bool under_scm_call(int cpu);

#else

Expand Down Expand Up @@ -167,7 +167,15 @@ static inline bool scm_is_secure_device(void)
return false;
}

<<<<<<< HEAD
extern bool under_scm_call(void)
=======
static inline int scm_enable_mem_protection(void)
{
return 0;
}
extern bool under_scm_call(int cpu)
>>>>>>> 190be9ed11703 (memlat: Optimize perf event reads when possible)
{
return false;
}
Expand Down

0 comments on commit 50608db

Please sign in to comment.