diff --git a/drivers/devfreq/arm-memlat-mon.c b/drivers/devfreq/arm-memlat-mon.c index 6ba234f5917a..47b42f21f497 100644 --- a/drivers/devfreq/arm-memlat-mon.c +++ b/drivers/devfreq/arm-memlat-mon.c @@ -48,6 +48,7 @@ struct event_data { struct perf_event *pevent; unsigned long prev_count; unsigned long last_delta; + bool any_cpu_readable; }; struct cpu_data { @@ -165,7 +166,37 @@ static inline void read_event(struct event_data *event) if (!event->pevent) return; - total = perf_event_read_value(event->pevent, &enabled, &running); + if (event->any_cpu_readable) { + if (perf_event_read_local(event->pevent, &total, NULL, NULL)) + return; + } else { + unsigned int ev_cpu = READ_ONCE(event->pevent->oncpu); + bool local_read; + int ret; + + if (ev_cpu >= nr_cpu_ids) + return; + + local_irq_disable(); + if ((local_read = (ev_cpu == raw_smp_processor_id()))) + ret = perf_event_read_local(event->pevent, &total, NULL, NULL); + local_irq_enable(); + + if (!local_read) { + /* + * Some SCM calls take very long (20+ ms), so the perf + * event IPI could lag on the CPU running the SCM call. + */ + if (under_scm_call(ev_cpu)) + return; + + total = perf_event_read_value(event->pevent, &enabled, + &running); + } else if (ret) { + return; + } + } + ev_count = total - event->prev_count; event->prev_count = total; event->last_delta = ev_count; @@ -226,13 +257,6 @@ static unsigned long get_cnt(struct memlat_hwmon *hw) struct memlat_cpu_grp *cpu_grp = mon->cpu_grp; unsigned int cpu; - /* - * Some of SCM call is very heavy(+20ms) so perf IPI could - * be stuck on the CPU which contributes long latency. - */ - if (under_scm_call()) - return 0; - for_each_cpu(cpu, &mon->cpus) { struct cpu_data *cpu_data = to_cpu_data(cpu_grp, cpu); struct event_data *common_evs = cpu_data->common_evs; @@ -291,6 +315,7 @@ static struct perf_event_attr *alloc_attr(void) static int set_event(struct event_data *ev, int cpu, unsigned int event_id, struct perf_event_attr *attr) { + static struct cpumask all_cpu_mask = CPU_MASK_ALL; struct perf_event *pevent; if (!event_id) @@ -303,6 +328,8 @@ static int set_event(struct event_data *ev, int cpu, unsigned int event_id, ev->pevent = pevent; perf_event_enable(pevent); + ev->any_cpu_readable = + cpumask_equal(&pevent->readable_on_cpus, &all_cpu_mask); return 0; } diff --git a/drivers/soc/qcom/scm.c b/drivers/soc/qcom/scm.c index 6f9221939b9b..bf418e81d27b 100644 --- a/drivers/soc/qcom/scm.c +++ b/drivers/soc/qcom/scm.c @@ -28,7 +28,7 @@ #define SCM_INTERRUPTED 1 #define SCM_V2_EBUSY -12 -static atomic_t scm_call_count = ATOMIC_INIT(0); +static DEFINE_PER_CPU(atomic_t, scm_call_count); static DEFINE_MUTEX(scm_lock); /* @@ -147,11 +147,12 @@ static int ___scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5, static int __scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5, u64 *ret1, u64 *ret2, u64 *ret3) { + atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id()); int ret; - atomic_inc(&scm_call_count); + atomic_inc(cnt); ret = ___scm_call_armv8_64(x0, x1, x2, x3, x4, x5, ret1, ret2, ret3); - atomic_dec(&scm_call_count); + atomic_dec(cnt); return ret; } @@ -209,11 +210,12 @@ static int ___scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5, static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5, u64 *ret1, u64 *ret2, u64 *ret3) { + atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id()); int ret; - atomic_inc(&scm_call_count); + atomic_inc(cnt); ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3); - atomic_dec(&scm_call_count); + atomic_dec(cnt); return ret; } @@ -271,11 +273,12 @@ static int ___scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5, static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5, u64 *ret1, u64 *ret2, u64 *ret3) { + atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id()); int ret; - atomic_inc(&scm_call_count); + atomic_inc(cnt); ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3); - atomic_dec(&scm_call_count); + atomic_dec(cnt); return ret; } @@ -773,7 +776,7 @@ early_initcall(scm_mem_protection_init); #endif -bool under_scm_call(void) +bool under_scm_call(int cpu) { - return atomic_read(&scm_call_count); + return atomic_read(per_cpu_ptr(&scm_call_count, cpu)); } diff --git a/include/soc/qcom/scm.h b/include/soc/qcom/scm.h index ae4ec932d07f..f67c4e3d391c 100644 --- a/include/soc/qcom/scm.h +++ b/include/soc/qcom/scm.h @@ -108,7 +108,7 @@ extern int scm_get_feat_version(u32 feat); extern bool is_scm_armv8(void); extern struct mutex scm_lmh_lock; -extern bool under_scm_call(void); +extern bool under_scm_call(int cpu); #else @@ -167,7 +167,15 @@ static inline bool scm_is_secure_device(void) return false; } +<<<<<<< HEAD extern bool under_scm_call(void) +======= +static inline int scm_enable_mem_protection(void) +{ + return 0; +} +extern bool under_scm_call(int cpu) +>>>>>>> 190be9ed11703 (memlat: Optimize perf event reads when possible) { return false; }