From 4ae4e26474ea957d2d16acae0445ac4aab0d644b Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 7 May 2024 10:18:22 +1000 Subject: [PATCH 1/4] spl-generic: bring up kstats subsystem before taskq For spl-taskq to use the kstats infrastructure, it has to be available first. Sponsored-by: Klara, Inc. Sponsored-by: Syneto Signed-off-by: Rob Norris --- module/os/linux/spl/spl-generic.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index 986db1518456..6ee0236d289a 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -868,16 +868,16 @@ spl_init(void) if ((rc = spl_tsd_init())) goto out2; - if ((rc = spl_taskq_init())) + if ((rc = spl_proc_init())) goto out3; - if ((rc = spl_kmem_cache_init())) + if ((rc = spl_kstat_init())) goto out4; - if ((rc = spl_proc_init())) + if ((rc = spl_taskq_init())) goto out5; - if ((rc = spl_kstat_init())) + if ((rc = spl_kmem_cache_init())) goto out6; if ((rc = spl_zlib_init())) @@ -891,13 +891,13 @@ spl_init(void) out8: spl_zlib_fini(); out7: - spl_kstat_fini(); + spl_kmem_cache_fini(); out6: - spl_proc_fini(); + spl_taskq_fini(); out5: - spl_kmem_cache_fini(); + spl_kstat_fini(); out4: - spl_taskq_fini(); + spl_proc_fini(); out3: spl_tsd_fini(); out2: @@ -913,10 +913,10 @@ spl_fini(void) { spl_zone_fini(); spl_zlib_fini(); - spl_kstat_fini(); - spl_proc_fini(); spl_kmem_cache_fini(); spl_taskq_fini(); + spl_kstat_fini(); + spl_proc_fini(); spl_tsd_fini(); spl_kvmem_fini(); spl_random_fini(); From 5b8378b59dc76aab70545e501899ea8537318546 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Fri, 3 May 2024 14:42:51 +1000 Subject: [PATCH 2/4] spl-taskq: per-taskq kstats This exposes a variety of per-taskq stats under /proc/spl/kstat/taskq, one file per taskq, named for the taskq name.instance. These include a small amount of info about the taskq config, the current state of the threads and queues, and various counters for thread and queue activity since the taskq was created. To assist with decrementing queue size counters, the list an entry is on is encoded in spare bits in the entry flags. Sponsored-by: Klara, Inc. Sponsored-by: Syneto Signed-off-by: Rob Norris --- include/os/linux/spl/sys/taskq.h | 42 +++++ module/os/linux/spl/spl-taskq.c | 314 +++++++++++++++++++++++++++++-- 2 files changed, 342 insertions(+), 14 deletions(-) diff --git a/include/os/linux/spl/sys/taskq.h b/include/os/linux/spl/sys/taskq.h index b73dab631e04..8051de36ba82 100644 --- a/include/os/linux/spl/sys/taskq.h +++ b/include/os/linux/spl/sys/taskq.h @@ -20,6 +20,10 @@ * You should have received a copy of the GNU General Public License along * with the SPL. If not, see . */ +/* + * Copyright (c) 2024, Klara Inc. + * Copyright (c) 2024, Syneto + */ #ifndef _SPL_TASKQ_H #define _SPL_TASKQ_H @@ -33,6 +37,9 @@ #include #include #include +#include + +typedef struct kstat_s kstat_t; #define TASKQ_NAMELEN 31 @@ -74,6 +81,32 @@ typedef enum tq_lock_role { typedef unsigned long taskqid_t; typedef void (task_func_t)(void *); +typedef struct taskq_sums { + /* gauges (inc/dec counters, current value) */ + wmsum_t tqs_threads_active; /* threads running a task */ + wmsum_t tqs_threads_idle; /* threads waiting for work */ + wmsum_t tqs_threads_total; /* total threads */ + wmsum_t tqs_tasks_pending; /* tasks waiting to execute */ + wmsum_t tqs_tasks_priority; /* hi-pri tasks waiting */ + wmsum_t tqs_tasks_total; /* total waiting tasks */ + wmsum_t tqs_tasks_delayed; /* tasks deferred to future */ + wmsum_t tqs_entries_free; /* task entries on free list */ + + /* counters (inc only, since taskq creation) */ + wmsum_t tqs_threads_created; /* threads created */ + wmsum_t tqs_threads_destroyed; /* threads destroyed */ + wmsum_t tqs_tasks_dispatched; /* tasks dispatched */ + wmsum_t tqs_tasks_dispatched_delayed; /* tasks delayed to future */ + wmsum_t tqs_tasks_executed_normal; /* normal pri tasks executed */ + wmsum_t tqs_tasks_executed_priority; /* high pri tasks executed */ + wmsum_t tqs_tasks_executed; /* total tasks executed */ + wmsum_t tqs_tasks_delayed_requeued; /* delayed tasks requeued */ + wmsum_t tqs_tasks_cancelled; /* tasks cancelled before run */ + wmsum_t tqs_thread_wakeups; /* total thread wakeups */ + wmsum_t tqs_thread_wakeups_nowork; /* thread woken but no tasks */ + wmsum_t tqs_thread_sleeps; /* total thread sleeps */ +} taskq_sums_t; + typedef struct taskq { spinlock_t tq_lock; /* protects taskq_t */ char *tq_name; /* taskq name */ @@ -105,6 +138,8 @@ typedef struct taskq { struct hlist_node tq_hp_cb_node; boolean_t tq_hp_support; unsigned long lastspawnstop; /* when to purge dynamic */ + taskq_sums_t tq_sums; + kstat_t *tq_ksp; } taskq_t; typedef struct taskq_ent { @@ -123,6 +158,13 @@ typedef struct taskq_ent { #define TQENT_FLAG_PREALLOC 0x1 #define TQENT_FLAG_CANCEL 0x2 +/* bits 2-3 are which list tqent is on */ +#define TQENT_LIST_NONE 0x0 +#define TQENT_LIST_PENDING 0x4 +#define TQENT_LIST_PRIORITY 0x8 +#define TQENT_LIST_DELAY 0xc +#define TQENT_LIST_MASK 0xc + typedef struct taskq_thread { struct list_head tqt_thread_list; struct list_head tqt_active_list; diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index e7b812c3b5b5..61012bfb36d3 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -22,16 +22,98 @@ * * Solaris Porting Layer (SPL) Task Queue Implementation. */ +/* + * Copyright (c) 2024, Klara Inc. + * Copyright (c) 2024, Syneto + */ #include #include #include #include #include +#include +#include +#include #ifdef HAVE_CPU_HOTPLUG #include #endif +typedef struct taskq_kstats { + /* static values, for completeness */ + kstat_named_t tqks_threads_max; + kstat_named_t tqks_entry_pool_min; + kstat_named_t tqks_entry_pool_max; + + /* gauges (inc/dec counters, current value) */ + kstat_named_t tqks_threads_active; + kstat_named_t tqks_threads_idle; + kstat_named_t tqks_threads_total; + kstat_named_t tqks_tasks_pending; + kstat_named_t tqks_tasks_priority; + kstat_named_t tqks_tasks_total; + kstat_named_t tqks_tasks_delayed; + kstat_named_t tqks_entries_free; + + /* counters (inc only, since taskq creation) */ + kstat_named_t tqks_threads_created; + kstat_named_t tqks_threads_destroyed; + kstat_named_t tqks_tasks_dispatched; + kstat_named_t tqks_tasks_dispatched_delayed; + kstat_named_t tqks_tasks_executed_normal; + kstat_named_t tqks_tasks_executed_priority; + kstat_named_t tqks_tasks_executed; + kstat_named_t tqks_tasks_delayed_requeued; + kstat_named_t tqks_tasks_cancelled; + kstat_named_t tqks_thread_wakeups; + kstat_named_t tqks_thread_wakeups_nowork; + kstat_named_t tqks_thread_sleeps; +} taskq_kstats_t; + +static taskq_kstats_t taskq_kstats_template = { + { "threads_max", KSTAT_DATA_UINT64 }, + { "entry_pool_min", KSTAT_DATA_UINT64 }, + { "entry_pool_max", KSTAT_DATA_UINT64 }, + { "threads_active", KSTAT_DATA_UINT64 }, + { "threads_idle", KSTAT_DATA_UINT64 }, + { "threads_total", KSTAT_DATA_UINT64 }, + { "tasks_pending", KSTAT_DATA_UINT64 }, + { "tasks_priority", KSTAT_DATA_UINT64 }, + { "tasks_total", KSTAT_DATA_UINT64 }, + { "tasks_delayed", KSTAT_DATA_UINT64 }, + { "entries_free", KSTAT_DATA_UINT64 }, + + { "threads_created", KSTAT_DATA_UINT64 }, + { "threads_destroyed", KSTAT_DATA_UINT64 }, + { "tasks_dispatched", KSTAT_DATA_UINT64 }, + { "tasks_dispatched_delayed", KSTAT_DATA_UINT64 }, + { "tasks_executed_normal", KSTAT_DATA_UINT64 }, + { "tasks_executed_priority", KSTAT_DATA_UINT64 }, + { "tasks_executed", KSTAT_DATA_UINT64 }, + { "tasks_delayed_requeued", KSTAT_DATA_UINT64 }, + { "tasks_cancelled", KSTAT_DATA_UINT64 }, + { "thread_wakeups", KSTAT_DATA_UINT64 }, + { "thread_wakeups_nowork", KSTAT_DATA_UINT64 }, + { "thread_sleeps", KSTAT_DATA_UINT64 }, +}; + +#define TQSTAT_INC(tq, stat) wmsum_add(&tq->tq_sums.tqs_##stat, 1) +#define TQSTAT_DEC(tq, stat) wmsum_add(&tq->tq_sums.tqs_##stat, -1) + +#define _TQSTAT_MOD_LIST(mod, tq, t) do { \ + switch (t->tqent_flags & TQENT_LIST_MASK) { \ + case TQENT_LIST_NONE: ASSERT(list_empty(&t->tqent_list)); break;\ + case TQENT_LIST_PENDING: mod(tq, tasks_pending); break; \ + case TQENT_LIST_PRIORITY: mod(tq, tasks_priority); break; \ + case TQENT_LIST_DELAY: mod(tq, tasks_delayed); break; \ + } \ +} while (0) +#define TQSTAT_INC_LIST(tq, t) _TQSTAT_MOD_LIST(TQSTAT_INC, tq, t) +#define TQSTAT_DEC_LIST(tq, t) _TQSTAT_MOD_LIST(TQSTAT_DEC, tq, t) + +#define TQENT_SET_LIST(t, l) \ + t->tqent_flags = (t->tqent_flags & ~TQENT_LIST_MASK) | l; + static int spl_taskq_thread_bind = 0; module_param(spl_taskq_thread_bind, int, 0644); MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default"); @@ -134,6 +216,7 @@ task_alloc(taskq_t *tq, uint_t flags, unsigned long *irqflags) ASSERT(!timer_pending(&t->tqent_timer)); list_del_init(&t->tqent_list); + TQSTAT_DEC(tq, entries_free); return (t); } @@ -204,12 +287,11 @@ task_done(taskq_t *tq, taskq_ent_t *t) { ASSERT(tq); ASSERT(t); + ASSERT(list_empty(&t->tqent_list)); /* Wake tasks blocked in taskq_wait_id() */ wake_up_all(&t->tqent_waitq); - list_del_init(&t->tqent_list); - if (tq->tq_nalloc <= tq->tq_minalloc) { t->tqent_id = TASKQID_INVALID; t->tqent_func = NULL; @@ -217,6 +299,7 @@ task_done(taskq_t *tq, taskq_ent_t *t) t->tqent_flags = 0; list_add_tail(&t->tqent_list, &tq->tq_free_list); + TQSTAT_INC(tq, entries_free); } else { task_free(tq, t); } @@ -263,6 +346,8 @@ task_expire_impl(taskq_ent_t *t) spin_unlock_irqrestore(&tq->tq_lock, flags); wake_up(&tq->tq_work_waitq); + + TQSTAT_INC(tq, tasks_delayed_requeued); } static void @@ -534,7 +619,10 @@ taskq_cancel_id(taskq_t *tq, taskqid_t id) t = taskq_find(tq, id); if (t && t != ERR_PTR(-EBUSY)) { list_del_init(&t->tqent_list); + TQSTAT_DEC_LIST(tq, t); + t->tqent_flags |= TQENT_FLAG_CANCEL; + TQSTAT_INC(tq, tasks_cancelled); /* * When canceling the lowest outstanding task id we @@ -604,13 +692,19 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) spin_lock(&t->tqent_lock); /* Queue to the front of the list to enforce TQ_NOQUEUE semantics */ - if (flags & TQ_NOQUEUE) + if (flags & TQ_NOQUEUE) { + TQENT_SET_LIST(t, TQENT_LIST_PRIORITY); list_add(&t->tqent_list, &tq->tq_prio_list); /* Queue to the priority list instead of the pending list */ - else if (flags & TQ_FRONT) + } else if (flags & TQ_FRONT) { + TQENT_SET_LIST(t, TQENT_LIST_PRIORITY); list_add_tail(&t->tqent_list, &tq->tq_prio_list); - else + } else { + TQENT_SET_LIST(t, TQENT_LIST_PENDING); list_add_tail(&t->tqent_list, &tq->tq_pend_list); + } + TQSTAT_INC_LIST(tq, t); + TQSTAT_INC(tq, tasks_total); t->tqent_id = rc = tq->tq_next_id; tq->tq_next_id++; @@ -629,6 +723,8 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) wake_up(&tq->tq_work_waitq); + TQSTAT_INC(tq, tasks_dispatched); + /* Spawn additional taskq threads if required. */ if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads) (void) taskq_thread_spawn(tq); @@ -662,6 +758,8 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg, /* Queue to the delay list for subsequent execution */ list_add_tail(&t->tqent_list, &tq->tq_delay_list); + TQENT_SET_LIST(t, TQENT_LIST_DELAY); + TQSTAT_INC_LIST(tq, t); t->tqent_id = rc = tq->tq_next_id; tq->tq_next_id++; @@ -676,6 +774,8 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg, spin_unlock(&t->tqent_lock); + TQSTAT_INC(tq, tasks_dispatched_delayed); + /* Spawn additional taskq threads if required. */ if (tq->tq_nactive == tq->tq_nthreads) (void) taskq_thread_spawn(tq); @@ -724,10 +824,15 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, t->tqent_flags |= TQENT_FLAG_PREALLOC; /* Queue to the priority list instead of the pending list */ - if (flags & TQ_FRONT) + if (flags & TQ_FRONT) { + TQENT_SET_LIST(t, TQENT_LIST_PRIORITY); list_add_tail(&t->tqent_list, &tq->tq_prio_list); - else + } else { + TQENT_SET_LIST(t, TQENT_LIST_PENDING); list_add_tail(&t->tqent_list, &tq->tq_pend_list); + } + TQSTAT_INC_LIST(tq, t); + TQSTAT_INC(tq, tasks_total); t->tqent_id = tq->tq_next_id; tq->tq_next_id++; @@ -742,6 +847,8 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, wake_up(&tq->tq_work_waitq); + TQSTAT_INC(tq, tasks_dispatched); + /* Spawn additional taskq threads if required. */ if (tq->tq_nactive == tq->tq_nthreads) (void) taskq_thread_spawn(tq); @@ -908,6 +1015,8 @@ taskq_thread(void *args) wake_up(&tq->tq_wait_waitq); set_current_state(TASK_INTERRUPTIBLE); + TQSTAT_INC(tq, threads_total); + while (!kthread_should_stop()) { if (list_empty(&tq->tq_pend_list) && @@ -919,9 +1028,15 @@ taskq_thread(void *args) add_wait_queue_exclusive(&tq->tq_work_waitq, &wait); spin_unlock_irqrestore(&tq->tq_lock, flags); + TQSTAT_INC(tq, thread_sleeps); + TQSTAT_INC(tq, threads_idle); + schedule(); seq_tasks = 0; + TQSTAT_DEC(tq, threads_idle); + TQSTAT_INC(tq, thread_wakeups); + spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class); remove_wait_queue(&tq->tq_work_waitq, &wait); @@ -931,6 +1046,8 @@ taskq_thread(void *args) if ((t = taskq_next_ent(tq)) != NULL) { list_del_init(&t->tqent_list); + TQSTAT_DEC_LIST(tq, t); + TQSTAT_DEC(tq, tasks_total); /* * A TQENT_FLAG_PREALLOC task may be reused or freed @@ -955,6 +1072,7 @@ taskq_thread(void *args) tq->tq_nactive++; spin_unlock_irqrestore(&tq->tq_lock, flags); + TQSTAT_INC(tq, threads_active); DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t); /* Perform the requested task */ @@ -962,8 +1080,17 @@ taskq_thread(void *args) DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t); + TQSTAT_DEC(tq, threads_active); + if ((t->tqent_flags & TQENT_LIST_MASK) == + TQENT_LIST_PENDING) + TQSTAT_INC(tq, tasks_executed_normal); + else + TQSTAT_INC(tq, tasks_executed_priority); + TQSTAT_INC(tq, tasks_executed); + spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class); + tq->tq_nactive--; list_del_init(&tqt->tqt_active_list); tqt->tqt_task = NULL; @@ -989,7 +1116,8 @@ taskq_thread(void *args) tqt->tqt_id = TASKQID_INVALID; tqt->tqt_flags = 0; wake_up_all(&tq->tq_wait_waitq); - } + } else + TQSTAT_INC(tq, thread_wakeups_nowork); set_current_state(TASK_INTERRUPTIBLE); @@ -998,6 +1126,10 @@ taskq_thread(void *args) __set_current_state(TASK_RUNNING); tq->tq_nthreads--; list_del_init(&tqt->tqt_thread_list); + + TQSTAT_DEC(tq, threads_total); + TQSTAT_INC(tq, threads_destroyed); + error: kmem_free(tqt, sizeof (taskq_thread_t)); spin_unlock_irqrestore(&tq->tq_lock, flags); @@ -1037,9 +1169,156 @@ taskq_thread_create(taskq_t *tq) wake_up_process(tqt->tqt_thread); + TQSTAT_INC(tq, threads_created); + return (tqt); } +static void +taskq_stats_init(taskq_t *tq) +{ + taskq_sums_t *tqs = &tq->tq_sums; + wmsum_init(&tqs->tqs_threads_active, 0); + wmsum_init(&tqs->tqs_threads_idle, 0); + wmsum_init(&tqs->tqs_threads_total, 0); + wmsum_init(&tqs->tqs_tasks_pending, 0); + wmsum_init(&tqs->tqs_tasks_priority, 0); + wmsum_init(&tqs->tqs_tasks_total, 0); + wmsum_init(&tqs->tqs_tasks_delayed, 0); + wmsum_init(&tqs->tqs_entries_free, 0); + wmsum_init(&tqs->tqs_threads_created, 0); + wmsum_init(&tqs->tqs_threads_destroyed, 0); + wmsum_init(&tqs->tqs_tasks_dispatched, 0); + wmsum_init(&tqs->tqs_tasks_dispatched_delayed, 0); + wmsum_init(&tqs->tqs_tasks_executed_normal, 0); + wmsum_init(&tqs->tqs_tasks_executed_priority, 0); + wmsum_init(&tqs->tqs_tasks_executed, 0); + wmsum_init(&tqs->tqs_tasks_delayed_requeued, 0); + wmsum_init(&tqs->tqs_tasks_cancelled, 0); + wmsum_init(&tqs->tqs_thread_wakeups, 0); + wmsum_init(&tqs->tqs_thread_wakeups_nowork, 0); + wmsum_init(&tqs->tqs_thread_sleeps, 0); +} + +static void +taskq_stats_fini(taskq_t *tq) +{ + taskq_sums_t *tqs = &tq->tq_sums; + wmsum_fini(&tqs->tqs_threads_active); + wmsum_fini(&tqs->tqs_threads_idle); + wmsum_fini(&tqs->tqs_threads_total); + wmsum_fini(&tqs->tqs_tasks_pending); + wmsum_fini(&tqs->tqs_tasks_priority); + wmsum_fini(&tqs->tqs_tasks_total); + wmsum_fini(&tqs->tqs_tasks_delayed); + wmsum_fini(&tqs->tqs_entries_free); + wmsum_fini(&tqs->tqs_threads_created); + wmsum_fini(&tqs->tqs_threads_destroyed); + wmsum_fini(&tqs->tqs_tasks_dispatched); + wmsum_fini(&tqs->tqs_tasks_dispatched_delayed); + wmsum_fini(&tqs->tqs_tasks_executed_normal); + wmsum_fini(&tqs->tqs_tasks_executed_priority); + wmsum_fini(&tqs->tqs_tasks_executed); + wmsum_fini(&tqs->tqs_tasks_delayed_requeued); + wmsum_fini(&tqs->tqs_tasks_cancelled); + wmsum_fini(&tqs->tqs_thread_wakeups); + wmsum_fini(&tqs->tqs_thread_wakeups_nowork); + wmsum_fini(&tqs->tqs_thread_sleeps); +} + +static int +taskq_kstats_update(kstat_t *ksp, int rw) +{ + if (rw == KSTAT_WRITE) + return (EACCES); + + taskq_t *tq = ksp->ks_private; + taskq_kstats_t *tqks = ksp->ks_data; + + tqks->tqks_threads_max.value.ui64 = tq->tq_maxthreads; + tqks->tqks_entry_pool_min.value.ui64 = tq->tq_minalloc; + tqks->tqks_entry_pool_max.value.ui64 = tq->tq_maxalloc; + + taskq_sums_t *tqs = &tq->tq_sums; + + tqks->tqks_threads_active.value.ui64 = + wmsum_value(&tqs->tqs_threads_active); + tqks->tqks_threads_idle.value.ui64 = + wmsum_value(&tqs->tqs_threads_idle); + tqks->tqks_threads_total.value.ui64 = + wmsum_value(&tqs->tqs_threads_total); + tqks->tqks_tasks_pending.value.ui64 = + wmsum_value(&tqs->tqs_tasks_pending); + tqks->tqks_tasks_priority.value.ui64 = + wmsum_value(&tqs->tqs_tasks_priority); + tqks->tqks_tasks_total.value.ui64 = + wmsum_value(&tqs->tqs_tasks_total); + tqks->tqks_tasks_delayed.value.ui64 = + wmsum_value(&tqs->tqs_tasks_delayed); + tqks->tqks_entries_free.value.ui64 = + wmsum_value(&tqs->tqs_entries_free); + tqks->tqks_threads_created.value.ui64 = + wmsum_value(&tqs->tqs_threads_created); + tqks->tqks_threads_destroyed.value.ui64 = + wmsum_value(&tqs->tqs_threads_destroyed); + tqks->tqks_tasks_dispatched.value.ui64 = + wmsum_value(&tqs->tqs_tasks_dispatched); + tqks->tqks_tasks_dispatched_delayed.value.ui64 = + wmsum_value(&tqs->tqs_tasks_dispatched_delayed); + tqks->tqks_tasks_executed_normal.value.ui64 = + wmsum_value(&tqs->tqs_tasks_executed_normal); + tqks->tqks_tasks_executed_priority.value.ui64 = + wmsum_value(&tqs->tqs_tasks_executed_priority); + tqks->tqks_tasks_executed.value.ui64 = + wmsum_value(&tqs->tqs_tasks_executed); + tqks->tqks_tasks_delayed_requeued.value.ui64 = + wmsum_value(&tqs->tqs_tasks_delayed_requeued); + tqks->tqks_tasks_cancelled.value.ui64 = + wmsum_value(&tqs->tqs_tasks_cancelled); + tqks->tqks_thread_wakeups.value.ui64 = + wmsum_value(&tqs->tqs_thread_wakeups); + tqks->tqks_thread_wakeups_nowork.value.ui64 = + wmsum_value(&tqs->tqs_thread_wakeups_nowork); + tqks->tqks_thread_sleeps.value.ui64 = + wmsum_value(&tqs->tqs_thread_sleeps); + + return (0); +} + +static void +taskq_kstats_init(taskq_t *tq) +{ + char name[TASKQ_NAMELEN+5]; /* 5 for dot, 3x instance digits, null */ + snprintf(name, sizeof (name), "%s.%d", tq->tq_name, tq->tq_instance); + + kstat_t *ksp = kstat_create("taskq", 0, name, "misc", + KSTAT_TYPE_NAMED, sizeof (taskq_kstats_t) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (ksp == NULL) + return; + + ksp->ks_private = tq; + ksp->ks_update = taskq_kstats_update; + ksp->ks_data = kmem_alloc(sizeof (taskq_kstats_t), KM_SLEEP); + memcpy(ksp->ks_data, &taskq_kstats_template, sizeof (taskq_kstats_t)); + kstat_install(ksp); + + tq->tq_ksp = ksp; +} + +static void +taskq_kstats_fini(taskq_t *tq) +{ + if (tq->tq_ksp == NULL) + return; + + kmem_free(tq->tq_ksp->ks_data, sizeof (taskq_kstats_t)); + kstat_delete(tq->tq_ksp); + + tq->tq_ksp = NULL; +} + taskq_t * taskq_create(const char *name, int threads_arg, pri_t pri, int minalloc, int maxalloc, uint_t flags) @@ -1104,6 +1383,7 @@ taskq_create(const char *name, int threads_arg, pri_t pri, init_waitqueue_head(&tq->tq_wait_waitq); tq->tq_lock_class = TQ_LOCK_GENERAL; INIT_LIST_HEAD(&tq->tq_taskqs); + taskq_stats_init(tq); if (flags & TASKQ_PREPOPULATE) { spin_lock_irqsave_nested(&tq->tq_lock, irqflags, @@ -1137,14 +1417,17 @@ taskq_create(const char *name, int threads_arg, pri_t pri, if (rc) { taskq_destroy(tq); - tq = NULL; - } else { - down_write(&tq_list_sem); - tq->tq_instance = taskq_find_by_name(name) + 1; - list_add_tail(&tq->tq_taskqs, &tq_list); - up_write(&tq_list_sem); + return (NULL); } + down_write(&tq_list_sem); + tq->tq_instance = taskq_find_by_name(name) + 1; + list_add_tail(&tq->tq_taskqs, &tq_list); + up_write(&tq_list_sem); + + /* Install kstats late, because the name includes tq_instance */ + taskq_kstats_init(tq); + return (tq); } EXPORT_SYMBOL(taskq_create); @@ -1177,6 +1460,8 @@ taskq_destroy(taskq_t *tq) taskq_wait(tq); + taskq_kstats_fini(tq); + /* remove taskq from global list used by the kstats */ down_write(&tq_list_sem); list_del(&tq->tq_taskqs); @@ -1230,6 +1515,7 @@ taskq_destroy(taskq_t *tq) spin_unlock_irqrestore(&tq->tq_lock, flags); + taskq_stats_fini(tq); kmem_strfree(tq->tq_name); kmem_free(tq, sizeof (taskq_t)); } From 5fe6f2779a1c30538a23d853ec4246cd58d39cbd Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 7 May 2024 10:26:20 +1000 Subject: [PATCH 3/4] spl-taskq: summary stats for all taskqs This adds /proc/spl/kstats/taskq/summary, which attempts to show a useful subset of stats for all taskqs in the system. Sponsored-by: Klara, Inc. Sponsored-by: Syneto Signed-off-by: Rob Norris --- module/os/linux/spl/spl-taskq.c | 98 +++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index 61012bfb36d3..29b8f5426502 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -1557,6 +1557,100 @@ taskq_create_synced(const char *name, int nthreads, pri_t pri, } EXPORT_SYMBOL(taskq_create_synced); +static kstat_t *taskq_summary_ksp = NULL; + +static int +spl_taskq_kstat_headers(char *buf, size_t size) +{ + size_t n = snprintf(buf, size, + "%-20s | %-17s | %-23s\n" + "%-20s | %-17s | %-23s\n" + "%-20s | %-17s | %-23s\n", + "", "threads", "tasks on queue", + "taskq name", "tot [act idl] max", " pend [ norm high] dly", + "--------------------", "-----------------", + "-----------------------"); + return (n >= size ? ENOMEM : 0); +} + +static int +spl_taskq_kstat_data(char *buf, size_t size, void *data) +{ + struct list_head *tql = NULL; + taskq_t *tq; + char name[TASKQ_NAMELEN+5]; /* 5 for dot, 3x instance digits, null */ + char threads[25]; + char tasks[30]; + size_t n; + int err = 0; + + down_read(&tq_list_sem); + list_for_each_prev(tql, &tq_list) { + tq = list_entry(tql, taskq_t, tq_taskqs); + + mutex_enter(tq->tq_ksp->ks_lock); + taskq_kstats_update(tq->tq_ksp, KSTAT_READ); + taskq_kstats_t *tqks = tq->tq_ksp->ks_data; + + snprintf(name, sizeof (name), "%s.%d", tq->tq_name, + tq->tq_instance); + snprintf(threads, sizeof (threads), "%3llu [%3llu %3llu] %3llu", + tqks->tqks_threads_total.value.ui64, + tqks->tqks_threads_active.value.ui64, + tqks->tqks_threads_idle.value.ui64, + tqks->tqks_threads_max.value.ui64); + snprintf(tasks, sizeof (tasks), "%5llu [%5llu %5llu] %3llu", + tqks->tqks_tasks_total.value.ui64, + tqks->tqks_tasks_pending.value.ui64, + tqks->tqks_tasks_priority.value.ui64, + tqks->tqks_tasks_delayed.value.ui64); + + mutex_exit(tq->tq_ksp->ks_lock); + + n = snprintf(buf, size, "%-20s | %-17s | %-23s\n", + name, threads, tasks); + if (n >= size) { + err = ENOMEM; + break; + } + + buf = &buf[n]; + size -= n; + } + + up_read(&tq_list_sem); + + return (err); +} + +static void +spl_taskq_kstat_init(void) +{ + kstat_t *ksp = kstat_create("taskq", 0, "summary", "misc", + KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); + + if (ksp == NULL) + return; + + ksp->ks_data = (void *)(uintptr_t)1; + ksp->ks_ndata = 1; + kstat_set_raw_ops(ksp, spl_taskq_kstat_headers, + spl_taskq_kstat_data, NULL); + kstat_install(ksp); + + taskq_summary_ksp = ksp; +} + +static void +spl_taskq_kstat_fini(void) +{ + if (taskq_summary_ksp == NULL) + return; + + kstat_delete(taskq_summary_ksp); + taskq_summary_ksp = NULL; +} + static unsigned int spl_taskq_kick = 0; /* @@ -1737,12 +1831,16 @@ spl_taskq_init(void) */ dynamic_taskq->tq_lock_class = TQ_LOCK_DYNAMIC; + spl_taskq_kstat_init(); + return (0); } void spl_taskq_fini(void) { + spl_taskq_kstat_fini(); + taskq_destroy(dynamic_taskq); dynamic_taskq = NULL; From f789b9d1b4e4af9557dbe047e89ac8e0176f1d05 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 7 May 2024 10:17:12 +1000 Subject: [PATCH 4/4] spl-proc: remove old taskq stats These had minimal useful information for the admin, didn't work properly in some places, and knew far too much about taskq internals. With the new stats available, these should never be needed anymore. Sponsored-by: Klara, Inc. Sponsored-by: Syneto Signed-off-by: Rob Norris --- man/man4/spl.4 | 11 -- module/os/linux/spl/spl-proc.c | 268 --------------------------------- 2 files changed, 279 deletions(-) diff --git a/man/man4/spl.4 b/man/man4/spl.4 index 5cc12764e18c..22832c492db8 100644 --- a/man/man4/spl.4 +++ b/man/man4/spl.4 @@ -175,17 +175,6 @@ Increasing this value will result in a slower thread creation rate which may be preferable for some configurations. . -.It Sy spl_max_show_tasks Ns = Ns Sy 512 Pq uint -The maximum number of tasks per pending list in each taskq shown in -.Pa /proc/spl/taskq{,-all} . -Write -.Sy 0 -to turn off the limit. -The proc file will walk the lists with lock held, -reading it could cause a lock-up if the list grow too large -without limiting the output. -"(truncated)" will be shown if the list is larger than the limit. -. .It Sy spl_taskq_thread_timeout_ms Ns = Ns Sy 5000 Pq uint Minimum idle threads exit interval for dynamic taskqs. Smaller values allow idle threads exit more often and potentially be diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c index 2c0cdd9febf5..9fefcd03c410 100644 --- a/module/os/linux/spl/spl-proc.c +++ b/module/os/linux/spl/spl-proc.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -63,8 +62,6 @@ static struct ctl_table_header *spl_kstat = NULL; static struct proc_dir_entry *proc_spl = NULL; static struct proc_dir_entry *proc_spl_kmem = NULL; static struct proc_dir_entry *proc_spl_kmem_slab = NULL; -static struct proc_dir_entry *proc_spl_taskq_all = NULL; -static struct proc_dir_entry *proc_spl_taskq = NULL; struct proc_dir_entry *proc_spl_kstat = NULL; #ifdef DEBUG_KMEM @@ -177,195 +174,6 @@ proc_dohostid(CONST_CTL_TABLE *table, int write, return (0); } -static void -taskq_seq_show_headers(struct seq_file *f) -{ - seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n", - "taskq", "act", "nthr", "spwn", "maxt", "pri", - "mina", "maxa", "cura", "flags"); -} - -/* indices into the lheads array below */ -#define LHEAD_PEND 0 -#define LHEAD_PRIO 1 -#define LHEAD_DELAY 2 -#define LHEAD_WAIT 3 -#define LHEAD_ACTIVE 4 -#define LHEAD_SIZE 5 - -static unsigned int spl_max_show_tasks = 512; -/* CSTYLED */ -module_param(spl_max_show_tasks, uint, 0644); -MODULE_PARM_DESC(spl_max_show_tasks, "Max number of tasks shown in taskq proc"); - -static int -taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag) -{ - taskq_t *tq = p; - taskq_thread_t *tqt = NULL; - spl_wait_queue_entry_t *wq; - struct task_struct *tsk; - taskq_ent_t *tqe; - char name[100]; - struct list_head *lheads[LHEAD_SIZE], *lh; - static char *list_names[LHEAD_SIZE] = - {"pend", "prio", "delay", "wait", "active" }; - int i, j, have_lheads = 0; - unsigned long wflags, flags; - - spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class); - spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags); - - /* get the various lists and check whether they're empty */ - lheads[LHEAD_PEND] = &tq->tq_pend_list; - lheads[LHEAD_PRIO] = &tq->tq_prio_list; - lheads[LHEAD_DELAY] = &tq->tq_delay_list; -#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY - lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.head; -#else - lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list; -#endif - lheads[LHEAD_ACTIVE] = &tq->tq_active_list; - - for (i = 0; i < LHEAD_SIZE; ++i) { - if (list_empty(lheads[i])) - lheads[i] = NULL; - else - ++have_lheads; - } - - /* early return in non-"all" mode if lists are all empty */ - if (!allflag && !have_lheads) { - spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags); - spin_unlock_irqrestore(&tq->tq_lock, flags); - return (0); - } - - /* unlock the waitq quickly */ - if (!lheads[LHEAD_WAIT]) - spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags); - - /* show the base taskq contents */ - snprintf(name, sizeof (name), "%s/%d", tq->tq_name, tq->tq_instance); - seq_printf(f, "%-25s ", name); - seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n", - tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn, - tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc, - tq->tq_nalloc, tq->tq_flags); - - /* show the active list */ - if (lheads[LHEAD_ACTIVE]) { - j = 0; - list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) { - if (j == 0) - seq_printf(f, "\t%s:", - list_names[LHEAD_ACTIVE]); - else if (j == 2) { - seq_printf(f, "\n\t "); - j = 0; - } - seq_printf(f, " [%d]%pf(%ps)", - tqt->tqt_thread->pid, - tqt->tqt_task->tqent_func, - tqt->tqt_task->tqent_arg); - ++j; - } - seq_printf(f, "\n"); - } - - for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i) - if (lheads[i]) { - j = 0; - list_for_each(lh, lheads[i]) { - if (spl_max_show_tasks != 0 && - j >= spl_max_show_tasks) { - seq_printf(f, "\n\t(truncated)"); - break; - } - /* show the wait waitq list */ - if (i == LHEAD_WAIT) { -#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY - wq = list_entry(lh, - spl_wait_queue_entry_t, entry); -#else - wq = list_entry(lh, - spl_wait_queue_entry_t, task_list); -#endif - if (j == 0) - seq_printf(f, "\t%s:", - list_names[i]); - else if (j % 8 == 0) - seq_printf(f, "\n\t "); - - tsk = wq->private; - seq_printf(f, " %d", tsk->pid); - /* pend, prio and delay lists */ - } else { - tqe = list_entry(lh, taskq_ent_t, - tqent_list); - if (j == 0) - seq_printf(f, "\t%s:", - list_names[i]); - else if (j % 2 == 0) - seq_printf(f, "\n\t "); - - seq_printf(f, " %pf(%ps)", - tqe->tqent_func, - tqe->tqent_arg); - } - ++j; - } - seq_printf(f, "\n"); - } - if (lheads[LHEAD_WAIT]) - spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags); - spin_unlock_irqrestore(&tq->tq_lock, flags); - - return (0); -} - -static int -taskq_all_seq_show(struct seq_file *f, void *p) -{ - return (taskq_seq_show_impl(f, p, B_TRUE)); -} - -static int -taskq_seq_show(struct seq_file *f, void *p) -{ - return (taskq_seq_show_impl(f, p, B_FALSE)); -} - -static void * -taskq_seq_start(struct seq_file *f, loff_t *pos) -{ - struct list_head *p; - loff_t n = *pos; - - down_read(&tq_list_sem); - if (!n) - taskq_seq_show_headers(f); - - p = tq_list.next; - while (n--) { - p = p->next; - if (p == &tq_list) - return (NULL); - } - - return (list_entry(p, taskq_t, tq_taskqs)); -} - -static void * -taskq_seq_next(struct seq_file *f, void *p, loff_t *pos) -{ - taskq_t *tq = p; - - ++*pos; - return ((tq->tq_taskqs.next == &tq_list) ? - NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs)); -} - static void slab_seq_show_headers(struct seq_file *f) { @@ -501,66 +309,6 @@ static const kstat_proc_op_t proc_slab_operations = { #endif }; -static void -taskq_seq_stop(struct seq_file *f, void *v) -{ - up_read(&tq_list_sem); -} - -static const struct seq_operations taskq_all_seq_ops = { - .show = taskq_all_seq_show, - .start = taskq_seq_start, - .next = taskq_seq_next, - .stop = taskq_seq_stop, -}; - -static const struct seq_operations taskq_seq_ops = { - .show = taskq_seq_show, - .start = taskq_seq_start, - .next = taskq_seq_next, - .stop = taskq_seq_stop, -}; - -static int -proc_taskq_all_open(struct inode *inode, struct file *filp) -{ - return (seq_open(filp, &taskq_all_seq_ops)); -} - -static int -proc_taskq_open(struct inode *inode, struct file *filp) -{ - return (seq_open(filp, &taskq_seq_ops)); -} - -static const kstat_proc_op_t proc_taskq_all_operations = { -#ifdef HAVE_PROC_OPS_STRUCT - .proc_open = proc_taskq_all_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = seq_release, -#else - .open = proc_taskq_all_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -#endif -}; - -static const kstat_proc_op_t proc_taskq_operations = { -#ifdef HAVE_PROC_OPS_STRUCT - .proc_open = proc_taskq_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = seq_release, -#else - .open = proc_taskq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -#endif -}; - static struct ctl_table spl_kmem_table[] = { #ifdef DEBUG_KMEM { @@ -677,8 +425,6 @@ static void spl_proc_cleanup(void) remove_proc_entry("kstat", proc_spl); remove_proc_entry("slab", proc_spl_kmem); remove_proc_entry("kmem", proc_spl); - remove_proc_entry("taskq-all", proc_spl); - remove_proc_entry("taskq", proc_spl); remove_proc_entry("spl", NULL); #ifndef HAVE_REGISTER_SYSCTL_TABLE @@ -761,20 +507,6 @@ spl_proc_init(void) goto out; } - proc_spl_taskq_all = proc_create_data("taskq-all", 0444, proc_spl, - &proc_taskq_all_operations, NULL); - if (proc_spl_taskq_all == NULL) { - rc = -EUNATCH; - goto out; - } - - proc_spl_taskq = proc_create_data("taskq", 0444, proc_spl, - &proc_taskq_operations, NULL); - if (proc_spl_taskq == NULL) { - rc = -EUNATCH; - goto out; - } - proc_spl_kmem = proc_mkdir("kmem", proc_spl); if (proc_spl_kmem == NULL) { rc = -EUNATCH;