diff --git a/include/os/linux/spl/sys/taskq.h b/include/os/linux/spl/sys/taskq.h index b73dab631e04..8051de36ba82 100644 --- a/include/os/linux/spl/sys/taskq.h +++ b/include/os/linux/spl/sys/taskq.h @@ -20,6 +20,10 @@ * You should have received a copy of the GNU General Public License along * with the SPL. If not, see . */ +/* + * Copyright (c) 2024, Klara Inc. + * Copyright (c) 2024, Syneto + */ #ifndef _SPL_TASKQ_H #define _SPL_TASKQ_H @@ -33,6 +37,9 @@ #include #include #include +#include + +typedef struct kstat_s kstat_t; #define TASKQ_NAMELEN 31 @@ -74,6 +81,32 @@ typedef enum tq_lock_role { typedef unsigned long taskqid_t; typedef void (task_func_t)(void *); +typedef struct taskq_sums { + /* gauges (inc/dec counters, current value) */ + wmsum_t tqs_threads_active; /* threads running a task */ + wmsum_t tqs_threads_idle; /* threads waiting for work */ + wmsum_t tqs_threads_total; /* total threads */ + wmsum_t tqs_tasks_pending; /* tasks waiting to execute */ + wmsum_t tqs_tasks_priority; /* hi-pri tasks waiting */ + wmsum_t tqs_tasks_total; /* total waiting tasks */ + wmsum_t tqs_tasks_delayed; /* tasks deferred to future */ + wmsum_t tqs_entries_free; /* task entries on free list */ + + /* counters (inc only, since taskq creation) */ + wmsum_t tqs_threads_created; /* threads created */ + wmsum_t tqs_threads_destroyed; /* threads destroyed */ + wmsum_t tqs_tasks_dispatched; /* tasks dispatched */ + wmsum_t tqs_tasks_dispatched_delayed; /* tasks delayed to future */ + wmsum_t tqs_tasks_executed_normal; /* normal pri tasks executed */ + wmsum_t tqs_tasks_executed_priority; /* high pri tasks executed */ + wmsum_t tqs_tasks_executed; /* total tasks executed */ + wmsum_t tqs_tasks_delayed_requeued; /* delayed tasks requeued */ + wmsum_t tqs_tasks_cancelled; /* tasks cancelled before run */ + wmsum_t tqs_thread_wakeups; /* total thread wakeups */ + wmsum_t tqs_thread_wakeups_nowork; /* thread woken but no tasks */ + wmsum_t tqs_thread_sleeps; /* total thread sleeps */ +} taskq_sums_t; + typedef struct taskq { spinlock_t tq_lock; /* protects taskq_t */ char *tq_name; /* taskq name */ @@ -105,6 +138,8 @@ typedef struct taskq { struct hlist_node tq_hp_cb_node; boolean_t tq_hp_support; unsigned long lastspawnstop; /* when to purge dynamic */ + taskq_sums_t tq_sums; + kstat_t *tq_ksp; } taskq_t; typedef struct taskq_ent { @@ -123,6 +158,13 @@ typedef struct taskq_ent { #define TQENT_FLAG_PREALLOC 0x1 #define TQENT_FLAG_CANCEL 0x2 +/* bits 2-3 are which list tqent is on */ +#define TQENT_LIST_NONE 0x0 +#define TQENT_LIST_PENDING 0x4 +#define TQENT_LIST_PRIORITY 0x8 +#define TQENT_LIST_DELAY 0xc +#define TQENT_LIST_MASK 0xc + typedef struct taskq_thread { struct list_head tqt_thread_list; struct list_head tqt_active_list; diff --git a/man/man4/spl.4 b/man/man4/spl.4 index 5cc12764e18c..22832c492db8 100644 --- a/man/man4/spl.4 +++ b/man/man4/spl.4 @@ -175,17 +175,6 @@ Increasing this value will result in a slower thread creation rate which may be preferable for some configurations. . -.It Sy spl_max_show_tasks Ns = Ns Sy 512 Pq uint -The maximum number of tasks per pending list in each taskq shown in -.Pa /proc/spl/taskq{,-all} . -Write -.Sy 0 -to turn off the limit. -The proc file will walk the lists with lock held, -reading it could cause a lock-up if the list grow too large -without limiting the output. -"(truncated)" will be shown if the list is larger than the limit. -. .It Sy spl_taskq_thread_timeout_ms Ns = Ns Sy 5000 Pq uint Minimum idle threads exit interval for dynamic taskqs. Smaller values allow idle threads exit more often and potentially be diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index 986db1518456..6ee0236d289a 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -868,16 +868,16 @@ spl_init(void) if ((rc = spl_tsd_init())) goto out2; - if ((rc = spl_taskq_init())) + if ((rc = spl_proc_init())) goto out3; - if ((rc = spl_kmem_cache_init())) + if ((rc = spl_kstat_init())) goto out4; - if ((rc = spl_proc_init())) + if ((rc = spl_taskq_init())) goto out5; - if ((rc = spl_kstat_init())) + if ((rc = spl_kmem_cache_init())) goto out6; if ((rc = spl_zlib_init())) @@ -891,13 +891,13 @@ spl_init(void) out8: spl_zlib_fini(); out7: - spl_kstat_fini(); + spl_kmem_cache_fini(); out6: - spl_proc_fini(); + spl_taskq_fini(); out5: - spl_kmem_cache_fini(); + spl_kstat_fini(); out4: - spl_taskq_fini(); + spl_proc_fini(); out3: spl_tsd_fini(); out2: @@ -913,10 +913,10 @@ spl_fini(void) { spl_zone_fini(); spl_zlib_fini(); - spl_kstat_fini(); - spl_proc_fini(); spl_kmem_cache_fini(); spl_taskq_fini(); + spl_kstat_fini(); + spl_proc_fini(); spl_tsd_fini(); spl_kvmem_fini(); spl_random_fini(); diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c index 2c0cdd9febf5..9fefcd03c410 100644 --- a/module/os/linux/spl/spl-proc.c +++ b/module/os/linux/spl/spl-proc.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -63,8 +62,6 @@ static struct ctl_table_header *spl_kstat = NULL; static struct proc_dir_entry *proc_spl = NULL; static struct proc_dir_entry *proc_spl_kmem = NULL; static struct proc_dir_entry *proc_spl_kmem_slab = NULL; -static struct proc_dir_entry *proc_spl_taskq_all = NULL; -static struct proc_dir_entry *proc_spl_taskq = NULL; struct proc_dir_entry *proc_spl_kstat = NULL; #ifdef DEBUG_KMEM @@ -177,195 +174,6 @@ proc_dohostid(CONST_CTL_TABLE *table, int write, return (0); } -static void -taskq_seq_show_headers(struct seq_file *f) -{ - seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n", - "taskq", "act", "nthr", "spwn", "maxt", "pri", - "mina", "maxa", "cura", "flags"); -} - -/* indices into the lheads array below */ -#define LHEAD_PEND 0 -#define LHEAD_PRIO 1 -#define LHEAD_DELAY 2 -#define LHEAD_WAIT 3 -#define LHEAD_ACTIVE 4 -#define LHEAD_SIZE 5 - -static unsigned int spl_max_show_tasks = 512; -/* CSTYLED */ -module_param(spl_max_show_tasks, uint, 0644); -MODULE_PARM_DESC(spl_max_show_tasks, "Max number of tasks shown in taskq proc"); - -static int -taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag) -{ - taskq_t *tq = p; - taskq_thread_t *tqt = NULL; - spl_wait_queue_entry_t *wq; - struct task_struct *tsk; - taskq_ent_t *tqe; - char name[100]; - struct list_head *lheads[LHEAD_SIZE], *lh; - static char *list_names[LHEAD_SIZE] = - {"pend", "prio", "delay", "wait", "active" }; - int i, j, have_lheads = 0; - unsigned long wflags, flags; - - spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class); - spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags); - - /* get the various lists and check whether they're empty */ - lheads[LHEAD_PEND] = &tq->tq_pend_list; - lheads[LHEAD_PRIO] = &tq->tq_prio_list; - lheads[LHEAD_DELAY] = &tq->tq_delay_list; -#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY - lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.head; -#else - lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list; -#endif - lheads[LHEAD_ACTIVE] = &tq->tq_active_list; - - for (i = 0; i < LHEAD_SIZE; ++i) { - if (list_empty(lheads[i])) - lheads[i] = NULL; - else - ++have_lheads; - } - - /* early return in non-"all" mode if lists are all empty */ - if (!allflag && !have_lheads) { - spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags); - spin_unlock_irqrestore(&tq->tq_lock, flags); - return (0); - } - - /* unlock the waitq quickly */ - if (!lheads[LHEAD_WAIT]) - spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags); - - /* show the base taskq contents */ - snprintf(name, sizeof (name), "%s/%d", tq->tq_name, tq->tq_instance); - seq_printf(f, "%-25s ", name); - seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n", - tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn, - tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc, - tq->tq_nalloc, tq->tq_flags); - - /* show the active list */ - if (lheads[LHEAD_ACTIVE]) { - j = 0; - list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) { - if (j == 0) - seq_printf(f, "\t%s:", - list_names[LHEAD_ACTIVE]); - else if (j == 2) { - seq_printf(f, "\n\t "); - j = 0; - } - seq_printf(f, " [%d]%pf(%ps)", - tqt->tqt_thread->pid, - tqt->tqt_task->tqent_func, - tqt->tqt_task->tqent_arg); - ++j; - } - seq_printf(f, "\n"); - } - - for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i) - if (lheads[i]) { - j = 0; - list_for_each(lh, lheads[i]) { - if (spl_max_show_tasks != 0 && - j >= spl_max_show_tasks) { - seq_printf(f, "\n\t(truncated)"); - break; - } - /* show the wait waitq list */ - if (i == LHEAD_WAIT) { -#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY - wq = list_entry(lh, - spl_wait_queue_entry_t, entry); -#else - wq = list_entry(lh, - spl_wait_queue_entry_t, task_list); -#endif - if (j == 0) - seq_printf(f, "\t%s:", - list_names[i]); - else if (j % 8 == 0) - seq_printf(f, "\n\t "); - - tsk = wq->private; - seq_printf(f, " %d", tsk->pid); - /* pend, prio and delay lists */ - } else { - tqe = list_entry(lh, taskq_ent_t, - tqent_list); - if (j == 0) - seq_printf(f, "\t%s:", - list_names[i]); - else if (j % 2 == 0) - seq_printf(f, "\n\t "); - - seq_printf(f, " %pf(%ps)", - tqe->tqent_func, - tqe->tqent_arg); - } - ++j; - } - seq_printf(f, "\n"); - } - if (lheads[LHEAD_WAIT]) - spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags); - spin_unlock_irqrestore(&tq->tq_lock, flags); - - return (0); -} - -static int -taskq_all_seq_show(struct seq_file *f, void *p) -{ - return (taskq_seq_show_impl(f, p, B_TRUE)); -} - -static int -taskq_seq_show(struct seq_file *f, void *p) -{ - return (taskq_seq_show_impl(f, p, B_FALSE)); -} - -static void * -taskq_seq_start(struct seq_file *f, loff_t *pos) -{ - struct list_head *p; - loff_t n = *pos; - - down_read(&tq_list_sem); - if (!n) - taskq_seq_show_headers(f); - - p = tq_list.next; - while (n--) { - p = p->next; - if (p == &tq_list) - return (NULL); - } - - return (list_entry(p, taskq_t, tq_taskqs)); -} - -static void * -taskq_seq_next(struct seq_file *f, void *p, loff_t *pos) -{ - taskq_t *tq = p; - - ++*pos; - return ((tq->tq_taskqs.next == &tq_list) ? - NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs)); -} - static void slab_seq_show_headers(struct seq_file *f) { @@ -501,66 +309,6 @@ static const kstat_proc_op_t proc_slab_operations = { #endif }; -static void -taskq_seq_stop(struct seq_file *f, void *v) -{ - up_read(&tq_list_sem); -} - -static const struct seq_operations taskq_all_seq_ops = { - .show = taskq_all_seq_show, - .start = taskq_seq_start, - .next = taskq_seq_next, - .stop = taskq_seq_stop, -}; - -static const struct seq_operations taskq_seq_ops = { - .show = taskq_seq_show, - .start = taskq_seq_start, - .next = taskq_seq_next, - .stop = taskq_seq_stop, -}; - -static int -proc_taskq_all_open(struct inode *inode, struct file *filp) -{ - return (seq_open(filp, &taskq_all_seq_ops)); -} - -static int -proc_taskq_open(struct inode *inode, struct file *filp) -{ - return (seq_open(filp, &taskq_seq_ops)); -} - -static const kstat_proc_op_t proc_taskq_all_operations = { -#ifdef HAVE_PROC_OPS_STRUCT - .proc_open = proc_taskq_all_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = seq_release, -#else - .open = proc_taskq_all_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -#endif -}; - -static const kstat_proc_op_t proc_taskq_operations = { -#ifdef HAVE_PROC_OPS_STRUCT - .proc_open = proc_taskq_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = seq_release, -#else - .open = proc_taskq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -#endif -}; - static struct ctl_table spl_kmem_table[] = { #ifdef DEBUG_KMEM { @@ -677,8 +425,6 @@ static void spl_proc_cleanup(void) remove_proc_entry("kstat", proc_spl); remove_proc_entry("slab", proc_spl_kmem); remove_proc_entry("kmem", proc_spl); - remove_proc_entry("taskq-all", proc_spl); - remove_proc_entry("taskq", proc_spl); remove_proc_entry("spl", NULL); #ifndef HAVE_REGISTER_SYSCTL_TABLE @@ -761,20 +507,6 @@ spl_proc_init(void) goto out; } - proc_spl_taskq_all = proc_create_data("taskq-all", 0444, proc_spl, - &proc_taskq_all_operations, NULL); - if (proc_spl_taskq_all == NULL) { - rc = -EUNATCH; - goto out; - } - - proc_spl_taskq = proc_create_data("taskq", 0444, proc_spl, - &proc_taskq_operations, NULL); - if (proc_spl_taskq == NULL) { - rc = -EUNATCH; - goto out; - } - proc_spl_kmem = proc_mkdir("kmem", proc_spl); if (proc_spl_kmem == NULL) { rc = -EUNATCH; diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index e7b812c3b5b5..29b8f5426502 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -22,16 +22,98 @@ * * Solaris Porting Layer (SPL) Task Queue Implementation. */ +/* + * Copyright (c) 2024, Klara Inc. + * Copyright (c) 2024, Syneto + */ #include #include #include #include #include +#include +#include +#include #ifdef HAVE_CPU_HOTPLUG #include #endif +typedef struct taskq_kstats { + /* static values, for completeness */ + kstat_named_t tqks_threads_max; + kstat_named_t tqks_entry_pool_min; + kstat_named_t tqks_entry_pool_max; + + /* gauges (inc/dec counters, current value) */ + kstat_named_t tqks_threads_active; + kstat_named_t tqks_threads_idle; + kstat_named_t tqks_threads_total; + kstat_named_t tqks_tasks_pending; + kstat_named_t tqks_tasks_priority; + kstat_named_t tqks_tasks_total; + kstat_named_t tqks_tasks_delayed; + kstat_named_t tqks_entries_free; + + /* counters (inc only, since taskq creation) */ + kstat_named_t tqks_threads_created; + kstat_named_t tqks_threads_destroyed; + kstat_named_t tqks_tasks_dispatched; + kstat_named_t tqks_tasks_dispatched_delayed; + kstat_named_t tqks_tasks_executed_normal; + kstat_named_t tqks_tasks_executed_priority; + kstat_named_t tqks_tasks_executed; + kstat_named_t tqks_tasks_delayed_requeued; + kstat_named_t tqks_tasks_cancelled; + kstat_named_t tqks_thread_wakeups; + kstat_named_t tqks_thread_wakeups_nowork; + kstat_named_t tqks_thread_sleeps; +} taskq_kstats_t; + +static taskq_kstats_t taskq_kstats_template = { + { "threads_max", KSTAT_DATA_UINT64 }, + { "entry_pool_min", KSTAT_DATA_UINT64 }, + { "entry_pool_max", KSTAT_DATA_UINT64 }, + { "threads_active", KSTAT_DATA_UINT64 }, + { "threads_idle", KSTAT_DATA_UINT64 }, + { "threads_total", KSTAT_DATA_UINT64 }, + { "tasks_pending", KSTAT_DATA_UINT64 }, + { "tasks_priority", KSTAT_DATA_UINT64 }, + { "tasks_total", KSTAT_DATA_UINT64 }, + { "tasks_delayed", KSTAT_DATA_UINT64 }, + { "entries_free", KSTAT_DATA_UINT64 }, + + { "threads_created", KSTAT_DATA_UINT64 }, + { "threads_destroyed", KSTAT_DATA_UINT64 }, + { "tasks_dispatched", KSTAT_DATA_UINT64 }, + { "tasks_dispatched_delayed", KSTAT_DATA_UINT64 }, + { "tasks_executed_normal", KSTAT_DATA_UINT64 }, + { "tasks_executed_priority", KSTAT_DATA_UINT64 }, + { "tasks_executed", KSTAT_DATA_UINT64 }, + { "tasks_delayed_requeued", KSTAT_DATA_UINT64 }, + { "tasks_cancelled", KSTAT_DATA_UINT64 }, + { "thread_wakeups", KSTAT_DATA_UINT64 }, + { "thread_wakeups_nowork", KSTAT_DATA_UINT64 }, + { "thread_sleeps", KSTAT_DATA_UINT64 }, +}; + +#define TQSTAT_INC(tq, stat) wmsum_add(&tq->tq_sums.tqs_##stat, 1) +#define TQSTAT_DEC(tq, stat) wmsum_add(&tq->tq_sums.tqs_##stat, -1) + +#define _TQSTAT_MOD_LIST(mod, tq, t) do { \ + switch (t->tqent_flags & TQENT_LIST_MASK) { \ + case TQENT_LIST_NONE: ASSERT(list_empty(&t->tqent_list)); break;\ + case TQENT_LIST_PENDING: mod(tq, tasks_pending); break; \ + case TQENT_LIST_PRIORITY: mod(tq, tasks_priority); break; \ + case TQENT_LIST_DELAY: mod(tq, tasks_delayed); break; \ + } \ +} while (0) +#define TQSTAT_INC_LIST(tq, t) _TQSTAT_MOD_LIST(TQSTAT_INC, tq, t) +#define TQSTAT_DEC_LIST(tq, t) _TQSTAT_MOD_LIST(TQSTAT_DEC, tq, t) + +#define TQENT_SET_LIST(t, l) \ + t->tqent_flags = (t->tqent_flags & ~TQENT_LIST_MASK) | l; + static int spl_taskq_thread_bind = 0; module_param(spl_taskq_thread_bind, int, 0644); MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default"); @@ -134,6 +216,7 @@ task_alloc(taskq_t *tq, uint_t flags, unsigned long *irqflags) ASSERT(!timer_pending(&t->tqent_timer)); list_del_init(&t->tqent_list); + TQSTAT_DEC(tq, entries_free); return (t); } @@ -204,12 +287,11 @@ task_done(taskq_t *tq, taskq_ent_t *t) { ASSERT(tq); ASSERT(t); + ASSERT(list_empty(&t->tqent_list)); /* Wake tasks blocked in taskq_wait_id() */ wake_up_all(&t->tqent_waitq); - list_del_init(&t->tqent_list); - if (tq->tq_nalloc <= tq->tq_minalloc) { t->tqent_id = TASKQID_INVALID; t->tqent_func = NULL; @@ -217,6 +299,7 @@ task_done(taskq_t *tq, taskq_ent_t *t) t->tqent_flags = 0; list_add_tail(&t->tqent_list, &tq->tq_free_list); + TQSTAT_INC(tq, entries_free); } else { task_free(tq, t); } @@ -263,6 +346,8 @@ task_expire_impl(taskq_ent_t *t) spin_unlock_irqrestore(&tq->tq_lock, flags); wake_up(&tq->tq_work_waitq); + + TQSTAT_INC(tq, tasks_delayed_requeued); } static void @@ -534,7 +619,10 @@ taskq_cancel_id(taskq_t *tq, taskqid_t id) t = taskq_find(tq, id); if (t && t != ERR_PTR(-EBUSY)) { list_del_init(&t->tqent_list); + TQSTAT_DEC_LIST(tq, t); + t->tqent_flags |= TQENT_FLAG_CANCEL; + TQSTAT_INC(tq, tasks_cancelled); /* * When canceling the lowest outstanding task id we @@ -604,13 +692,19 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) spin_lock(&t->tqent_lock); /* Queue to the front of the list to enforce TQ_NOQUEUE semantics */ - if (flags & TQ_NOQUEUE) + if (flags & TQ_NOQUEUE) { + TQENT_SET_LIST(t, TQENT_LIST_PRIORITY); list_add(&t->tqent_list, &tq->tq_prio_list); /* Queue to the priority list instead of the pending list */ - else if (flags & TQ_FRONT) + } else if (flags & TQ_FRONT) { + TQENT_SET_LIST(t, TQENT_LIST_PRIORITY); list_add_tail(&t->tqent_list, &tq->tq_prio_list); - else + } else { + TQENT_SET_LIST(t, TQENT_LIST_PENDING); list_add_tail(&t->tqent_list, &tq->tq_pend_list); + } + TQSTAT_INC_LIST(tq, t); + TQSTAT_INC(tq, tasks_total); t->tqent_id = rc = tq->tq_next_id; tq->tq_next_id++; @@ -629,6 +723,8 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) wake_up(&tq->tq_work_waitq); + TQSTAT_INC(tq, tasks_dispatched); + /* Spawn additional taskq threads if required. */ if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads) (void) taskq_thread_spawn(tq); @@ -662,6 +758,8 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg, /* Queue to the delay list for subsequent execution */ list_add_tail(&t->tqent_list, &tq->tq_delay_list); + TQENT_SET_LIST(t, TQENT_LIST_DELAY); + TQSTAT_INC_LIST(tq, t); t->tqent_id = rc = tq->tq_next_id; tq->tq_next_id++; @@ -676,6 +774,8 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg, spin_unlock(&t->tqent_lock); + TQSTAT_INC(tq, tasks_dispatched_delayed); + /* Spawn additional taskq threads if required. */ if (tq->tq_nactive == tq->tq_nthreads) (void) taskq_thread_spawn(tq); @@ -724,10 +824,15 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, t->tqent_flags |= TQENT_FLAG_PREALLOC; /* Queue to the priority list instead of the pending list */ - if (flags & TQ_FRONT) + if (flags & TQ_FRONT) { + TQENT_SET_LIST(t, TQENT_LIST_PRIORITY); list_add_tail(&t->tqent_list, &tq->tq_prio_list); - else + } else { + TQENT_SET_LIST(t, TQENT_LIST_PENDING); list_add_tail(&t->tqent_list, &tq->tq_pend_list); + } + TQSTAT_INC_LIST(tq, t); + TQSTAT_INC(tq, tasks_total); t->tqent_id = tq->tq_next_id; tq->tq_next_id++; @@ -742,6 +847,8 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, wake_up(&tq->tq_work_waitq); + TQSTAT_INC(tq, tasks_dispatched); + /* Spawn additional taskq threads if required. */ if (tq->tq_nactive == tq->tq_nthreads) (void) taskq_thread_spawn(tq); @@ -908,6 +1015,8 @@ taskq_thread(void *args) wake_up(&tq->tq_wait_waitq); set_current_state(TASK_INTERRUPTIBLE); + TQSTAT_INC(tq, threads_total); + while (!kthread_should_stop()) { if (list_empty(&tq->tq_pend_list) && @@ -919,9 +1028,15 @@ taskq_thread(void *args) add_wait_queue_exclusive(&tq->tq_work_waitq, &wait); spin_unlock_irqrestore(&tq->tq_lock, flags); + TQSTAT_INC(tq, thread_sleeps); + TQSTAT_INC(tq, threads_idle); + schedule(); seq_tasks = 0; + TQSTAT_DEC(tq, threads_idle); + TQSTAT_INC(tq, thread_wakeups); + spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class); remove_wait_queue(&tq->tq_work_waitq, &wait); @@ -931,6 +1046,8 @@ taskq_thread(void *args) if ((t = taskq_next_ent(tq)) != NULL) { list_del_init(&t->tqent_list); + TQSTAT_DEC_LIST(tq, t); + TQSTAT_DEC(tq, tasks_total); /* * A TQENT_FLAG_PREALLOC task may be reused or freed @@ -955,6 +1072,7 @@ taskq_thread(void *args) tq->tq_nactive++; spin_unlock_irqrestore(&tq->tq_lock, flags); + TQSTAT_INC(tq, threads_active); DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t); /* Perform the requested task */ @@ -962,8 +1080,17 @@ taskq_thread(void *args) DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t); + TQSTAT_DEC(tq, threads_active); + if ((t->tqent_flags & TQENT_LIST_MASK) == + TQENT_LIST_PENDING) + TQSTAT_INC(tq, tasks_executed_normal); + else + TQSTAT_INC(tq, tasks_executed_priority); + TQSTAT_INC(tq, tasks_executed); + spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class); + tq->tq_nactive--; list_del_init(&tqt->tqt_active_list); tqt->tqt_task = NULL; @@ -989,7 +1116,8 @@ taskq_thread(void *args) tqt->tqt_id = TASKQID_INVALID; tqt->tqt_flags = 0; wake_up_all(&tq->tq_wait_waitq); - } + } else + TQSTAT_INC(tq, thread_wakeups_nowork); set_current_state(TASK_INTERRUPTIBLE); @@ -998,6 +1126,10 @@ taskq_thread(void *args) __set_current_state(TASK_RUNNING); tq->tq_nthreads--; list_del_init(&tqt->tqt_thread_list); + + TQSTAT_DEC(tq, threads_total); + TQSTAT_INC(tq, threads_destroyed); + error: kmem_free(tqt, sizeof (taskq_thread_t)); spin_unlock_irqrestore(&tq->tq_lock, flags); @@ -1037,9 +1169,156 @@ taskq_thread_create(taskq_t *tq) wake_up_process(tqt->tqt_thread); + TQSTAT_INC(tq, threads_created); + return (tqt); } +static void +taskq_stats_init(taskq_t *tq) +{ + taskq_sums_t *tqs = &tq->tq_sums; + wmsum_init(&tqs->tqs_threads_active, 0); + wmsum_init(&tqs->tqs_threads_idle, 0); + wmsum_init(&tqs->tqs_threads_total, 0); + wmsum_init(&tqs->tqs_tasks_pending, 0); + wmsum_init(&tqs->tqs_tasks_priority, 0); + wmsum_init(&tqs->tqs_tasks_total, 0); + wmsum_init(&tqs->tqs_tasks_delayed, 0); + wmsum_init(&tqs->tqs_entries_free, 0); + wmsum_init(&tqs->tqs_threads_created, 0); + wmsum_init(&tqs->tqs_threads_destroyed, 0); + wmsum_init(&tqs->tqs_tasks_dispatched, 0); + wmsum_init(&tqs->tqs_tasks_dispatched_delayed, 0); + wmsum_init(&tqs->tqs_tasks_executed_normal, 0); + wmsum_init(&tqs->tqs_tasks_executed_priority, 0); + wmsum_init(&tqs->tqs_tasks_executed, 0); + wmsum_init(&tqs->tqs_tasks_delayed_requeued, 0); + wmsum_init(&tqs->tqs_tasks_cancelled, 0); + wmsum_init(&tqs->tqs_thread_wakeups, 0); + wmsum_init(&tqs->tqs_thread_wakeups_nowork, 0); + wmsum_init(&tqs->tqs_thread_sleeps, 0); +} + +static void +taskq_stats_fini(taskq_t *tq) +{ + taskq_sums_t *tqs = &tq->tq_sums; + wmsum_fini(&tqs->tqs_threads_active); + wmsum_fini(&tqs->tqs_threads_idle); + wmsum_fini(&tqs->tqs_threads_total); + wmsum_fini(&tqs->tqs_tasks_pending); + wmsum_fini(&tqs->tqs_tasks_priority); + wmsum_fini(&tqs->tqs_tasks_total); + wmsum_fini(&tqs->tqs_tasks_delayed); + wmsum_fini(&tqs->tqs_entries_free); + wmsum_fini(&tqs->tqs_threads_created); + wmsum_fini(&tqs->tqs_threads_destroyed); + wmsum_fini(&tqs->tqs_tasks_dispatched); + wmsum_fini(&tqs->tqs_tasks_dispatched_delayed); + wmsum_fini(&tqs->tqs_tasks_executed_normal); + wmsum_fini(&tqs->tqs_tasks_executed_priority); + wmsum_fini(&tqs->tqs_tasks_executed); + wmsum_fini(&tqs->tqs_tasks_delayed_requeued); + wmsum_fini(&tqs->tqs_tasks_cancelled); + wmsum_fini(&tqs->tqs_thread_wakeups); + wmsum_fini(&tqs->tqs_thread_wakeups_nowork); + wmsum_fini(&tqs->tqs_thread_sleeps); +} + +static int +taskq_kstats_update(kstat_t *ksp, int rw) +{ + if (rw == KSTAT_WRITE) + return (EACCES); + + taskq_t *tq = ksp->ks_private; + taskq_kstats_t *tqks = ksp->ks_data; + + tqks->tqks_threads_max.value.ui64 = tq->tq_maxthreads; + tqks->tqks_entry_pool_min.value.ui64 = tq->tq_minalloc; + tqks->tqks_entry_pool_max.value.ui64 = tq->tq_maxalloc; + + taskq_sums_t *tqs = &tq->tq_sums; + + tqks->tqks_threads_active.value.ui64 = + wmsum_value(&tqs->tqs_threads_active); + tqks->tqks_threads_idle.value.ui64 = + wmsum_value(&tqs->tqs_threads_idle); + tqks->tqks_threads_total.value.ui64 = + wmsum_value(&tqs->tqs_threads_total); + tqks->tqks_tasks_pending.value.ui64 = + wmsum_value(&tqs->tqs_tasks_pending); + tqks->tqks_tasks_priority.value.ui64 = + wmsum_value(&tqs->tqs_tasks_priority); + tqks->tqks_tasks_total.value.ui64 = + wmsum_value(&tqs->tqs_tasks_total); + tqks->tqks_tasks_delayed.value.ui64 = + wmsum_value(&tqs->tqs_tasks_delayed); + tqks->tqks_entries_free.value.ui64 = + wmsum_value(&tqs->tqs_entries_free); + tqks->tqks_threads_created.value.ui64 = + wmsum_value(&tqs->tqs_threads_created); + tqks->tqks_threads_destroyed.value.ui64 = + wmsum_value(&tqs->tqs_threads_destroyed); + tqks->tqks_tasks_dispatched.value.ui64 = + wmsum_value(&tqs->tqs_tasks_dispatched); + tqks->tqks_tasks_dispatched_delayed.value.ui64 = + wmsum_value(&tqs->tqs_tasks_dispatched_delayed); + tqks->tqks_tasks_executed_normal.value.ui64 = + wmsum_value(&tqs->tqs_tasks_executed_normal); + tqks->tqks_tasks_executed_priority.value.ui64 = + wmsum_value(&tqs->tqs_tasks_executed_priority); + tqks->tqks_tasks_executed.value.ui64 = + wmsum_value(&tqs->tqs_tasks_executed); + tqks->tqks_tasks_delayed_requeued.value.ui64 = + wmsum_value(&tqs->tqs_tasks_delayed_requeued); + tqks->tqks_tasks_cancelled.value.ui64 = + wmsum_value(&tqs->tqs_tasks_cancelled); + tqks->tqks_thread_wakeups.value.ui64 = + wmsum_value(&tqs->tqs_thread_wakeups); + tqks->tqks_thread_wakeups_nowork.value.ui64 = + wmsum_value(&tqs->tqs_thread_wakeups_nowork); + tqks->tqks_thread_sleeps.value.ui64 = + wmsum_value(&tqs->tqs_thread_sleeps); + + return (0); +} + +static void +taskq_kstats_init(taskq_t *tq) +{ + char name[TASKQ_NAMELEN+5]; /* 5 for dot, 3x instance digits, null */ + snprintf(name, sizeof (name), "%s.%d", tq->tq_name, tq->tq_instance); + + kstat_t *ksp = kstat_create("taskq", 0, name, "misc", + KSTAT_TYPE_NAMED, sizeof (taskq_kstats_t) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (ksp == NULL) + return; + + ksp->ks_private = tq; + ksp->ks_update = taskq_kstats_update; + ksp->ks_data = kmem_alloc(sizeof (taskq_kstats_t), KM_SLEEP); + memcpy(ksp->ks_data, &taskq_kstats_template, sizeof (taskq_kstats_t)); + kstat_install(ksp); + + tq->tq_ksp = ksp; +} + +static void +taskq_kstats_fini(taskq_t *tq) +{ + if (tq->tq_ksp == NULL) + return; + + kmem_free(tq->tq_ksp->ks_data, sizeof (taskq_kstats_t)); + kstat_delete(tq->tq_ksp); + + tq->tq_ksp = NULL; +} + taskq_t * taskq_create(const char *name, int threads_arg, pri_t pri, int minalloc, int maxalloc, uint_t flags) @@ -1104,6 +1383,7 @@ taskq_create(const char *name, int threads_arg, pri_t pri, init_waitqueue_head(&tq->tq_wait_waitq); tq->tq_lock_class = TQ_LOCK_GENERAL; INIT_LIST_HEAD(&tq->tq_taskqs); + taskq_stats_init(tq); if (flags & TASKQ_PREPOPULATE) { spin_lock_irqsave_nested(&tq->tq_lock, irqflags, @@ -1137,14 +1417,17 @@ taskq_create(const char *name, int threads_arg, pri_t pri, if (rc) { taskq_destroy(tq); - tq = NULL; - } else { - down_write(&tq_list_sem); - tq->tq_instance = taskq_find_by_name(name) + 1; - list_add_tail(&tq->tq_taskqs, &tq_list); - up_write(&tq_list_sem); + return (NULL); } + down_write(&tq_list_sem); + tq->tq_instance = taskq_find_by_name(name) + 1; + list_add_tail(&tq->tq_taskqs, &tq_list); + up_write(&tq_list_sem); + + /* Install kstats late, because the name includes tq_instance */ + taskq_kstats_init(tq); + return (tq); } EXPORT_SYMBOL(taskq_create); @@ -1177,6 +1460,8 @@ taskq_destroy(taskq_t *tq) taskq_wait(tq); + taskq_kstats_fini(tq); + /* remove taskq from global list used by the kstats */ down_write(&tq_list_sem); list_del(&tq->tq_taskqs); @@ -1230,6 +1515,7 @@ taskq_destroy(taskq_t *tq) spin_unlock_irqrestore(&tq->tq_lock, flags); + taskq_stats_fini(tq); kmem_strfree(tq->tq_name); kmem_free(tq, sizeof (taskq_t)); } @@ -1271,6 +1557,100 @@ taskq_create_synced(const char *name, int nthreads, pri_t pri, } EXPORT_SYMBOL(taskq_create_synced); +static kstat_t *taskq_summary_ksp = NULL; + +static int +spl_taskq_kstat_headers(char *buf, size_t size) +{ + size_t n = snprintf(buf, size, + "%-20s | %-17s | %-23s\n" + "%-20s | %-17s | %-23s\n" + "%-20s | %-17s | %-23s\n", + "", "threads", "tasks on queue", + "taskq name", "tot [act idl] max", " pend [ norm high] dly", + "--------------------", "-----------------", + "-----------------------"); + return (n >= size ? ENOMEM : 0); +} + +static int +spl_taskq_kstat_data(char *buf, size_t size, void *data) +{ + struct list_head *tql = NULL; + taskq_t *tq; + char name[TASKQ_NAMELEN+5]; /* 5 for dot, 3x instance digits, null */ + char threads[25]; + char tasks[30]; + size_t n; + int err = 0; + + down_read(&tq_list_sem); + list_for_each_prev(tql, &tq_list) { + tq = list_entry(tql, taskq_t, tq_taskqs); + + mutex_enter(tq->tq_ksp->ks_lock); + taskq_kstats_update(tq->tq_ksp, KSTAT_READ); + taskq_kstats_t *tqks = tq->tq_ksp->ks_data; + + snprintf(name, sizeof (name), "%s.%d", tq->tq_name, + tq->tq_instance); + snprintf(threads, sizeof (threads), "%3llu [%3llu %3llu] %3llu", + tqks->tqks_threads_total.value.ui64, + tqks->tqks_threads_active.value.ui64, + tqks->tqks_threads_idle.value.ui64, + tqks->tqks_threads_max.value.ui64); + snprintf(tasks, sizeof (tasks), "%5llu [%5llu %5llu] %3llu", + tqks->tqks_tasks_total.value.ui64, + tqks->tqks_tasks_pending.value.ui64, + tqks->tqks_tasks_priority.value.ui64, + tqks->tqks_tasks_delayed.value.ui64); + + mutex_exit(tq->tq_ksp->ks_lock); + + n = snprintf(buf, size, "%-20s | %-17s | %-23s\n", + name, threads, tasks); + if (n >= size) { + err = ENOMEM; + break; + } + + buf = &buf[n]; + size -= n; + } + + up_read(&tq_list_sem); + + return (err); +} + +static void +spl_taskq_kstat_init(void) +{ + kstat_t *ksp = kstat_create("taskq", 0, "summary", "misc", + KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); + + if (ksp == NULL) + return; + + ksp->ks_data = (void *)(uintptr_t)1; + ksp->ks_ndata = 1; + kstat_set_raw_ops(ksp, spl_taskq_kstat_headers, + spl_taskq_kstat_data, NULL); + kstat_install(ksp); + + taskq_summary_ksp = ksp; +} + +static void +spl_taskq_kstat_fini(void) +{ + if (taskq_summary_ksp == NULL) + return; + + kstat_delete(taskq_summary_ksp); + taskq_summary_ksp = NULL; +} + static unsigned int spl_taskq_kick = 0; /* @@ -1451,12 +1831,16 @@ spl_taskq_init(void) */ dynamic_taskq->tq_lock_class = TQ_LOCK_DYNAMIC; + spl_taskq_kstat_init(); + return (0); } void spl_taskq_fini(void) { + spl_taskq_kstat_fini(); + taskq_destroy(dynamic_taskq); dynamic_taskq = NULL;