From e1cd8395b36e951d4b1dc06395b57aae03ea3708 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 26 Sep 2016 00:14:09 +0300 Subject: [PATCH] lkl: add support for direct syscalls This patch reworks the LKL syscall interface to avoid the performance penalty caused by the context switch between the host thread and the associated syscall kernel thread. The main idea is to associate Linux threads (struct task_struct) with existing host threads instead of creating a new coresponding "syscall thread" for host threads that issue system calls. When issuing a system call, from a host thread, all we need to do is: (a) make sure we exclusively have the cpu by calling lkl_cpu_get() and (b) make sure we are in the right Linux thread context by calling switch_to_host_task(). When a host thread finishes a system call, while it does yield the cpu, it does not scheduled out the Linux thread context to keep it available for the next system call. This means that while interrupts will run, kernel threads will be delayed until the next system call. The patch improves the LKL sycall latency by ~60 times on my machine on synthetic benchmarks. This patch also removes the lkl_create_syscall_thread() and lkl_stop_syscall_thread() as they are not necessary anymore. Signed-off-by: Octavian Purdila --- arch/lkl/include/asm/syscalls.h | 4 +- arch/lkl/include/asm/unistd.h | 1 - arch/lkl/include/uapi/asm/unistd.h | 3 +- arch/lkl/kernel/setup.c | 13 +- arch/lkl/kernel/syscalls.c | 412 +++++------------------------ tools/lkl/include/lkl.h | 18 -- 6 files changed, 82 insertions(+), 369 deletions(-) diff --git a/arch/lkl/include/asm/syscalls.h b/arch/lkl/include/asm/syscalls.h index b1e2d08e1bddf3..43956b4bbf0ad6 100644 --- a/arch/lkl/include/asm/syscalls.h +++ b/arch/lkl/include/asm/syscalls.h @@ -1,8 +1,8 @@ #ifndef _ASM_LKL_SYSCALLS_H #define _ASM_LKL_SYSCALLS_H -int initial_syscall_thread(void *); -void free_initial_syscall_thread(void); +int syscalls_init(void); +void syscalls_cleanup(void); long lkl_syscall(long no, long *params); #define sys_mmap sys_mmap_pgoff diff --git a/arch/lkl/include/asm/unistd.h b/arch/lkl/include/asm/unistd.h index 39d62476b35f5a..c3451dfdb4e937 100644 --- a/arch/lkl/include/asm/unistd.h +++ b/arch/lkl/include/asm/unistd.h @@ -1,6 +1,5 @@ #include -__SYSCALL(__NR_create_syscall_thread, sys_create_syscall_thread) __SYSCALL(__NR_virtio_mmio_device_add, sys_virtio_mmio_device_add) #define __SC_ASCII(t, a) #t "," #a diff --git a/arch/lkl/include/uapi/asm/unistd.h b/arch/lkl/include/uapi/asm/unistd.h index ad47ffe9eb6dd3..654215e8189cc6 100644 --- a/arch/lkl/include/uapi/asm/unistd.h +++ b/arch/lkl/include/uapi/asm/unistd.h @@ -8,5 +8,4 @@ #include -#define __NR_create_syscall_thread (__NR_arch_specific_syscall + 0) -#define __NR_virtio_mmio_device_add (__NR_arch_specific_syscall + 1) +#define __NR_virtio_mmio_device_add (__NR_arch_specific_syscall + 0) diff --git a/arch/lkl/kernel/setup.c b/arch/lkl/kernel/setup.c index cf81789949a166..59add61bfb0eb6 100644 --- a/arch/lkl/kernel/setup.c +++ b/arch/lkl/kernel/setup.c @@ -75,8 +75,12 @@ int __init lkl_start_kernel(struct lkl_host_operations *ops, } lkl_ops->sem_down(init_sem); + current_thread_info()->tid = lkl_ops->thread_self(); + lkl_cpu_change_owner(current_thread_info()->tid); + lkl_cpu_put(); is_running = 1; + return 0; out_free_init_sem: @@ -118,7 +122,7 @@ long lkl_sys_halt(void) lkl_cpu_wait_shutdown(); - free_initial_syscall_thread(); + syscalls_cleanup(); threads_cleanup(); /* Shutdown the clockevents source. */ tick_suspend_local(); @@ -151,9 +155,12 @@ static int lkl_run_init(struct linux_binprm *bprm) set_binfmt(&lkl_run_init_binfmt); - initial_syscall_thread(init_sem); + init_pid_ns.child_reaper = 0; + + syscalls_init(); - kernel_halt(); + lkl_ops->sem_up(init_sem); + lkl_ops->thread_exit(); return 0; } diff --git a/arch/lkl/kernel/syscalls.c b/arch/lkl/kernel/syscalls.c index e0589cdbe04bfc..790d6c9d4c42c8 100644 --- a/arch/lkl/kernel/syscalls.c +++ b/arch/lkl/kernel/syscalls.c @@ -13,9 +13,8 @@ #include #include #include +#include -struct syscall_thread_data; -static asmlinkage long sys_create_syscall_thread(struct syscall_thread_data *); static asmlinkage long sys_virtio_mmio_device_add(long base, long size, unsigned int irq); @@ -33,399 +32,126 @@ syscall_handler_t syscall_table[__NR_syscalls] = { #endif }; -struct syscall { - long no, *params, ret; -}; - -static struct syscall_thread_data { - struct syscall *s; - void *mutex, *completion; - int irq; - /* to be accessed from Linux context only */ - wait_queue_head_t wqh; - struct list_head list; - bool stop; - struct completion stopped; -} default_syscall_thread_data; - -static LIST_HEAD(syscall_threads); - -static struct syscall *dequeue_syscall(struct syscall_thread_data *data) -{ - - return (struct syscall *)__sync_fetch_and_and((long *)&data->s, 0); -} - -static long run_syscall(struct syscall *s) +static long run_syscall(long no, long *params) { long ret; - if (s->no < 0 || s->no >= __NR_syscalls) - ret = -ENOSYS; - else { - ret = syscall_table[s->no](s->params[0], s->params[1], - s->params[2], s->params[3], - s->params[4], s->params[5]); - } - s->ret = ret; + if (no < 0 || no >= __NR_syscalls) + return -ENOSYS; + + ret = syscall_table[no](params[0], params[1], params[2], params[3], + params[4], params[5]); task_work_run(); return ret; } -static irqreturn_t syscall_irq_handler(int irq, void *dev_id) -{ - struct syscall_thread_data *data = (struct syscall_thread_data *)dev_id; - - wake_up(&data->wqh); - return IRQ_HANDLED; -} +#define CLONE_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_THREAD | \ + CLONE_SIGHAND | SIGCHLD) -static void cleanup_syscall_threads(void); +static int host_task_id; +static struct task_struct *host0; -int syscall_thread(void *_data) +static int new_host_task(struct task_struct **task) { - struct syscall_thread_data *data; - struct syscall *s; - int ret; - static int count; - - data = (struct syscall_thread_data *)_data; - init_waitqueue_head(&data->wqh); - list_add(&data->list, &syscall_threads); - init_completion(&data->stopped); - - snprintf(current->comm, sizeof(current->comm), "ksyscalld%d", count++); - - data->irq = lkl_get_free_irq("syscall"); - if (data->irq < 0) { - pr_err("lkl: %s: failed to allocate irq: %d\n", __func__, - data->irq); - return data->irq; - } - - ret = request_irq(data->irq, syscall_irq_handler, 0, current->comm, - data); - if (ret) { - pr_err("lkl: %s: failed to request irq %d: %d\n", __func__, - data->irq, ret); - lkl_put_irq(data->irq, "syscall"); - data->irq = -1; - return ret; - } - - pr_info("lkl: syscall thread %s initialized (irq%d)\n", current->comm, - data->irq); - - /* system call thread is ready */ - lkl_ops->sem_up(data->completion); - - while (1) { - wait_event(data->wqh, - (s = dequeue_syscall(data)) != NULL || data->stop); - - if (data->stop || s->no == __NR_reboot) - break; - - run_syscall(s); - - lkl_ops->sem_up(data->completion); - } + pid_t pid; - if (data == &default_syscall_thread_data) - cleanup_syscall_threads(); + switch_to_host_task(host0); - pr_info("lkl: exiting syscall thread %s\n", current->comm); + pid = kernel_thread(NULL, NULL, CLONE_FLAGS); + if (pid < 0) + return pid; - list_del(&data->list); + rcu_read_lock(); + *task = find_task_by_pid_ns(pid, &init_pid_ns); + rcu_read_unlock(); - free_irq(data->irq, data); - lkl_put_irq(data->irq, "syscall"); + host_task_id++; - if (data->stop) { - complete(&data->stopped); - } else { - s->ret = 0; - lkl_ops->sem_up(data->completion); - } + snprintf((*task)->comm, sizeof((*task)->comm), "host%d", host_task_id); return 0; } -static unsigned int syscall_thread_data_key; - -static int syscall_thread_data_init(struct syscall_thread_data *data, - void *completion) +static void del_host_task(void *arg) { - data->mutex = lkl_ops->sem_alloc(1); - if (!data->mutex) - return -ENOMEM; + struct task_struct *task = (struct task_struct *)arg; - if (!completion) - data->completion = lkl_ops->sem_alloc(0); - else - data->completion = completion; - if (!data->completion) { - lkl_ops->sem_free(data->mutex); - data->mutex = NULL; - return -ENOMEM; - } + if (lkl_cpu_get() < 0) + return; - return 0; + switch_to_host_task(task); + host_task_id--; + thread_set_sched_exit(); + do_exit(0); } -static long __lkl_syscall(struct syscall_thread_data *data, long no, - long *params) -{ - struct syscall s; - - s.no = no; - s.params = params; - - lkl_ops->sem_down(data->mutex); - data->s = &s; - lkl_trigger_irq(data->irq); - lkl_ops->sem_down(data->completion); - lkl_ops->sem_up(data->mutex); +static unsigned int task_key; - return s.ret; -} - -static struct syscall_thread_data *__lkl_create_syscall_thread(void) +long lkl_syscall(long no, long *params) { - struct syscall_thread_data *data; - long params[6], ret; - - if (!lkl_ops->tls_set) - return ERR_PTR(-ENOTSUPP); - - data = lkl_ops->mem_alloc(sizeof(*data)); - if (!data) - return ERR_PTR(-ENOMEM); - - memset(data, 0, sizeof(*data)); - - ret = syscall_thread_data_init(data, NULL); - if (ret < 0) - goto out_free; - - ret = lkl_ops->tls_set(syscall_thread_data_key, data); - if (ret < 0) - goto out_free; + struct task_struct *task = host0; + static int count; + long ret; - params[0] = (long)data; - ret = __lkl_syscall(&default_syscall_thread_data, - __NR_create_syscall_thread, params); + ret = lkl_cpu_get(); if (ret < 0) - goto out_free; - - lkl_ops->sem_down(data->completion); - - return data; - -out_free: - lkl_ops->sem_free(data->completion); - lkl_ops->sem_free(data->mutex); - lkl_ops->mem_free(data); - - return ERR_PTR(ret); -} - -int lkl_create_syscall_thread(void) -{ - struct syscall_thread_data *data = __lkl_create_syscall_thread(); - - if (IS_ERR(data)) - return PTR_ERR(data); - return 0; -} - -static int kernel_stop_syscall_thread(struct syscall_thread_data *data) -{ - data->stop = true; - wake_up(&data->wqh); - wait_for_completion(&data->stopped); - - return 0; -} - -static int __lkl_stop_syscall_thread(struct syscall_thread_data *data, - bool host) -{ - long ret, params[6]; - - if (host) - ret = __lkl_syscall(data, __NR_reboot, params); - else - ret = kernel_stop_syscall_thread(data); - if (ret) return ret; - lkl_ops->sem_free(data->completion); - lkl_ops->sem_free(data->mutex); - lkl_ops->mem_free(data); - - return 0; -} + count++; -int lkl_stop_syscall_thread(void) -{ - struct syscall_thread_data *data = NULL; - int ret; - - if (lkl_ops->tls_get) - data = lkl_ops->tls_get(syscall_thread_data_key); - if (!data) - return -EINVAL; - - ret = __lkl_stop_syscall_thread(data, true); - if (!ret && lkl_ops->tls_set) - lkl_ops->tls_set(syscall_thread_data_key, NULL); - return ret; -} - -static int auto_syscall_threads = true; -static int __init setup_auto_syscall_threads(char *str) -{ - get_option (&str, &auto_syscall_threads); - - return 1; -} -__setup("lkl_auto_syscall_threads=", setup_auto_syscall_threads); - - -long lkl_syscall(long no, long *params) -{ - struct syscall_thread_data *data = NULL; - - if (auto_syscall_threads && lkl_ops->tls_get) { - data = lkl_ops->tls_get(syscall_thread_data_key); - if (!data) { - data = __lkl_create_syscall_thread(); - if (!data) - lkl_puts("failed to create syscall thread\n"); + if (lkl_ops->tls_get) { + task = lkl_ops->tls_get(task_key); + if (!task) { + ret = new_host_task(&task); + if (ret) + goto out; + lkl_ops->tls_set(task_key, task); } } - if (!data || no == __NR_reboot) - data = &default_syscall_thread_data; - return __lkl_syscall(data, no, params); -} + switch_to_host_task(task); -static asmlinkage long -sys_create_syscall_thread(struct syscall_thread_data *data) -{ - pid_t pid; + ret = run_syscall(no, params); - pid = kernel_thread(syscall_thread, data, CLONE_VM | CLONE_FS | - CLONE_FILES | CLONE_THREAD | CLONE_SIGHAND | SIGCHLD); - if (pid < 0) - return pid; - - return 0; -} - - -/* - * A synchronization algorithm between cleanup_syscall_threads (which terminates - * all remaining syscall threads) and destructors functions (which frees a - * syscall thread as soon as the associated host thread terminates) is required - * since destructor functions run in host context and is not subject to kernel - * scheduling. - * - * An atomic counter is used to count the number of running destructor functions - * and allows the cleanup function to wait for the running destructor functions - * to complete. - * - * The cleanup functions adds MAX_SYSCALL_THREADS to this counter and this - * allows the destructor functions to check if the cleanup process has started - * and abort execution. This prevents "late" destructors from trying to free the - * syscall threads. - * - * This algorithm assumes that we never have more the MAX_SYSCALL_THREADS - * running. - */ -#define MAX_SYSCALL_THREADS 1000000 -static unsigned int destrs; - -/* - * This is called when the host thread terminates if auto_syscall_threads is - * enabled. We use it to remove the associated kernel syscall thread since it is - * not going to be used anymore. - * - * Note that this run in host context, not kernel context. - * - * To avoid races between the destructor and lkl_sys_halt we announce that a - * destructor is running and also check to see if lkl_sys_halt is running, in - * which case we bail out - the kernel thread is going to be / has been stopped - * by lkl_sys_halt. - */ -static void syscall_thread_destructor(void *_data) -{ - struct syscall_thread_data *data = _data; - - if (!data) - return; - - if (__sync_fetch_and_add(&destrs, 1) < MAX_SYSCALL_THREADS) - __lkl_stop_syscall_thread(data, true); - __sync_fetch_and_sub(&destrs, 1); -} - -static void cleanup_syscall_threads(void) -{ - struct syscall_thread_data *i = NULL, *aux; - - /* announce destructors that we are stopping */ - __sync_fetch_and_add(&destrs, MAX_SYSCALL_THREADS); + if (count > 1) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!thread_set_sched_jmp()) + schedule(); + count--; + return ret; + } - /* wait for any pending destructors to complete */ - while (__sync_fetch_and_add(&destrs, 0) > MAX_SYSCALL_THREADS) - schedule_timeout(1); +out: + count--; + lkl_cpu_put(); - /* no more destructors, we can safely remove the remaining threads */ - list_for_each_entry_safe(i, aux, &syscall_threads, list) { - if (i == &default_syscall_thread_data) - continue; - __lkl_stop_syscall_thread(i, false); - } + return ret; } -int initial_syscall_thread(void *sem) +int syscalls_init(void) { - void (*destr)(void *) = NULL; int ret = 0; - if (auto_syscall_threads) - destr = syscall_thread_destructor; - - if (lkl_ops->tls_alloc) - ret = lkl_ops->tls_alloc(&syscall_thread_data_key, destr); - if (ret) - return ret; - - init_pid_ns.child_reaper = 0; - - ret = syscall_thread_data_init(&default_syscall_thread_data, sem); - if (ret) - goto out; - - ret = syscall_thread(&default_syscall_thread_data); - -out: - if (lkl_ops->tls_free) - lkl_ops->tls_free(syscall_thread_data_key); + snprintf(current->comm, sizeof(current->comm), "host0"); + set_thread_flag(TIF_HOST_THREAD); + host0 = current; + if (lkl_ops->tls_alloc) { + ret = lkl_ops->tls_alloc(&task_key, del_host_task); + if (ret) + return ret; + } return ret; } -void free_initial_syscall_thread(void) +void syscalls_cleanup(void) { - lkl_ops->sem_free(default_syscall_thread_data.mutex); - lkl_ops->sem_free(default_syscall_thread_data.completion); + if (lkl_ops->tls_free) + lkl_ops->tls_free(task_key); } SYSCALL_DEFINE3(virtio_mmio_device_add, long, base, long, size, unsigned int, diff --git a/tools/lkl/include/lkl.h b/tools/lkl/include/lkl.h index 3ceee86b61c504..d2c9797770a937 100644 --- a/tools/lkl/include/lkl.h +++ b/tools/lkl/include/lkl.h @@ -340,24 +340,6 @@ void lkl_netdev_free(struct lkl_netdev *nd); */ int lkl_netdev_get_ifindex(int id); -/** - * lkl_create_syscall_thread - create an additional system call thread - * - * Create a new system call thread. All subsequent system calls issued from this - * host thread are queued to the newly created system call thread. - * - * System call threads must be stopped up by calling @lkl_stop_syscall_thread - * before @lkl_halt is called. - */ -int lkl_create_syscall_thread(void); - -/** - * lkl_stop_syscall_thread - stop the associated system call thread - * - * Stop the system call thread associated with this host thread, if any. - */ -int lkl_stop_syscall_thread(void); - /** * lkl_netdev_tap_create - create TAP net_device for the virtio net backend *