From 8cd1bdcfe0b8131c17c43991c321d7004ebd0c14 Mon Sep 17 00:00:00 2001 From: Yuan Liu Date: Tue, 1 Nov 2016 16:10:12 -0700 Subject: [PATCH] lkl: Direct irq and fix direct syscall degration There is two major issues in current direct syscall implementation: 1. When there is already a thread pending in syscall, direct syscall degrages to wakeup idle and the performance is bad in that case. This is actually common in applications that have a trafficless control connection. 2. IRQ is not direct if LKL is in idle. Both issuses are actually because of the same thing: LKL can't reschedule when LKL is in idle. The patch adds such support. There are two downside of this patch: 1. need to change kernel/sched/idle.c to expose cpu_idle_loop. 2. lkl_idle_tail_schedule must be in sync with idle.c There downsides seem OK given the performance we achieve from this patch. For common case, it saves one context switch (direct irq) and I can observe 10% TCP_RR improvement on my desktop. Signed-off-by: Yuan Liu --- arch/lkl/include/asm/cpu.h | 5 +- arch/lkl/include/asm/thread_info.h | 1 + arch/lkl/kernel/cpu.c | 94 +++++++++++++++++++++++++----- arch/lkl/kernel/syscalls.c | 12 ---- arch/lkl/kernel/threads.c | 35 +++++++++-- 5 files changed, 115 insertions(+), 32 deletions(-) diff --git a/arch/lkl/include/asm/cpu.h b/arch/lkl/include/asm/cpu.h index 1bffb16a51f467..67436cd72f43de 100644 --- a/arch/lkl/include/asm/cpu.h +++ b/arch/lkl/include/asm/cpu.h @@ -7,8 +7,11 @@ int lkl_cpu_try_run_irq(int irq); int lkl_cpu_init(void); void lkl_cpu_shutdown(void); void lkl_cpu_wait_shutdown(void); -void lkl_cpu_wakeup(void); +void lkl_cpu_wakeup_idle(void); void lkl_cpu_change_owner(lkl_thread_t owner); void lkl_cpu_set_irqs_pending(void); +void lkl_idle_tail_schedule(void); +int lkl_cpu_idle_pending(void); +extern void cpu_idle_loop(void); #endif /* _ASM_LKL_CPU_H */ diff --git a/arch/lkl/include/asm/thread_info.h b/arch/lkl/include/asm/thread_info.h index cd4b91dd1464b6..2202be67b7bce7 100644 --- a/arch/lkl/include/asm/thread_info.h +++ b/arch/lkl/include/asm/thread_info.h @@ -60,6 +60,7 @@ void threads_cleanup(void); #define TIF_SCHED_JB 7 #define TIF_SCHED_EXIT 8 #define TIF_HOST_THREAD 9 +#define TIF_IDLE 10 static inline void set_ti_thread_flag(struct thread_info *ti, int flag); diff --git a/arch/lkl/kernel/cpu.c b/arch/lkl/kernel/cpu.c index c99db15abe3f17..7bf282526a02e1 100644 --- a/arch/lkl/kernel/cpu.c +++ b/arch/lkl/kernel/cpu.c @@ -1,5 +1,8 @@ +#include +#include #include #include +#include #include #include #include @@ -50,6 +53,10 @@ struct lkl_cpu { struct lkl_sem *sem; /* semaphore for the idle thread */ struct lkl_sem *idle_sem; + /* if the idle thread is pending */ + bool idle_pending; + /* jmp_buf used for idle thread to restart */ + struct lkl_jmp_buf idle_jb; /* semaphore used for shutdown */ struct lkl_sem *shutdown_sem; } cpu; @@ -126,18 +133,19 @@ void lkl_cpu_put(void) lkl_ops->mutex_lock(cpu.lock); } - if (need_resched()) { + if (need_resched() && cpu.count == 1) { + if (in_interrupt()) + lkl_bug("%s: in interrupt\n", __func__); + lkl_ops->mutex_unlock(cpu.lock); if (test_thread_flag(TIF_HOST_THREAD)) { - if (cpu.count == 1 && !in_interrupt()) { - lkl_ops->mutex_unlock(cpu.lock); - set_current_state(TASK_UNINTERRUPTIBLE); - if (!thread_set_sched_jmp()) - schedule(); - return; - } + set_current_state(TASK_UNINTERRUPTIBLE); + if (!thread_set_sched_jmp()) + schedule(); } else { - lkl_cpu_wakeup(); + if (!thread_set_sched_jmp()) + lkl_idle_tail_schedule(); } + return; } if (--cpu.count > 0) { @@ -210,20 +218,36 @@ void arch_cpu_idle(void) lkl_ops->thread_exit(); } - /* enable irqs now to allow direct irqs to run */ local_irq_enable(); + if (need_resched()) + return; + + cpu.idle_pending = true; lkl_cpu_put(); lkl_ops->sem_down(cpu.idle_sem); - lkl_cpu_get(); + cpu.idle_pending = false; + lkl_ops->jmp_buf_longjmp(&cpu.idle_jb, 1); +} + - run_irqs(); +void arch_cpu_idle_prepare(void) +{ + set_ti_thread_flag(current_thread_info(), TIF_IDLE); + /* + * We hijack the idle loop here so that we can let the idle thread + * jump back to the beginning. + */ + while (1) { + if (!lkl_ops->jmp_buf_set(&cpu.idle_jb)) + cpu_idle_loop(); + } } -void lkl_cpu_wakeup(void) +void lkl_cpu_wakeup_idle(void) { lkl_ops->sem_up(cpu.idle_sem); } @@ -242,3 +266,47 @@ int lkl_cpu_init(void) return 0; } + +/* + * Simulate the exit path of idle loop so that we can schedule when LKL is + * in idle. + * It's just a duplication of those in idle.c so a better way is to refactor + * idle.c to expose such function. + */ +void lkl_idle_tail_schedule(void) +{ + + if (!cpu.idle_pending || + !test_bit(TIF_IDLE, ¤t_thread_info()->flags)) + lkl_bug("%s: not in idle\n", __func__); + + start_critical_timings(); + __current_set_polling(); + + if (WARN_ON_ONCE(irqs_disabled())) + local_irq_enable(); + + rcu_idle_exit(); + arch_cpu_idle_exit(); + preempt_set_need_resched(); + tick_nohz_idle_exit(); + __current_clr_polling(); + + /* + * memory barrier copied from idle.c + */ + smp_mb__after_atomic(); + + /* + * Didn't find a way to include kernel/sched/sched.h for + * sched_ttwu_pending(). + * Anyway, it's no op when not CONFIG_SMP. + */ + + schedule_preempt_disabled(); +} + +int lkl_cpu_idle_pending(void) +{ + return cpu.idle_pending; +} diff --git a/arch/lkl/kernel/syscalls.c b/arch/lkl/kernel/syscalls.c index 790d6c9d4c42c8..ba733b8a8e4030 100644 --- a/arch/lkl/kernel/syscalls.c +++ b/arch/lkl/kernel/syscalls.c @@ -93,15 +93,12 @@ static unsigned int task_key; long lkl_syscall(long no, long *params) { struct task_struct *task = host0; - static int count; long ret; ret = lkl_cpu_get(); if (ret < 0) return ret; - count++; - if (lkl_ops->tls_get) { task = lkl_ops->tls_get(task_key); if (!task) { @@ -116,16 +113,7 @@ long lkl_syscall(long no, long *params) ret = run_syscall(no, params); - if (count > 1) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (!thread_set_sched_jmp()) - schedule(); - count--; - return ret; - } - out: - count--; lkl_cpu_put(); return ret; diff --git a/arch/lkl/kernel/threads.c b/arch/lkl/kernel/threads.c index 049344b585209a..5480ecdc806d8d 100644 --- a/arch/lkl/kernel/threads.c +++ b/arch/lkl/kernel/threads.c @@ -84,25 +84,48 @@ struct thread_info *_current_thread_info = &init_thread_union.thread_info; */ static struct task_struct *abs_prev = &init_task; +/* + * Reimplement to make sure there is no atomic op. + * Copied from include/asm-generic/bitops/non-atomic.h + */ +static inline int test_bit_no_atomic(int nr, const unsigned long *addr) +{ + return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + + struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next) { struct thread_info *_prev = task_thread_info(prev); struct thread_info *_next = task_thread_info(next); unsigned long _prev_flags = _prev->flags; + bool wakeup_idle = test_bit_no_atomic(TIF_IDLE, &_next->flags) && + lkl_cpu_idle_pending(); _current_thread_info = task_thread_info(next); _next->prev_sched = prev; abs_prev = prev; BUG_ON(!_next->tid); - lkl_cpu_change_owner(_next->tid); - lkl_ops->sem_up(_next->sched_sem); - if (test_bit(TIF_SCHED_JB, &_prev_flags)) { + if (test_bit_no_atomic(TIF_SCHED_JB, &_prev_flags)) { + /* Atomic. Must be done before wakeup next */ clear_ti_thread_flag(_prev, TIF_SCHED_JB); + } + if (wakeup_idle) + schedule_tail(abs_prev); + lkl_cpu_change_owner(_next->tid); + + /* No kernel code is allowed after wakeup next */ + if (wakeup_idle) + lkl_cpu_wakeup_idle(); + else + lkl_ops->sem_up(_next->sched_sem); + + if (test_bit_no_atomic(TIF_SCHED_JB, &_prev_flags)) { lkl_ops->jmp_buf_longjmp(&_prev->sched_jb, 1); - } else if (test_bit(TIF_SCHED_EXIT, &_prev_flags)) { + } else if (test_bit_no_atomic(TIF_SCHED_EXIT, &_prev_flags)) { lkl_ops->thread_exit(); } else { lkl_ops->sem_down(_prev->sched_sem); @@ -132,8 +155,8 @@ void switch_to_host_task(struct task_struct *task) if (!thread_set_sched_jmp()) schedule(); } else { - lkl_cpu_wakeup(); - lkl_cpu_put(); + if (!thread_set_sched_jmp()) + lkl_idle_tail_schedule(); } lkl_ops->sem_down(task_thread_info(task)->sched_sem);