From bad9a43a20372a3b41e15e8aa03b918c76f360f0 Mon Sep 17 00:00:00 2001
From: Abhijeet Dharmapurikar <adharmap@codeaurora.org>
Date: Tue, 19 Mar 2013 16:05:49 -0700
Subject: [PATCH 1/3] irqchip: gic: fix irq_trigger return

The genirq layer expects a 0 in case that the retrigger function is
not able to resend the irq in hardware, but the code is returning
-ENXIO. Fix it.

[ tglx: Reworked comment and changelog ]

Signed-off-by: Abhijeet Dharmapurikar <adharmap@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1363734349-32635-1-git-send-email-sboyd@codeaurora.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-gic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index a32e0d5aa45f43..fc6aebf1e4b263 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -236,7 +236,8 @@ static int gic_retrigger(struct irq_data *d)
 	if (gic_arch_extn.irq_retrigger)
 		return gic_arch_extn.irq_retrigger(d);
 
-	return -ENXIO;
+	/* the genirq layer expects 0 if we can't retrigger in hardware */
+	return 0;
 }
 
 #ifdef CONFIG_SMP

From 84cc8fd2fe65866e49d70b38b3fdf7219dd92fe0 Mon Sep 17 00:00:00 2001
From: Michael Bohan <mbohan@codeaurora.org>
Date: Tue, 19 Mar 2013 19:19:25 -0700
Subject: [PATCH 2/3] hrtimer: Don't reinitialize a cpu_base lock on CPU_UP

The current code makes the assumption that a cpu_base lock won't be
held if the CPU corresponding to that cpu_base is offline, which isn't
always true.

If a hrtimer is not queued, then it will not be migrated by
migrate_hrtimers() when a CPU is offlined. Therefore, the hrtimer's
cpu_base may still point to a CPU which has subsequently gone offline
if the timer wasn't enqueued at the time the CPU went down.

Normally this wouldn't be a problem, but a cpu_base's lock is blindly
reinitialized each time a CPU is brought up. If a CPU is brought
online during the period that another thread is performing a hrtimer
operation on a stale hrtimer, then the lock will be reinitialized
under its feet, and a SPIN_BUG() like the following will be observed:

<0>[   28.082085] BUG: spinlock already unlocked on CPU#0, swapper/0/0
<0>[   28.087078]  lock: 0xc4780b40, value 0x0 .magic: dead4ead, .owner: <none>/-1, .owner_cpu: -1
<4>[   42.451150] [<c0014398>] (unwind_backtrace+0x0/0x120) from [<c0269220>] (do_raw_spin_unlock+0x44/0xdc)
<4>[   42.460430] [<c0269220>] (do_raw_spin_unlock+0x44/0xdc) from [<c071b5bc>] (_raw_spin_unlock+0x8/0x30)
<4>[   42.469632] [<c071b5bc>] (_raw_spin_unlock+0x8/0x30) from [<c00a9ce0>] (__hrtimer_start_range_ns+0x1e4/0x4f8)
<4>[   42.479521] [<c00a9ce0>] (__hrtimer_start_range_ns+0x1e4/0x4f8) from [<c00aa014>] (hrtimer_start+0x20/0x28)
<4>[   42.489247] [<c00aa014>] (hrtimer_start+0x20/0x28) from [<c00e6190>] (rcu_idle_enter_common+0x1ac/0x320)
<4>[   42.498709] [<c00e6190>] (rcu_idle_enter_common+0x1ac/0x320) from [<c00e6440>] (rcu_idle_enter+0xa0/0xb8)
<4>[   42.508259] [<c00e6440>] (rcu_idle_enter+0xa0/0xb8) from [<c000f268>] (cpu_idle+0x24/0xf0)
<4>[   42.516503] [<c000f268>] (cpu_idle+0x24/0xf0) from [<c06ed3c0>] (rest_init+0x88/0xa0)
<4>[   42.524319] [<c06ed3c0>] (rest_init+0x88/0xa0) from [<c0c00978>] (start_kernel+0x3d0/0x434)

As an example, this particular crash occurred when hrtimer_start() was
executed on CPU #0. The code locked the hrtimer's current cpu_base
corresponding to CPU #1. CPU #0 then tried to switch the hrtimer's
cpu_base to an optimal CPU which was online. In this case, it selected
the cpu_base corresponding to CPU #3.

Before it could proceed, CPU #1 came online and reinitialized the
spinlock corresponding to its cpu_base. Thus now CPU #0 held a lock
which was reinitialized. When CPU #0 finally ended up unlocking the
old cpu_base corresponding to CPU #1 so that it could switch to CPU
#3, we hit this SPIN_BUG() above while in switch_hrtimer_base().

CPU #0                            CPU #1
----                              ----
...                               <offline>
hrtimer_start()
lock_hrtimer_base(base #1)
...                               init_hrtimers_cpu()
switch_hrtimer_base()             ...
...                               raw_spin_lock_init(&cpu_base->lock)
raw_spin_unlock(&cpu_base->lock)  ...
<spin_bug>

Solve this by statically initializing the lock.

Signed-off-by: Michael Bohan <mbohan@codeaurora.org>
Link: http://lkml.kernel.org/r/1363745965-23475-1-git-send-email-mbohan@codeaurora.org
Cc: stable@vger.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/hrtimer.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index cc47812d3feb0c..14be27feda491d 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -63,6 +63,7 @@
 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 {
 
+	.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
 	.clock_base =
 	{
 		{
@@ -1642,8 +1643,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
 	struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
 	int i;
 
-	raw_spin_lock_init(&cpu_base->lock);
-
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 		cpu_base->clock_base[i].cpu_base = cpu_base;
 		timerqueue_init_head(&cpu_base->clock_base[i].active);

From f2530dc71cf0822f90bb63ea4600caaef33a66bb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 9 Apr 2013 09:33:34 +0200
Subject: [PATCH 3/3] kthread: Prevent unpark race which puts threads on the
 wrong cpu

The smpboot threads rely on the park/unpark mechanism which binds per
cpu threads on a particular core. Though the functionality is racy:

CPU0	       	 	CPU1  	     	    CPU2
unpark(T)				    wake_up_process(T)
  clear(SHOULD_PARK)	T runs
			leave parkme() due to !SHOULD_PARK
  bind_to(CPU2)		BUG_ON(wrong CPU)

We cannot let the tasks move themself to the target CPU as one of
those tasks is actually the migration thread itself, which requires
that it starts running on the target cpu right away.

The solution to this problem is to prevent wakeups in park mode which
are not from unpark(). That way we can guarantee that the association
of the task to the target cpu is working correctly.

Add a new task state (TASK_PARKED) which prevents other wakeups and
use this state explicitly for the unpark wakeup.

Peter noticed: Also, since the task state is visible to userspace and
all the parked tasks are still in the PID space, its a good hint in ps
and friends that these tasks aren't really there for the moment.

The migration thread has another related issue.

CPU0	      	     	 CPU1
Bring up CPU2
create_thread(T)
park(T)
 wait_for_completion()
			 parkme()
			 complete()
sched_set_stop_task()
			 schedule(TASK_PARKED)

The sched_set_stop_task() call is issued while the task is on the
runqueue of CPU1 and that confuses the hell out of the stop_task class
on that cpu. So we need the same synchronizaion before
sched_set_stop_task().

Reported-by: Dave Jones <davej@redhat.com>
Reported-and-tested-by: Dave Hansen <dave@sr71.net>
Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
Acked-by: Peter Ziljstra <peterz@infradead.org>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: dhillf@gmail.com
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1304091635430.21884@ionos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/proc/array.c              |  1 +
 include/linux/sched.h        |  5 ++--
 include/trace/events/sched.h |  2 +-
 kernel/kthread.c             | 52 +++++++++++++++++++-----------------
 kernel/smpboot.c             | 14 ++++++++--
 5 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index f7ed9ee46eb9d3..cbd0f1b324b972 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -143,6 +143,7 @@ static const char * const task_state_array[] = {
 	"x (dead)",		/*  64 */
 	"K (wakekill)",		/* 128 */
 	"W (waking)",		/* 256 */
+	"P (parked)",		/* 512 */
 };
 
 static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d35d2b6ddbfb69..e692a022527bda 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -163,9 +163,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_DEAD		64
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
-#define TASK_STATE_MAX		512
+#define TASK_PARKED		512
+#define TASK_STATE_MAX		1024
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
 
 extern char ___assert_task_state[1 - 2*!!(
 		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 5a8671e8a67ff8..e5586caff67a97 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -147,7 +147,7 @@ TRACE_EVENT(sched_switch,
 		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
 				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
 				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
-				{ 128, "W" }) : "R",
+				{ 128, "K" }, { 256, "W" }, { 512, "P" }) : "R",
 		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 691dc2ef9baf24..9eb7fed0bbaa98 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -124,12 +124,12 @@ void *kthread_data(struct task_struct *task)
 
 static void __kthread_parkme(struct kthread *self)
 {
-	__set_current_state(TASK_INTERRUPTIBLE);
+	__set_current_state(TASK_PARKED);
 	while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
 		if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
 			complete(&self->parked);
 		schedule();
-		__set_current_state(TASK_INTERRUPTIBLE);
+		__set_current_state(TASK_PARKED);
 	}
 	clear_bit(KTHREAD_IS_PARKED, &self->flags);
 	__set_current_state(TASK_RUNNING);
@@ -256,8 +256,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 }
 EXPORT_SYMBOL(kthread_create_on_node);
 
-static void __kthread_bind(struct task_struct *p, unsigned int cpu)
+static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
 {
+	/* Must have done schedule() in kthread() before we set_task_cpu */
+	if (!wait_task_inactive(p, state)) {
+		WARN_ON(1);
+		return;
+	}
 	/* It's safe because the task is inactive. */
 	do_set_cpus_allowed(p, cpumask_of(cpu));
 	p->flags |= PF_THREAD_BOUND;
@@ -274,12 +279,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu)
  */
 void kthread_bind(struct task_struct *p, unsigned int cpu)
 {
-	/* Must have done schedule() in kthread() before we set_task_cpu */
-	if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
-		WARN_ON(1);
-		return;
-	}
-	__kthread_bind(p, cpu);
+	__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(kthread_bind);
 
@@ -324,6 +324,22 @@ static struct kthread *task_get_live_kthread(struct task_struct *k)
 	return NULL;
 }
 
+static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
+{
+	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+	/*
+	 * We clear the IS_PARKED bit here as we don't wait
+	 * until the task has left the park code. So if we'd
+	 * park before that happens we'd see the IS_PARKED bit
+	 * which might be about to be cleared.
+	 */
+	if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
+		if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
+			__kthread_bind(k, kthread->cpu, TASK_PARKED);
+		wake_up_state(k, TASK_PARKED);
+	}
+}
+
 /**
  * kthread_unpark - unpark a thread created by kthread_create().
  * @k:		thread created by kthread_create().
@@ -336,20 +352,8 @@ void kthread_unpark(struct task_struct *k)
 {
 	struct kthread *kthread = task_get_live_kthread(k);
 
-	if (kthread) {
-		clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
-		/*
-		 * We clear the IS_PARKED bit here as we don't wait
-		 * until the task has left the park code. So if we'd
-		 * park before that happens we'd see the IS_PARKED bit
-		 * which might be about to be cleared.
-		 */
-		if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
-			if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
-				__kthread_bind(k, kthread->cpu);
-			wake_up_process(k);
-		}
-	}
+	if (kthread)
+		__kthread_unpark(k, kthread);
 	put_task_struct(k);
 }
 
@@ -407,7 +411,7 @@ int kthread_stop(struct task_struct *k)
 	trace_sched_kthread_stop(k);
 	if (kthread) {
 		set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
-		clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+		__kthread_unpark(k, kthread);
 		wake_up_process(k);
 		wait_for_completion(&kthread->exited);
 	}
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 8eaed9aa9cf0c1..02fc5c9336735a 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -185,8 +185,18 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
 	}
 	get_task_struct(tsk);
 	*per_cpu_ptr(ht->store, cpu) = tsk;
-	if (ht->create)
-		ht->create(cpu);
+	if (ht->create) {
+		/*
+		 * Make sure that the task has actually scheduled out
+		 * into park position, before calling the create
+		 * callback. At least the migration thread callback
+		 * requires that the task is off the runqueue.
+		 */
+		if (!wait_task_inactive(tsk, TASK_PARKED))
+			WARN_ON(1);
+		else
+			ht->create(cpu);
+	}
 	return 0;
 }