Skip to content

Commit

Permalink
arm64: locks: patch in lse instructions when supported by the CPU
Browse files Browse the repository at this point in the history
On CPUs which support the LSE atomic instructions introduced in ARMv8.1,
it makes sense to use them in preference to ll/sc sequences.

This patch introduces runtime patching of our locking functions so that
LSE atomic instructions are used for spinlocks and rwlocks.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
  • Loading branch information
wildea01 committed Jul 27, 2015
1 parent c09d6a0 commit 81bb5c6
Showing 1 changed file with 108 additions and 29 deletions.
137 changes: 108 additions & 29 deletions arch/arm64/include/asm/spinlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#ifndef __ASM_SPINLOCK_H
#define __ASM_SPINLOCK_H

#include <asm/lse.h>
#include <asm/spinlock_types.h>
#include <asm/processor.h>

Expand All @@ -38,11 +39,21 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)

asm volatile(
/* Atomically increment the next ticket. */
ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %3\n"
"1: ldaxr %w0, %3\n"
" add %w1, %w0, %w5\n"
" stxr %w2, %w1, %3\n"
" cbnz %w2, 1b\n"
" cbnz %w2, 1b\n",
/* LSE atomics */
" mov %w2, %w5\n"
" ldadda %w2, %w0, %3\n"
" nop\n"
" nop\n"
" nop\n"
)

/* Did we get the lock? */
" eor %w1, %w0, %w0, ror #16\n"
" cbz %w1, 3f\n"
Expand All @@ -67,15 +78,25 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
unsigned int tmp;
arch_spinlock_t lockval;

asm volatile(
" prfm pstl1strm, %2\n"
"1: ldaxr %w0, %2\n"
" eor %w1, %w0, %w0, ror #16\n"
" cbnz %w1, 2f\n"
" add %w0, %w0, %3\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 1b\n"
"2:"
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldaxr %w0, %2\n"
" eor %w1, %w0, %w0, ror #16\n"
" cbnz %w1, 2f\n"
" add %w0, %w0, %3\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 1b\n"
"2:",
/* LSE atomics */
" ldr %w0, %2\n"
" eor %w1, %w0, %w0, ror #16\n"
" cbnz %w1, 1f\n"
" add %w1, %w0, %3\n"
" casa %w0, %w1, %2\n"
" and %w1, %w1, #0xffff\n"
" eor %w1, %w1, %w0, lsr #16\n"
"1:")
: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
: "I" (1 << TICKET_SHIFT)
: "memory");
Expand All @@ -85,10 +106,19 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)

static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
asm volatile(
" stlrh %w1, %0\n"
: "=Q" (lock->owner)
: "r" (lock->owner + 1)
unsigned long tmp;

asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" ldr %w1, %0\n"
" add %w1, %w1, #1\n"
" stlrh %w1, %0",
/* LSE atomics */
" mov %w1, #1\n"
" nop\n"
" staddlh %w1, %0")
: "=Q" (lock->owner), "=&r" (tmp)
:
: "memory");
}

Expand Down Expand Up @@ -123,13 +153,24 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
{
unsigned int tmp;

asm volatile(
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" sevl\n"
"1: wfe\n"
"2: ldaxr %w0, %1\n"
" cbnz %w0, 1b\n"
" stxr %w0, %w2, %1\n"
" cbnz %w0, 2b\n"
" nop",
/* LSE atomics */
"1: mov %w0, wzr\n"
"2: casa %w0, %w2, %1\n"
" cbz %w0, 3f\n"
" ldxr %w0, %1\n"
" cbz %w0, 2b\n"
" wfe\n"
" b 1b\n"
"3:")
: "=&r" (tmp), "+Q" (rw->lock)
: "r" (0x80000000)
: "memory");
Expand All @@ -139,12 +180,18 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
{
unsigned int tmp;

asm volatile(
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
"1: ldaxr %w0, %1\n"
" cbnz %w0, 2f\n"
" stxr %w0, %w2, %1\n"
" cbnz %w0, 1b\n"
"2:\n"
"2:",
/* LSE atomics */
" mov %w0, wzr\n"
" casa %w0, %w2, %1\n"
" nop\n"
" nop")
: "=&r" (tmp), "+Q" (rw->lock)
: "r" (0x80000000)
: "memory");
Expand All @@ -154,9 +201,10 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)

static inline void arch_write_unlock(arch_rwlock_t *rw)
{
asm volatile(
" stlr %w1, %0\n"
: "=Q" (rw->lock) : "r" (0) : "memory");
asm volatile(ARM64_LSE_ATOMIC_INSN(
" stlr wzr, %0",
" swpl wzr, wzr, %0")
: "=Q" (rw->lock) :: "memory");
}

/* write_can_lock - would write_trylock() succeed? */
Expand All @@ -173,52 +221,83 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
*
* The memory barriers are implicit with the load-acquire and store-release
* instructions.
*
* Note that in UNDEFINED cases, such as unlocking a lock twice, the LL/SC
* and LSE implementations may exhibit different behaviour (although this
* will have no effect on lockdep).
*/
static inline void arch_read_lock(arch_rwlock_t *rw)
{
unsigned int tmp, tmp2;

asm volatile(
" sevl\n"
ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
"1: wfe\n"
"2: ldaxr %w0, %2\n"
" add %w0, %w0, #1\n"
" tbnz %w0, #31, 1b\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 2b\n"
" nop\n"
" cbnz %w1, 2b",
/* LSE atomics */
"1: wfe\n"
"2: ldxr %w0, %2\n"
" adds %w1, %w0, #1\n"
" tbnz %w1, #31, 1b\n"
" casa %w0, %w1, %2\n"
" sbc %w0, %w1, %w0\n"
" cbnz %w0, 2b")
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
: "memory");
: "cc", "memory");
}

static inline void arch_read_unlock(arch_rwlock_t *rw)
{
unsigned int tmp, tmp2;

asm volatile(
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
"1: ldxr %w0, %2\n"
" sub %w0, %w0, #1\n"
" stlxr %w1, %w0, %2\n"
" cbnz %w1, 1b\n"
" cbnz %w1, 1b",
/* LSE atomics */
" movn %w0, #0\n"
" nop\n"
" nop\n"
" staddl %w0, %2")
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
: "memory");
}

static inline int arch_read_trylock(arch_rwlock_t *rw)
{
unsigned int tmp, tmp2 = 1;
unsigned int tmp, tmp2;

asm volatile(
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" mov %w1, #1\n"
"1: ldaxr %w0, %2\n"
" add %w0, %w0, #1\n"
" tbnz %w0, #31, 2f\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 1b\n"
"2:\n"
: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
"2:",
/* LSE atomics */
" ldr %w0, %2\n"
" adds %w1, %w0, #1\n"
" tbnz %w1, #31, 1f\n"
" casa %w0, %w1, %2\n"
" sbc %w1, %w1, %w0\n"
" nop\n"
"1:")
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
: "memory");
: "cc", "memory");

return !tmp2;
}
Expand Down

0 comments on commit 81bb5c6

Please sign in to comment.