Skip to content

Commit

Permalink
Support per-core state using #[thread_local]
Browse files Browse the repository at this point in the history
- Controlled by `thread_local` feature
- Does not require nightly features, but using it does
- Intercepts `__pre_init` to copy .tdata into the per-core state
- Implements `__aeabi_read_tp` to returns per-core state for code compiler
  generates when accessing `thread_local` variables
- Needs linker script support to set up the layout and symbols
  • Loading branch information
jsgf committed Apr 15, 2024
1 parent 840e90b commit a8768cb
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 0 deletions.
34 changes: 34 additions & 0 deletions memory.x
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,37 @@ SECTIONS {
KEEP(*(.boot2));
} > BOOT2
} INSERT BEFORE .text;

/* Per-core (thread) data into flash */
SECTIONS {
.tdata : ALIGN(4)
{
. = ALIGN(4);
PROVIDE(__tdata_start = .);
*(.tdata .tdata.*);
. = ALIGN(4);
PROVIDE(__tdata_end = .);
} > FLASH
PROVIDE(__tdata_len = __tdata_end - __tdata_start);
} INSERT AFTER .data;

/* Size per-core state and allocate bss space for each core */
SECTIONS {
.tbss (NOLOAD) : ALIGN(4)
{
. = ALIGN(4);
PROVIDE(__tbss_start = .);
*(.tbss .tbss.*);
*(.tcommon);
. = ALIGN(4);
PROVIDE(__tbss_end = .);
} > RAM
PROVIDE(__tbss_len = __tbss_end - __tbss_start);

.tls_state (NOLOAD) : ALIGN(4) {
PROVIDE(TLS_CORE_0 = ALIGN(4));
. += __tdata_len + __tbss_len;
PROVIDE(TLS_CORE_1 = ALIGN(4));
. += __tdata_len + __tbss_len;
} > RAM
} INSERT AFTER .bss;
3 changes: 3 additions & 0 deletions rp2040-hal/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ rtic-monotonic = ["dep:rtic-monotonic"]
# Implement `i2c-write-iter` traits
i2c-write-iter = ["dep:i2c-write-iter"]

# Enable use of thread-local variables for multicore state
thread_local = []

[[example]]
# irq example uses cortex-m-rt::interrupt, need rt feature for that
name = "gpio_irq_example"
Expand Down
65 changes: 65 additions & 0 deletions rp2040-hal/src/multicore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,3 +290,68 @@ impl<'p> Core<'p> {
}
}
}

#[cfg(all(target_arch = "arm", feature = "thread_local"))]
mod thread_local {
use core::arch::global_asm;
use core::ptr::addr_of;

extern "C" {
static TLS_CORE_0: u8;
static TLS_CORE_1: u8;
}
// Not really a const pointer, but we reform it into mut in the asm
static mut TLS_STATE: [*const u8; 2] = [
// Point to linker-allocated space in .bss
unsafe { addr_of!(TLS_CORE_0) },
unsafe { addr_of!(TLS_CORE_1) },
];

// Define `__aeabi_read_tp` called by the compiler to get access to
// thread-local storage.
global_asm! {
".pushsection .text.__aeabi_read_tp",
".align 4",
".p2align 4,,15",
".global __aeabi_read_tp",
".type __aeabi_read_tp,%function",

"__aeabi_read_tp:",
" push {{r1, lr}}",
" ldr r1, =0xd0000000", // Load SIO CPUID addr
" ldr r1, [r1]", // Get current CPUID
" lsls r1, r1, #2", // Scale by 4
" ldr r0, ={tls_state}", // Load TLS_STATE base addr
" ldr r0, [r0, r1]", // Load CPU per-thread
" pop {{r1, pc}}",

".popsection",
tls_state = sym TLS_STATE,
}

// Intercept __pre_init to hook into the startup code to copy the tdata into
// TLS_CORE_[01].
global_asm! {
".pushsection .text.__pre_init",
".align 4",
".p2align 4,,15",
".global __pre_init",
".type __pre_init,%function",

"__pre_init:",
" push {{lr}}",
" ldr r0, ={tls_core_0}",
" ldr r1, =__tdata_start",
" ldr r2, =__tdata_len",
" bl __aeabi_memcpy",
" ldr r0, ={tls_core_1}",
" ldr r1, =__tdata_start",
" ldr r2, =__tdata_len",
" bl __aeabi_memcpy",
" pop {{pc}}",

".popsection",
tls_core_0 = sym TLS_CORE_0,
tls_core_1 = sym TLS_CORE_1,
}
}

0 comments on commit a8768cb

Please sign in to comment.