Skip to content

Commit

Permalink
optimize 32-bit aligned mem{cpy,clr,set} intrinsics for ARM
Browse files Browse the repository at this point in the history
this reduces the execution time of all these routines by 40-70%
  • Loading branch information
japaric committed Jun 30, 2017
1 parent c311dee commit 75c6ccc
Show file tree
Hide file tree
Showing 6 changed files with 463 additions and 30 deletions.
82 changes: 57 additions & 25 deletions src/arm.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use core::intrinsics;
use core::{intrinsics, ptr};

#[cfg(feature = "mem")]
use mem::{memcpy, memmove, memset};
use mem;

// NOTE This function and the ones below are implemented using assembly because they using a custom
// calling convention which can't be implemented using a normal Rust function
Expand Down Expand Up @@ -60,65 +59,98 @@ pub unsafe fn __aeabi_ldivmod() {
intrinsics::unreachable();
}

// TODO: These aeabi_* functions should be defined as aliases
#[cfg(not(feature = "mem"))]
extern "C" {
fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8;
}

// FIXME: The `*4` and `*8` variants should be defined as aliases.

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
mem::memcpy(dest, src, n);
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) {
let mut dest = dest as *mut u32;
let mut src = src as *mut u32;

while n >= 4 {
ptr::write(dest, ptr::read(src));
dest = dest.offset(1);
src = src.offset(1);
n -= 4;
}

__aeabi_memcpy(dest as *mut u8, src as *const u8, n);
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
__aeabi_memcpy4(dest, src, n);
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n);
mem::memmove(dest, src, n);
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n);
__aeabi_memmove(dest, src, n);
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n);
__aeabi_memmove(dest, src, n);
}

// Note the different argument order
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
mem::memset(dest, c, n);
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) {
let mut dest = dest as *mut u32;

let byte = (c as u32) & 0xff;
let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;

while n >= 4 {
ptr::write(dest, c);
dest = dest.offset(1);
n -= 4;
}

__aeabi_memset(dest as *mut u8, n, byte as i32);
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
__aeabi_memset4(dest, n, c);
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) {
memset(dest, 0, n);
__aeabi_memset(dest, n, 0);
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
memset(dest, 0, n);
__aeabi_memset4(dest, n, 0);
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
memset(dest, 0, n);
__aeabi_memset4(dest, n, 0);
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#![feature(i128_type)]
#![feature(repr_simd)]
#![feature(abi_unadjusted)]
#![feature(linkage)]
#![allow(unused_features)]
#![no_builtins]
#![unstable(feature = "compiler_builtins_lib",
Expand Down Expand Up @@ -45,7 +46,6 @@ mod macros;
pub mod int;
pub mod float;

#[cfg(feature = "mem")]
pub mod mem;

#[cfg(target_arch = "arm")]
Expand Down
8 changes: 4 additions & 4 deletions src/mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ type c_int = i16;
#[cfg(not(target_pointer_width = "16"))]
type c_int = i32;

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memcpy(dest: *mut u8,
src: *const u8,
n: usize)
Expand All @@ -18,7 +18,7 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8,
dest
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memmove(dest: *mut u8,
src: *const u8,
n: usize)
Expand All @@ -41,7 +41,7 @@ pub unsafe extern "C" fn memmove(dest: *mut u8,
dest
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
let mut i = 0;
while i < n {
Expand All @@ -51,7 +51,7 @@ pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
s
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
let mut i = 0;
while i < n {
Expand Down
58 changes: 58 additions & 0 deletions tests/aeabi_memclr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#![cfg(all(target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"))]
#![feature(compiler_builtins_lib)]
#![no_std]

extern crate compiler_builtins;

// test runner
extern crate utest_cortex_m_qemu;

// overrides `panic!`
#[macro_use]
extern crate utest_macros;

use core::mem;

macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}

extern "C" {
fn __aeabi_memclr4(dest: *mut u8, n: usize);
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
}

struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}

impl Aligned {
fn new() -> Self {
Aligned {
array: [0; 8],
_alignment: [],
}
}
}

#[test]
fn memclr4() {
let mut aligned = Aligned::new();;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;

for n in 0..9 {
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, 0xff);
__aeabi_memclr4(xs.as_mut_ptr(), n);
}

assert!(xs[0..n].iter().all(|x| *x == 0));
}
}
69 changes: 69 additions & 0 deletions tests/aeabi_memcpy.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#![cfg(all(target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"))]
#![feature(compiler_builtins_lib)]
#![no_std]

extern crate compiler_builtins;

// test runner
extern crate utest_cortex_m_qemu;

// overrides `panic!`
#[macro_use]
extern crate utest_macros;

macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}

extern "C" {
fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
}

struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}

impl Aligned {
fn new(array: [u8; 8]) -> Self {
Aligned {
array: array,
_alignment: [],
}
}
}

#[test]
fn memcpy() {
let mut dest = [0; 4];
let src = [0xde, 0xad, 0xbe, 0xef];

for n in 0..dest.len() {
dest.copy_from_slice(&[0; 4]);

unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) }

assert_eq!(&dest[0..n], &src[0..n])
}
}

#[test]
fn memcpy4() {
let mut aligned = Aligned::new([0; 8]);
let dest = &mut aligned.array;
let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d];

for n in 0..dest.len() {
dest.copy_from_slice(&[0; 8]);

unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) }

assert_eq!(&dest[0..n], &src[0..n])
}
}
Loading

0 comments on commit 75c6ccc

Please sign in to comment.