From e3cb49b574daa1c95fc6a2c8d1cd058bf1495450 Mon Sep 17 00:00:00 2001 From: Paul Sajna Date: Tue, 18 Aug 2020 14:25:13 -0700 Subject: [PATCH] kernel memcpy, updated example --- examples/memcpy_bench/src/main.rs | 123 +++++++++++++++++++----------- psp/src/alloc_impl.rs | 45 ++++++----- psp/src/sys/kernel/mod.rs | 3 + 3 files changed, 109 insertions(+), 62 deletions(-) diff --git a/examples/memcpy_bench/src/main.rs b/examples/memcpy_bench/src/main.rs index b1f7a1d2..439a4ac8 100644 --- a/examples/memcpy_bench/src/main.rs +++ b/examples/memcpy_bench/src/main.rs @@ -1,10 +1,14 @@ #![no_std] #![no_main] -psp::module!("sample_module", 1, 1); extern crate alloc; use alloc::alloc::Layout; +use alloc::format; use core::time::Duration; +use core::ffi::c_void; +use psp::sys::SceUid; + +psp::module!("sample_module", 1, 1); fn psp_main() { psp::enable_home_button(); @@ -15,49 +19,80 @@ fn psp_main() { sys::sceKernelChangeCurrentThreadAttr(0, ThreadAttributes::VFPU); } - let size = 16; - let iterations = 1000; - let cpu_dur: Duration; - let cpu32_dur: Duration; - let dmac_dur: Duration; - let vfpu_dur: Duration; - - let src = unsafe { alloc::alloc::alloc(Layout::from_size_align_unchecked(size, 16)) }; - let dst = unsafe { alloc::alloc::alloc(Layout::from_size_align_unchecked(size, 16)) }; - cpu_dur = psp::benchmark(|| { - for _ in 0..iterations { - unsafe { memcpy(dst, src as *const u8, size); } - } - }, 10); - - cpu32_dur = psp::benchmark(|| { - for _ in 0..iterations { - unsafe { memcpy32(dst, src as *const u8, size); } - } - }, 10); - - - dmac_dur = psp::benchmark(|| { - for _ in 0..iterations { - unsafe { psp::sys::sceDmacMemcpy(dst, src as *const u8, size); } - } - }, 10); - - vfpu_dur = psp::benchmark(|| { - for _ in 0..iterations { - unsafe { psp::sys::sceVfpuMemcpy(dst, src as *const u8, size); } - } - }, 10); - - unsafe { alloc::alloc::dealloc(src, Layout::from_size_align_unchecked(size, 16)); } - unsafe { alloc::alloc::dealloc(dst, Layout::from_size_align_unchecked(size, 16)); } - - psp::dprintln!("size: {} bytes", size); - psp::dprintln!("iterations: {}", iterations); - psp::dprintln!("cpu: {} microseconds", cpu_dur.as_micros()); - psp::dprintln!("cpu32: {} microseconds", cpu32_dur.as_micros()); - psp::dprintln!("dmac: {} microseconds", dmac_dur.as_micros()); - psp::dprintln!("vfpu: {} microseconds", vfpu_dur.as_micros()); + let iters: [usize; 11] = [16, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1]; + let sizes: [usize; 11] = [32,64,512,1024,2048,16348,32768,65536,131072,524288,1048576]; + + let mut cpu_dur: Duration; + let mut cpu32_dur: Duration; + let mut kernel_dur: Duration; + let mut dmac_dur: Duration; + let mut vfpu_dur: Duration; + + let fd = unsafe { psp::sys::sceIoOpen(b"host0:/results.txt\0".as_ptr(), psp::sys::IoOpenFlags::CREAT | psp::sys::IoOpenFlags::RD_WR, 0o777) }; + + for i in 0..11 { + let size = sizes[i]; + let iterations = iters[i]; + let src = unsafe { alloc::alloc::alloc(Layout::from_size_align_unchecked(size, 16)) }; + let dst = unsafe { alloc::alloc::alloc(Layout::from_size_align_unchecked(size, 16)) }; + cpu_dur = psp::benchmark(|| { + for _ in 0..iterations { + unsafe { memcpy(dst, src as *const u8, size); } + } + }, 10); + + cpu32_dur = psp::benchmark(|| { + for _ in 0..iterations { + unsafe { memcpy32(dst, src as *const u8, size); } + } + }, 10); + + kernel_dur = psp::benchmark(|| { + for _ in 0..iterations { + unsafe { psp::sys::sceKernelMemcpy(dst, src as *const u8, size); } + } + }, 10); + + dmac_dur = psp::benchmark(|| { + for _ in 0..iterations { + unsafe { psp::sys::sceDmacMemcpy(dst, src as *const u8, size); } + } + }, 10); + + vfpu_dur = psp::benchmark(|| { + for _ in 0..iterations { + unsafe { psp::sys::sceVfpuMemcpy(dst, src as *const u8, size); } + } + }, 10); + + unsafe { alloc::alloc::dealloc(src, Layout::from_size_align_unchecked(size, 16)); } + unsafe { alloc::alloc::dealloc(dst, Layout::from_size_align_unchecked(size, 16)); } + + let output = format!( + "size: {} bytes +iterations: {} +cpu: {} microseconds +cpu32: {} microseconds +kernel: {} microseconds +dmac: {} microseconds +vfpu: {} microseconds\n\n", + size, iterations, cpu_dur.as_micros(), cpu32_dur.as_micros(), + kernel_dur.as_micros(), dmac_dur.as_micros(), + vfpu_dur.as_micros() + ); + write_to_fd(fd, output); + } + unsafe { psp::sys::sceIoClose(fd) }; +} + +fn write_to_fd(fd: SceUid, msg: alloc::string::String) { + unsafe { + psp::sys::sceIoWrite( + fd, + msg.as_str().as_bytes().as_ptr() as *const u8 as *const c_void, + msg.len() + ) + }; } unsafe fn memcpy(dst: *mut u8, src: *const u8, num: usize) -> *mut u8 { diff --git a/psp/src/alloc_impl.rs b/psp/src/alloc_impl.rs index b9324fcf..5d6d3533 100644 --- a/psp/src/alloc_impl.rs +++ b/psp/src/alloc_impl.rs @@ -77,30 +77,39 @@ unsafe extern fn memset(ptr: *mut u8, value: u32, num: usize) -> *mut u8 { ptr } - #[no_mangle] #[cfg(not(feature = "stub-only"))] -unsafe extern fn memcpy(dst: *mut u8, src: *const u8, num: isize) -> *mut u8 { - let mut size = num as usize; - let mut dst32 = dst as *mut u32; - let mut src32 = src as *const u32; - while size > 3 { - *dst32 = *src32; - dst32 = dst32.add(1); - src32 = src32.add(1); - size = size.saturating_sub(4); - } - let mut dst_new = dst32 as *mut u8; - let mut src_new = src32 as *const u8; - while size > 0 { - *dst_new = *src_new; - dst_new = dst_new.add(1); - src_new = src_new.add(1); - size = size.saturating_sub(1); +unsafe extern fn memcpy(dst: *mut u8, src: *const u8, num: usize) -> *mut u8 { + for i in 0..num { + *dst.add(i) = *src.add(i); } dst } +// broke format macro somehow +//#[no_mangle] +//#[cfg(not(feature = "stub-only"))] +//unsafe extern fn memcpy(dst: *mut u8, src: *const u8, num: isize) -> *mut u8 { + //let mut size = num as usize; + //let mut dst32 = dst as *mut u32; + //let mut src32 = src as *const u32; + //while size > 3 { + //*dst32 = *src32; + //dst32 = dst32.add(1); + //src32 = src32.add(1); + //size = size.saturating_sub(4); + //} + //let mut dst_new = dst32 as *mut u8; + //let mut src_new = src32 as *const u8; + //while size > 0 { + //*dst_new = *src_new; + //dst_new = dst_new.add(1); + //src_new = src_new.add(1); + //size = size.saturating_sub(1); + //} + //dst +//} + #[no_mangle] #[cfg(not(feature = "stub-only"))] unsafe extern fn memcmp(ptr1: *mut u8, ptr2: *mut u8, num: isize) -> i32 { diff --git a/psp/src/sys/kernel/mod.rs b/psp/src/sys/kernel/mod.rs index 9e1574ae..167c2ffd 100644 --- a/psp/src/sys/kernel/mod.rs +++ b/psp/src/sys/kernel/mod.rs @@ -691,6 +691,9 @@ psp_extern! { /// /// 1 if interrupts are currently enabled. pub fn sceKernelIsCpuIntrEnable() -> i32; + + #[psp(0x1839852A)] + pub fn sceKernelMemcpy(dst: *mut u8, src: *const u8, num: usize) -> *mut u8; } #[repr(C)]