Skip to content

Commit

Permalink
Add a lazily loaded PmByteString that loads one char at a time.
Browse files Browse the repository at this point in the history
See #3
  • Loading branch information
Cryptjar committed Apr 4, 2022
1 parent c748179 commit 9d13169
Show file tree
Hide file tree
Showing 4 changed files with 345 additions and 0 deletions.
112 changes: 112 additions & 0 deletions examples/uno-pm-string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
//
// This file provides a example on how to use strings on an Arduino Uno.
//


// Define no_std only for AVR
#![cfg_attr(target_arch = "avr", no_std)]
#![no_main]
//
// To unwrap the Option in const context
#![feature(const_option)]


use avr_progmem::string::PmByteString; // A progmem wrapper for strings
#[cfg(target_arch = "avr")]
use panic_halt as _; // halting panic implementation for AVR


/// A string directly in progmem
#[cfg_attr(target_arch = "avr", link_section = ".progmem.data")]
static UNICODE_TEXT: PmByteString<137> = unsafe {
PmByteString::new(
"dai 大賢者 kenja, Völlerei lässt grüßen, le garçon de théâtre, Ελληνική Δημοκρατία, \
Слава Україні",
)
.unwrap()
};

/// A string directly in progmem
#[cfg_attr(target_arch = "avr", link_section = ".progmem.data")]
static LONG_TEXT: PmByteString<242> = unsafe {
PmByteString::new(
"
A long test string literal, that is stored in progmem instead of DRAM.
Of course, it needs to be temporarily load into DRAM.
However, unlike a `ByteString`, it will be only read a char at a time,
thus a `PmByteString` can never be too long.
",
)
.unwrap()
};

/// A single string that is over 2 KiB is size
#[cfg_attr(target_arch = "avr", link_section = ".progmem.data")]
static MUCH_LONGER_TEXT: PmByteString<2177> =
unsafe { PmByteString::new(include_str!("./test_text.txt")).unwrap() };


// Include a fancy printer supporting Arduino Uno's USB-Serial output as well
// as stdout on non-AVR targets.
mod printer;
use printer::Printer;

#[no_mangle]
fn main() -> ! {
let mut printer = {
#[cfg(target_arch = "avr")]
{
// Initialize the USB-Serial output on the Arduino Uno

let dp = arduino_hal::Peripherals::take().unwrap();
let pins = arduino_hal::pins!(dp);
let serial = arduino_hal::default_serial!(dp, pins, 9600);

Printer(serial)
}
#[cfg(not(target_arch = "avr"))]
{
// Just use stdout for non-AVR targets
Printer
}
};

// Print some introduction text
printer.println("Hello from Arduino!");
printer.println("");
printer.println("--------------------------");
printer.println("");

// Read string from progmem char-by-char
for c in LONG_TEXT.chars() {
printer.print(c);
}

printer.println("");

// Or just use the `ufmt::uDisplay` impl
ufmt::uwrite!(&mut printer, "{}", &UNICODE_TEXT).unwrap();

printer.println("");

// Thus loading 2 KiB with ease
ufmt::uwrite!(&mut printer, "{}", MUCH_LONGER_TEXT).unwrap();

// Print some final lines
printer.println("");
printer.println("--------------------------");
printer.println("");
printer.println("DONE");

// It is very convenient to just exit on non-AVR platforms, otherwise users
// might get the impression that the program hangs, whereas it already
// succeeded.
#[cfg(not(target_arch = "avr"))]
std::process::exit(0);

// Otherwise, that is on AVR, just go into an infinite loop, because on AVR
// we just can't exit!
loop {
// Done, just do nothing
}
}
113 changes: 113 additions & 0 deletions src/string.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use core::fmt;
use core::ops::Deref;

use crate::wrapper::PmIter;
use crate::ProgMem;


mod from_slice;
mod validations;



Expand Down Expand Up @@ -56,6 +59,11 @@ impl<const N: usize> ByteString<N> {
Err(_e) => None,
}
}

/// Returns the underlying byte array.
pub fn as_bytes(&self) -> &[u8; N] {
&self.0
}
}

impl<const N: usize> Deref for ByteString<N> {
Expand Down Expand Up @@ -123,3 +131,108 @@ macro_rules! progmem_str {
&*TEXT.load()
}};
}



/// A byte string in progmem
///
/// Not to be confused with a [`ByteString`].
/// A `ByteString` is just a wrapper around a byte array (`[u8;N]`) that can
/// be put into a [`ProgMem`].
/// A `PmByteString` on the other hand, is a wrapper around a
/// `ProgMem<[u8;N]>`.
///
#[repr(transparent)]
pub struct PmByteString<const N: usize>(pub ProgMem<[u8; N]>);

impl<const N: usize> PmByteString<N> {
/// Creates a new byte array from the given string
///
/// # Safety
///
/// This function is only sound to call, if the value is
/// stored in a static that is for instance attributed with
/// `#[link_section = ".progmem.data"]`.
pub const unsafe fn new(s: &str) -> Option<Self> {
Self::from_bytes(s.as_bytes())
}

/// Wraps the given byte slice
///
/// # Safety
///
/// This function is only sound to call, if the value is
/// stored in a static that is for instance attributed with
/// `#[link_section = ".progmem.data"]`.
pub const unsafe fn from_bytes(bytes: &[u8]) -> Option<Self> {
let res = from_slice::array_ref_try_from_slice(bytes);

match res {
Ok(array) => {
let array = *array;
let pm = unsafe { ProgMem::new(array) };
Some(Self(pm))
},
Err(_e) => None,
}
}

/// Returns the underlying progmem byte array.
pub fn as_bytes(&self) -> &ProgMem<[u8; N]> {
&self.0
}

/// Lazily iterate over the `char`s of the string.
///
/// This function is analog to [`ProgMem::iter`], except it is over the
/// `char`s of this string.
pub fn chars(&self) -> PmChars<N> {
PmChars::new(self)
}
}

impl<const N: usize> fmt::Display for PmByteString<N> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
for c in self.chars() {
write!(fmt, "{}", c)?
}
Ok(())
}
}

#[cfg(any(feature = "ufmt", doc))]
#[doc(cfg(feature = "ufmt"))]
impl<const N: usize> ufmt::uDisplay for PmByteString<N> {
fn fmt<W: ?Sized>(&self, fmt: &mut ufmt::Formatter<W>) -> Result<(), W::Error>
where
W: ufmt::uWrite,
{
for c in self.chars() {
ufmt::uwrite!(fmt, "{}", c)?
}
Ok(())
}
}


/// An iterator over a [`PmByteString`]
pub struct PmChars<'a, const N: usize> {
bytes: PmIter<'a, u8, N>,
}

impl<'a, const N: usize> PmChars<'a, N> {
pub fn new(pm: &'a PmByteString<N>) -> Self {
PmChars {
bytes: pm.0.iter(),
}
}
}

impl<'a, const N: usize> Iterator for PmChars<'a, N> {
type Item = char;

fn next(&mut self) -> Option<Self::Item> {
unsafe { validations::next_code_point(&mut self.bytes) }
.map(|u| core::char::from_u32(u).unwrap())
}
}
69 changes: 69 additions & 0 deletions src/string/validations.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// This file as a partial copy of the str/validations.rs of the Rust core lib.
//
// A copy was needed, because the original `next_code_point` takes an iterator
// of `&u8`, which is not an option for as, because we only have `u8` by-value.
//
// Source:
// https://github.com/rust-lang/rust/blob/03b17b181af4945fa24e0df79676e89454546440/library/core/src/str/validations.rs


/// Mask of the value bits of a continuation byte.
const CONT_MASK: u8 = 0b0011_1111;

/// Returns the initial codepoint accumulator for the first byte.
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
/// for width 3, and 3 bits for width 4.
#[inline]
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
(byte & (0x7F >> width)) as u32
}

/// Returns the value of `ch` updated with continuation byte `byte`.
#[inline]
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
(ch << 6) | (byte & CONT_MASK) as u32
}


/// Reads the next code point out of a byte iterator (assuming a
/// UTF-8-like encoding).
///
/// # Safety
///
/// `bytes` must produce a valid UTF-8-like (UTF-8 or WTF-8) string
#[inline]
pub(super) unsafe fn next_code_point<I: Iterator<Item = u8>>(bytes: &mut I) -> Option<u32> {
// Decode UTF-8
let x = bytes.next()?;
if x < 128 {
return Some(x as u32);
}

// Multibyte case follows
// Decode from a byte combination out of: [[[x y] z] w]
// NOTE: Performance is sensitive to the exact formulation here
let init = utf8_first_byte(x, 2);
// SAFETY: `bytes` produces an UTF-8-like string,
// so the iterator must produce a value here.
let y = unsafe { bytes.next().unwrap() };
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
// [[x y z] w] case
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
// SAFETY: `bytes` produces an UTF-8-like string,
// so the iterator must produce a value here.
let z = unsafe { bytes.next().unwrap() };
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
ch = init << 12 | y_z;
if x >= 0xF0 {
// [x y z w] case
// use only the lower 3 bits of `init`
// SAFETY: `bytes` produces an UTF-8-like string,
// so the iterator must produce a value here.
let w = unsafe { bytes.next().unwrap() };
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
}

Some(ch)
}
51 changes: 51 additions & 0 deletions src/wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,59 @@ impl<T: Copy, const N: usize> ProgMem<[T; N]> {
//
unsafe { read_value(array) }
}

/// Lazily iterate over all elements
///
/// Returns an iterator which lazily loads the elements one at a time
/// from progmem.
/// This means this iterator can be used to access huge arrays while
/// only requiring `size_of::<T>()` amount of stack memory.
///
/// # Panics
///
/// This method panics, if the size of an element (i.e. `size_of::<T>()`)
/// is beyond 255 bytes.
/// However, this is currently just a implementation limitation, which may
/// be lifted in the future.
///
pub fn iter(&self) -> PmIter<T, N> {
PmIter::new(self)
}
}


/// An iterator over an array in progmem.
pub struct PmIter<'a, T, const N: usize> {
progmem: &'a ProgMem<[T; N]>,
current_idx: usize,
}

impl<'a, T, const N: usize> PmIter<'a, T, N> {
/// Creates a new iterator over the given progmem array.
pub const fn new(pm: &'a ProgMem<[T; N]>) -> Self {
Self {
progmem: pm,
current_idx: 0,
}
}
}

impl<'a, T: Copy, const N: usize> Iterator for PmIter<'a, T, N> {
type Item = T;

fn next(&mut self) -> Option<Self::Item> {
// Check for iterator end
if self.current_idx < N {
// Load next item from progmem
let b = self.progmem.load_at(self.current_idx);
self.current_idx += 1;

Some(b)
} else {
None
}
}
}


/// Define a static in progmem.
Expand Down

0 comments on commit 9d13169

Please sign in to comment.