From fa420a1930a35778ff305e33ac4841b5d3dc186c Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 12 Jul 2023 12:23:16 +1000 Subject: [PATCH] Add support for encoding/decoding bech32 addresses Add support for: - Converting bytes to field elements using two extension traits (and iterator apaptors). - Checksumming an stream of field elements. - Decoding bech32 hrpstrings (as well as segwit addresses). - Encoding hrpstrings by way of an `Encoder` and a bunch of iterator adaptors. --- src/lib.rs | 2 + src/primitives/checksum.rs | 68 ++++- src/primitives/decode/mod.rs | 394 +++++++++++++++++++++++++ src/primitives/decode/segwit.rs | 219 ++++++++++++++ src/primitives/encode.rs | 311 ++++++++++++++++++++ src/primitives/gf32.rs | 2 + src/primitives/iter.rs | 496 ++++++++++++++++++++++++++++++++ src/primitives/mod.rs | 3 + tests/bip_173_test_vectors.rs | 107 +++++++ tests/bip_350_test_vectors.rs | 122 ++++++++ 10 files changed, 1718 insertions(+), 6 deletions(-) create mode 100644 src/primitives/decode/mod.rs create mode 100644 src/primitives/decode/segwit.rs create mode 100644 src/primitives/encode.rs create mode 100644 src/primitives/iter.rs create mode 100644 tests/bip_173_test_vectors.rs create mode 100644 tests/bip_350_test_vectors.rs diff --git a/src/lib.rs b/src/lib.rs index a201b772b..4d775015f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,8 +35,10 @@ use core::{fmt, mem}; pub use crate::primitives::checksum::Checksum; use crate::primitives::checksum::{self, PackedFe32}; +pub use crate::primitives::gf32::Fe32; use crate::primitives::hrp; pub use crate::primitives::hrp::Hrp; +pub use crate::primitives::iter::{ByteIterExt, Fe32IterExt}; pub use crate::primitives::{Bech32, Bech32m}; mod error; diff --git a/src/primitives/checksum.rs b/src/primitives/checksum.rs index da6f57c44..4598f41fb 100644 --- a/src/primitives/checksum.rs +++ b/src/primitives/checksum.rs @@ -97,12 +97,8 @@ impl Engine { /// Feeds `hrp` into the checksum engine. pub fn input_hrp(&mut self, hrp: &Hrp) { - for b in hrp.lowercase_byte_iter() { - self.input_fe(Fe32(b >> 5)); - } - self.input_fe(Fe32::Q); - for b in hrp.lowercase_byte_iter() { - self.input_fe(Fe32(b & 0x1f)); + for fe in HrpFe32Iter::new(hrp) { + self.input_fe(fe) } } @@ -200,3 +196,63 @@ macro_rules! impl_packed_fe32 { impl_packed_fe32!(u32); impl_packed_fe32!(u64); impl_packed_fe32!(u128); + +/// Iterator that yields the field elements that are input into a checksum algorithm for an [`Hrp`]. +pub struct HrpFe32Iter<'hrp> { + /// `None` once the hrp high fes have been yielded. + high_iter: Option>, + /// `None` once the hrp low fes have been yielded. + low_iter: Option>, +} + +impl<'hrp> HrpFe32Iter<'hrp> { + /// Creates an iterator that yields the field elements of `hrp` as they are input into the + /// checksum algorithm. + pub fn new(hrp: &'hrp Hrp) -> Self { + let high_iter = hrp.lowercase_byte_iter(); + let low_iter = hrp.lowercase_byte_iter(); + + Self { high_iter: Some(high_iter), low_iter: Some(low_iter) } + } +} + +impl<'hrp> Iterator for HrpFe32Iter<'hrp> { + type Item = Fe32; + fn next(&mut self) -> Option { + if let Some(ref mut high_iter) = &mut self.high_iter { + match high_iter.next() { + Some(high) => return Some(Fe32(high >> 5)), + None => { + self.high_iter = None; + return Some(Fe32::Q); + } + } + } + if let Some(ref mut low_iter) = &mut self.low_iter { + match low_iter.next() { + Some(low) => return Some(Fe32(low & 0x1f)), + None => self.low_iter = None, + } + } + None + } + + fn size_hint(&self) -> (usize, Option) { + let high = match &self.high_iter { + Some(high_iter) => { + let (min, max) = high_iter.size_hint(); + (min + 1, max.map(|max| max + 1)) // +1 for the extra Q + } + None => (0, Some(0)), + }; + let low = match &self.low_iter { + Some(low_iter) => low_iter.size_hint(), + None => (0, Some(0)), + }; + + let min = high.0 + 1 + low.0; + let max = high.1.zip(low.1).map(|(high, low)| high + 1 + low); + + (min, max) + } +} diff --git a/src/primitives/decode/mod.rs b/src/primitives/decode/mod.rs new file mode 100644 index 000000000..ce1a4d130 --- /dev/null +++ b/src/primitives/decode/mod.rs @@ -0,0 +1,394 @@ +// SPDX-License-Identifier: MIT + +//! Decoding of bech32 encoded strings as specified by [BIP-173] (and [BIP-350]). +//! +//! A Bech32 string is at most 90 characters long and consists of: +//! +//! - The human-readable part, which is intended to convey the type of data, or anything else that +//! is relevant to the reader. This part MUST contain 1 to 83 US-ASCII characters. +//! - The separator, which is always "1". +//! - The data part, which is at least 6 characters long and only consists of alphanumeric +//! characters excluding "1", "b", "i", and "o". +//! +//! ## Modules +//! +//! > We first describe the general checksummed base32 format called Bech32 and then define +//! > Segregated Witness addresses using it. +//! +//! - mod.rs: Handles parsing the general checksummed base32 format. +//! - segwit.rs: Handles parsing Segregated Witness addresses. +//! +//! [BIP-173]: +//! [BIP-350]: + +pub mod segwit; + +use core::{fmt, iter, slice, str}; + +use crate::primitives::checksum::{self, Checksum}; +use crate::primitives::gf32::Fe32; +use crate::primitives::hrp::{self, Hrp}; +use crate::primitives::iter::{Fe32IterExt, FesToBytes}; +use crate::write_err; + +/// Separator between the hrp and payload (as defined by BIP-173). +const SEP: char = '1'; + +/// An HRP string that has been parsed and had the checksum validated. +/// +/// Pre-parsing an HRP string only checks validity of the characters, it does not validate the +/// checksum in any way - to validate convert to [`Hrpstring`]. +#[derive(Debug)] +pub struct Hrpstring<'s> { + /// The human-readable part, guaranteed to be lowercase ASCII characters. + hrp: Hrp, + /// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters, + /// with the checksum removed. + data: &'s [u8], +} + +impl<'s> Hrpstring<'s> { + /// Parses and validates an HRP string, without treating the first data character specially. + /// + /// This is equivalent to `Hrpstring::new_unvalidated().validate_checksum::()`. + pub fn new(s: &'s str) -> Result { + let unvalidated = Hrpstring::new_unvalidated(s)?; + let ret = unvalidated.validate_checksum::()?; + Ok(ret) + } + + /// Parses an bech32 encode string and constructs a [`Hrpstring`] object. + /// + /// Checks for valid ASCII values, does not validate the checksum. + /// + /// The object returned by this function must must have `validate_checksum` called on it to + /// validate and checksum and maintain data invariant (remove checksum from parsed data). + pub fn new_unvalidated(s: &'s str) -> Result { + let sep_pos = check_characters(s)?; + let (hrp, data) = s.split_at(sep_pos); + + let ret = Hrpstring { + hrp: Hrp::parse(hrp)?, + data: data[1..].as_bytes(), // Skip the separator. + }; + + Ok(ret) + } + + /// Returns the human-readable part. + pub fn hrp(&self) -> Hrp { self.hrp } + + /// Returns an iterator over the byte data encoded by the HRP string (excluding the HRP and + /// checksum). + pub fn byte_iter(&self) -> ByteIter { + ByteIter { iter: AsciiToFe32Iter { iter: self.data.iter().copied() }.fes_to_bytes() } + } + + /// Validates that a [`Hrpstring`] returned by `new_unvalidated` has a valid checksum. + /// + /// # Returns + /// + /// Returns `self` with the checksum removed from the inner data slice. + pub fn validate_checksum(mut self) -> Result { + if Ck::CHECKSUM_LENGTH == 0 { + return Ok(self); // Called with NoChecksum. + } + + if self.data.len() < Ck::CHECKSUM_LENGTH { + return Err(Error::InvalidChecksumLength); + } + + let mut checksum_eng = checksum::Engine::::new(); + checksum_eng.input_hrp(&self.hrp()); + + // Unwrap ok since we checked all characters in our constructor. + for fe in self.data.iter().map(|&b| Fe32::from_char_unchecked(b)) { + checksum_eng.input_fe(fe); + } + + if checksum_eng.residue() != &Ck::TARGET_RESIDUE { + return Err(Error::InvalidChecksum); + } + + let data_len = self.data.len() - Ck::CHECKSUM_LENGTH; + self.data = &self.data[..data_len]; + + Ok(self) + } +} + +/// A iterator over a parsed HRP string data as bytes. +pub struct ByteIter<'s> { + iter: FesToBytes>>>, +} + +impl<'s> Iterator for ByteIter<'s> { + type Item = u8; + fn next(&mut self) -> Option { self.iter.next() } + fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } +} + +impl<'s> ExactSizeIterator for ByteIter<'s> { + fn len(&self) -> usize { self.iter.len() } +} + +/// A iterator over a parsed HRP string data as field elements. +pub struct Fe32Iter<'s> { + iter: AsciiToFe32Iter>>, +} + +impl<'s> Iterator for Fe32Iter<'s> { + type Item = Fe32; + fn next(&mut self) -> Option { self.iter.next() } + fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } +} + +/// Helper iterator adaptor that maps an iterator of valid bech32 character ASCII bytes to an +/// iterator of field elements. +/// +/// # Panics +/// +/// If any `u8` in the input iterator is out of range for an [`Fe32`]. Should only be used on data +/// that has already been checked for validity (eg, by using `check_characters`). +struct AsciiToFe32Iter> { + iter: I, +} + +impl Iterator for AsciiToFe32Iter +where + I: Iterator, +{ + type Item = Fe32; + fn next(&mut self) -> Option { self.iter.next().map(Fe32::from_char_unchecked) } + fn size_hint(&self) -> (usize, Option) { + // Each ASCII character is an fe32 so iterators are the same size. + self.iter.size_hint() + } +} + +impl ExactSizeIterator for AsciiToFe32Iter +where + I: Iterator + ExactSizeIterator, +{ + fn len(&self) -> usize { self.iter.len() } +} + +/// Checks whether a given HRP string has data characters in the bech32 alphabet (incl. checksum +/// characters), and that the whole string has consistent casing (hrp, data, and checksum). +/// +/// # Returns +/// +/// The byte-index into the string where the '1' separator occurs, or an error if it does not. +fn check_characters(s: &str) -> Result { + let mut has_upper = false; + let mut has_lower = false; + let mut req_bech32 = true; + let mut sep_pos = None; + for (n, ch) in s.char_indices().rev() { + if ch == SEP && sep_pos.is_none() { + req_bech32 = false; + sep_pos = Some(n); + } + if req_bech32 { + Fe32::from_char(ch).map_err(|_| Error::InvalidChar(ch))?; + } + if ch.is_ascii_uppercase() { + has_upper = true; + } else if ch.is_ascii_lowercase() { + has_lower = true; + } + } + if has_upper && has_lower { + Err(Error::MixedCase) + } else if let Some(pos) = sep_pos { + Ok(pos) + } else { + Err(Error::MissingSeparator) + } +} + +/// Errors types for Bech32 encoding/decoding. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum Error { + /// Human-readable part is invalid. + InvalidHrp(hrp::Error), + /// String does not contain the separator character. + MissingSeparator, + /// No characters after the separator. + NothingAfterSeparator, + /// The checksum does not match the rest of the data. + InvalidChecksum, + /// The checksum is not a valid length. + InvalidChecksumLength, + /// Some part of the string contains an invalid character. + InvalidChar(char), + /// The whole string must be of one case. + MixedCase, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use Error::*; + + match *self { + InvalidHrp(ref e) => write_err!(f, "invalid human-readable part"; e), + MissingSeparator => write!(f, "missing human-readable separator, \"{}\"", SEP), + NothingAfterSeparator => write!(f, "invalid data - no characters after the separator"), + InvalidChecksum => write!(f, "invalid checksum"), + InvalidChecksumLength => write!(f, "the checksum is not a valid length"), + InvalidChar(n) => write!(f, "invalid character (code={})", n), + MixedCase => write!(f, "mixed-case strings not allowed"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use Error::*; + + match *self { + InvalidHrp(ref e) => Some(e), + MissingSeparator + | NothingAfterSeparator + | InvalidChecksum + | InvalidChecksumLength + | InvalidChar(_) + | MixedCase => None, + } + } +} + +impl From for Error { + fn from(e: hrp::Error) -> Self { Error::InvalidHrp(e) } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Bech32, Bech32m}; + + #[test] + #[allow(unused_variables)] // Triggered by matches macro. + fn bip_173_invalid_parsing_fails() { + let invalid: Vec<(&str, Error)> = vec!( + ("\u{20}1nwldj5", + Error::InvalidChar('\u{20}')), + ("\u{7F}1axkwrx", + Error::InvalidChar('\u{7F}')), + ("\u{80}1eym55h", + Error::InvalidChar('\u{80}')), + ("an84characterslonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11d6pts4", + Error::InvalidHrp(hrp::Error::TooLong(84))), + ("pzry9x0s0muk", + Error::MissingSeparator), + ("1pzry9x0s0muk", + Error::InvalidHrp(hrp::Error::Empty)), + ("x1b4n0q5v", + Error::InvalidChar('b')), + ("li1dgmt3", + Error::InvalidChecksum), + ("de1lg7wt\u{ff}", + Error::InvalidChar('\u{ff}')), + ("A1G7SGD8", + Error::MissingSeparator), // TODO: fix error type. + ("10a06t8", + Error::InvalidHrp(hrp::Error::Empty)), + ("1qzzfhee", + Error::InvalidHrp(hrp::Error::Empty)), + ); + + for (s, expected_error) in invalid { + assert!(matches!(Hrpstring::new::(s), Err(expected_error))); + } + } + + #[test] + #[allow(unused_variables)] // Triggered by matches macro. + fn bip_173_invalid_because_of_invalid_checksum() { + assert!(matches!(Hrpstring::new::("li1dgmt3"), Err(Error::InvalidChecksumLength))) + } + + #[test] + #[allow(unused_variables)] // Triggered by matches macro. + fn bip_350_invalid_parsing_fails() { + let invalid: Vec<(&str, Error)> = vec!( + ("\u{20}1xj0phk", + Error::InvalidChar('\u{20}')), + ("\u{7F}1g6xzxy", + Error::InvalidChar('\u{7F}')), + ("\u{80}1g6xzxy", + Error::InvalidChar('\u{7F}')), + ("an84characterslonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1569pvx", + Error::InvalidHrp(hrp::Error::TooLong(84))), + ("qyrz8wqd2c9m", + Error::MissingSeparator), + ("1qyrz8wqd2c9m", + Error::InvalidHrp(hrp::Error::Empty)), + ("y1b0jsk6g", + Error::InvalidChar('b')), + ("lt1igcx5c0", + Error::InvalidChar('i')), + ("mm1crxm3i", + Error::InvalidChar('i')), + ("au1s5cgom", + Error::InvalidChar('o')), + ("M1VUXWEZ", + Error::InvalidChecksum), + ("16plkw9", + Error::InvalidHrp(hrp::Error::Empty)), + ("1p2gdwpf", + Error::InvalidHrp(hrp::Error::Empty)), + + ); + + for (s, expected_error) in invalid { + assert!(matches!(Hrpstring::new::(s), Err(expected_error))); + } + } + + #[test] + #[allow(unused_variables)] // Triggered by matches macro. + fn bip_350_invalid_because_of_invalid_checksum() { + // Note the "bc1p2" test case is not from the bip test vectors. + let invalid: Vec<&str> = vec!["in1muywd", "bc1p2"]; + + for s in invalid { + assert!(matches!(Hrpstring::new::(s), Err(Error::InvalidChecksumLength))) + } + } + + #[test] + fn check_hrp_uppercase_returns_lower() { + let addr = "BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4"; + let parsed = Hrpstring::new::(addr).expect("failed to parse address"); + assert_eq!(parsed.hrp(), Hrp::parse_unchecked("bc")); + } + + #[test] + #[cfg(feature = "alloc")] + fn check_hrp_max_length() { + let hrps = + "an83characterlonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio"; + + let hrp = Hrp::parse_unchecked(hrps); + let s = + crate::encode(hrp, [], crate::Variant::Bech32).expect("failed to encode empty buffer"); + + let parsed = Hrpstring::new::(&s).expect("failed to parse address"); + assert_eq!(parsed.hrp(), hrp); + } + + #[test] + fn exclude_strings_that_are_not_valid_bech32_length_0() { + let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq"; + assert!(Hrpstring::new::(addr).is_ok()) + } + + #[test] + fn exclude_strings_that_are_not_valid_bech32_length_1() { + let addr = "23451QAR0SRRR7XFKVY5L643LYDNW9RE59GTZZLKULZK"; + assert!(Hrpstring::new::(addr).is_ok()) + } +} diff --git a/src/primitives/decode/segwit.rs b/src/primitives/decode/segwit.rs new file mode 100644 index 000000000..56ad2f7d9 --- /dev/null +++ b/src/primitives/decode/segwit.rs @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: MIT + +//! Decoding of Segregated Witness address encoded as bech32 strings as specified by [BIP-173] (and [BIP-350]). +//! +//! [BIP-173]: https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki +//! [BIP-350]: https://github.com/bitcoin/bips/blob/master/bip-0350.mediawiki + +use core::{fmt, str}; + +// use crate::primitives::checksum::Checksum; +use crate::primitives::decode::ByteIter; +use crate::primitives::gf32::Fe32; +use crate::primitives::hrp::Hrp; +use crate::{write_err, Bech32, Bech32m}; + +/// An HRP string that has been parsed and had the checksum validated. +/// +/// Pre-parsing an HRP string only checks validity of the characters, it does not validate the +/// checksum in any way - to validate convert to [`Hrpstring`]. +#[derive(Debug)] +pub struct Hrpstring<'s> { + /// The parsed HRP and data. + inner: super::Hrpstring<'s>, + /// The first byte of `self.inner.data`. + witness_version: Fe32, +} + +impl<'s> Hrpstring<'s> { + /// Parses an HRP string, treating the first data character as a witness version. + /// + /// This version byte does not appear in the extracted binary data, but is covered + /// by the checksum. It can be accessed with [`Self::witness_version`]. + pub fn new(s: &'s str) -> Result { + let unvalidated = super::Hrpstring::new_unvalidated(s)?; + + let hrp = unvalidated.hrp(); + if !is_known_hrp(hrp) { + return Err(Error::UnknownHrp); + } + + // Unwrap ok since check_characters (in `Self::new`) checked the bech32-ness of this char. + let witness_version = Fe32::from_char(unvalidated.data[0].into()).unwrap(); + if witness_version.to_u8() > 16 { + return Err(Error::InvalidWitnessVersion); + } + + let checksum_validated = match witness_version { + Fe32::Q => unvalidated.validate_checksum::()?, + _ => unvalidated.validate_checksum::()?, + }; + + Hrpstring::validate_witness_lengths(checksum_validated) + } + + /// Parses an HRP string, treating the first data character as a witness version. + /// + /// ## WARNING + /// + /// You almost certainly do not want to use this function. + /// + /// It is provided for backwards comparability to parse addresses that have an non-zero witness + /// version because [BIP-173] explicitly allows using the bech32 checksum with any witness + /// version however [BIP-350] specifies all witness version > 0 now MUST use bech32m. + /// + /// [BIP-173]: https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki + /// [BIP-350]: https://github.com/bitcoin/bips/blob/master/bip-0350.mediawiki + pub fn new_bech32(s: &'s str) -> Result { + let unvalidated = super::Hrpstring::new_unvalidated(s)?; + + let hrp = unvalidated.hrp(); + if !is_known_hrp(hrp) { + return Err(Error::UnknownHrp); + } + + // Unwrap ok since check_characters (in `Self::new`) checked the bech32-ness of this char. + let witness_version = Fe32::from_char(unvalidated.data[0].into()).unwrap(); + if witness_version.to_u8() > 16 { + return Err(Error::InvalidWitnessVersion); + } + + let checksum_validated = unvalidated.validate_checksum::()?; + Hrpstring::validate_witness_lengths(checksum_validated) + } + + #[allow(clippy::manual_range_contains)] // For witness length range check. + fn validate_witness_lengths( + mut checksum_validated: super::Hrpstring<'s>, + ) -> Result { + // Unwrap ok since check_characters (in `super::Hrpstring::new`) checked the bech32-ness of this char. + let witness_version = Fe32::from_char(checksum_validated.data[0].into()).unwrap(); + + // From BIP-173: + // > Re-arrange those bits into groups of 8 bits. Any incomplete group at the + // > end MUST be 4 bits or less, MUST be all zeroes, and is discarded. + + // TODO: We need to check for non-zero padding. + + let data_len = checksum_validated.data.len() - 1; // -1 for witness version. + if data_len * 5 % 8 > 4 { + return Err(Error::InvalidDataLength); + } + + // QUESTION: For all the test vectors to pass the witness length checks include the witness + // byte even though BIP-173 seems to read like the witness byte should be excluded? + let witness_len = checksum_validated.byte_iter().len(); + + if witness_len < 2 || witness_len > 40 { + return Err(Error::InvalidWitnessLength); + } + + if witness_version == Fe32::Q && witness_len != 20 && witness_len != 32 { + return Err(Error::InvalidSegwitV0WitnessLength); + } + + checksum_validated.data = &checksum_validated.data[1..]; + + Ok(Self { inner: checksum_validated, witness_version }) + } + + /// Returns the witness version if present. + pub fn witness_version(&self) -> Fe32 { self.witness_version } + + /// Returns the human-readable part. + pub fn hrp(&self) -> Hrp { self.inner.hrp() } + + /// Returns an iterator over the byte data encoded by the HRP string (excluding the HRP and + /// checksum). + pub fn byte_iter(&self) -> ByteIter { self.inner.byte_iter() } +} + +fn is_known_hrp(hrp: Hrp) -> bool { + if hrp.lowercase_char_iter().eq("bc".chars()) { + return true; + } + if hrp.lowercase_char_iter().eq("tb".chars()) { + return true; + } + if hrp.lowercase_char_iter().eq("bcrt".chars()) { + return true; + } + false +} + +/// Errors types for segwit encoding/decoding. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum Error { + /// General bech32 decoding error. + Decode(super::Error), + /// Unknown human-readable part. + UnknownHrp, + /// Invalid witness version - not between 0 and 16. + InvalidWitnessVersion, + /// The data payload is not a valid length. + InvalidDataLength, + /// The witness is not between 2 and 40 bytes long. + InvalidWitnessLength, + /// The segwit v0 witness is not 20 or 32 bytes long. + InvalidSegwitV0WitnessLength, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use Error::*; + + match *self { + Decode(ref e) => write_err!(f, "decode failed"; e), + UnknownHrp => write!(f, "unknown human-readable part"), + InvalidWitnessVersion => write!(f, "invalid witness version - not between 0 and 16"), + InvalidDataLength => write!(f, "invalid data - payload is not a valid length"), + InvalidWitnessLength => write!(f, "the witness is not between 2 and 40 bytes long"), + InvalidSegwitV0WitnessLength => + write!(f, "the segwit v0 witness is not 20 or 32 bytes long"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use Error::*; + + match *self { + Decode(ref e) => Some(e), + UnknownHrp + | InvalidWitnessVersion + | InvalidDataLength + | InvalidWitnessLength + | InvalidSegwitV0WitnessLength => None, + } + } +} + +impl From for Error { + fn from(e: super::Error) -> Self { Error::Decode(e) } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn known_hrps() { + assert!(is_known_hrp(Hrp::parse_unchecked("bc"))); + assert!(is_known_hrp(Hrp::parse_unchecked("tb"))); + assert!(is_known_hrp(Hrp::parse_unchecked("bcrt"))); + + assert!(is_known_hrp(Hrp::parse_unchecked("BC"))); + assert!(is_known_hrp(Hrp::parse_unchecked("TB"))); + assert!(is_known_hrp(Hrp::parse_unchecked("BCRT"))); + } + + #[test] + fn unknown_hrps() { + assert!(!is_known_hrp(Hrp::parse_unchecked("abc"))); + assert!(!is_known_hrp(Hrp::parse_unchecked("b"))); + assert!(!is_known_hrp(Hrp::parse_unchecked("bc1"))); + } +} diff --git a/src/primitives/encode.rs b/src/primitives/encode.rs new file mode 100644 index 000000000..c0d2f5020 --- /dev/null +++ b/src/primitives/encode.rs @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: MIT + +//! Bech32 address encoding. +//! +//! This module provides types and iterators that can be used to encode data as a bech32 address in +//! a variety of ways without any allocations, generating, verifying, and appending checksums, +//! prepending HRP strings etc. +//! +//! In general, directly using these adaptors is not very ergonomic, and users are recommended to +//! instead use the higher-level functions at the root of this crate. +//! +//! # Examples +//! +//! ``` +//! use bech32::{Bech32, ByteIterExt, Fe32IterExt, Fe32, Hrp}; +//! +//! let witness_prog = [ +//! 0x75, 0x1e, 0x76, 0xe8, 0x19, 0x91, 0x96, 0xd4, +//! 0x54, 0x94, 0x1c, 0x45, 0xd1, 0xb3, 0xa3, 0x23, +//! 0xf1, 0x43, 0x3b, 0xd6, +//! ]; +//! +//! // Get a stream of characters representing the bech32 encoded address +//! // use "bc" for the human-readable part. +//! let hrp = Hrp::parse("bc").expect("bc is valid hrp string"); +//! let chars = witness_prog +//! .iter() +//! .copied() +//! .bytes_to_fes() +//! .with_checksum::(&hrp) +//! .with_witness_version(Fe32::Q) // Optionally add witness version. +//! .chars(); +//! +//! #[cfg(feature = "alloc")] +//! { +//! let addr = chars.collect::(); +//! assert_eq!(addr.to_uppercase(), "BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4"); +//! } +//! ``` + +use core::iter::Iterator; +use core::marker::PhantomData; + +use crate::primitives::checksum::HrpFe32Iter; +use crate::primitives::hrp::{self, Hrp}; +use crate::primitives::iter::Checksummed; +use crate::{Checksum, Fe32}; + +/// The `Encoder` builds iterators that can be used to encode field elements into a bech32 address. +#[derive(Clone, PartialEq, Eq)] +pub struct Encoder<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// The field elements to encode. + data: I, + /// The human-readable part used at the front of the address encoding. + hrp: &'hrp Hrp, + /// The witness version, if present. + witness_version: Option, + /// Checksum marker. + marker: PhantomData, +} + +impl<'hrp, I, Ck> Encoder<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// Constructs a new bech32 encoder. + pub fn new(data: I, hrp: &'hrp Hrp) -> Self { + Self { data, hrp, witness_version: None, marker: PhantomData:: } + } + + /// Add `witness_version` to the encoder (as first byte of encoded data). + /// + /// Note, caller to guarantee that witness version is within valid range (0-16). + pub fn with_witness_version(mut self, witness_version: Fe32) -> Self { + self.witness_version = Some(witness_version); + self + } + + /// Returns an iterator that yields the bech32 encoded address as field ASCII characters. + pub fn chars(self) -> CharIter<'hrp, I, Ck> { + let witver_iter = WitnessVersionIter::new(self.witness_version, self.data); + CharIter::new(self.hrp, witver_iter) + } + + /// Returns an iterator that yields the field elements that go into the checksum, as well as the checksum at the end. + /// + /// Each field element yielded has been input into the checksum algorithm (including the HRP as it is fed into the algorithm). + pub fn fes(self) -> Fe32Iter<'hrp, I, Ck> { + let witver_iter = WitnessVersionIter::new(self.witness_version, self.data); + Fe32Iter::new(self.hrp, witver_iter) + } +} + +/// Iterator adaptor that just prepends a single character to a field element stream. +/// +/// More ergonomic to use than `std::iter::once(fe).chain(iter)`. +pub struct WitnessVersionIter +where + I: Iterator, +{ + witness_version: Option, + iter: I, +} + +impl WitnessVersionIter +where + I: Iterator, +{ + /// Creates a [`WitnessVersionIter`]. + pub fn new(witness_version: Option, iter: I) -> Self { Self { witness_version, iter } } +} + +impl Iterator for WitnessVersionIter +where + I: Iterator, +{ + type Item = Fe32; + + fn next(&mut self) -> Option { self.witness_version.take().or_else(|| self.iter.next()) } + + fn size_hint(&self) -> (usize, Option) { + let (min, max) = self.iter.size_hint(); + match self.witness_version { + Some(_) => (min + 1, max.map(|max| max + 1)), + None => (min, max), + } + } +} + +/// Iterator adaptor which takes a stream of field elements, converts it to characters prefixed by +/// an HRP (and separator), and suffixed by the checksum i.e., converts the data in a stream of +/// field elements into stream of characters representing the encoded bech32 string. +pub struct CharIter<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// `None` once the hrp has been yielded. + hrp_iter: Option>, + /// Iterator over field elements made up of the optional witness version, the data to be + /// encoded, plus the checksum. + checksummed: Checksummed, Ck>, +} + +impl<'hrp, I, Ck> CharIter<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// Adapts the `Fe32Iter` iterator to yield characters representing the bech32 encoding. + pub fn new(hrp: &'hrp Hrp, data: WitnessVersionIter) -> Self { + let checksummed = Checksummed::new_hrp(hrp, data); + Self { hrp_iter: Some(hrp.lowercase_char_iter()), checksummed } + } +} + +impl<'a, I, Ck> Iterator for CharIter<'a, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + type Item = char; + + fn next(&mut self) -> Option { + if let Some(ref mut hrp_iter) = self.hrp_iter { + match hrp_iter.next() { + Some(c) => return Some(c), + None => { + self.hrp_iter = None; + return Some('1'); + } + } + } + + self.checksummed.next().map(|fe| fe.to_char()) + } + + fn size_hint(&self) -> (usize, Option) { + match &self.hrp_iter { + // We have yielded the hrp and separator already. + None => self.checksummed.size_hint(), + // Yet to finish yielding the hrp (and the separator). + Some(hrp_iter) => { + let (hrp_min, hrp_max) = hrp_iter.size_hint(); + let (chk_min, chk_max) = self.checksummed.size_hint(); + + let min = hrp_min + 1 + chk_min; // +1 for the separator. + + // To provide a max boundary we need to have gotten a value from the hrp iter as well as the + // checksummed iter, otherwise we have to return None since we cannot know the maximum. + let max = match (hrp_max, chk_max) { + (Some(hrp_max), Some(chk_max)) => Some(hrp_max + 1 + chk_max), + (_, _) => None, + }; + + (min, max) + } + } + } +} + +/// Iterator adaptor for a checksummed iterator that inputs the HRP into the checksum algorithm +/// before yielding the HRP as field elements followed by the data then checksum. +pub struct Fe32Iter<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// `None` once the hrp field elements have been yielded. + hrp_iter: Option>, + /// Iterator over field elements made up of the optional witness version, the data to be + /// encoded, plus the checksum. + checksummed: Checksummed, Ck>, +} + +impl<'hrp, I, Ck> Fe32Iter<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// Creates a [`Fe32Iter`] which yields all the field elements which go into the checksum algorithm. + pub fn new(hrp: &'hrp Hrp, data: WitnessVersionIter) -> Self { + let hrp_iter = HrpFe32Iter::new(hrp); + let checksummed = Checksummed::new_hrp(hrp, data); + Self { hrp_iter: Some(hrp_iter), checksummed } + } +} + +impl<'hrp, I, Ck> Iterator for Fe32Iter<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + type Item = Fe32; + fn next(&mut self) -> Option { + if let Some(ref mut hrp_iter) = &mut self.hrp_iter { + match hrp_iter.next() { + Some(fe) => return Some(fe), + None => self.hrp_iter = None, + } + } + self.checksummed.next() + } + + fn size_hint(&self) -> (usize, Option) { + let hrp = match &self.hrp_iter { + Some(hrp_iter) => hrp_iter.size_hint(), + None => (0, Some(0)), + }; + + let data = self.checksummed.size_hint(); + + let min = hrp.0 + data.0; + let max = hrp.1.zip(data.1).map(|(hrp, data)| hrp + data); + + (min, max) + } +} + +#[cfg(test)] +mod tests { + use crate::{Bech32, ByteIterExt, Fe32, Fe32IterExt, Hrp}; + + // Tests below using this data, are based on the test vector (from BIP-173): + // BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4: 0014751e76e8199196d454941c45d1b3a323f1433bd6 + #[rustfmt::skip] + const DATA: [u8; 20] = [ + 0x75, 0x1e, 0x76, 0xe8, 0x19, 0x91, 0x96, 0xd4, + 0x54, 0x94, 0x1c, 0x45, 0xd1, 0xb3, 0xa3, 0x23, + 0xf1, 0x43, 0x3b, 0xd6, + ]; + + #[test] + fn hrpstring_iter() { + let iter = DATA.iter().copied().bytes_to_fes(); + + let hrp = Hrp::parse_unchecked("bc"); + let iter = iter.with_checksum::(&hrp).with_witness_version(Fe32::Q).chars(); + + assert!(iter.eq("bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4".chars())); + } + + #[test] + #[cfg(feature = "alloc")] + fn hrpstring_iter_collect() { + let iter = DATA.iter().copied().bytes_to_fes(); + + let hrp = Hrp::parse_unchecked("bc"); + let iter = iter.with_checksum::(&hrp).with_witness_version(Fe32::Q).chars(); + + let encoded = iter.collect::(); + assert_eq!(encoded, "bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4"); + } + + #[test] + fn hrpstring_iter_size_hint() { + let char_len = "w508d6qejxtdg4y5r3zarvary0c5xw7k".len(); + let iter = DATA.iter().copied().bytes_to_fes(); + + let hrp = Hrp::parse_unchecked("bc"); + let iter = iter.with_checksum::(&hrp).with_witness_version(Fe32::Q).chars(); + + let checksummed_len = 2 + 1 + 1 + char_len + 6; // bc + SEP + Q + chars + checksum + assert_eq!(iter.size_hint().0, checksummed_len); + } +} diff --git a/src/primitives/gf32.rs b/src/primitives/gf32.rs index 882348a0d..da42c5b0e 100644 --- a/src/primitives/gf32.rs +++ b/src/primitives/gf32.rs @@ -185,6 +185,8 @@ impl Fe32 { Ok(Fe32(u5)) } + pub(crate) fn from_char_unchecked(c: u8) -> Fe32 { Fe32(CHARS_INV[usize::from(c)] as u8) } + /// Converts the field element to a lowercase bech32 character. pub fn to_char(self) -> char { // Indexing fine as we have self.0 in [0, 32) as an invariant. diff --git a/src/primitives/iter.rs b/src/primitives/iter.rs new file mode 100644 index 000000000..2e5e1be6d --- /dev/null +++ b/src/primitives/iter.rs @@ -0,0 +1,496 @@ +// SPDX-License-Identifier: MIT + +//! Iterator Adaptors. +//! +//! Iterator extension traits and blanket implementations to convert: +//! +//! - `BytesToFes`: An iterator over bytes to an iterator over field elements. +//! - `FesToBytes`: An iterator over field elements to an iterator over bytes. +//! - `Checksummed`: An iterator over field elements that appends the checksum. +//! +//! # Examples +//! +//! ``` +//! use bech32::{Bech32, ByteIterExt, Fe32IterExt, Fe32, Hrp}; +//! +//! let data = [ +//! 0x75, 0x1e, 0x76, 0xe8, 0x19, 0x91, 0x96, 0xd4, +//! 0x54, 0x94, 0x1c, 0x45, 0xd1, 0xb3, 0xa3, 0x23, +//! 0xf1, 0x43, 0x3b, 0xd6, +//! ]; +//! +//! // Convert byte data to GF32 field elements. +//! let fe_iter = data.iter().copied().bytes_to_fes(); +//! +//! // Convert field elements back to bytes. +//! let byte_iter = fe_iter.fes_to_bytes(); +//! +//! # assert!(data.iter().copied().eq(byte_iter)); +//! ``` + +use crate::primitives::checksum::{self, Checksum, PackedFe32}; +use crate::primitives::encode::Encoder; +use crate::primitives::gf32::Fe32; +use crate::primitives::hrp::Hrp; + +/// Extension trait for byte iterators which provides an adaptor to GF32 elements. +pub trait ByteIterExt: Sized + Iterator { + /// Adapts the byte iterator to output GF32 field elements instead. + /// + /// If the total number of bits is not a multiple of 5 we pad with 0s + fn bytes_to_fes(mut self) -> BytesToFes { + BytesToFes { last_byte: self.next(), bit_offset: 0, iter: self } + } + + /// Adapts the byte iterator to encode the field elements into a bech32 address (after first + /// converting it to a field element iterator) with checksum `Ck`. + fn with_checksum(self, hrp: &Hrp) -> Encoder, Ck> + where + I: Iterator, + Ck: Checksum, + { + self.bytes_to_fes().with_checksum(hrp) + } +} + +impl ByteIterExt for I where I: Iterator {} + +/// Extension trait for field element iterators. +pub trait Fe32IterExt: Sized + Iterator { + /// Adapts the `Fe32` iterator to output bytes instead. + /// + /// If the total number of bits is not a multiple of 8, any trailing bits + /// are simply dropped. + fn fes_to_bytes(mut self) -> FesToBytes { + FesToBytes { last_fe: self.next(), bit_offset: 0, iter: self } + } + + /// Adapts the Fe32 iterator to encode the field elements into a bech32 address. + fn with_checksum(self, hrp: &Hrp) -> Encoder { Encoder::new(self, hrp) } +} + +impl Fe32IterExt for I where I: Iterator {} + +/// Iterator adaptor that converts bytes to GF32 elements. +/// +/// If the total number of bits is not a multiple of 5, it right-pads with 0 bits. +#[derive(Clone, PartialEq, Eq)] +pub struct BytesToFes> { + last_byte: Option, + bit_offset: usize, + iter: I, +} + +impl Iterator for BytesToFes +where + I: Iterator, +{ + type Item = Fe32; + + fn next(&mut self) -> Option { + use core::cmp::Ordering::*; + + let bit_offset = { + let ret = self.bit_offset; + self.bit_offset = (self.bit_offset + 5) % 8; + ret + }; + + if let Some(last) = self.last_byte { + match bit_offset.cmp(&3) { + Less => Some(Fe32((last >> (3 - bit_offset)) & 0x1f)), + Equal => { + self.last_byte = self.iter.next(); + Some(Fe32(last & 0x1f)) + } + Greater => { + self.last_byte = self.iter.next(); + let next = self.last_byte.unwrap_or(0); + Some(Fe32(((last << (bit_offset - 3)) | (next >> (11 - bit_offset))) & 0x1f)) + } + } + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let (min, max) = self.iter.size_hint(); + let (min, max) = match self.last_byte { + // +1 because we set last_byte with call to `next`. + Some(_) => (min + 1, max.map(|max| max + 1)), + None => (min, max), + }; + + let min = bytes_len_to_fes_len(min); + let max = max.map(bytes_len_to_fes_len); + + (min, max) + } +} + +/// The number of fes encoded by n bytes, rounded up because we pad the fes. +fn bytes_len_to_fes_len(bytes: usize) -> usize { + let bits = bytes * 8; + (bits + 4) / 5 +} + +impl ExactSizeIterator for BytesToFes +where + I: Iterator + ExactSizeIterator, +{ + fn len(&self) -> usize { + let len = self.iter.len(); + bytes_len_to_fes_len(len) + } +} + +/// Iterator adaptor that converts GF32 elements to bytes. +/// +/// If the total number of bits is not a multiple of 8, any trailing bits are dropped. +/// +/// Note that if there are 5 or more trailing bits, the result will be that an entire field element +/// is dropped. If this occurs, the input was an invalid length for a bech32 string, but this +/// iterator does not do any checks for this. +#[derive(Clone, PartialEq, Eq)] +pub struct FesToBytes> { + last_fe: Option, + bit_offset: usize, + iter: I, +} + +impl Iterator for FesToBytes +where + I: Iterator, +{ + type Item = u8; + + fn next(&mut self) -> Option { + let bit_offset = { + let ret = self.bit_offset; + self.bit_offset = (self.bit_offset + 8) % 5; + ret + }; + + if let Some(last) = self.last_fe { + let mut ret = last.0 << (3 + bit_offset); + + self.last_fe = self.iter.next(); + let next1 = self.last_fe?; + if bit_offset > 2 { + self.last_fe = self.iter.next(); + let next2 = self.last_fe?; + ret |= next1.0 << (bit_offset - 2); + ret |= next2.0 >> (7 - bit_offset); + } else { + ret |= next1.0 >> (2 - bit_offset); + if self.bit_offset == 0 { + self.last_fe = self.iter.next(); + } + } + + Some(ret) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + // If the total number of bits is not a multiple of 8, any trailing bits are dropped. + let fes_len_to_bytes_len = |n| n * 5 / 8; + + let (fes_min, fes_max) = self.iter.size_hint(); + // +1 because we set last_fe with call to `next`. + let min = fes_len_to_bytes_len(fes_min) + 1; + let max = fes_max.map(|max| fes_len_to_bytes_len(max) + 1); + (min, max) + } +} + +// If the total number of bits is not a multiple of 8, any trailing bits are dropped. +fn fes_len_to_bytes_len(n: usize) -> usize { n * 5 / 8 } + +impl ExactSizeIterator for FesToBytes +where + I: Iterator + ExactSizeIterator, +{ + fn len(&self) -> usize { + let len = self.iter.len(); + fes_len_to_bytes_len(len) + } +} + +/// Iterator adaptor for field-element-yielding iterator, which tacks a checksum onto the end of the +/// yielded data. +#[derive(Clone, PartialEq, Eq)] +pub struct Checksummed +where + I: Iterator, + Ck: Checksum, +{ + iter: I, + checksum_remaining: usize, + checksum_engine: checksum::Engine, +} + +impl Checksummed +where + I: Iterator, + Ck: Checksum, +{ + /// Creates a new checksummed iterator which adapts a data iterator of field elements by + /// appending a checksum. + pub fn new(data: I) -> Checksummed { + Checksummed { + iter: data, + checksum_remaining: Ck::CHECKSUM_LENGTH, + checksum_engine: checksum::Engine::new(), + } + } + + /// Creates a new checksummed iterator which adapts a data iterator of field elements by + /// first inputting the [`Hrp`] and then appending a checksum. + pub fn new_hrp(hrp: &Hrp, data: I) -> Checksummed { + let mut ret = Self::new(data); + ret.checksum_engine.input_hrp(hrp); + ret + } +} + +impl Iterator for Checksummed +where + I: Iterator, + Ck: Checksum, +{ + type Item = Fe32; + + fn next(&mut self) -> Option { + match self.iter.next() { + Some(fe) => { + self.checksum_engine.input_fe(fe); + Some(fe) + } + None => + if self.checksum_remaining == 0 { + None + } else { + if self.checksum_remaining == Ck::CHECKSUM_LENGTH { + self.checksum_engine.input_target_residue(); + } + self.checksum_remaining -= 1; + Some(Fe32(self.checksum_engine.residue().unpack(self.checksum_remaining))) + }, + } + } + + fn size_hint(&self) -> (usize, Option) { + let add = self.checksum_remaining; + let (min, max) = self.iter.size_hint(); + + (min + add, max.map(|max| max + add)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Tests below using this data, are based on the test vector (from BIP-173): + // BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4: 0014751e76e8199196d454941c45d1b3a323f1433bd6 + #[rustfmt::skip] + const DATA: [u8; 20] = [ + 0x75, 0x1e, 0x76, 0xe8, 0x19, 0x91, 0x96, 0xd4, + 0x54, 0x94, 0x1c, 0x45, 0xd1, 0xb3, 0xa3, 0x23, + 0xf1, 0x43, 0x3b, 0xd6, + ]; + + #[test] + fn byte_iter_ext() { + assert!(DATA + .iter() + .copied() + .bytes_to_fes() + .map(Fe32::to_char) + .eq("w508d6qejxtdg4y5r3zarvary0c5xw7k".chars())); + } + + #[test] + fn bytes_to_fes_size_hint() { + let char_len = "w508d6qejxtdg4y5r3zarvary0c5xw7k".len(); + assert_eq!(DATA.iter().copied().bytes_to_fes().size_hint(), (char_len, Some(char_len))); + } + + #[test] + fn fe32_iter_ext() { + let fe_iter = "w508d6qejxtdg4y5r3zarvary0c5xw7k" + .bytes() + .map(|b| Fe32::from_char(char::from(b)).unwrap()); + + assert!(fe_iter.clone().fes_to_bytes().eq(DATA.iter().copied())); + } + + #[test] + fn fes_to_bytes_size_hint() { + let fe_iter = "w508d6qejxtdg4y5r3zarvary0c5xw7k" + .bytes() + .map(|b| Fe32::from_char(char::from(b)).unwrap()); + + let got_hint = fe_iter.clone().fes_to_bytes().size_hint(); + let want_hint = DATA.iter().size_hint(); + + assert_eq!(got_hint, want_hint) + } + + #[test] + fn padding_bytes_trailing_0_bits_roundtrips() { + // 5 * 8 % 5 = 0 + const BYTES: [u8; 5] = [0x75, 0x1e, 0x76, 0xe8, 0x19]; + assert!(BYTES.iter().copied().bytes_to_fes().fes_to_bytes().eq(BYTES.iter().copied())) + } + + #[test] + fn padding_bytes_trailing_1_bit_roundtrips() { + // 2 * 8 % 5 = 1 + const BYTES: [u8; 2] = [0x75, 0x1e]; + assert!(BYTES.iter().copied().bytes_to_fes().fes_to_bytes().eq(BYTES.iter().copied())) + } + + #[test] + fn padding_bytes_trailing_2_bits_roundtrips() { + // 4 * 8 % 5 = 2 + const BYTES: [u8; 4] = [0x75, 0x1e, 0x76, 0xe8]; + assert!(BYTES.iter().copied().bytes_to_fes().fes_to_bytes().eq(BYTES.iter().copied())) + } + + #[test] + fn padding_bytes_trailing_3_bits_roundtrips() { + // 6 * 8 % 5 = 3 + const BYTES: [u8; 6] = [0x75, 0x1e, 0x76, 0xe8, 0x19, 0xab]; + assert!(BYTES.iter().copied().bytes_to_fes().fes_to_bytes().eq(BYTES.iter().copied())) + } + + #[test] + fn padding_bytes_trailing_4_bits_roundtrips() { + // 3 * 8 % 5 = 4 + const BYTES: [u8; 3] = [0x75, 0x1e, 0x76]; + assert!(BYTES.iter().copied().bytes_to_fes().fes_to_bytes().eq(BYTES.iter().copied())) + } + + #[test] + fn padding_fes_trailing_0_bits_roundtrips() { + // 8 * 5 % 8 = 0 + const FES: [Fe32; 8] = + [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Y, Fe32::X, Fe32::G, Fe32::F]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + fn padding_fes_trailing_1_bit_zero_roundtrips() { + // 5 * 5 % 8 = 1 + const FES: [Fe32; 5] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_1_bit_non_zero_does_not_roundtrip() { + // 5 * 5 % 8 = 1 + const FES: [Fe32; 5] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::L]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + fn padding_fes_trailing_2_bits_zeros_roundtrips() { + // 2 * 5 % 8 = 2 + const FES: [Fe32; 2] = [Fe32::P, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_2_bits_non_zero_does_not_roundtrip() { + // 2 * 5 % 8 = 2 + const FES: [Fe32; 2] = [Fe32::Q, Fe32::P]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + fn padding_fes_trailing_3_bits_zeros_roundtrips() { + // 7 * 5 % 8 = 3 + const FES: [Fe32; 7] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Y, Fe32::X, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_3_bits_non_zero_does_not_roundtrip() { + // 7 * 5 % 8 = 3 + const FES: [Fe32; 7] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Y, Fe32::X, Fe32::P]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + fn padding_fes_trailing_4_bits_zeros_roundtrips() { + // 4 * 5 % 8 = 4 + const FES: [Fe32; 4] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_4_bits_non_zero_does_not_roundtrip() { + // 4 * 5 % 8 = 4 + const FES: [Fe32; 4] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::P]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + // Padding is never more than 4 bits so any additional bits will always fail to roundtrip. + + #[test] + #[should_panic] + fn padding_fes_trailing_5_bits_zeros_does_not_roundtrip() { + // 1 * 5 % 8 = 5 + const FES: [Fe32; 1] = [Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_5_bits_non_zero_does_not_roundtrip() { + // 1 * 5 % 8 = 5 + const FES: [Fe32; 1] = [Fe32::P]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_6_bits_zeros_does_not_roundtrip() { + // 6 * 5 % 8 = 6 + const FES: [Fe32; 6] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Q, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_6_bits_non_zero_does_not_roundtrip() { + // 6 * 5 % 8 = 6 + const FES: [Fe32; 6] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Y, Fe32::X]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_7_bits_zeros_does_not_roundtrip() { + // 3 * 5 % 8 = 7 + const FES: [Fe32; 3] = [Fe32::P, Fe32::Q, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_7_bits_non_zero_does_not_roundtrip() { + // 3 * 5 % 8 = 7 + const FES: [Fe32; 3] = [Fe32::Q, Fe32::P, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } +} diff --git a/src/primitives/mod.rs b/src/primitives/mod.rs index bd08c3736..478775a1a 100644 --- a/src/primitives/mod.rs +++ b/src/primitives/mod.rs @@ -3,8 +3,11 @@ //! Provides the internal nuts and bolts that enable bech32 encoding/decoding. pub mod checksum; +pub mod decode; +pub mod encode; pub mod gf32; pub mod hrp; +pub mod iter; use checksum::{Checksum, PackedNull}; diff --git a/tests/bip_173_test_vectors.rs b/tests/bip_173_test_vectors.rs new file mode 100644 index 000000000..94f343547 --- /dev/null +++ b/tests/bip_173_test_vectors.rs @@ -0,0 +1,107 @@ +// BIP-173 test vectors. + +#![cfg(feature = "alloc")] + +use bech32::{Bech32, Bech32m, ByteIterExt, Fe32IterExt}; + +// This is a separate test because we correctly identify this string as invalid but not for the +// reason given in the bip. +#[test] +fn bip_173_checksum_calculated_with_uppercase_form() { + use bech32::primitives::decode::Hrpstring; + // BIP-173 states reason for error should be: "checksum calculated with uppercase form of HRP". + let s = "A1G7SGD8"; + assert!(Hrpstring::new::(s).is_err()); +} + +macro_rules! check_valid_bech32 { + ($($test_name:ident, $s:literal);* $(;)?) => { + $( + #[test] + fn $test_name() { + use bech32::primitives::decode::Hrpstring; + + let valid_bech32 = $s; + let hrps = Hrpstring::new_unvalidated(valid_bech32).unwrap(); + let _ = hrps.validate_checksum::().expect("valid bech32"); + + // Valid bech32 strings are by definition invalid bech32m. + let hrps = Hrpstring::new_unvalidated(valid_bech32).unwrap(); + assert!(hrps.validate_checksum::().is_err()); + } + )* + } +} +check_valid_bech32! { + valid_bech32_hrp_string_0, "A12UEL5L"; + valid_bech32_hrp_string_a, "a12uel5l"; + valid_bech32_hrp_string_1, "an83characterlonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1tt5tgs"; + valid_bech32_hrp_string_2, "abcdef1qpzry9x8gf2tvdw0s3jn54khce6mua7lmqqqxw"; + valid_bech32_hrp_string_3, "11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j"; + valid_bech32_hrp_string_4, "split1checkupstagehandshakeupstreamerranterredcaperred2y9e3w"; + valid_bech32_hrp_string_b, "?1ezyfcl"; +} + +macro_rules! check_valid_address_roundtrip { + ($($test_name:ident, $addr:literal);* $(;)?) => { + $( + #[test] + fn $test_name() { + use bech32::primitives::decode::segwit::Hrpstring; + + let hrpstring = Hrpstring::new_bech32($addr).expect("valid address"); + let hrp = hrpstring.hrp(); + let witness_version = hrpstring.witness_version(); + + let encoded = hrpstring.byte_iter().bytes_to_fes().with_checksum::(&hrp).with_witness_version(witness_version).chars().collect::(); + if encoded != $addr { + let got = encoded.to_uppercase(); + assert_eq!(got, $addr) + } + } + )* + } +} +// Note these test vectors include various witness versions. +check_valid_address_roundtrip! { + bip_173_valid_address_roundtrip_0, "BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4"; + bip_173_valid_address_roundtrip_1, "tb1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3q0sl5k7"; + bip_173_valid_address_roundtrip_2, "bc1pw508d6qejxtdg4y5r3zarvary0c5xw7kw508d6qejxtdg4y5r3zarvary0c5xw7k7grplx"; + bip_173_valid_address_roundtrip_3, "BC1SW50QA3JX3S"; + bip_173_valid_address_roundtrip_4, "bc1zw508d6qejxtdg4y5r3zarvaryvg6kdaj"; + bip_173_valid_address_roundtrip_5, "tb1qqqqqp399et2xygdj5xreqhjjvcmzhxw4aywxecjdzew6hylgvsesrxh6hy"; +} + +macro_rules! check_invalid_address { + ($($test_name:ident, $addr:literal);* $(;)?) => { + $( + #[test] + #[cfg(feature = "alloc")] + fn $test_name() { + use bech32::primitives::decode::segwit::Hrpstring; + assert!(Hrpstring::new($addr).is_err()); + } + )* + } +} +check_invalid_address! { + // Invalid human-readable part + bip_173_invalid_address_0, "tc1qw508d6qejxtdg4y5r3zarvary0c5xw7kg3g4ty"; + // Invalid checksum + bip_173_invalid_address_1, "bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t5"; + // Invalid witness version + bip_173_invalid_address_2, "BC13W508D6QEJXTDG4Y5R3ZARVARY0C5XW7KN40WF2"; + // Invalid program length + bip_173_invalid_address_3, "bc1rw5uspcuh"; + // Invalid program length + bip_173_invalid_address_4, "bc10w508d6qejxtdg4y5r3zarvary0c5xw7kw508d6qejxtdg4y5r3zarvary0c5xw7kw5rljs90"; + // Invalid program length for witness version 0 (per BIP-141) + bip_173_invalid_address_5, "BC1QR508D6QEJXTDG4Y5R3ZARVARYV98GJ9P"; + // Mixed case + bip_173_invalid_address_6, "tb1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3q0sL5k7"; + // zero padding of more than 4 bits + bip_173_invalid_address_7, "bc1zw508d6qejxtdg4y5r3zarvaryvqyzf3du"; + // Non-zero padding in 8-to-5 conversion + // TODO: We just drop the padding, we need to check that its non-zero. + // bip_173_invalid_address_8, "tb1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3pjxtptv"; +} diff --git a/tests/bip_350_test_vectors.rs b/tests/bip_350_test_vectors.rs new file mode 100644 index 000000000..a5f7b1f87 --- /dev/null +++ b/tests/bip_350_test_vectors.rs @@ -0,0 +1,122 @@ +// BIP-350 test vectors. + +#![cfg(feature = "alloc")] + +use bech32::{Bech32, Bech32m, ByteIterExt, Fe32, Fe32IterExt}; + +// This is a separate test because we correctly identify this string as invalid but not for the +// reason given in the bip. +#[test] +fn bip_350_checksum_calculated_with_uppercase_form() { + use bech32::primitives::decode::Hrpstring; + // BIP-350 states reason for error should be: "checksum calculated with uppercase form of HRP". + let s = "M1VUXWEZ"; + assert!(Hrpstring::new::(s).is_err()); +} + +macro_rules! check_valid_bech32m { + ($($test_name:ident, $s:literal);* $(;)?) => { + $( + #[test] + fn $test_name() { + use bech32::primitives::decode::Hrpstring; + + let valid_bech32m = $s; + let hrps = Hrpstring::new_unvalidated(valid_bech32m).unwrap(); + let _ = hrps.validate_checksum::().expect("valid bech32m"); + + // Valid bech32 strings are by definition invalid bech32m. + let hrps = Hrpstring::new_unvalidated(valid_bech32m).unwrap(); + assert!(hrps.validate_checksum::().is_err()); + } + )* + } +} +check_valid_bech32m! { + valid_bech32m_hrp_string_0, "A1LQFN3A"; + valid_bech32m_hrp_string_1, "a1lqfn3a"; + valid_bech32m_hrp_string_2, "an83characterlonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11sg7hg6"; + valid_bech32m_hrp_string_3, "abcdef1l7aum6echk45nj3s0wdvt2fg8x9yrzpqzd3ryx"; + valid_bech32m_hrp_string_4, "11llllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllludsr8"; + valid_bech32m_hrp_string_5, "split1checkupstagehandshakeupstreamerranterredcaperredlc445v"; + valid_bech32m_hrp_string_6, "?1v759aa"; +} + +macro_rules! check_valid_address_roundtrip { + ($($test_name:ident, $addr:literal);* $(;)?) => { + $( + #[test] + #[cfg(feature = "alloc")] + fn $test_name() { + use bech32::primitives::decode::segwit::Hrpstring; + + let hrpstring = Hrpstring::new($addr).expect("valid address"); + let hrp = hrpstring.hrp(); + let witness_version = hrpstring.witness_version(); + + let encoded = match witness_version { + Fe32::Q => hrpstring.byte_iter().bytes_to_fes().with_checksum::(&hrp).with_witness_version(witness_version).chars().collect::(), + _ => hrpstring.byte_iter().bytes_to_fes().with_checksum::(&hrp).with_witness_version(witness_version).chars().collect::(), + }; + + if encoded != $addr { + let got = encoded.to_uppercase(); + assert_eq!(got, $addr) + } + } + )* + } +} +// Note these test vectors include various witness versions. +check_valid_address_roundtrip! { + bip_350_valid_address_roundtrip_0, "BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4"; + bip_350_valid_address_roundtrip_1, "tb1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3q0sl5k7"; + bip_350_valid_address_roundtrip_2, "bc1pw508d6qejxtdg4y5r3zarvary0c5xw7kw508d6qejxtdg4y5r3zarvary0c5xw7kt5nd6y"; + bip_350_valid_address_roundtrip_3, "BC1SW50QGDZ25J"; + bip_350_valid_address_roundtrip_4, "bc1zw508d6qejxtdg4y5r3zarvaryvaxxpcs"; + bip_350_valid_address_roundtrip_5, "tb1qqqqqp399et2xygdj5xreqhjjvcmzhxw4aywxecjdzew6hylgvsesrxh6hy"; + bip_350_valid_address_roundtrip_6, "tb1pqqqqp399et2xygdj5xreqhjjvcmzhxw4aywxecjdzew6hylgvsesf3hn0c"; + bip_350_valid_address_roundtrip_7, "bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqzk5jj0"; +} + +macro_rules! check_invalid_address { + ($($test_name:ident, $addr:literal);* $(;)?) => { + $( + #[test] + #[cfg(feature = "alloc")] + fn $test_name() { + use bech32::primitives::decode::segwit::Hrpstring; + assert!(Hrpstring::new($addr).is_err()); + } + )* + } +} +check_invalid_address! { + // Invalid human-readable part + bip_350_invalid_address_0, "tc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vq5zuyut"; + // Invalid checksums (Bech32 instead of Bech32m): + bip_350_invalid_address_1, "bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqh2y7hd"; + bip_350_invalid_address_2, "tb1z0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqglt7rf"; + bip_350_invalid_address_3, "BC1S0XLXVLHEMJA6C4DQV22UAPCTQUPFHLXM9H8Z3K2E72Q4K9HCZ7VQ54WELL"; + bip_350_invalid_address_4, "bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kemeawh"; + bip_350_invalid_address_5, "tb1q0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vq24jc47"; + // Invalid character in checksum + bip_350_invalid_address_6, "bc1p38j9r5y49hruaue7wxjce0updqjuyyx0kh56v8s25huc6995vvpql3jow4"; + // Invalid witness version + bip_350_invalid_address_7, "BC130XLXVLHEMJA6C4DQV22UAPCTQUPFHLXM9H8Z3K2E72Q4K9HCZ7VQ7ZWS8R"; + // Invalid program length (1 byte) + bip_350_invalid_address_8, "bc1pw5dgrnzv"; + // Invalid program length (41 bytes) + bip_350_invalid_address_9, "bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7v8n0nx0muaewav253zgeav"; + // Invalid program length for witness version 0 (per BIP-141) + bip_350_invalid_address_10, "BC1QR508D6QEJXTDG4Y5R3ZARVARYV98GJ9P"; + // Mixed case + bip_350_invalid_address_11, "tb1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vq47Zagq"; + // zero padding of more than 4 bits + bip_350_invalid_address_12, "bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7v07qwwzcrf"; + // Non-zero padding in 8-to-5 conversion + // TODO: We just drop the padding, we need to check that its non-zero. + // bip_350_invalid_address_13, "tb1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vpggkg4j"; + // Empty data section + bip_350_invalid_address_14, "bc1gmk9yu"; +}