From 5478e2bd307991557e006e294f528c94b52fd6ff Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 12 Jul 2023 12:23:16 +1000 Subject: [PATCH] Add support for encoding/decoding bech32 addresses Add support for: - Converting bytes to field elements using two extension traits (and iterator apaptors). - Checksumming an stream of field elements. - Decoding bech32 hrpstrings (as well as segwit addresses). - Encoding hrpstrings by way of an `Encoder` and a bunch of iterator adaptors. --- src/lib.rs | 2 + src/primitives/checksum.rs | 68 ++- src/primitives/decode.rs | 965 ++++++++++++++++++++++++++++++++++ src/primitives/encode.rs | 332 ++++++++++++ src/primitives/gf32.rs | 2 + src/primitives/hrp.rs | 51 ++ src/primitives/iter.rs | 492 +++++++++++++++++ src/primitives/mod.rs | 3 + tests/bip_173_test_vectors.rs | 120 +++++ tests/bip_350_test_vectors.rs | 131 +++++ 10 files changed, 2160 insertions(+), 6 deletions(-) create mode 100644 src/primitives/decode.rs create mode 100644 src/primitives/encode.rs create mode 100644 src/primitives/iter.rs create mode 100644 tests/bip_173_test_vectors.rs create mode 100644 tests/bip_350_test_vectors.rs diff --git a/src/lib.rs b/src/lib.rs index a201b772b..4d775015f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,8 +35,10 @@ use core::{fmt, mem}; pub use crate::primitives::checksum::Checksum; use crate::primitives::checksum::{self, PackedFe32}; +pub use crate::primitives::gf32::Fe32; use crate::primitives::hrp; pub use crate::primitives::hrp::Hrp; +pub use crate::primitives::iter::{ByteIterExt, Fe32IterExt}; pub use crate::primitives::{Bech32, Bech32m}; mod error; diff --git a/src/primitives/checksum.rs b/src/primitives/checksum.rs index da6f57c44..4598f41fb 100644 --- a/src/primitives/checksum.rs +++ b/src/primitives/checksum.rs @@ -97,12 +97,8 @@ impl Engine { /// Feeds `hrp` into the checksum engine. pub fn input_hrp(&mut self, hrp: &Hrp) { - for b in hrp.lowercase_byte_iter() { - self.input_fe(Fe32(b >> 5)); - } - self.input_fe(Fe32::Q); - for b in hrp.lowercase_byte_iter() { - self.input_fe(Fe32(b & 0x1f)); + for fe in HrpFe32Iter::new(hrp) { + self.input_fe(fe) } } @@ -200,3 +196,63 @@ macro_rules! impl_packed_fe32 { impl_packed_fe32!(u32); impl_packed_fe32!(u64); impl_packed_fe32!(u128); + +/// Iterator that yields the field elements that are input into a checksum algorithm for an [`Hrp`]. +pub struct HrpFe32Iter<'hrp> { + /// `None` once the hrp high fes have been yielded. + high_iter: Option>, + /// `None` once the hrp low fes have been yielded. + low_iter: Option>, +} + +impl<'hrp> HrpFe32Iter<'hrp> { + /// Creates an iterator that yields the field elements of `hrp` as they are input into the + /// checksum algorithm. + pub fn new(hrp: &'hrp Hrp) -> Self { + let high_iter = hrp.lowercase_byte_iter(); + let low_iter = hrp.lowercase_byte_iter(); + + Self { high_iter: Some(high_iter), low_iter: Some(low_iter) } + } +} + +impl<'hrp> Iterator for HrpFe32Iter<'hrp> { + type Item = Fe32; + fn next(&mut self) -> Option { + if let Some(ref mut high_iter) = &mut self.high_iter { + match high_iter.next() { + Some(high) => return Some(Fe32(high >> 5)), + None => { + self.high_iter = None; + return Some(Fe32::Q); + } + } + } + if let Some(ref mut low_iter) = &mut self.low_iter { + match low_iter.next() { + Some(low) => return Some(Fe32(low & 0x1f)), + None => self.low_iter = None, + } + } + None + } + + fn size_hint(&self) -> (usize, Option) { + let high = match &self.high_iter { + Some(high_iter) => { + let (min, max) = high_iter.size_hint(); + (min + 1, max.map(|max| max + 1)) // +1 for the extra Q + } + None => (0, Some(0)), + }; + let low = match &self.low_iter { + Some(low_iter) => low_iter.size_hint(), + None => (0, Some(0)), + }; + + let min = high.0 + 1 + low.0; + let max = high.1.zip(low.1).map(|(high, low)| high + 1 + low); + + (min, max) + } +} diff --git a/src/primitives/decode.rs b/src/primitives/decode.rs new file mode 100644 index 000000000..e4dfa85c1 --- /dev/null +++ b/src/primitives/decode.rs @@ -0,0 +1,965 @@ +// SPDX-License-Identifier: MIT + +//! Decoding of bech32 encoded strings as specified by [BIP-173] and [BIP-350]. +//! +//! You should only need to use this module directly if you want control over exactly what is +//! checked and when it is checked (correct bech32 characters, valid checksum, valid checksum for +//! specific checksum algorithm, etc). If you are parsing/validating modern (post BIP-350) bitcoin +//! segwit addresses consider using the higher crate level API. +//! +//! If you do find yourself using this module directly then consider using the most general type +//! that serves your purposes, each type can be created by parsing an address string to `new`. You +//! likely do not want to arbitrarily transition from one type to the next even though possible. And +//! be prepared to spend some time with the bips - you have been warned :) +//! +//! # Details +//! +//! A Bech32 string is at most 90 characters long and consists of: +//! +//! - The human-readable part, which is intended to convey the type of data, or anything else that +//! is relevant to the reader. This part MUST contain 1 to 83 US-ASCII characters. +//! - The separator, which is always "1". +//! - The data part, which is at least 6 characters long and only consists of alphanumeric +//! characters excluding "1", "b", "i", and "o". +//! +//! The types in this module heavily lean on the wording in BIP-173: *We first +//! describe the general checksummed base32 format called Bech32 and then define Segregated Witness +//! addresses using it.* +//! +//! - `UncheckedHrpstring`: Parses the general checksummed base32 format and provides checksum validation. +//! - `CheckedHrpstring`: Provides access to the data encoded by a general checksummed base32 string and segwit checks. +//! - `SegwitHrpstring`: Provides access to the data encoded by a segwit address. +//! +//! # Examples +//! +//! ``` +//! use bech32::{Bech32, Bech32m, Fe32, Hrp}; +//! use bech32::primitives::decode::{CheckedHrpstring, SegwitHrpstring, UncheckedHrpstring}; +//! +//! // An arbitrary HRP and a string of valid bech32 characters. +//! let s = "abcd143hj65vxw49rts6kcw35u6r6tgzguyr03vvveeewjqpn05efzq444444"; +//! assert!(UncheckedHrpstring::new(s).is_ok()); +//! // But it has an invalid checksum. +//! assert!(CheckedHrpstring::new::(s).is_err()); +//! assert!(CheckedHrpstring::new::(s).is_err()); +//! assert!(SegwitHrpstring::new(s).is_err()); +//! +//! // An arbitrary HRP, a string of valid bech32 characters, and a valid bech32 checksum. +//! let s = "abcd14g08d6qejxtdg4y5r3zarvary0c5xw7kxugcx9"; +//! assert!(UncheckedHrpstring::new(s).is_ok()); +//! assert!(CheckedHrpstring::new::(s).is_ok()); +//! // But not a valid segwit address. +//! assert!(SegwitHrpstring::new(s).is_err()); +//! // And not a valid bech32m checksum. +//! assert!(CheckedHrpstring::new::(s).is_err()); +//! +//! // A valid Bitcoin taproot address. +//! let s = "bc1pdp43hj65vxw49rts6kcw35u6r6tgzguyr03vvveeewjqpn05efzq7un9w0"; +//! assert!(UncheckedHrpstring::new(s).is_ok()); +//! assert!(CheckedHrpstring::new::(s).is_ok()); +//! assert!(SegwitHrpstring::new(s).is_ok()); +//! // But not a valid segwit v0 checksum. +//! assert!(CheckedHrpstring::new::(s).is_err()); +//! +//! // Get the HRP, witness version, and encoded data. +//! let address = "bc1pdp43hj65vxw49rts6kcw35u6r6tgzguyr03vvveeewjqpn05efzq7un9w0"; +//! let segwit = SegwitHrpstring::new(address).expect("valid segwit address"); +//! let _encoded_data = segwit.byte_iter(); +//! assert_eq!(segwit.hrp(), Hrp::parse("bc").unwrap()); +//! assert_eq!(segwit.witness_version(), Fe32::P); +//! ``` +//! +//! [BIP-173]: +//! [BIP-350]: + +use core::{fmt, iter, slice, str}; + +use crate::primitives::checksum::{self, Checksum}; +use crate::primitives::gf32::Fe32; +use crate::primitives::hrp::{self, Hrp}; +use crate::primitives::iter::{Fe32IterExt, FesToBytes}; +use crate::{write_err, Bech32, Bech32m}; + +/// Separator between the hrp and payload (as defined by BIP-173). +const SEP: char = '1'; + +/// An HRP string that has been parsed but not yet had the checksum checked. +/// +/// Parsing an HRP string only checks validity of the characters, it does not validate the +/// checksum in any way. +/// +/// Unless you are attempting to validate a string with multiple checksums then you likely do not +/// want to use this type directly, instead use [`CheckedHrpstring::new(s)`]. +/// +/// # Examples +/// +/// ``` +/// use bech32::{Bech32, Bech32m, primitives::decode::UncheckedHrpstring}; +/// +/// let addr = "BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4"; +/// let unchecked = UncheckedHrpstring::new(addr).expect("valid bech32 character encoded string"); +/// if unchecked.has_valid_checksum::() { +/// // Remove the checksum and do something with the data. +/// let checked = unchecked.remove_checksum::(); +/// let _ = checked.byte_iter(); +/// } else if unchecked.has_valid_checksum::() { +/// // Remove the checksum and do something with the data as above. +/// } else { +/// // Checksum is not valid for either the bech32 or bech32 checksum algorithms. +/// } +/// ``` +#[derive(Debug)] +pub struct UncheckedHrpstring<'s> { + /// The human-readable part, guaranteed to be lowercase ASCII characters. + hrp: Hrp, + /// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters. + /// + /// Contains the checksum if one was present in the parsed string. + data: &'s [u8], +} + +impl<'s> UncheckedHrpstring<'s> { + /// Parses an bech32 encode string and constructs a [`UncheckedHrpstring`] object. + /// + /// Checks for valid ASCII values, does not validate the checksum. + pub fn new(s: &'s str) -> Result { + let sep_pos = check_characters(s)?; + let (hrp, data) = s.split_at(sep_pos); + + let ret = UncheckedHrpstring { + hrp: Hrp::parse(hrp)?, + data: data[1..].as_bytes(), // Skip the separator. + }; + + Ok(ret) + } + + /// Returns the human-readable part. + pub fn hrp(&self) -> Hrp { self.hrp } + + /// Validates that data has a valid checksum for the `Ck` algorithm and returns a [`CheckedHrpstring`]. + pub fn validate_and_remove_checksum( + self, + ) -> Result, ChecksumError> { + self.validate_checksum::()?; + Ok(self.remove_checksum::()) + } + + /// Validates that data has a valid checksum for the `Ck` algorithm (this may mean an empty + /// checksum if `NoChecksum` is used). + /// + /// This is useful if you do not know which checksum algorithm was used and wish to validate + /// against multiple algorithms consecutively. If this function returns `true` then call + /// `remove_checksum` to get a [`CheckedHrpstring`]. + pub fn has_valid_checksum(&self) -> bool { + self.validate_checksum::().is_ok() + } + + /// Validates that data has a valid checksum for the `Ck` algorithm (this may mean an empty + /// checksum if `NoChecksum` is used). + pub fn validate_checksum(&self) -> Result<(), ChecksumError> { + use ChecksumError::*; + + if Ck::CHECKSUM_LENGTH == 0 { + // Called with NoChecksum + return Ok(()); + } + + if self.data.len() < Ck::CHECKSUM_LENGTH { + return Err(InvalidChecksumLength); + } + + let mut checksum_eng = checksum::Engine::::new(); + checksum_eng.input_hrp(&self.hrp()); + + // Unwrap ok since we checked all characters in our constructor. + for fe in self.data.iter().map(|&b| Fe32::from_char_unchecked(b)) { + checksum_eng.input_fe(fe); + } + + if checksum_eng.residue() != &Ck::TARGET_RESIDUE { + return Err(InvalidChecksum); + } + + Ok(()) + } + + /// Removes the checksum for the `Ck` algorithm and returns an [`CheckedHrpstring`]. + /// + /// Data must be valid (ie, first call `has_valid_checksum` or `validate_checksum()`). This + /// function is typically paired with `has_valid_checksum` when validating against multiple + /// checksum algorithms consecutively. + /// + /// # Panics + /// + /// May panic if data is not valid. + pub fn remove_checksum(self) -> CheckedHrpstring<'s> { + let data_len = self.data.len() - Ck::CHECKSUM_LENGTH; + + CheckedHrpstring { hrp: self.hrp(), data: &self.data[..data_len] } + } +} + +/// An HRP string that has been parsed and had the checksum validated. +/// +/// This type does not treat the first byte of the data in any special way i.e., as the witness +/// version byte. If you are parsing Bitcoin segwit addresses you likely want to use [`SegwitHrpstring`]. +/// +/// > We first describe the general checksummed base32 format called Bech32 and then +/// > define Segregated Witness addresses using it. +/// +/// This type abstracts over the general checksummed base32 format called Bech32. +/// +/// # Examples +/// +/// ``` +/// use bech32::{Bech32, primitives::decode::CheckedHrpstring}; +/// +/// // Parse a general checksummed bech32 encoded string. +/// let s = "abcd14g08d6qejxtdg4y5r3zarvary0c5xw7kxugcx9"; +/// let checked = CheckedHrpstring::new::(s).expect("valid bech32 string with a valid checksum"); +/// +/// // Do something with the encoded data. +/// let _ = checked.byte_iter(); +/// ``` +#[derive(Debug)] +pub struct CheckedHrpstring<'s> { + /// The human-readable part, guaranteed to be lowercase ASCII characters. + hrp: Hrp, + /// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters, + /// with the checksum removed. + data: &'s [u8], +} + +impl<'s> CheckedHrpstring<'s> { + /// Parses and validates an HRP string, without treating the first data character specially. + /// + /// If you are validating the checksum multiple times consider using [`UncheckedHrpstring`]. + /// + /// This is equivalent to `UncheckedHrpstring::new().validate_and_remove_checksum::()`. + pub fn new(s: &'s str) -> Result { + let unchecked = UncheckedHrpstring::new(s)?; + let checked = unchecked.validate_and_remove_checksum::()?; + Ok(checked) + } + + /// Returns the human-readable part. + pub fn hrp(&self) -> Hrp { self.hrp } + + /// Returns an iterator that yields the data part of the parsed bech32 encoded string. + /// + /// Converts the ASCII bytes representing field elements to the respective field elements, then + /// converts the stream of field elements to a stream of bytes. + pub fn byte_iter(&self) -> ByteIter { + ByteIter { iter: AsciiToFe32Iter { iter: self.data.iter().copied() }.fes_to_bytes() } + } + + /// Converts this type to a [`SegwitHrpstring`] after validating the witness and HRP. + pub fn validate_segwit(mut self) -> Result, SegwitHrpstringError> { + if self.data.is_empty() { + return Err(SegwitHrpstringError::MissingWitnessVersion); + } + // Unwrap ok since check_characters checked the bech32-ness of this char. + let witness_version = Fe32::from_char(self.data[0].into()).unwrap(); + self.data = &self.data[1..]; // Remove the witness version byte from data. + + self.validate_padding()?; + self.validate_witness_length(witness_version)?; + + Ok(SegwitHrpstring { hrp: self.hrp(), witness_version, data: self.data }) + } + + /// Validates the segwit padding rules. + /// + /// Must be called after the witness version byte is removed from the data. + /// + /// From BIP-173: + /// > Re-arrange those bits into groups of 8 bits. Any incomplete group at the + /// > end MUST be 4 bits or less, MUST be all zeroes, and is discarded. + fn validate_padding(&self) -> Result<(), PaddingError> { + if self.data.is_empty() { + return Ok(()); // Empty data implies correct padding. + } + + let fe_iter = AsciiToFe32Iter { iter: self.data.iter().copied() }; + let padding_len = fe_iter.len() * 5 % 8; + + if padding_len > 4 { + return Err(PaddingError::TooMuch)?; + } + + let last_fe = fe_iter.last().expect("checked above"); + let last_byte = last_fe.0; + + let padding_contains_non_zero_bits = match padding_len { + 0 => false, + 1 => last_byte & 0b0001 > 0, + 2 => last_byte & 0b0011 > 0, + 3 => last_byte & 0b0111 > 0, + 4 => last_byte & 0b1111 > 0, + _ => unreachable!("checked above"), + }; + if padding_contains_non_zero_bits { + Err(PaddingError::NonZero) + } else { + Ok(()) + } + } + + /// Validates the segwit witness length rules. + /// + /// Must be called after the witness version byte is removed from the data. + #[allow(clippy::manual_range_contains)] // For witness length range check. + fn validate_witness_length(&self, witness_version: Fe32) -> Result<(), WitnessLengthError> { + use WitnessLengthError::*; + + let witness_len = self.byte_iter().len(); + if witness_len < 2 { + return Err(TooShort); + } + if witness_len > 40 { + return Err(TooLong); + } + if witness_version == Fe32::Q && witness_len != 20 && witness_len != 32 { + return Err(InvalidSegwitV0); + } + Ok(()) + } +} + +/// An HRP string that has been parsed, had the checksum validated, had the witness version +/// validated, had the witness data length checked, and the had witness version and checksum +/// removed. +/// +/// # Examples +/// +/// ``` +/// use bech32::primitives::decode::SegwitHrpstring; +/// +/// // Parse a segwit V0 address. +/// let address = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq"; +/// let segwit = SegwitHrpstring::new(address).expect("valid segwit address"); +/// +/// // Do something with the encoded data. +/// let _ = segwit.byte_iter(); +/// ``` +#[derive(Debug)] +pub struct SegwitHrpstring<'s> { + /// The human-readable part, valid for segwit addresses. + hrp: Hrp, + /// The first byte of the parsed data. + witness_version: Fe32, + /// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters, + /// with the witness version and checksum removed. + data: &'s [u8], +} + +impl<'s> SegwitHrpstring<'s> { + /// Parses an HRP string, treating the first data character as a witness version. + /// + /// The version byte does not appear in the extracted binary data, but is covered by the + /// checksum. It can be accessed with [`Self::witness_version`]. + /// + /// NOTE: We do not enforce any restrictions on the HRP, use [`SegwitHrpstring::has_valid_hrp`] + /// to get strict BIP conformance (also [`Hrp::is_valid_on_mainnet`] and friends). + pub fn new(s: &'s str) -> Result { + let unchecked = UncheckedHrpstring::new(s)?; + + // Unwrap ok since check_characters (in `Self::new`) checked the bech32-ness of this char. + let witness_version = Fe32::from_char(unchecked.data[0].into()).unwrap(); + if witness_version.to_u8() > 16 { + return Err(SegwitHrpstringError::InvalidWitnessVersion(witness_version)); + } + + let checked: CheckedHrpstring<'s> = match witness_version { + Fe32::Q => unchecked.validate_and_remove_checksum::()?, + _ => unchecked.validate_and_remove_checksum::()?, + }; + + checked.validate_segwit() + } + + /// Parses an HRP string, treating the first data character as a witness version. + /// + /// ## WARNING + /// + /// You almost certainly do not want to use this function. + /// + /// It is provided for backwards comparability to parse addresses that have an non-zero witness + /// version because [BIP-173] explicitly allows using the bech32 checksum with any witness + /// version however [BIP-350] specifies all witness version > 0 now MUST use bech32m. + /// + /// [BIP-173]: https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki + /// [BIP-350]: https://github.com/bitcoin/bips/blob/master/bip-0350.mediawiki + pub fn new_bech32(s: &'s str) -> Result { + let unchecked = UncheckedHrpstring::new(s)?; + + // Unwrap ok since check_characters (in `Self::new`) checked the bech32-ness of this char. + let witness_version = Fe32::from_char(unchecked.data[0].into()).unwrap(); + if witness_version.to_u8() > 16 { + return Err(SegwitHrpstringError::InvalidWitnessVersion(witness_version)); + } + + let checked = unchecked.validate_and_remove_checksum::()?; + checked.validate_segwit() + } + + /// Returns `true` if the HRP is "bc" or "tb". + /// + /// BIP-173 requires that the HRP is "bc" or "tb" but software in the Bitcoin ecosystem uses + /// other HRPs, specifically "bcrt" for regtest addresses. We provide this function in order to + /// be BIP-173 compliant but their are no restrictions on the HRP of [`SegwitHrpstring`]. + pub fn has_valid_hrp(&self) -> bool { self.hrp().is_valid_segwit() } + + /// Returns the human-readable part. + pub fn hrp(&self) -> Hrp { self.hrp } + + /// Returns the witness version. + pub fn witness_version(&self) -> Fe32 { self.witness_version } + + /// Returns an iterator that yields the data part, excluding the witness version, of the parsed + /// bech32 encoded string. + /// + /// Converts the ASCII bytes representing field elements to the respective field elements, then + /// converts the stream of field elements to a stream of bytes. + /// + /// Use `self.witness_version()` to get the witness version. + pub fn byte_iter(&self) -> ByteIter { + ByteIter { iter: AsciiToFe32Iter { iter: self.data.iter().copied() }.fes_to_bytes() } + } +} + +/// Checks whether a given HRP string has data characters in the bech32 alphabet (incl. checksum +/// characters), and that the whole string has consistent casing (hrp, data, and checksum). +/// +/// # Returns +/// +/// The byte-index into the string where the '1' separator occurs, or an error if it does not. +fn check_characters(s: &str) -> Result { + use CharError::*; + + let mut has_upper = false; + let mut has_lower = false; + let mut req_bech32 = true; + let mut sep_pos = None; + for (n, ch) in s.char_indices().rev() { + if ch == SEP && sep_pos.is_none() { + req_bech32 = false; + sep_pos = Some(n); + } + if req_bech32 { + Fe32::from_char(ch).map_err(|_| InvalidChar(ch))?; + } + if ch.is_ascii_uppercase() { + has_upper = true; + } else if ch.is_ascii_lowercase() { + has_lower = true; + } + } + if has_upper && has_lower { + Err(MixedCase) + } else if let Some(pos) = sep_pos { + Ok(pos) + } else { + Err(MissingSeparator) + } +} + +/// An iterator over a parsed HRP string data as bytes. +pub struct ByteIter<'s> { + iter: FesToBytes>>>, +} + +impl<'s> Iterator for ByteIter<'s> { + type Item = u8; + fn next(&mut self) -> Option { self.iter.next() } + fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } +} + +impl<'s> ExactSizeIterator for ByteIter<'s> { + fn len(&self) -> usize { self.iter.len() } +} + +/// An iterator over a parsed HRP string data as field elements. +pub struct Fe32Iter<'s> { + iter: AsciiToFe32Iter>>, +} + +impl<'s> Iterator for Fe32Iter<'s> { + type Item = Fe32; + fn next(&mut self) -> Option { self.iter.next() } + fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } +} + +/// Helper iterator adaptor that maps an iterator of valid bech32 character ASCII bytes to an +/// iterator of field elements. +/// +/// # Panics +/// +/// If any `u8` in the input iterator is out of range for an [`Fe32`]. Should only be used on data +/// that has already been checked for validity (eg, by using `check_characters`). +struct AsciiToFe32Iter> { + iter: I, +} + +impl Iterator for AsciiToFe32Iter +where + I: Iterator, +{ + type Item = Fe32; + fn next(&mut self) -> Option { self.iter.next().map(Fe32::from_char_unchecked) } + fn size_hint(&self) -> (usize, Option) { + // Each ASCII character is an fe32 so iterators are the same size. + self.iter.size_hint() + } +} + +impl ExactSizeIterator for AsciiToFe32Iter +where + I: Iterator + ExactSizeIterator, +{ + fn len(&self) -> usize { self.iter.len() } +} + +/// An error while constructing a [`SegwitHrpstring`] type. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SegwitHrpstringError { + /// Error while parsing the encoded address string. + Unchecked(UncheckedHrpstringError), + /// The witness version byte is missing. + MissingWitnessVersion, + /// Invalid witness version (must be 0-16 inclusive). + InvalidWitnessVersion(Fe32), + /// Invalid padding on the witness data. + Padding(PaddingError), + /// Invalid witness length. + WitnessLength(WitnessLengthError), + /// Invalid checksum. + Checksum(ChecksumError), +} + +impl fmt::Display for SegwitHrpstringError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use SegwitHrpstringError::*; + + match *self { + Unchecked(ref e) => write_err!(f, "parsing unchecked hrpstring failed"; e), + MissingWitnessVersion => write!(f, "the witness version byte is missing"), + InvalidWitnessVersion(fe) => write!(f, "invalid segwit witness version: {}", fe), + Padding(ref e) => write_err!(f, "invalid padding on the witness data"; e), + WitnessLength(ref e) => write_err!(f, "invalid witness length"; e), + Checksum(ref e) => write_err!(f, "invalid checksum"; e), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for SegwitHrpstringError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use SegwitHrpstringError::*; + + match *self { + Unchecked(ref e) => Some(e), + Padding(ref e) => Some(e), + WitnessLength(ref e) => Some(e), + Checksum(ref e) => Some(e), + MissingWitnessVersion | InvalidWitnessVersion(_) => None, + } + } +} + +impl From for SegwitHrpstringError { + fn from(e: UncheckedHrpstringError) -> Self { Self::Unchecked(e) } +} + +impl From for SegwitHrpstringError { + fn from(e: WitnessLengthError) -> Self { Self::WitnessLength(e) } +} + +impl From for SegwitHrpstringError { + fn from(e: PaddingError) -> Self { Self::Padding(e) } +} + +impl From for SegwitHrpstringError { + fn from(e: ChecksumError) -> Self { Self::Checksum(e) } +} + +/// An error while constructing a [`CheckedHrpstring`] type. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CheckedHrpstringError { + /// Error while parsing the encoded address string. + Parse(UncheckedHrpstringError), + /// Invalid checksum. + Checksum(ChecksumError), +} + +impl fmt::Display for CheckedHrpstringError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use CheckedHrpstringError::*; + + match *self { + Parse(ref e) => write_err!(f, "parse failed"; e), + Checksum(ref e) => write_err!(f, "invalid checksum"; e), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for CheckedHrpstringError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use CheckedHrpstringError::*; + + match *self { + Parse(ref e) => Some(e), + Checksum(ref e) => Some(e), + } + } +} + +impl From for CheckedHrpstringError { + fn from(e: UncheckedHrpstringError) -> Self { Self::Parse(e) } +} + +impl From for CheckedHrpstringError { + fn from(e: ChecksumError) -> Self { Self::Checksum(e) } +} + +/// Errors when parsing a bech32 encoded string. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum UncheckedHrpstringError { + /// An error with the characters of the input string. + Char(CharError), + /// The human-readable part is invalid. + Hrp(hrp::Error), +} + +impl fmt::Display for UncheckedHrpstringError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use UncheckedHrpstringError::*; + + match *self { + Char(ref e) => write_err!(f, "character error"; e), + Hrp(ref e) => write_err!(f, "invalid human-readable part"; e), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for UncheckedHrpstringError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use UncheckedHrpstringError::*; + + match *self { + Char(ref e) => Some(e), + Hrp(ref e) => Some(e), + } + } +} + +impl From for UncheckedHrpstringError { + fn from(e: CharError) -> Self { Self::Char(e) } +} + +impl From for UncheckedHrpstringError { + fn from(e: hrp::Error) -> Self { Self::Hrp(e) } +} + +/// Character errors in a bech32 encoded string. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum CharError { + /// String does not contain the separator character. + MissingSeparator, + /// No characters after the separator. + NothingAfterSeparator, + /// The checksum does not match the rest of the data. + InvalidChecksum, + /// The checksum is not a valid length. + InvalidChecksumLength, + /// Some part of the string contains an invalid character. + InvalidChar(char), + /// The whole string must be of one case. + MixedCase, +} + +impl fmt::Display for CharError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use CharError::*; + + match *self { + MissingSeparator => write!(f, "missing human-readable separator, \"{}\"", SEP), + NothingAfterSeparator => write!(f, "invalid data - no characters after the separator"), + InvalidChecksum => write!(f, "invalid checksum"), + InvalidChecksumLength => write!(f, "the checksum is not a valid length"), + InvalidChar(n) => write!(f, "invalid character (code={})", n), + MixedCase => write!(f, "mixed-case strings not allowed"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for CharError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use CharError::*; + + match *self { + MissingSeparator + | NothingAfterSeparator + | InvalidChecksum + | InvalidChecksumLength + | InvalidChar(_) + | MixedCase => None, + } + } +} + +/// Errors in the checksum of a bech32 encoded string. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum ChecksumError { + /// The checksum does not match the rest of the data. + InvalidChecksum, + /// The checksum is not a valid length. + InvalidChecksumLength, +} + +impl fmt::Display for ChecksumError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use ChecksumError::*; + + match *self { + InvalidChecksum => write!(f, "invalid checksum"), + InvalidChecksumLength => write!(f, "the checksum is not a valid length"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for ChecksumError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use ChecksumError::*; + + match *self { + InvalidChecksum | InvalidChecksumLength => None, + } + } +} + +/// Witness program invalid because of incorrect length. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum WitnessLengthError { + /// The witness data is too short. + TooShort, + /// The witness data is too long. + TooLong, + /// The segwit v0 witness is not 20 or 32 bytes long. + InvalidSegwitV0, +} + +impl fmt::Display for WitnessLengthError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use WitnessLengthError::*; + + match *self { + TooShort => write!(f, "witness program is less than 2 bytes long"), + TooLong => write!(f, "witness program is more than 40 bytes long"), + InvalidSegwitV0 => write!(f, "the segwit v0 witness is not 20 or 32 bytes long"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for WitnessLengthError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use WitnessLengthError::*; + + match *self { + TooShort | TooLong | InvalidSegwitV0 => None, + } + } +} + +/// Error validating the padding bits on the witness data. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PaddingError { + /// The data payload has too many bits of padding. + TooMuch, + /// The data payload is padded with non-zero bits. + NonZero, +} + +impl fmt::Display for PaddingError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use PaddingError::*; + + match *self { + TooMuch => write!(f, "the data payload has too many bits of padding"), + NonZero => write!(f, "the data payload is padded with non-zero bits"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for PaddingError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use PaddingError::*; + + match *self { + TooMuch | NonZero => None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[cfg(feature = "alloc")] + use crate::Variant; + + #[test] + fn bip_173_invalid_parsing_fails() { + use UncheckedHrpstringError::*; + + let invalid: Vec<(&str, UncheckedHrpstringError)> = vec!( + ("\u{20}1nwldj5", + // TODO: Rust >= 1.59.0 use Hrp(hrp::Error::InvalidAsciiByte('\u{20}'.try_into().unwrap()))), + Hrp(hrp::Error::InvalidAsciiByte(32))), + ("\u{7F}1axkwrx", + Hrp(hrp::Error::InvalidAsciiByte(127))), + ("\u{80}1eym55h", + Hrp(hrp::Error::NonAsciiChar('\u{80}'))), + ("an84characterslonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11d6pts4", + Hrp(hrp::Error::TooLong(84))), + ("pzry9x0s0muk", + Char(CharError::MissingSeparator)), + ("1pzry9x0s0muk", + Hrp(hrp::Error::Empty)), + ("x1b4n0q5v", + Char(CharError::InvalidChar('b'))), + // "li1dgmt3" in separate test because error is a checksum error. + ("de1lg7wt\u{ff}", + Char(CharError::InvalidChar('\u{ff}'))), + // "A1G7SGD8" in separate test because error is a checksum error. + ("10a06t8", + Hrp(hrp::Error::Empty)), + ("1qzzfhee", + Hrp(hrp::Error::Empty)), + ); + + for (s, want) in invalid { + let got = UncheckedHrpstring::new(s).unwrap_err(); + assert_eq!(got, want); + } + } + + #[test] + fn bip_173_invalid_parsing_fails_invalid_checksum() { + use ChecksumError::*; + + let err = UncheckedHrpstring::new("li1dgmt3") + .expect("string parses correctly") + .validate_checksum::() + .unwrap_err(); + assert_eq!(err, InvalidChecksumLength); + + let err = UncheckedHrpstring::new("A1G7SGD8") + .expect("string parses correctly") + .validate_checksum::() + .unwrap_err(); + assert_eq!(err, InvalidChecksum); + } + + #[test] + fn bip_350_invalid_parsing_fails() { + use UncheckedHrpstringError::*; + + let invalid: Vec<(&str, UncheckedHrpstringError)> = vec!( + ("\u{20}1xj0phk", + // TODO: Rust >= 1.59.0 use Hrp(hrp::Error::InvalidAsciiByte('\u{20}'.try_into().unwrap()))), + Hrp(hrp::Error::InvalidAsciiByte(32))), + ("\u{7F}1g6xzxy", + Hrp(hrp::Error::InvalidAsciiByte(127))), + ("\u{80}1g6xzxy", + Hrp(hrp::Error::NonAsciiChar('\u{80}'))), + ("an84characterslonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1569pvx", + Hrp(hrp::Error::TooLong(84))), + ("qyrz8wqd2c9m", + Char(CharError::MissingSeparator)), + ("1qyrz8wqd2c9m", + Hrp(hrp::Error::Empty)), + ("y1b0jsk6g", + Char(CharError::InvalidChar('b'))), + ("lt1igcx5c0", + Char(CharError::InvalidChar('i'))), + // "in1muywd" in separate test because error is a checksum error. + ("mm1crxm3i", + Char(CharError::InvalidChar('i'))), + ("au1s5cgom", + Char(CharError::InvalidChar('o'))), + // "M1VUXWEZ" in separate test because error is a checksum error. + ("16plkw9", + Hrp(hrp::Error::Empty)), + ("1p2gdwpf", + Hrp(hrp::Error::Empty)), + + ); + + for (s, want) in invalid { + let got = UncheckedHrpstring::new(s).unwrap_err(); + assert_eq!(got, want); + } + } + + #[test] + fn bip_350_invalid_because_of_invalid_checksum() { + use ChecksumError::*; + + // Note the "bc1p2" test case is not from the bip test vectors. + let invalid: Vec<&str> = vec!["in1muywd", "bc1p2"]; + + for s in invalid { + let err = + UncheckedHrpstring::new(s).unwrap().validate_checksum::().unwrap_err(); + assert_eq!(err, InvalidChecksumLength); + } + + let err = UncheckedHrpstring::new("M1VUXWEZ") + .unwrap() + .validate_checksum::() + .unwrap_err(); + assert_eq!(err, InvalidChecksum); + } + + #[test] + fn check_hrp_uppercase_returns_lower() { + let addr = "BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4"; + let unchecked = UncheckedHrpstring::new(addr).expect("failed to parse address"); + assert_eq!(unchecked.hrp(), Hrp::parse_unchecked("bc")); + } + + #[test] + #[cfg(feature = "alloc")] + fn check_hrp_max_length() { + let hrps = + "an83characterlonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio"; + + let hrp = Hrp::parse_unchecked(hrps); + let s = crate::encode(hrp, [], Variant::Bech32).expect("failed to encode empty buffer"); + + let unchecked = UncheckedHrpstring::new(&s).expect("failed to parse address"); + assert_eq!(unchecked.hrp(), hrp); + } + + #[test] + fn mainnet_valid_addresses() { + let addresses = vec![ + "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq", + "23451QAR0SRRR7XFKVY5L643LYDNW9RE59GTZZLKULZK", + ]; + for valid in addresses { + assert!(CheckedHrpstring::new::(valid).is_ok()) + } + } +} diff --git a/src/primitives/encode.rs b/src/primitives/encode.rs new file mode 100644 index 000000000..69394feb6 --- /dev/null +++ b/src/primitives/encode.rs @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: MIT + +//! Bech32 address encoding. +//! +//! This module provides types and iterators that can be used to encode data as a bech32 address in +//! a variety of ways without any allocations, generating, verifying, and appending checksums, +//! prepending HRP strings etc. +//! +//! In general, directly using these adaptors is not very ergonomic, and users are recommended to +//! instead use the higher-level functions at the root of this crate. +//! +//! # Examples +//! +//! ``` +//! use bech32::{Bech32, ByteIterExt, Fe32IterExt, Fe32, Hrp}; +//! +//! let witness_prog = [ +//! 0x75, 0x1e, 0x76, 0xe8, 0x19, 0x91, 0x96, 0xd4, +//! 0x54, 0x94, 0x1c, 0x45, 0xd1, 0xb3, 0xa3, 0x23, +//! 0xf1, 0x43, 0x3b, 0xd6, +//! ]; +//! +//! // Get a stream of characters representing the bech32 encoded +//! // address using "bc" for the human-readable part. +//! let hrp = Hrp::parse("bc").expect("bc is valid hrp string"); +//! let chars = witness_prog +//! .iter() +//! .copied() +//! .bytes_to_fes() +//! .with_checksum::(&hrp) +//! .with_witness_version(Fe32::Q) // Optionally add witness version. +//! .chars(); +//! +//! #[cfg(feature = "alloc")] +//! { +//! let addr = chars.collect::(); +//! assert_eq!(addr.to_uppercase(), "BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4"); +//! } +//! ``` + +use core::iter::Iterator; +use core::marker::PhantomData; + +use crate::primitives::checksum::HrpFe32Iter; +use crate::primitives::hrp::{self, Hrp}; +use crate::primitives::iter::Checksummed; +use crate::{Checksum, Fe32}; + +/// The `Encoder` builds iterators that can be used to encode field elements into a bech32 address. +/// +/// Construct the encoder by calling [`Fe32IterExt::with_checksum`] on an iterator of field +/// elements, optionally prefix the data with a witness version, and then get the encoding as either +/// a stream of characters ([`Encoder::chars`]) or a stream of field elements ([`Encoder::fes`]). +/// +/// # Examples +/// +/// ``` +/// use bech32::{Bech32, ByteIterExt, Fe32IterExt, Hrp}; +/// +/// let data = [0x75, 0x1e, 0x76, 0xe8, 0x19, 0x91, 0x96, 0xd4]; +/// +/// let hrp = Hrp::parse("abc").expect("bc is valid hrp string"); +/// let chars = data +/// .iter() +/// .copied() +/// .bytes_to_fes() +/// .with_checksum::(&hrp) +/// .chars(); +/// ``` +/// [`Fe32IterExt::with_checksum`]: crate::Fe32IterExt::with_checksum +#[derive(Clone, PartialEq, Eq)] +pub struct Encoder<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// The field elements to encode. + data: I, + /// The human-readable part used at the front of the address encoding. + hrp: &'hrp Hrp, + /// The witness version, if present. + witness_version: Option, + /// Checksum marker. + marker: PhantomData, +} + +impl<'hrp, I, Ck> Encoder<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// Constructs a new bech32 encoder. + pub fn new(data: I, hrp: &'hrp Hrp) -> Self { + Self { data, hrp, witness_version: None, marker: PhantomData:: } + } + + /// Adds `witness_version` to the encoder (as first byte of encoded data). + /// + /// Note, caller to guarantee that witness version is within valid range (0-16). + pub fn with_witness_version(mut self, witness_version: Fe32) -> Self { + self.witness_version = Some(witness_version); + self + } + + /// Returns an iterator that yields the bech32 encoded address as field ASCII characters. + pub fn chars(self) -> CharIter<'hrp, I, Ck> { + let witver_iter = WitnessVersionIter::new(self.witness_version, self.data); + CharIter::new(self.hrp, witver_iter) + } + + /// Returns an iterator that yields the field elements that go into the checksum, as well as the checksum at the end. + /// + /// Each field element yielded has been input into the checksum algorithm (including the HRP as it is fed into the algorithm). + pub fn fes(self) -> Fe32Iter<'hrp, I, Ck> { + let witver_iter = WitnessVersionIter::new(self.witness_version, self.data); + Fe32Iter::new(self.hrp, witver_iter) + } +} + +/// Iterator adaptor that just prepends a single character to a field element stream. +/// +/// More ergonomic to use than `std::iter::once(fe).chain(iter)`. +pub struct WitnessVersionIter +where + I: Iterator, +{ + witness_version: Option, + iter: I, +} + +impl WitnessVersionIter +where + I: Iterator, +{ + /// Creates a [`WitnessVersionIter`]. + pub fn new(witness_version: Option, iter: I) -> Self { Self { witness_version, iter } } +} + +impl Iterator for WitnessVersionIter +where + I: Iterator, +{ + type Item = Fe32; + + fn next(&mut self) -> Option { self.witness_version.take().or_else(|| self.iter.next()) } + + fn size_hint(&self) -> (usize, Option) { + let (min, max) = self.iter.size_hint(); + match self.witness_version { + Some(_) => (min + 1, max.map(|max| max + 1)), + None => (min, max), + } + } +} + +/// Iterator adaptor which takes a stream of field elements, converts it to characters prefixed by +/// an HRP (and separator), and suffixed by the checksum i.e., converts the data in a stream of +/// field elements into stream of characters representing the encoded bech32 string. +pub struct CharIter<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// `None` once the hrp has been yielded. + hrp_iter: Option>, + /// Iterator over field elements made up of the optional witness version, the data to be + /// encoded, plus the checksum. + checksummed: Checksummed, Ck>, +} + +impl<'hrp, I, Ck> CharIter<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// Adapts the `Fe32Iter` iterator to yield characters representing the bech32 encoding. + pub fn new(hrp: &'hrp Hrp, data: WitnessVersionIter) -> Self { + let checksummed = Checksummed::new_hrp(hrp, data); + Self { hrp_iter: Some(hrp.lowercase_char_iter()), checksummed } + } +} + +impl<'a, I, Ck> Iterator for CharIter<'a, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + type Item = char; + + fn next(&mut self) -> Option { + if let Some(ref mut hrp_iter) = self.hrp_iter { + match hrp_iter.next() { + Some(c) => return Some(c), + None => { + self.hrp_iter = None; + return Some('1'); + } + } + } + + self.checksummed.next().map(|fe| fe.to_char()) + } + + fn size_hint(&self) -> (usize, Option) { + match &self.hrp_iter { + // We have yielded the hrp and separator already. + None => self.checksummed.size_hint(), + // Yet to finish yielding the hrp (and the separator). + Some(hrp_iter) => { + let (hrp_min, hrp_max) = hrp_iter.size_hint(); + let (chk_min, chk_max) = self.checksummed.size_hint(); + + let min = hrp_min + 1 + chk_min; // +1 for the separator. + + // To provide a max boundary we need to have gotten a value from the hrp iter as well as the + // checksummed iter, otherwise we have to return None since we cannot know the maximum. + let max = match (hrp_max, chk_max) { + (Some(hrp_max), Some(chk_max)) => Some(hrp_max + 1 + chk_max), + (_, _) => None, + }; + + (min, max) + } + } + } +} + +/// Iterator adaptor for a checksummed iterator that inputs the HRP into the checksum algorithm +/// before yielding the HRP as field elements followed by the data then checksum. +pub struct Fe32Iter<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// `None` once the hrp field elements have been yielded. + hrp_iter: Option>, + /// Iterator over field elements made up of the optional witness version, the data to be + /// encoded, plus the checksum. + checksummed: Checksummed, Ck>, +} + +impl<'hrp, I, Ck> Fe32Iter<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + /// Creates a [`Fe32Iter`] which yields all the field elements which go into the checksum algorithm. + pub fn new(hrp: &'hrp Hrp, data: WitnessVersionIter) -> Self { + let hrp_iter = HrpFe32Iter::new(hrp); + let checksummed = Checksummed::new_hrp(hrp, data); + Self { hrp_iter: Some(hrp_iter), checksummed } + } +} + +impl<'hrp, I, Ck> Iterator for Fe32Iter<'hrp, I, Ck> +where + I: Iterator, + Ck: Checksum, +{ + type Item = Fe32; + fn next(&mut self) -> Option { + if let Some(ref mut hrp_iter) = &mut self.hrp_iter { + match hrp_iter.next() { + Some(fe) => return Some(fe), + None => self.hrp_iter = None, + } + } + self.checksummed.next() + } + + fn size_hint(&self) -> (usize, Option) { + let hrp = match &self.hrp_iter { + Some(hrp_iter) => hrp_iter.size_hint(), + None => (0, Some(0)), + }; + + let data = self.checksummed.size_hint(); + + let min = hrp.0 + data.0; + let max = hrp.1.zip(data.1).map(|(hrp, data)| hrp + data); + + (min, max) + } +} + +#[cfg(test)] +mod tests { + use crate::{Bech32, ByteIterExt, Fe32, Fe32IterExt, Hrp}; + + // Tests below using this data, are based on the test vector (from BIP-173): + // BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4: 0014751e76e8199196d454941c45d1b3a323f1433bd6 + #[rustfmt::skip] + const DATA: [u8; 20] = [ + 0x75, 0x1e, 0x76, 0xe8, 0x19, 0x91, 0x96, 0xd4, + 0x54, 0x94, 0x1c, 0x45, 0xd1, 0xb3, 0xa3, 0x23, + 0xf1, 0x43, 0x3b, 0xd6, + ]; + + #[test] + fn hrpstring_iter() { + let iter = DATA.iter().copied().bytes_to_fes(); + + let hrp = Hrp::parse_unchecked("bc"); + let iter = iter.with_checksum::(&hrp).with_witness_version(Fe32::Q).chars(); + + assert!(iter.eq("bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4".chars())); + } + + #[test] + #[cfg(feature = "alloc")] + fn hrpstring_iter_collect() { + let iter = DATA.iter().copied().bytes_to_fes(); + + let hrp = Hrp::parse_unchecked("bc"); + let iter = iter.with_checksum::(&hrp).with_witness_version(Fe32::Q).chars(); + + let encoded = iter.collect::(); + assert_eq!(encoded, "bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4"); + } + + #[test] + fn hrpstring_iter_size_hint() { + let char_len = "w508d6qejxtdg4y5r3zarvary0c5xw7k".len(); + let iter = DATA.iter().copied().bytes_to_fes(); + + let hrp = Hrp::parse_unchecked("bc"); + let iter = iter.with_checksum::(&hrp).with_witness_version(Fe32::Q).chars(); + + let checksummed_len = 2 + 1 + 1 + char_len + 6; // bc + SEP + Q + chars + checksum + assert_eq!(iter.size_hint().0, checksummed_len); + } +} diff --git a/src/primitives/gf32.rs b/src/primitives/gf32.rs index 882348a0d..da42c5b0e 100644 --- a/src/primitives/gf32.rs +++ b/src/primitives/gf32.rs @@ -185,6 +185,8 @@ impl Fe32 { Ok(Fe32(u5)) } + pub(crate) fn from_char_unchecked(c: u8) -> Fe32 { Fe32(CHARS_INV[usize::from(c)] as u8) } + /// Converts the field element to a lowercase bech32 character. pub fn to_char(self) -> char { // Indexing fine as we have self.0 in [0, 32) as an invariant. diff --git a/src/primitives/hrp.rs b/src/primitives/hrp.rs index 50852eaa9..9571e2e4c 100644 --- a/src/primitives/hrp.rs +++ b/src/primitives/hrp.rs @@ -21,6 +21,27 @@ use crate::Case; /// Maximum length of the human-readable part, as defined by BIP-173. const MAX_HRP_LEN: usize = 83; +macro_rules! define_hrp_const { + ($name:ident, $size:literal, $zero:literal, $one:literal, $two:literal, $three:literal, $network:literal) => { +/// "The human-readable part for the Bitcoin $network." + #[rustfmt::skip] + pub const $name: Hrp = Hrp { buf: [ + $zero, $one, $two, $three, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], size: $size }; + }; +} +define_hrp_const! {BC, 2, 98, 99, 0, 0, "network (mainnet)."} +define_hrp_const! {TB, 2, 116, 98, 0, 0, "testnet networks (testnet, signet)."} +define_hrp_const! {BCRT, 4, 98, 99, 114, 116, "regtest network."} + /// The human-readable part (human readable prefix before the '1' separator). #[derive(Clone, Copy, Debug)] pub struct Hrp { @@ -157,6 +178,27 @@ impl Hrp { /// Always false, the human-readable part is guaranteed to be between 1-83 characters. pub fn is_empty(&self) -> bool { false } + + /// Returns `true` if this [`Hrp`] is valid according to the bips. + /// + /// [BIP-173] states that the HRP must be either "bc" or "tb". + /// + /// [BIP-173]: https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki#user-content-Segwit_address_format + pub fn is_valid_segwit(&self) -> bool { + self.is_valid_on_mainnet() || self.is_valid_on_testnet() + } + + /// Returns `true` if this hrpstring is valid on the Bitcoin network i.e., HRP is "bc". + pub fn is_valid_on_mainnet(&self) -> bool { *self == self::BC } + + /// Returns `true` if this hrpstring is valid on the Bitcoin testnet network i.e., HRP is "tb". + pub fn is_valid_on_testnet(&self) -> bool { *self == self::TB } + + /// Returns `true` if this hrpstring is valid on the Bitcoin signet network i.e., HRP is "tb". + pub fn is_valid_on_signet(&self) -> bool { *self == self::TB } + + /// Returns `true` if this hrpstring is valid on the Bitcoin regtest network i.e., HRP is "bcrt". + pub fn is_valid_on_regtest(&self) -> bool { *self == self::BC } } /// Displays the human-readable part. @@ -459,4 +501,13 @@ mod tests { char_4, "abc123def", 9; char_5, "ABC123DEF", 9; } + + #[cfg(feature = "alloc")] + #[test] + fn hrp_consts() { + use crate::primitives::hrp::{BC, BCRT, TB}; + assert_eq!(BC, Hrp::parse_unchecked("bc")); + assert_eq!(TB, Hrp::parse_unchecked("tb")); + assert_eq!(BCRT, Hrp::parse_unchecked("bcrt")); + } } diff --git a/src/primitives/iter.rs b/src/primitives/iter.rs new file mode 100644 index 000000000..afd78e5ba --- /dev/null +++ b/src/primitives/iter.rs @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: MIT + +//! Iterator Adaptors. +//! +//! Iterator extension traits and blanket implementations to convert: +//! +//! - `BytesToFes`: An iterator over bytes to an iterator over field elements. +//! - `FesToBytes`: An iterator over field elements to an iterator over bytes. +//! - `Checksummed`: An iterator over field elements that appends the checksum. +//! +//! # Examples +//! +//! ``` +//! use bech32::{Bech32, ByteIterExt, Fe32IterExt, Fe32, Hrp}; +//! +//! let data = [ +//! 0x75, 0x1e, 0x76, 0xe8, 0x19, 0x91, 0x96, 0xd4, +//! 0x54, 0x94, 0x1c, 0x45, 0xd1, 0xb3, 0xa3, 0x23, +//! 0xf1, 0x43, 0x3b, 0xd6, +//! ]; +//! +//! // Convert byte data to GF32 field elements. +//! let fe_iter = data.iter().copied().bytes_to_fes(); +//! +//! // Convert field elements back to bytes. +//! let byte_iter = fe_iter.fes_to_bytes(); +//! +//! # assert!(data.iter().copied().eq(byte_iter)); +//! ``` + +use crate::primitives::checksum::{self, Checksum, PackedFe32}; +use crate::primitives::encode::Encoder; +use crate::primitives::gf32::Fe32; +use crate::primitives::hrp::Hrp; + +/// Extension trait for byte iterators which provides an adaptor to GF32 elements. +pub trait ByteIterExt: Sized + Iterator { + /// Adapts the byte iterator to output GF32 field elements instead. + /// + /// If the total number of bits is not a multiple of 5 we pad with 0s + fn bytes_to_fes(mut self) -> BytesToFes { + BytesToFes { last_byte: self.next(), bit_offset: 0, iter: self } + } +} + +impl ByteIterExt for I where I: Iterator {} + +/// Extension trait for field element iterators. +pub trait Fe32IterExt: Sized + Iterator { + /// Adapts the `Fe32` iterator to output bytes instead. + /// + /// If the total number of bits is not a multiple of 8, any trailing bits + /// are simply dropped. + fn fes_to_bytes(mut self) -> FesToBytes { + FesToBytes { last_fe: self.next(), bit_offset: 0, iter: self } + } + + /// Adapts the Fe32 iterator to encode the field elements into a bech32 address. + fn with_checksum(self, hrp: &Hrp) -> Encoder { Encoder::new(self, hrp) } +} + +impl Fe32IterExt for I where I: Iterator {} + +/// Iterator adaptor that converts bytes to GF32 elements. +/// +/// If the total number of bits is not a multiple of 5, it right-pads with 0 bits. +#[derive(Clone, PartialEq, Eq)] +pub struct BytesToFes> { + last_byte: Option, + bit_offset: usize, + iter: I, +} + +impl Iterator for BytesToFes +where + I: Iterator, +{ + type Item = Fe32; + + fn next(&mut self) -> Option { + use core::cmp::Ordering::*; + + let bit_offset = { + let ret = self.bit_offset; + self.bit_offset = (self.bit_offset + 5) % 8; + ret + }; + + if let Some(last) = self.last_byte { + match bit_offset.cmp(&3) { + Less => Some(Fe32((last >> (3 - bit_offset)) & 0x1f)), + Equal => { + self.last_byte = self.iter.next(); + Some(Fe32(last & 0x1f)) + } + Greater => { + self.last_byte = self.iter.next(); + let next = self.last_byte.unwrap_or(0); + Some(Fe32(((last << (bit_offset - 3)) | (next >> (11 - bit_offset))) & 0x1f)) + } + } + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let (min, max) = self.iter.size_hint(); + let (min, max) = match self.last_byte { + // +1 because we set last_byte with call to `next`. + Some(_) => (min + 1, max.map(|max| max + 1)), + None => (min, max), + }; + + let min = bytes_len_to_fes_len(min); + let max = max.map(bytes_len_to_fes_len); + + (min, max) + } +} + +/// The number of fes encoded by n bytes, rounded up because we pad the fes. +fn bytes_len_to_fes_len(bytes: usize) -> usize { + let bits = bytes * 8; + (bits + 4) / 5 +} + +impl ExactSizeIterator for BytesToFes +where + I: Iterator + ExactSizeIterator, +{ + fn len(&self) -> usize { + let len = match self.last_byte { + Some(_) => self.iter.len() + 1, + None => self.iter.len(), + }; + bytes_len_to_fes_len(len) + } +} + +/// Iterator adaptor that converts GF32 elements to bytes. +/// +/// If the total number of bits is not a multiple of 8, any trailing bits are dropped. +/// +/// Note that if there are 5 or more trailing bits, the result will be that an entire field element +/// is dropped. If this occurs, the input was an invalid length for a bech32 string, but this +/// iterator does not do any checks for this. +#[derive(Clone, PartialEq, Eq)] +pub struct FesToBytes> { + last_fe: Option, + bit_offset: usize, + iter: I, +} + +impl Iterator for FesToBytes +where + I: Iterator, +{ + type Item = u8; + + fn next(&mut self) -> Option { + let bit_offset = { + let ret = self.bit_offset; + self.bit_offset = (self.bit_offset + 8) % 5; + ret + }; + + if let Some(last) = self.last_fe { + let mut ret = last.0 << (3 + bit_offset); + + self.last_fe = self.iter.next(); + let next1 = self.last_fe?; + if bit_offset > 2 { + self.last_fe = self.iter.next(); + let next2 = self.last_fe?; + ret |= next1.0 << (bit_offset - 2); + ret |= next2.0 >> (7 - bit_offset); + } else { + ret |= next1.0 >> (2 - bit_offset); + if self.bit_offset == 0 { + self.last_fe = self.iter.next(); + } + } + + Some(ret) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + // If the total number of bits is not a multiple of 8, any trailing bits are dropped. + let fes_len_to_bytes_len = |n| n * 5 / 8; + + let (fes_min, fes_max) = self.iter.size_hint(); + // +1 because we set last_fe with call to `next`. + let min = fes_len_to_bytes_len(fes_min + 1); + let max = fes_max.map(|max| fes_len_to_bytes_len(max + 1)); + (min, max) + } +} + +// If the total number of bits is not a multiple of 8, any trailing bits are dropped. +fn fes_len_to_bytes_len(n: usize) -> usize { n * 5 / 8 } + +impl ExactSizeIterator for FesToBytes +where + I: Iterator + ExactSizeIterator, +{ + fn len(&self) -> usize { + let len = match self.last_fe { + Some(_) => self.iter.len() + 1, + None => self.iter.len(), + }; + fes_len_to_bytes_len(len) + } +} + +/// Iterator adaptor for field-element-yielding iterator, which tacks a checksum onto the end of the +/// yielded data. +#[derive(Clone, PartialEq, Eq)] +pub struct Checksummed +where + I: Iterator, + Ck: Checksum, +{ + iter: I, + checksum_remaining: usize, + checksum_engine: checksum::Engine, +} + +impl Checksummed +where + I: Iterator, + Ck: Checksum, +{ + /// Creates a new checksummed iterator which adapts a data iterator of field elements by + /// appending a checksum. + pub fn new(data: I) -> Checksummed { + Checksummed { + iter: data, + checksum_remaining: Ck::CHECKSUM_LENGTH, + checksum_engine: checksum::Engine::new(), + } + } + + /// Creates a new checksummed iterator which adapts a data iterator of field elements by + /// first inputting the [`Hrp`] and then appending a checksum. + pub fn new_hrp(hrp: &Hrp, data: I) -> Checksummed { + let mut ret = Self::new(data); + ret.checksum_engine.input_hrp(hrp); + ret + } +} + +impl Iterator for Checksummed +where + I: Iterator, + Ck: Checksum, +{ + type Item = Fe32; + + fn next(&mut self) -> Option { + match self.iter.next() { + Some(fe) => { + self.checksum_engine.input_fe(fe); + Some(fe) + } + None => + if self.checksum_remaining == 0 { + None + } else { + if self.checksum_remaining == Ck::CHECKSUM_LENGTH { + self.checksum_engine.input_target_residue(); + } + self.checksum_remaining -= 1; + Some(Fe32(self.checksum_engine.residue().unpack(self.checksum_remaining))) + }, + } + } + + fn size_hint(&self) -> (usize, Option) { + let add = self.checksum_remaining; + let (min, max) = self.iter.size_hint(); + + (min + add, max.map(|max| max + add)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Tests below using this data, are based on the test vector (from BIP-173): + // BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4: 0014751e76e8199196d454941c45d1b3a323f1433bd6 + #[rustfmt::skip] + const DATA: [u8; 20] = [ + 0x75, 0x1e, 0x76, 0xe8, 0x19, 0x91, 0x96, 0xd4, + 0x54, 0x94, 0x1c, 0x45, 0xd1, 0xb3, 0xa3, 0x23, + 0xf1, 0x43, 0x3b, 0xd6, + ]; + + #[test] + fn byte_iter_ext() { + assert!(DATA + .iter() + .copied() + .bytes_to_fes() + .map(Fe32::to_char) + .eq("w508d6qejxtdg4y5r3zarvary0c5xw7k".chars())); + } + + #[test] + fn bytes_to_fes_size_hint() { + let char_len = "w508d6qejxtdg4y5r3zarvary0c5xw7k".len(); + assert_eq!(DATA.iter().copied().bytes_to_fes().size_hint(), (char_len, Some(char_len))); + } + + #[test] + fn fe32_iter_ext() { + let fe_iter = "w508d6qejxtdg4y5r3zarvary0c5xw7k" + .bytes() + .map(|b| Fe32::from_char(char::from(b)).unwrap()); + + assert!(fe_iter.clone().fes_to_bytes().eq(DATA.iter().copied())); + } + + #[test] + fn fes_to_bytes_size_hint() { + let fe_iter = "w508d6qejxtdg4y5r3zarvary0c5xw7k" + .bytes() + .map(|b| Fe32::from_char(char::from(b)).unwrap()); + + let got_hint = fe_iter.clone().fes_to_bytes().size_hint(); + let want_hint = DATA.iter().size_hint(); + + assert_eq!(got_hint, want_hint) + } + + #[test] + fn padding_bytes_trailing_0_bits_roundtrips() { + // 5 * 8 % 5 = 0 + const BYTES: [u8; 5] = [0x75, 0x1e, 0x76, 0xe8, 0x19]; + assert!(BYTES.iter().copied().bytes_to_fes().fes_to_bytes().eq(BYTES.iter().copied())) + } + + #[test] + fn padding_bytes_trailing_1_bit_roundtrips() { + // 2 * 8 % 5 = 1 + const BYTES: [u8; 2] = [0x75, 0x1e]; + assert!(BYTES.iter().copied().bytes_to_fes().fes_to_bytes().eq(BYTES.iter().copied())) + } + + #[test] + fn padding_bytes_trailing_2_bits_roundtrips() { + // 4 * 8 % 5 = 2 + const BYTES: [u8; 4] = [0x75, 0x1e, 0x76, 0xe8]; + assert!(BYTES.iter().copied().bytes_to_fes().fes_to_bytes().eq(BYTES.iter().copied())) + } + + #[test] + fn padding_bytes_trailing_3_bits_roundtrips() { + // 6 * 8 % 5 = 3 + const BYTES: [u8; 6] = [0x75, 0x1e, 0x76, 0xe8, 0x19, 0xab]; + assert!(BYTES.iter().copied().bytes_to_fes().fes_to_bytes().eq(BYTES.iter().copied())) + } + + #[test] + fn padding_bytes_trailing_4_bits_roundtrips() { + // 3 * 8 % 5 = 4 + const BYTES: [u8; 3] = [0x75, 0x1e, 0x76]; + assert!(BYTES.iter().copied().bytes_to_fes().fes_to_bytes().eq(BYTES.iter().copied())) + } + + #[test] + fn padding_fes_trailing_0_bits_roundtrips() { + // 8 * 5 % 8 = 0 + const FES: [Fe32; 8] = + [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Y, Fe32::X, Fe32::G, Fe32::F]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + fn padding_fes_trailing_1_bit_zero_roundtrips() { + // 5 * 5 % 8 = 1 + const FES: [Fe32; 5] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_1_bit_non_zero_does_not_roundtrip() { + // 5 * 5 % 8 = 1 + const FES: [Fe32; 5] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::L]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + fn padding_fes_trailing_2_bits_zeros_roundtrips() { + // 2 * 5 % 8 = 2 + const FES: [Fe32; 2] = [Fe32::P, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_2_bits_non_zero_does_not_roundtrip() { + // 2 * 5 % 8 = 2 + const FES: [Fe32; 2] = [Fe32::Q, Fe32::P]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + fn padding_fes_trailing_3_bits_zeros_roundtrips() { + // 7 * 5 % 8 = 3 + const FES: [Fe32; 7] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Y, Fe32::X, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_3_bits_non_zero_does_not_roundtrip() { + // 7 * 5 % 8 = 3 + const FES: [Fe32; 7] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Y, Fe32::X, Fe32::P]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + fn padding_fes_trailing_4_bits_zeros_roundtrips() { + // 4 * 5 % 8 = 4 + const FES: [Fe32; 4] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_4_bits_non_zero_does_not_roundtrip() { + // 4 * 5 % 8 = 4 + const FES: [Fe32; 4] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::P]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + // Padding is never more than 4 bits so any additional bits will always fail to roundtrip. + + #[test] + #[should_panic] + fn padding_fes_trailing_5_bits_zeros_does_not_roundtrip() { + // 1 * 5 % 8 = 5 + const FES: [Fe32; 1] = [Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_5_bits_non_zero_does_not_roundtrip() { + // 1 * 5 % 8 = 5 + const FES: [Fe32; 1] = [Fe32::P]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_6_bits_zeros_does_not_roundtrip() { + // 6 * 5 % 8 = 6 + const FES: [Fe32; 6] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Q, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_6_bits_non_zero_does_not_roundtrip() { + // 6 * 5 % 8 = 6 + const FES: [Fe32; 6] = [Fe32::Q, Fe32::P, Fe32::Z, Fe32::R, Fe32::Y, Fe32::X]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_7_bits_zeros_does_not_roundtrip() { + // 3 * 5 % 8 = 7 + const FES: [Fe32; 3] = [Fe32::P, Fe32::Q, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } + + #[test] + #[should_panic] + fn padding_fes_trailing_7_bits_non_zero_does_not_roundtrip() { + // 3 * 5 % 8 = 7 + const FES: [Fe32; 3] = [Fe32::Q, Fe32::P, Fe32::Q]; + assert!(FES.iter().copied().fes_to_bytes().bytes_to_fes().eq(FES.iter().copied())) + } +} diff --git a/src/primitives/mod.rs b/src/primitives/mod.rs index bd08c3736..478775a1a 100644 --- a/src/primitives/mod.rs +++ b/src/primitives/mod.rs @@ -3,8 +3,11 @@ //! Provides the internal nuts and bolts that enable bech32 encoding/decoding. pub mod checksum; +pub mod decode; +pub mod encode; pub mod gf32; pub mod hrp; +pub mod iter; use checksum::{Checksum, PackedNull}; diff --git a/tests/bip_173_test_vectors.rs b/tests/bip_173_test_vectors.rs new file mode 100644 index 000000000..bb645cf60 --- /dev/null +++ b/tests/bip_173_test_vectors.rs @@ -0,0 +1,120 @@ +// BIP-173 test vectors. + +#![cfg(feature = "alloc")] + +use bech32::primitives::decode::{ + CheckedHrpstring, ChecksumError, SegwitHrpstring, UncheckedHrpstring, +}; +use bech32::{Bech32, Bech32m, ByteIterExt, Fe32IterExt}; + +// This is a separate test because we correctly identify this string as invalid but not for the +// reason given in the bip. +#[test] +fn bip_173_checksum_calculated_with_uppercase_form() { + use bech32::primitives::decode::{CheckedHrpstringError, ChecksumError, SegwitHrpstringError}; + + // BIP-173 states reason for error should be: "checksum calculated with uppercase form of HRP". + let s = "A1G7SGD8"; + + assert_eq!( + CheckedHrpstring::new::(s).unwrap_err(), + CheckedHrpstringError::Checksum(ChecksumError::InvalidChecksum) + ); + + assert_eq!( + SegwitHrpstring::new(s).unwrap_err(), + SegwitHrpstringError::Checksum(ChecksumError::InvalidChecksum) + ); +} + +macro_rules! check_valid_bech32 { + ($($test_name:ident, $valid_bech32:literal);* $(;)?) => { + $( + #[test] + fn $test_name() { + let p = UncheckedHrpstring::new($valid_bech32).unwrap(); + p.validate_checksum::().expect("valid bech32"); + // Valid bech32 strings are by definition invalid bech32m. + assert_eq!(p.validate_checksum::().unwrap_err(), ChecksumError::InvalidChecksum); + } + )* + } +} +check_valid_bech32! { + valid_bech32_hrp_string_0, "A12UEL5L"; + valid_bech32_hrp_string_a, "a12uel5l"; + valid_bech32_hrp_string_1, "an83characterlonghumanreadablepartthatcontainsthenumber1andtheexcludedcharactersbio1tt5tgs"; + valid_bech32_hrp_string_2, "abcdef1qpzry9x8gf2tvdw0s3jn54khce6mua7lmqqqxw"; + valid_bech32_hrp_string_3, "11qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqc8247j"; + valid_bech32_hrp_string_4, "split1checkupstagehandshakeupstreamerranterredcaperred2y9e3w"; + valid_bech32_hrp_string_b, "?1ezyfcl"; +} + +macro_rules! check_valid_address_roundtrip { + ($($test_name:ident, $addr:literal);* $(;)?) => { + $( + #[test] + fn $test_name() { + let hrpstring = SegwitHrpstring::new_bech32($addr).expect("valid address"); + let hrp = hrpstring.hrp(); + let witness_version = hrpstring.witness_version(); + + let encoded = hrpstring.byte_iter().bytes_to_fes().with_checksum::(&hrp.into()).with_witness_version(witness_version).chars().collect::(); + + // The bips specifically say that encoder should output lowercase characters so we uppercase manually. + if encoded != $addr { + let got = encoded.to_uppercase(); + assert_eq!(got, $addr) + } + } + )* + } +} +// Note these test vectors include various witness versions. +check_valid_address_roundtrip! { + bip_173_valid_address_roundtrip_0, "BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4"; + bip_173_valid_address_roundtrip_1, "tb1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3q0sl5k7"; + bip_173_valid_address_roundtrip_2, "bc1pw508d6qejxtdg4y5r3zarvary0c5xw7kw508d6qejxtdg4y5r3zarvary0c5xw7k7grplx"; + bip_173_valid_address_roundtrip_3, "BC1SW50QA3JX3S"; + bip_173_valid_address_roundtrip_4, "bc1zw508d6qejxtdg4y5r3zarvaryvg6kdaj"; + bip_173_valid_address_roundtrip_5, "tb1qqqqqp399et2xygdj5xreqhjjvcmzhxw4aywxecjdzew6hylgvsesrxh6hy"; +} + +macro_rules! check_invalid_address { + ($($test_name:ident, $addr:literal);* $(;)?) => { + $( + #[test] + #[cfg(feature = "alloc")] + fn $test_name() { + match SegwitHrpstring::new($addr) { + Err(_) => {}, + // We do not enforce the bip specified restrictions when constructing + // SegwitHrpstring so must explicitly do check. + Ok(segwit) => assert!(!segwit.has_valid_hrp()), + } + } + )* + } +} +check_invalid_address! { + // Invalid human-readable part + bip_173_invalid_address_0, "tc1qw508d6qejxtdg4y5r3zarvary0c5xw7kg3g4ty"; + // Invalid checksum + bip_173_invalid_address_1, "bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t5"; + // Invalid witness version + bip_173_invalid_address_2, "BC13W508D6QEJXTDG4Y5R3ZARVARY0C5XW7KN40WF2"; + // Invalid program length + bip_173_invalid_address_3, "bc1rw5uspcuh"; + // Invalid program length + bip_173_invalid_address_4, "bc10w508d6qejxtdg4y5r3zarvary0c5xw7kw508d6qejxtdg4y5r3zarvary0c5xw7kw5rljs90"; + // Invalid program length for witness version 0 (per BIP-141) + bip_173_invalid_address_5, "BC1QR508D6QEJXTDG4Y5R3ZARVARYV98GJ9P"; + // Mixed case + bip_173_invalid_address_6, "tb1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3q0sL5k7"; + // zero padding of more than 4 bits + bip_173_invalid_address_7, "bc1zw508d6qejxtdg4y5r3zarvaryvqyzf3du"; + // Non-zero padding in 8-to-5 conversion + bip_173_invalid_address_8, "tb1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3pjxtptv"; + // Empty data section + bip_173_invalid_address_14, "bc1gmk9yu"; +} diff --git a/tests/bip_350_test_vectors.rs b/tests/bip_350_test_vectors.rs new file mode 100644 index 000000000..fb5d870cc --- /dev/null +++ b/tests/bip_350_test_vectors.rs @@ -0,0 +1,131 @@ +// BIP-350 test vectors. + +#![cfg(feature = "alloc")] + +use bech32::primitives::decode::{ + CheckedHrpstring, CheckedHrpstringError, ChecksumError, SegwitHrpstring, SegwitHrpstringError, + UncheckedHrpstring, +}; +use bech32::{Bech32, Bech32m, ByteIterExt, Fe32, Fe32IterExt}; + +// This is a separate test because we correctly identify this string as invalid but not for the +// reason given in the bip. +#[test] +fn bip_350_checksum_calculated_with_uppercase_form() { + // BIP-350 states reason for error should be: "checksum calculated with uppercase form of HRP". + let s = "M1VUXWEZ"; + + assert_eq!( + CheckedHrpstring::new::(s).unwrap_err(), + CheckedHrpstringError::Checksum(ChecksumError::InvalidChecksum) + ); + + assert_eq!( + SegwitHrpstring::new(s).unwrap_err(), + SegwitHrpstringError::Checksum(ChecksumError::InvalidChecksum) + ); +} + +macro_rules! check_valid_bech32m { + ($($test_name:ident, $valid_bech32m:literal);* $(;)?) => { + $( + #[test] + fn $test_name() { + let p = UncheckedHrpstring::new($valid_bech32m).unwrap(); + p.validate_checksum::().expect("valid bech32m"); + // Valid bech32m strings are by definition invalid bech32. + assert_eq!(p.validate_checksum::().unwrap_err(), ChecksumError::InvalidChecksum); + } + )* + } +} +check_valid_bech32m! { + valid_bech32m_hrp_string_0, "A1LQFN3A"; + valid_bech32m_hrp_string_1, "a1lqfn3a"; + valid_bech32m_hrp_string_2, "an83characterlonghumanreadablepartthatcontainsthetheexcludedcharactersbioandnumber11sg7hg6"; + valid_bech32m_hrp_string_3, "abcdef1l7aum6echk45nj3s0wdvt2fg8x9yrzpqzd3ryx"; + valid_bech32m_hrp_string_4, "11llllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllludsr8"; + valid_bech32m_hrp_string_5, "split1checkupstagehandshakeupstreamerranterredcaperredlc445v"; + valid_bech32m_hrp_string_6, "?1v759aa"; +} + +macro_rules! check_valid_address_roundtrip { + ($($test_name:ident, $addr:literal);* $(;)?) => { + $( + #[test] + #[cfg(feature = "alloc")] + fn $test_name() { + let hrpstring = SegwitHrpstring::new($addr).expect("valid address"); + let hrp = hrpstring.hrp(); + let witness_version = hrpstring.witness_version(); + + let encoded = match witness_version { + Fe32::Q => hrpstring.byte_iter().bytes_to_fes().with_checksum::(&hrp.into()).with_witness_version(witness_version).chars().collect::(), + _ => hrpstring.byte_iter().bytes_to_fes().with_checksum::(&hrp.into()).with_witness_version(witness_version).chars().collect::(), + }; + + // The bips specifically say that encoder should output lowercase characters so we uppercase manually. + if encoded != $addr { + let got = encoded.to_uppercase(); + assert_eq!(got, $addr) + } + } + )* + } +} +// Note these test vectors include various witness versions. +check_valid_address_roundtrip! { + bip_350_valid_address_roundtrip_0, "BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4"; + bip_350_valid_address_roundtrip_1, "tb1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3q0sl5k7"; + bip_350_valid_address_roundtrip_2, "bc1pw508d6qejxtdg4y5r3zarvary0c5xw7kw508d6qejxtdg4y5r3zarvary0c5xw7kt5nd6y"; + bip_350_valid_address_roundtrip_3, "BC1SW50QGDZ25J"; + bip_350_valid_address_roundtrip_4, "bc1zw508d6qejxtdg4y5r3zarvaryvaxxpcs"; + bip_350_valid_address_roundtrip_5, "tb1qqqqqp399et2xygdj5xreqhjjvcmzhxw4aywxecjdzew6hylgvsesrxh6hy"; + bip_350_valid_address_roundtrip_6, "tb1pqqqqp399et2xygdj5xreqhjjvcmzhxw4aywxecjdzew6hylgvsesf3hn0c"; + bip_350_valid_address_roundtrip_7, "bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqzk5jj0"; +} + +macro_rules! check_invalid_address { + ($($test_name:ident, $addr:literal);* $(;)?) => { + $( + #[test] + #[cfg(feature = "alloc")] + fn $test_name() { + match SegwitHrpstring::new($addr) { + Err(_) => {}, + // We do not enforce the bip specified restrictions when constructing + // SegwitHrpstring so must explicitly do check. + Ok(segwit) => assert!(!segwit.has_valid_hrp()), + } + } + )* + } +} +check_invalid_address! { + // Invalid human-readable part + bip_350_invalid_address_0, "tc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vq5zuyut"; + // Invalid checksums (Bech32 instead of Bech32m): + bip_350_invalid_address_1, "bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqh2y7hd"; + bip_350_invalid_address_2, "tb1z0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqglt7rf"; + bip_350_invalid_address_3, "BC1S0XLXVLHEMJA6C4DQV22UAPCTQUPFHLXM9H8Z3K2E72Q4K9HCZ7VQ54WELL"; + bip_350_invalid_address_4, "bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kemeawh"; + bip_350_invalid_address_5, "tb1q0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vq24jc47"; + // Invalid character in checksum + bip_350_invalid_address_6, "bc1p38j9r5y49hruaue7wxjce0updqjuyyx0kh56v8s25huc6995vvpql3jow4"; + // Invalid witness version + bip_350_invalid_address_7, "BC130XLXVLHEMJA6C4DQV22UAPCTQUPFHLXM9H8Z3K2E72Q4K9HCZ7VQ7ZWS8R"; + // Invalid program length (1 byte) + bip_350_invalid_address_8, "bc1pw5dgrnzv"; + // Invalid program length (41 bytes) + bip_350_invalid_address_9, "bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7v8n0nx0muaewav253zgeav"; + // Invalid program length for witness version 0 (per BIP-141) + bip_350_invalid_address_10, "BC1QR508D6QEJXTDG4Y5R3ZARVARYV98GJ9P"; + // Mixed case + bip_350_invalid_address_11, "tb1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vq47Zagq"; + // zero padding of more than 4 bits + bip_350_invalid_address_12, "bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7v07qwwzcrf"; + // Non-zero padding in 8-to-5 conversion + bip_350_invalid_address_13, "tb1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vpggkg4j"; + // Empty data section + bip_350_invalid_address_14, "bc1gmk9yu"; +}