Skip to content

Commit

Permalink
Merge pull request #136 from Jules-Bertholet/std-tables
Browse files Browse the repository at this point in the history
Use stdlib alphabetic and numeric character tables
  • Loading branch information
Manishearth committed Jun 26, 2024
2 parents 592ce00 + e96ec2e commit 2081c29
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 16 deletions.
28 changes: 20 additions & 8 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,19 +232,27 @@ def emit_util_mod(f):
#[inline]
fn is_alphabetic(c: char) -> bool {
match c {
'a' ..= 'z' | 'A' ..= 'Z' => true,
c if c > '\x7f' => super::derived_property::Alphabetic(c),
_ => false,
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
c.is_alphabetic()
} else {
match c {
'a' ..= 'z' | 'A' ..= 'Z' => true,
c if c > '\\x7f' => super::derived_property::Alphabetic(c),
_ => false,
}
}
}
#[inline]
fn is_numeric(c: char) -> bool {
match c {
'0' ..= '9' => true,
c if c > '\x7f' => super::general_category::N(c),
_ => false,
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
c.is_numeric()
} else {
match c {
'0' ..= '9' => true,
c if c > '\\x7f' => super::general_category::N(c),
_ => false,
}
}
}
Expand Down Expand Up @@ -388,6 +396,10 @@ def emit_break_module(f, break_table, break_cats, name):
/// The version of [Unicode](http://www.unicode.org/)
/// that this version of unicode-segmentation is based on.
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
""" % UNICODE_VERSION)

rf.write("""
const UNICODE_VERSION_U8: (u8, u8, u8) = (%s, %s, %s);
""" % UNICODE_VERSION)

# download and parse all the data
Expand Down
26 changes: 18 additions & 8 deletions src/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
/// that this version of unicode-segmentation is based on.
pub const UNICODE_VERSION: (u64, u64, u64) = (15, 1, 0);

const UNICODE_VERSION_U8: (u8, u8, u8) = (15, 1, 0);

pub mod util {
#[inline]
pub fn bsearch_range_table(c: char, r: &[(char,char)]) -> bool {
Expand All @@ -29,19 +31,27 @@ pub mod util {

#[inline]
fn is_alphabetic(c: char) -> bool {
match c {
'a' ..= 'z' | 'A' ..= 'Z' => true,
c if c > '' => super::derived_property::Alphabetic(c),
_ => false,
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
c.is_alphabetic()
} else {
match c {
'a' ..= 'z' | 'A' ..= 'Z' => true,
c if c > '\x7f' => super::derived_property::Alphabetic(c),
_ => false,
}
}
}

#[inline]
fn is_numeric(c: char) -> bool {
match c {
'0' ..= '9' => true,
c if c > '' => super::general_category::N(c),
_ => false,
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
c.is_numeric()
} else {
match c {
'0' ..= '9' => true,
c if c > '\x7f' => super::general_category::N(c),
_ => false,
}
}
}

Expand Down

0 comments on commit 2081c29

Please sign in to comment.