diff --git a/generate/src/parse.rs b/generate/src/parse.rs index 22fb413..31deb56 100644 --- a/generate/src/parse.rs +++ b/generate/src/parse.rs @@ -50,16 +50,6 @@ pub fn parse_id_properties(ucd_dir: &Path) -> Properties { set.extend(lo..=hi); } - // - // Unicode 4.1 through Unicode 15 omitted these two characters from ID_Continue - // by accident. However, this accident was corrected in Unicode 15.1. Any JS VM - // that supports ES6+ but that uses a version of Unicode earlier than 15.1 will - // consider these to be a syntax error, so we deliberately omit these characters - // from the set of identifiers that are valid in both ES5 and ES6+. For more info - // see 2.2 in https://www.unicode.org/L2/L2023/23160-utc176-properties-recs.pdf - properties.id_continue.remove(&0x30FB); - properties.id_continue.remove(&0xFF65); - properties } diff --git a/src/tables.rs b/src/tables.rs index bcca496..0f4c265 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -327,7 +327,7 @@ pub(crate) static LEAF: Align64<[u8; 7872]> = Align64([ 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xE0, 0x00, 0x00, 0x00, 0xFE, 0xFF, 0x3E, 0x1F, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0x7F, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF7, + 0xFF, 0xFF, 0x7F, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -374,7 +374,7 @@ pub(crate) static LEAF: Align64<[u8; 7872]> = Align64([ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFC, 0xFC, 0xFC, 0x1C, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0x18, 0x00, 0x00, 0xE0, 0x00, 0x00, 0x00, 0x00, 0xDF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, - 0x00, 0x00, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07, 0xC0, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFC, 0xFC, 0xFC, 0x1C, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xEF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xB7, 0xFF, 0x3F, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, diff --git a/tests/compare.rs b/tests/compare.rs index de34de4..c42962e 100644 --- a/tests/compare.rs +++ b/tests/compare.rs @@ -14,10 +14,6 @@ fn compare_all_implementations() { let id_continue_roaring = roaring::id_continue_bitmap(); for ch in '\0'..=char::MAX { - // See test legacy_katakana_middle_dot_patch in tests/patch.rs - if matches!(ch, '・' | '・') { - continue; - } let thought_to_be_start = unicode_id_start::is_id_start(ch); let thought_to_be_continue = unicode_id_start::is_id_continue(ch); diff --git a/tests/patch.rs b/tests/patch.rs deleted file mode 100644 index 98c933c..0000000 --- a/tests/patch.rs +++ /dev/null @@ -1,11 +0,0 @@ -use unicode_id_start::is_id_continue_unicode; - -#[test] -fn legacy_katakana_middle_dot_patch() { - // U+30FB KATAKANA MIDDLE DOT - // https://util.unicode.org/UnicodeJsps/character.jsp?a=30FB - assert!(!is_id_continue_unicode('・')); - // U+FF65 HALFWIDTH KATAKANA MIDDLE DOT - // https://util.unicode.org/UnicodeJsps/character.jsp?a=FF65 - assert!(!is_id_continue_unicode('・')); -}