From fa10dd35b272eb16d6d1505537046a6aba08cbe3 Mon Sep 17 00:00:00 2001 From: Tom Cumming Date: Tue, 2 May 2017 21:02:29 +0100 Subject: [PATCH 1/4] Fetch and generate sentence tests, property table --- scripts/unicode.py | 7 + scripts/unicode_gen_breaktests.py | 15 + src/tables.rs | 968 ++++++++++++++++++++++++++++++ src/testdata.rs | 333 ++++++++++ 4 files changed, 1323 insertions(+) diff --git a/scripts/unicode.py b/scripts/unicode.py index 189832b..d032195 100755 --- a/scripts/unicode.py +++ b/scripts/unicode.py @@ -351,3 +351,10 @@ def emit_break_module(f, break_table, break_cats, name): word_table.extend([(x, y, cat) for (x, y) in word_cats[cat]]) word_table.sort(key=lambda w: w[0]) emit_break_module(rf, word_table, word_cats.keys(), "word") + + sentence_cats = load_properties("auxiliary/SentenceBreakProperty.txt", []) + sentence_table = [] + for cat in sentence_cats: + sentence_table.extend([(x, y, cat) for (x, y) in sentence_cats[cat]]) + sentence_table.sort(key=lambda w: w[0]) + emit_break_module(rf, sentence_table, sentence_cats.keys(), "sentence") diff --git a/scripts/unicode_gen_breaktests.py b/scripts/unicode_gen_breaktests.py index 476aa93..21cb1fa 100755 --- a/scripts/unicode_gen_breaktests.py +++ b/scripts/unicode_gen_breaktests.py @@ -190,8 +190,23 @@ def create_words_data(f): f.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt\n") unicode.emit_table(f, "TEST_WORD", test, wtype, True, showfun, True) +def create_sentence_data(f): + d = load_test_data("auxiliary/SentenceBreakTest.txt") + + test = [] + + for (c, i) in d: + allchars = [cn for s in c for cn in s] + test.append((allchars, c)) + + wtype = "&'static [(&'static str, &'static [&'static str])]" + f.write(" // official Unicode test data\n") + f.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/SentenceBreakTest.txt\n") + unicode.emit_table(f, "TEST_SENTENCE", test, wtype, True, showfun, True) + if __name__ == "__main__": with open("testdata.rs", "w") as rf: rf.write(unicode.preamble) create_grapheme_data(rf) create_words_data(rf) + create_sentence_data(rf) diff --git a/src/tables.rs b/src/tables.rs index 625a588..242f8b5 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -1323,3 +1323,971 @@ pub mod word { ]; } +pub mod sentence { + use core::result::Result::{Ok, Err}; + + pub use self::SentenceCat::*; + + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, PartialEq, Eq)] + pub enum SentenceCat { + SC_ATerm, + SC_Any, + SC_CR, + SC_Close, + SC_Extend, + SC_Format, + SC_LF, + SC_Lower, + SC_Numeric, + SC_OLetter, + SC_SContinue, + SC_STerm, + SC_Sep, + SC_Sp, + SC_Upper, + } + + fn bsearch_range_value_table(c: char, r: &'static [(char, char, SentenceCat)]) -> SentenceCat { + use core::cmp::Ordering::{Equal, Less, Greater}; + match r.binary_search_by(|&(lo, hi, _)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) { + Ok(idx) => { + let (_, _, cat) = r[idx]; + cat + } + Err(_) => SC_Any + } + } + + pub fn sentence_category(c: char) -> SentenceCat { + bsearch_range_value_table(c, sentence_cat_table) + } + + const sentence_cat_table: &'static [(char, char, SentenceCat)] = &[ + ('\u{9}', '\u{9}', SC_Sp), ('\u{a}', '\u{a}', SC_LF), ('\u{b}', '\u{c}', SC_Sp), ('\u{d}', + '\u{d}', SC_CR), ('\u{20}', '\u{20}', SC_Sp), ('\u{21}', '\u{21}', SC_STerm), ('\u{22}', + '\u{22}', SC_Close), ('\u{27}', '\u{29}', SC_Close), ('\u{2c}', '\u{2d}', SC_SContinue), + ('\u{2e}', '\u{2e}', SC_ATerm), ('\u{30}', '\u{39}', SC_Numeric), ('\u{3a}', '\u{3a}', + SC_SContinue), ('\u{3f}', '\u{3f}', SC_STerm), ('\u{41}', '\u{5a}', SC_Upper), ('\u{5b}', + '\u{5b}', SC_Close), ('\u{5d}', '\u{5d}', SC_Close), ('\u{61}', '\u{7a}', SC_Lower), + ('\u{7b}', '\u{7b}', SC_Close), ('\u{7d}', '\u{7d}', SC_Close), ('\u{85}', '\u{85}', + SC_Sep), ('\u{a0}', '\u{a0}', SC_Sp), ('\u{aa}', '\u{aa}', SC_Lower), ('\u{ab}', '\u{ab}', + SC_Close), ('\u{ad}', '\u{ad}', SC_Format), ('\u{b5}', '\u{b5}', SC_Lower), ('\u{ba}', + '\u{ba}', SC_Lower), ('\u{bb}', '\u{bb}', SC_Close), ('\u{c0}', '\u{d6}', SC_Upper), + ('\u{d8}', '\u{de}', SC_Upper), ('\u{df}', '\u{f6}', SC_Lower), ('\u{f8}', '\u{ff}', + SC_Lower), ('\u{100}', '\u{100}', SC_Upper), ('\u{101}', '\u{101}', SC_Lower), ('\u{102}', + '\u{102}', SC_Upper), ('\u{103}', '\u{103}', SC_Lower), ('\u{104}', '\u{104}', SC_Upper), + ('\u{105}', '\u{105}', SC_Lower), ('\u{106}', '\u{106}', SC_Upper), ('\u{107}', '\u{107}', + SC_Lower), ('\u{108}', '\u{108}', SC_Upper), ('\u{109}', '\u{109}', SC_Lower), ('\u{10a}', + '\u{10a}', SC_Upper), ('\u{10b}', '\u{10b}', SC_Lower), ('\u{10c}', '\u{10c}', SC_Upper), + ('\u{10d}', '\u{10d}', SC_Lower), ('\u{10e}', '\u{10e}', SC_Upper), ('\u{10f}', '\u{10f}', + SC_Lower), ('\u{110}', '\u{110}', SC_Upper), ('\u{111}', '\u{111}', SC_Lower), ('\u{112}', + '\u{112}', SC_Upper), ('\u{113}', '\u{113}', SC_Lower), ('\u{114}', '\u{114}', SC_Upper), + ('\u{115}', '\u{115}', SC_Lower), ('\u{116}', '\u{116}', SC_Upper), ('\u{117}', '\u{117}', + SC_Lower), ('\u{118}', '\u{118}', SC_Upper), ('\u{119}', '\u{119}', SC_Lower), ('\u{11a}', + '\u{11a}', SC_Upper), ('\u{11b}', '\u{11b}', SC_Lower), ('\u{11c}', '\u{11c}', SC_Upper), + ('\u{11d}', '\u{11d}', SC_Lower), ('\u{11e}', '\u{11e}', SC_Upper), ('\u{11f}', '\u{11f}', + SC_Lower), ('\u{120}', '\u{120}', SC_Upper), ('\u{121}', '\u{121}', SC_Lower), ('\u{122}', + '\u{122}', SC_Upper), ('\u{123}', '\u{123}', SC_Lower), ('\u{124}', '\u{124}', SC_Upper), + ('\u{125}', '\u{125}', SC_Lower), ('\u{126}', '\u{126}', SC_Upper), ('\u{127}', '\u{127}', + SC_Lower), ('\u{128}', '\u{128}', SC_Upper), ('\u{129}', '\u{129}', SC_Lower), ('\u{12a}', + '\u{12a}', SC_Upper), ('\u{12b}', '\u{12b}', SC_Lower), ('\u{12c}', '\u{12c}', SC_Upper), + ('\u{12d}', '\u{12d}', SC_Lower), ('\u{12e}', '\u{12e}', SC_Upper), ('\u{12f}', '\u{12f}', + SC_Lower), ('\u{130}', '\u{130}', SC_Upper), ('\u{131}', '\u{131}', SC_Lower), ('\u{132}', + '\u{132}', SC_Upper), ('\u{133}', '\u{133}', SC_Lower), ('\u{134}', '\u{134}', SC_Upper), + ('\u{135}', '\u{135}', SC_Lower), ('\u{136}', '\u{136}', SC_Upper), ('\u{137}', '\u{138}', + SC_Lower), ('\u{139}', '\u{139}', SC_Upper), ('\u{13a}', '\u{13a}', SC_Lower), ('\u{13b}', + '\u{13b}', SC_Upper), ('\u{13c}', '\u{13c}', SC_Lower), ('\u{13d}', '\u{13d}', SC_Upper), + ('\u{13e}', '\u{13e}', SC_Lower), ('\u{13f}', '\u{13f}', SC_Upper), ('\u{140}', '\u{140}', + SC_Lower), ('\u{141}', '\u{141}', SC_Upper), ('\u{142}', '\u{142}', SC_Lower), ('\u{143}', + '\u{143}', SC_Upper), ('\u{144}', '\u{144}', SC_Lower), ('\u{145}', '\u{145}', SC_Upper), + ('\u{146}', '\u{146}', SC_Lower), ('\u{147}', '\u{147}', SC_Upper), ('\u{148}', '\u{149}', + SC_Lower), ('\u{14a}', '\u{14a}', SC_Upper), ('\u{14b}', '\u{14b}', SC_Lower), ('\u{14c}', + '\u{14c}', SC_Upper), ('\u{14d}', '\u{14d}', SC_Lower), ('\u{14e}', '\u{14e}', SC_Upper), + ('\u{14f}', '\u{14f}', SC_Lower), ('\u{150}', '\u{150}', SC_Upper), ('\u{151}', '\u{151}', + SC_Lower), ('\u{152}', '\u{152}', SC_Upper), ('\u{153}', '\u{153}', SC_Lower), ('\u{154}', + '\u{154}', SC_Upper), ('\u{155}', '\u{155}', SC_Lower), ('\u{156}', '\u{156}', SC_Upper), + ('\u{157}', '\u{157}', SC_Lower), ('\u{158}', '\u{158}', SC_Upper), ('\u{159}', '\u{159}', + SC_Lower), ('\u{15a}', '\u{15a}', SC_Upper), ('\u{15b}', '\u{15b}', SC_Lower), ('\u{15c}', + '\u{15c}', SC_Upper), ('\u{15d}', '\u{15d}', SC_Lower), ('\u{15e}', '\u{15e}', SC_Upper), + ('\u{15f}', '\u{15f}', SC_Lower), ('\u{160}', '\u{160}', SC_Upper), ('\u{161}', '\u{161}', + SC_Lower), ('\u{162}', '\u{162}', SC_Upper), ('\u{163}', '\u{163}', SC_Lower), ('\u{164}', + '\u{164}', SC_Upper), ('\u{165}', '\u{165}', SC_Lower), ('\u{166}', '\u{166}', SC_Upper), + ('\u{167}', '\u{167}', SC_Lower), ('\u{168}', '\u{168}', SC_Upper), ('\u{169}', '\u{169}', + SC_Lower), ('\u{16a}', '\u{16a}', SC_Upper), ('\u{16b}', '\u{16b}', SC_Lower), ('\u{16c}', + '\u{16c}', SC_Upper), ('\u{16d}', '\u{16d}', SC_Lower), ('\u{16e}', '\u{16e}', SC_Upper), + ('\u{16f}', '\u{16f}', SC_Lower), ('\u{170}', '\u{170}', SC_Upper), ('\u{171}', '\u{171}', + SC_Lower), ('\u{172}', '\u{172}', SC_Upper), ('\u{173}', '\u{173}', SC_Lower), ('\u{174}', + '\u{174}', SC_Upper), ('\u{175}', '\u{175}', SC_Lower), ('\u{176}', '\u{176}', SC_Upper), + ('\u{177}', '\u{177}', SC_Lower), ('\u{178}', '\u{179}', SC_Upper), ('\u{17a}', '\u{17a}', + SC_Lower), ('\u{17b}', '\u{17b}', SC_Upper), ('\u{17c}', '\u{17c}', SC_Lower), ('\u{17d}', + '\u{17d}', SC_Upper), ('\u{17e}', '\u{180}', SC_Lower), ('\u{181}', '\u{182}', SC_Upper), + ('\u{183}', '\u{183}', SC_Lower), ('\u{184}', '\u{184}', SC_Upper), ('\u{185}', '\u{185}', + SC_Lower), ('\u{186}', '\u{187}', SC_Upper), ('\u{188}', '\u{188}', SC_Lower), ('\u{189}', + '\u{18b}', SC_Upper), ('\u{18c}', '\u{18d}', SC_Lower), ('\u{18e}', '\u{191}', SC_Upper), + ('\u{192}', '\u{192}', SC_Lower), ('\u{193}', '\u{194}', SC_Upper), ('\u{195}', '\u{195}', + SC_Lower), ('\u{196}', '\u{198}', SC_Upper), ('\u{199}', '\u{19b}', SC_Lower), ('\u{19c}', + '\u{19d}', SC_Upper), ('\u{19e}', '\u{19e}', SC_Lower), ('\u{19f}', '\u{1a0}', SC_Upper), + ('\u{1a1}', '\u{1a1}', SC_Lower), ('\u{1a2}', '\u{1a2}', SC_Upper), ('\u{1a3}', '\u{1a3}', + SC_Lower), ('\u{1a4}', '\u{1a4}', SC_Upper), ('\u{1a5}', '\u{1a5}', SC_Lower), ('\u{1a6}', + '\u{1a7}', SC_Upper), ('\u{1a8}', '\u{1a8}', SC_Lower), ('\u{1a9}', '\u{1a9}', SC_Upper), + ('\u{1aa}', '\u{1ab}', SC_Lower), ('\u{1ac}', '\u{1ac}', SC_Upper), ('\u{1ad}', '\u{1ad}', + SC_Lower), ('\u{1ae}', '\u{1af}', SC_Upper), ('\u{1b0}', '\u{1b0}', SC_Lower), ('\u{1b1}', + '\u{1b3}', SC_Upper), ('\u{1b4}', '\u{1b4}', SC_Lower), ('\u{1b5}', '\u{1b5}', SC_Upper), + ('\u{1b6}', '\u{1b6}', SC_Lower), ('\u{1b7}', '\u{1b8}', SC_Upper), ('\u{1b9}', '\u{1ba}', + SC_Lower), ('\u{1bb}', '\u{1bb}', SC_OLetter), ('\u{1bc}', '\u{1bc}', SC_Upper), ('\u{1bd}', + '\u{1bf}', SC_Lower), ('\u{1c0}', '\u{1c3}', SC_OLetter), ('\u{1c4}', '\u{1c5}', SC_Upper), + ('\u{1c6}', '\u{1c6}', SC_Lower), ('\u{1c7}', '\u{1c8}', SC_Upper), ('\u{1c9}', '\u{1c9}', + SC_Lower), ('\u{1ca}', '\u{1cb}', SC_Upper), ('\u{1cc}', '\u{1cc}', SC_Lower), ('\u{1cd}', + '\u{1cd}', SC_Upper), ('\u{1ce}', '\u{1ce}', SC_Lower), ('\u{1cf}', '\u{1cf}', SC_Upper), + ('\u{1d0}', '\u{1d0}', SC_Lower), ('\u{1d1}', '\u{1d1}', SC_Upper), ('\u{1d2}', '\u{1d2}', + SC_Lower), ('\u{1d3}', '\u{1d3}', SC_Upper), ('\u{1d4}', '\u{1d4}', SC_Lower), ('\u{1d5}', + '\u{1d5}', SC_Upper), ('\u{1d6}', '\u{1d6}', SC_Lower), ('\u{1d7}', '\u{1d7}', SC_Upper), + ('\u{1d8}', '\u{1d8}', SC_Lower), ('\u{1d9}', '\u{1d9}', SC_Upper), ('\u{1da}', '\u{1da}', + SC_Lower), ('\u{1db}', '\u{1db}', SC_Upper), ('\u{1dc}', '\u{1dd}', SC_Lower), ('\u{1de}', + '\u{1de}', SC_Upper), ('\u{1df}', '\u{1df}', SC_Lower), ('\u{1e0}', '\u{1e0}', SC_Upper), + ('\u{1e1}', '\u{1e1}', SC_Lower), ('\u{1e2}', '\u{1e2}', SC_Upper), ('\u{1e3}', '\u{1e3}', + SC_Lower), ('\u{1e4}', '\u{1e4}', SC_Upper), ('\u{1e5}', '\u{1e5}', SC_Lower), ('\u{1e6}', + '\u{1e6}', SC_Upper), ('\u{1e7}', '\u{1e7}', SC_Lower), ('\u{1e8}', '\u{1e8}', SC_Upper), + ('\u{1e9}', '\u{1e9}', SC_Lower), ('\u{1ea}', '\u{1ea}', SC_Upper), ('\u{1eb}', '\u{1eb}', + SC_Lower), ('\u{1ec}', '\u{1ec}', SC_Upper), ('\u{1ed}', '\u{1ed}', SC_Lower), ('\u{1ee}', + '\u{1ee}', SC_Upper), ('\u{1ef}', '\u{1f0}', SC_Lower), ('\u{1f1}', '\u{1f2}', SC_Upper), + ('\u{1f3}', '\u{1f3}', SC_Lower), ('\u{1f4}', '\u{1f4}', SC_Upper), ('\u{1f5}', '\u{1f5}', + SC_Lower), ('\u{1f6}', '\u{1f8}', SC_Upper), ('\u{1f9}', '\u{1f9}', SC_Lower), ('\u{1fa}', + '\u{1fa}', SC_Upper), ('\u{1fb}', '\u{1fb}', SC_Lower), ('\u{1fc}', '\u{1fc}', SC_Upper), + ('\u{1fd}', '\u{1fd}', SC_Lower), ('\u{1fe}', '\u{1fe}', SC_Upper), ('\u{1ff}', '\u{1ff}', + SC_Lower), ('\u{200}', '\u{200}', SC_Upper), ('\u{201}', '\u{201}', SC_Lower), ('\u{202}', + '\u{202}', SC_Upper), ('\u{203}', '\u{203}', SC_Lower), ('\u{204}', '\u{204}', SC_Upper), + ('\u{205}', '\u{205}', SC_Lower), ('\u{206}', '\u{206}', SC_Upper), ('\u{207}', '\u{207}', + SC_Lower), ('\u{208}', '\u{208}', SC_Upper), ('\u{209}', '\u{209}', SC_Lower), ('\u{20a}', + '\u{20a}', SC_Upper), ('\u{20b}', '\u{20b}', SC_Lower), ('\u{20c}', '\u{20c}', SC_Upper), + ('\u{20d}', '\u{20d}', SC_Lower), ('\u{20e}', '\u{20e}', SC_Upper), ('\u{20f}', '\u{20f}', + SC_Lower), ('\u{210}', '\u{210}', SC_Upper), ('\u{211}', '\u{211}', SC_Lower), ('\u{212}', + '\u{212}', SC_Upper), ('\u{213}', '\u{213}', SC_Lower), ('\u{214}', '\u{214}', SC_Upper), + ('\u{215}', '\u{215}', SC_Lower), ('\u{216}', '\u{216}', SC_Upper), ('\u{217}', '\u{217}', + SC_Lower), ('\u{218}', '\u{218}', SC_Upper), ('\u{219}', '\u{219}', SC_Lower), ('\u{21a}', + '\u{21a}', SC_Upper), ('\u{21b}', '\u{21b}', SC_Lower), ('\u{21c}', '\u{21c}', SC_Upper), + ('\u{21d}', '\u{21d}', SC_Lower), ('\u{21e}', '\u{21e}', SC_Upper), ('\u{21f}', '\u{21f}', + SC_Lower), ('\u{220}', '\u{220}', SC_Upper), ('\u{221}', '\u{221}', SC_Lower), ('\u{222}', + '\u{222}', SC_Upper), ('\u{223}', '\u{223}', SC_Lower), ('\u{224}', '\u{224}', SC_Upper), + ('\u{225}', '\u{225}', SC_Lower), ('\u{226}', '\u{226}', SC_Upper), ('\u{227}', '\u{227}', + SC_Lower), ('\u{228}', '\u{228}', SC_Upper), ('\u{229}', '\u{229}', SC_Lower), ('\u{22a}', + '\u{22a}', SC_Upper), ('\u{22b}', '\u{22b}', SC_Lower), ('\u{22c}', '\u{22c}', SC_Upper), + ('\u{22d}', '\u{22d}', SC_Lower), ('\u{22e}', '\u{22e}', SC_Upper), ('\u{22f}', '\u{22f}', + SC_Lower), ('\u{230}', '\u{230}', SC_Upper), ('\u{231}', '\u{231}', SC_Lower), ('\u{232}', + '\u{232}', SC_Upper), ('\u{233}', '\u{239}', SC_Lower), ('\u{23a}', '\u{23b}', SC_Upper), + ('\u{23c}', '\u{23c}', SC_Lower), ('\u{23d}', '\u{23e}', SC_Upper), ('\u{23f}', '\u{240}', + SC_Lower), ('\u{241}', '\u{241}', SC_Upper), ('\u{242}', '\u{242}', SC_Lower), ('\u{243}', + '\u{246}', SC_Upper), ('\u{247}', '\u{247}', SC_Lower), ('\u{248}', '\u{248}', SC_Upper), + ('\u{249}', '\u{249}', SC_Lower), ('\u{24a}', '\u{24a}', SC_Upper), ('\u{24b}', '\u{24b}', + SC_Lower), ('\u{24c}', '\u{24c}', SC_Upper), ('\u{24d}', '\u{24d}', SC_Lower), ('\u{24e}', + '\u{24e}', SC_Upper), ('\u{24f}', '\u{293}', SC_Lower), ('\u{294}', '\u{294}', SC_OLetter), + ('\u{295}', '\u{2b8}', SC_Lower), ('\u{2b9}', '\u{2bf}', SC_OLetter), ('\u{2c0}', '\u{2c1}', + SC_Lower), ('\u{2c6}', '\u{2d1}', SC_OLetter), ('\u{2e0}', '\u{2e4}', SC_Lower), ('\u{2ec}', + '\u{2ec}', SC_OLetter), ('\u{2ee}', '\u{2ee}', SC_OLetter), ('\u{300}', '\u{36f}', + SC_Extend), ('\u{370}', '\u{370}', SC_Upper), ('\u{371}', '\u{371}', SC_Lower), ('\u{372}', + '\u{372}', SC_Upper), ('\u{373}', '\u{373}', SC_Lower), ('\u{374}', '\u{374}', SC_OLetter), + ('\u{376}', '\u{376}', SC_Upper), ('\u{377}', '\u{377}', SC_Lower), ('\u{37a}', '\u{37d}', + SC_Lower), ('\u{37f}', '\u{37f}', SC_Upper), ('\u{386}', '\u{386}', SC_Upper), ('\u{388}', + '\u{38a}', SC_Upper), ('\u{38c}', '\u{38c}', SC_Upper), ('\u{38e}', '\u{38f}', SC_Upper), + ('\u{390}', '\u{390}', SC_Lower), ('\u{391}', '\u{3a1}', SC_Upper), ('\u{3a3}', '\u{3ab}', + SC_Upper), ('\u{3ac}', '\u{3ce}', SC_Lower), ('\u{3cf}', '\u{3cf}', SC_Upper), ('\u{3d0}', + '\u{3d1}', SC_Lower), ('\u{3d2}', '\u{3d4}', SC_Upper), ('\u{3d5}', '\u{3d7}', SC_Lower), + ('\u{3d8}', '\u{3d8}', SC_Upper), ('\u{3d9}', '\u{3d9}', SC_Lower), ('\u{3da}', '\u{3da}', + SC_Upper), ('\u{3db}', '\u{3db}', SC_Lower), ('\u{3dc}', '\u{3dc}', SC_Upper), ('\u{3dd}', + '\u{3dd}', SC_Lower), ('\u{3de}', '\u{3de}', SC_Upper), ('\u{3df}', '\u{3df}', SC_Lower), + ('\u{3e0}', '\u{3e0}', SC_Upper), ('\u{3e1}', '\u{3e1}', SC_Lower), ('\u{3e2}', '\u{3e2}', + SC_Upper), ('\u{3e3}', '\u{3e3}', SC_Lower), ('\u{3e4}', '\u{3e4}', SC_Upper), ('\u{3e5}', + '\u{3e5}', SC_Lower), ('\u{3e6}', '\u{3e6}', SC_Upper), ('\u{3e7}', '\u{3e7}', SC_Lower), + ('\u{3e8}', '\u{3e8}', SC_Upper), ('\u{3e9}', '\u{3e9}', SC_Lower), ('\u{3ea}', '\u{3ea}', + SC_Upper), ('\u{3eb}', '\u{3eb}', SC_Lower), ('\u{3ec}', '\u{3ec}', SC_Upper), ('\u{3ed}', + '\u{3ed}', SC_Lower), ('\u{3ee}', '\u{3ee}', SC_Upper), ('\u{3ef}', '\u{3f3}', SC_Lower), + ('\u{3f4}', '\u{3f4}', SC_Upper), ('\u{3f5}', '\u{3f5}', SC_Lower), ('\u{3f7}', '\u{3f7}', + SC_Upper), ('\u{3f8}', '\u{3f8}', SC_Lower), ('\u{3f9}', '\u{3fa}', SC_Upper), ('\u{3fb}', + '\u{3fc}', SC_Lower), ('\u{3fd}', '\u{42f}', SC_Upper), ('\u{430}', '\u{45f}', SC_Lower), + ('\u{460}', '\u{460}', SC_Upper), ('\u{461}', '\u{461}', SC_Lower), ('\u{462}', '\u{462}', + SC_Upper), ('\u{463}', '\u{463}', SC_Lower), ('\u{464}', '\u{464}', SC_Upper), ('\u{465}', + '\u{465}', SC_Lower), ('\u{466}', '\u{466}', SC_Upper), ('\u{467}', '\u{467}', SC_Lower), + ('\u{468}', '\u{468}', SC_Upper), ('\u{469}', '\u{469}', SC_Lower), ('\u{46a}', '\u{46a}', + SC_Upper), ('\u{46b}', '\u{46b}', SC_Lower), ('\u{46c}', '\u{46c}', SC_Upper), ('\u{46d}', + '\u{46d}', SC_Lower), ('\u{46e}', '\u{46e}', SC_Upper), ('\u{46f}', '\u{46f}', SC_Lower), + ('\u{470}', '\u{470}', SC_Upper), ('\u{471}', '\u{471}', SC_Lower), ('\u{472}', '\u{472}', + SC_Upper), ('\u{473}', '\u{473}', SC_Lower), ('\u{474}', '\u{474}', SC_Upper), ('\u{475}', + '\u{475}', SC_Lower), ('\u{476}', '\u{476}', SC_Upper), ('\u{477}', '\u{477}', SC_Lower), + ('\u{478}', '\u{478}', SC_Upper), ('\u{479}', '\u{479}', SC_Lower), ('\u{47a}', '\u{47a}', + SC_Upper), ('\u{47b}', '\u{47b}', SC_Lower), ('\u{47c}', '\u{47c}', SC_Upper), ('\u{47d}', + '\u{47d}', SC_Lower), ('\u{47e}', '\u{47e}', SC_Upper), ('\u{47f}', '\u{47f}', SC_Lower), + ('\u{480}', '\u{480}', SC_Upper), ('\u{481}', '\u{481}', SC_Lower), ('\u{483}', '\u{489}', + SC_Extend), ('\u{48a}', '\u{48a}', SC_Upper), ('\u{48b}', '\u{48b}', SC_Lower), ('\u{48c}', + '\u{48c}', SC_Upper), ('\u{48d}', '\u{48d}', SC_Lower), ('\u{48e}', '\u{48e}', SC_Upper), + ('\u{48f}', '\u{48f}', SC_Lower), ('\u{490}', '\u{490}', SC_Upper), ('\u{491}', '\u{491}', + SC_Lower), ('\u{492}', '\u{492}', SC_Upper), ('\u{493}', '\u{493}', SC_Lower), ('\u{494}', + '\u{494}', SC_Upper), ('\u{495}', '\u{495}', SC_Lower), ('\u{496}', '\u{496}', SC_Upper), + ('\u{497}', '\u{497}', SC_Lower), ('\u{498}', '\u{498}', SC_Upper), ('\u{499}', '\u{499}', + SC_Lower), ('\u{49a}', '\u{49a}', SC_Upper), ('\u{49b}', '\u{49b}', SC_Lower), ('\u{49c}', + '\u{49c}', SC_Upper), ('\u{49d}', '\u{49d}', SC_Lower), ('\u{49e}', '\u{49e}', SC_Upper), + ('\u{49f}', '\u{49f}', SC_Lower), ('\u{4a0}', '\u{4a0}', SC_Upper), ('\u{4a1}', '\u{4a1}', + SC_Lower), ('\u{4a2}', '\u{4a2}', SC_Upper), ('\u{4a3}', '\u{4a3}', SC_Lower), ('\u{4a4}', + '\u{4a4}', SC_Upper), ('\u{4a5}', '\u{4a5}', SC_Lower), ('\u{4a6}', '\u{4a6}', SC_Upper), + ('\u{4a7}', '\u{4a7}', SC_Lower), ('\u{4a8}', '\u{4a8}', SC_Upper), ('\u{4a9}', '\u{4a9}', + SC_Lower), ('\u{4aa}', '\u{4aa}', SC_Upper), ('\u{4ab}', '\u{4ab}', SC_Lower), ('\u{4ac}', + '\u{4ac}', SC_Upper), ('\u{4ad}', '\u{4ad}', SC_Lower), ('\u{4ae}', '\u{4ae}', SC_Upper), + ('\u{4af}', '\u{4af}', SC_Lower), ('\u{4b0}', '\u{4b0}', SC_Upper), ('\u{4b1}', '\u{4b1}', + SC_Lower), ('\u{4b2}', '\u{4b2}', SC_Upper), ('\u{4b3}', '\u{4b3}', SC_Lower), ('\u{4b4}', + '\u{4b4}', SC_Upper), ('\u{4b5}', '\u{4b5}', SC_Lower), ('\u{4b6}', '\u{4b6}', SC_Upper), + ('\u{4b7}', '\u{4b7}', SC_Lower), ('\u{4b8}', '\u{4b8}', SC_Upper), ('\u{4b9}', '\u{4b9}', + SC_Lower), ('\u{4ba}', '\u{4ba}', SC_Upper), ('\u{4bb}', '\u{4bb}', SC_Lower), ('\u{4bc}', + '\u{4bc}', SC_Upper), ('\u{4bd}', '\u{4bd}', SC_Lower), ('\u{4be}', '\u{4be}', SC_Upper), + ('\u{4bf}', '\u{4bf}', SC_Lower), ('\u{4c0}', '\u{4c1}', SC_Upper), ('\u{4c2}', '\u{4c2}', + SC_Lower), ('\u{4c3}', '\u{4c3}', SC_Upper), ('\u{4c4}', '\u{4c4}', SC_Lower), ('\u{4c5}', + '\u{4c5}', SC_Upper), ('\u{4c6}', '\u{4c6}', SC_Lower), ('\u{4c7}', '\u{4c7}', SC_Upper), + ('\u{4c8}', '\u{4c8}', SC_Lower), ('\u{4c9}', '\u{4c9}', SC_Upper), ('\u{4ca}', '\u{4ca}', + SC_Lower), ('\u{4cb}', '\u{4cb}', SC_Upper), ('\u{4cc}', '\u{4cc}', SC_Lower), ('\u{4cd}', + '\u{4cd}', SC_Upper), ('\u{4ce}', '\u{4cf}', SC_Lower), ('\u{4d0}', '\u{4d0}', SC_Upper), + ('\u{4d1}', '\u{4d1}', SC_Lower), ('\u{4d2}', '\u{4d2}', SC_Upper), ('\u{4d3}', '\u{4d3}', + SC_Lower), ('\u{4d4}', '\u{4d4}', SC_Upper), ('\u{4d5}', '\u{4d5}', SC_Lower), ('\u{4d6}', + '\u{4d6}', SC_Upper), ('\u{4d7}', '\u{4d7}', SC_Lower), ('\u{4d8}', '\u{4d8}', SC_Upper), + ('\u{4d9}', '\u{4d9}', SC_Lower), ('\u{4da}', '\u{4da}', SC_Upper), ('\u{4db}', '\u{4db}', + SC_Lower), ('\u{4dc}', '\u{4dc}', SC_Upper), ('\u{4dd}', '\u{4dd}', SC_Lower), ('\u{4de}', + '\u{4de}', SC_Upper), ('\u{4df}', '\u{4df}', SC_Lower), ('\u{4e0}', '\u{4e0}', SC_Upper), + ('\u{4e1}', '\u{4e1}', SC_Lower), ('\u{4e2}', '\u{4e2}', SC_Upper), ('\u{4e3}', '\u{4e3}', + SC_Lower), ('\u{4e4}', '\u{4e4}', SC_Upper), ('\u{4e5}', '\u{4e5}', SC_Lower), ('\u{4e6}', + '\u{4e6}', SC_Upper), ('\u{4e7}', '\u{4e7}', SC_Lower), ('\u{4e8}', '\u{4e8}', SC_Upper), + ('\u{4e9}', '\u{4e9}', SC_Lower), ('\u{4ea}', '\u{4ea}', SC_Upper), ('\u{4eb}', '\u{4eb}', + SC_Lower), ('\u{4ec}', '\u{4ec}', SC_Upper), ('\u{4ed}', '\u{4ed}', SC_Lower), ('\u{4ee}', + '\u{4ee}', SC_Upper), ('\u{4ef}', '\u{4ef}', SC_Lower), ('\u{4f0}', '\u{4f0}', SC_Upper), + ('\u{4f1}', '\u{4f1}', SC_Lower), ('\u{4f2}', '\u{4f2}', SC_Upper), ('\u{4f3}', '\u{4f3}', + SC_Lower), ('\u{4f4}', '\u{4f4}', SC_Upper), ('\u{4f5}', '\u{4f5}', SC_Lower), ('\u{4f6}', + '\u{4f6}', SC_Upper), ('\u{4f7}', '\u{4f7}', SC_Lower), ('\u{4f8}', '\u{4f8}', SC_Upper), + ('\u{4f9}', '\u{4f9}', SC_Lower), ('\u{4fa}', '\u{4fa}', SC_Upper), ('\u{4fb}', '\u{4fb}', + SC_Lower), ('\u{4fc}', '\u{4fc}', SC_Upper), ('\u{4fd}', '\u{4fd}', SC_Lower), ('\u{4fe}', + '\u{4fe}', SC_Upper), ('\u{4ff}', '\u{4ff}', SC_Lower), ('\u{500}', '\u{500}', SC_Upper), + ('\u{501}', '\u{501}', SC_Lower), ('\u{502}', '\u{502}', SC_Upper), ('\u{503}', '\u{503}', + SC_Lower), ('\u{504}', '\u{504}', SC_Upper), ('\u{505}', '\u{505}', SC_Lower), ('\u{506}', + '\u{506}', SC_Upper), ('\u{507}', '\u{507}', SC_Lower), ('\u{508}', '\u{508}', SC_Upper), + ('\u{509}', '\u{509}', SC_Lower), ('\u{50a}', '\u{50a}', SC_Upper), ('\u{50b}', '\u{50b}', + SC_Lower), ('\u{50c}', '\u{50c}', SC_Upper), ('\u{50d}', '\u{50d}', SC_Lower), ('\u{50e}', + '\u{50e}', SC_Upper), ('\u{50f}', '\u{50f}', SC_Lower), ('\u{510}', '\u{510}', SC_Upper), + ('\u{511}', '\u{511}', SC_Lower), ('\u{512}', '\u{512}', SC_Upper), ('\u{513}', '\u{513}', + SC_Lower), ('\u{514}', '\u{514}', SC_Upper), ('\u{515}', '\u{515}', SC_Lower), ('\u{516}', + '\u{516}', SC_Upper), ('\u{517}', '\u{517}', SC_Lower), ('\u{518}', '\u{518}', SC_Upper), + ('\u{519}', '\u{519}', SC_Lower), ('\u{51a}', '\u{51a}', SC_Upper), ('\u{51b}', '\u{51b}', + SC_Lower), ('\u{51c}', '\u{51c}', SC_Upper), ('\u{51d}', '\u{51d}', SC_Lower), ('\u{51e}', + '\u{51e}', SC_Upper), ('\u{51f}', '\u{51f}', SC_Lower), ('\u{520}', '\u{520}', SC_Upper), + ('\u{521}', '\u{521}', SC_Lower), ('\u{522}', '\u{522}', SC_Upper), ('\u{523}', '\u{523}', + SC_Lower), ('\u{524}', '\u{524}', SC_Upper), ('\u{525}', '\u{525}', SC_Lower), ('\u{526}', + '\u{526}', SC_Upper), ('\u{527}', '\u{527}', SC_Lower), ('\u{528}', '\u{528}', SC_Upper), + ('\u{529}', '\u{529}', SC_Lower), ('\u{52a}', '\u{52a}', SC_Upper), ('\u{52b}', '\u{52b}', + SC_Lower), ('\u{52c}', '\u{52c}', SC_Upper), ('\u{52d}', '\u{52d}', SC_Lower), ('\u{52e}', + '\u{52e}', SC_Upper), ('\u{52f}', '\u{52f}', SC_Lower), ('\u{531}', '\u{556}', SC_Upper), + ('\u{559}', '\u{559}', SC_OLetter), ('\u{55d}', '\u{55d}', SC_SContinue), ('\u{561}', + '\u{587}', SC_Lower), ('\u{589}', '\u{589}', SC_STerm), ('\u{591}', '\u{5bd}', SC_Extend), + ('\u{5bf}', '\u{5bf}', SC_Extend), ('\u{5c1}', '\u{5c2}', SC_Extend), ('\u{5c4}', '\u{5c5}', + SC_Extend), ('\u{5c7}', '\u{5c7}', SC_Extend), ('\u{5d0}', '\u{5ea}', SC_OLetter), + ('\u{5f0}', '\u{5f3}', SC_OLetter), ('\u{600}', '\u{605}', SC_Format), ('\u{60c}', + '\u{60d}', SC_SContinue), ('\u{610}', '\u{61a}', SC_Extend), ('\u{61c}', '\u{61c}', + SC_Format), ('\u{61f}', '\u{61f}', SC_STerm), ('\u{620}', '\u{64a}', SC_OLetter), + ('\u{64b}', '\u{65f}', SC_Extend), ('\u{660}', '\u{669}', SC_Numeric), ('\u{66b}', + '\u{66c}', SC_Numeric), ('\u{66e}', '\u{66f}', SC_OLetter), ('\u{670}', '\u{670}', + SC_Extend), ('\u{671}', '\u{6d3}', SC_OLetter), ('\u{6d4}', '\u{6d4}', SC_STerm), + ('\u{6d5}', '\u{6d5}', SC_OLetter), ('\u{6d6}', '\u{6dc}', SC_Extend), ('\u{6dd}', + '\u{6dd}', SC_Format), ('\u{6df}', '\u{6e4}', SC_Extend), ('\u{6e5}', '\u{6e6}', + SC_OLetter), ('\u{6e7}', '\u{6e8}', SC_Extend), ('\u{6ea}', '\u{6ed}', SC_Extend), + ('\u{6ee}', '\u{6ef}', SC_OLetter), ('\u{6f0}', '\u{6f9}', SC_Numeric), ('\u{6fa}', + '\u{6fc}', SC_OLetter), ('\u{6ff}', '\u{6ff}', SC_OLetter), ('\u{700}', '\u{702}', + SC_STerm), ('\u{70f}', '\u{70f}', SC_Format), ('\u{710}', '\u{710}', SC_OLetter), + ('\u{711}', '\u{711}', SC_Extend), ('\u{712}', '\u{72f}', SC_OLetter), ('\u{730}', + '\u{74a}', SC_Extend), ('\u{74d}', '\u{7a5}', SC_OLetter), ('\u{7a6}', '\u{7b0}', + SC_Extend), ('\u{7b1}', '\u{7b1}', SC_OLetter), ('\u{7c0}', '\u{7c9}', SC_Numeric), + ('\u{7ca}', '\u{7ea}', SC_OLetter), ('\u{7eb}', '\u{7f3}', SC_Extend), ('\u{7f4}', + '\u{7f5}', SC_OLetter), ('\u{7f8}', '\u{7f8}', SC_SContinue), ('\u{7f9}', '\u{7f9}', + SC_STerm), ('\u{7fa}', '\u{7fa}', SC_OLetter), ('\u{800}', '\u{815}', SC_OLetter), + ('\u{816}', '\u{819}', SC_Extend), ('\u{81a}', '\u{81a}', SC_OLetter), ('\u{81b}', + '\u{823}', SC_Extend), ('\u{824}', '\u{824}', SC_OLetter), ('\u{825}', '\u{827}', + SC_Extend), ('\u{828}', '\u{828}', SC_OLetter), ('\u{829}', '\u{82d}', SC_Extend), + ('\u{840}', '\u{858}', SC_OLetter), ('\u{859}', '\u{85b}', SC_Extend), ('\u{8a0}', + '\u{8b4}', SC_OLetter), ('\u{8b6}', '\u{8bd}', SC_OLetter), ('\u{8d4}', '\u{8e1}', + SC_Extend), ('\u{8e2}', '\u{8e2}', SC_Format), ('\u{8e3}', '\u{903}', SC_Extend), + ('\u{904}', '\u{939}', SC_OLetter), ('\u{93a}', '\u{93c}', SC_Extend), ('\u{93d}', + '\u{93d}', SC_OLetter), ('\u{93e}', '\u{94f}', SC_Extend), ('\u{950}', '\u{950}', + SC_OLetter), ('\u{951}', '\u{957}', SC_Extend), ('\u{958}', '\u{961}', SC_OLetter), + ('\u{962}', '\u{963}', SC_Extend), ('\u{964}', '\u{965}', SC_STerm), ('\u{966}', '\u{96f}', + SC_Numeric), ('\u{971}', '\u{980}', SC_OLetter), ('\u{981}', '\u{983}', SC_Extend), + ('\u{985}', '\u{98c}', SC_OLetter), ('\u{98f}', '\u{990}', SC_OLetter), ('\u{993}', + '\u{9a8}', SC_OLetter), ('\u{9aa}', '\u{9b0}', SC_OLetter), ('\u{9b2}', '\u{9b2}', + SC_OLetter), ('\u{9b6}', '\u{9b9}', SC_OLetter), ('\u{9bc}', '\u{9bc}', SC_Extend), + ('\u{9bd}', '\u{9bd}', SC_OLetter), ('\u{9be}', '\u{9c4}', SC_Extend), ('\u{9c7}', + '\u{9c8}', SC_Extend), ('\u{9cb}', '\u{9cd}', SC_Extend), ('\u{9ce}', '\u{9ce}', + SC_OLetter), ('\u{9d7}', '\u{9d7}', SC_Extend), ('\u{9dc}', '\u{9dd}', SC_OLetter), + ('\u{9df}', '\u{9e1}', SC_OLetter), ('\u{9e2}', '\u{9e3}', SC_Extend), ('\u{9e6}', + '\u{9ef}', SC_Numeric), ('\u{9f0}', '\u{9f1}', SC_OLetter), ('\u{a01}', '\u{a03}', + SC_Extend), ('\u{a05}', '\u{a0a}', SC_OLetter), ('\u{a0f}', '\u{a10}', SC_OLetter), + ('\u{a13}', '\u{a28}', SC_OLetter), ('\u{a2a}', '\u{a30}', SC_OLetter), ('\u{a32}', + '\u{a33}', SC_OLetter), ('\u{a35}', '\u{a36}', SC_OLetter), ('\u{a38}', '\u{a39}', + SC_OLetter), ('\u{a3c}', '\u{a3c}', SC_Extend), ('\u{a3e}', '\u{a42}', SC_Extend), + ('\u{a47}', '\u{a48}', SC_Extend), ('\u{a4b}', '\u{a4d}', SC_Extend), ('\u{a51}', '\u{a51}', + SC_Extend), ('\u{a59}', '\u{a5c}', SC_OLetter), ('\u{a5e}', '\u{a5e}', SC_OLetter), + ('\u{a66}', '\u{a6f}', SC_Numeric), ('\u{a70}', '\u{a71}', SC_Extend), ('\u{a72}', + '\u{a74}', SC_OLetter), ('\u{a75}', '\u{a75}', SC_Extend), ('\u{a81}', '\u{a83}', + SC_Extend), ('\u{a85}', '\u{a8d}', SC_OLetter), ('\u{a8f}', '\u{a91}', SC_OLetter), + ('\u{a93}', '\u{aa8}', SC_OLetter), ('\u{aaa}', '\u{ab0}', SC_OLetter), ('\u{ab2}', + '\u{ab3}', SC_OLetter), ('\u{ab5}', '\u{ab9}', SC_OLetter), ('\u{abc}', '\u{abc}', + SC_Extend), ('\u{abd}', '\u{abd}', SC_OLetter), ('\u{abe}', '\u{ac5}', SC_Extend), + ('\u{ac7}', '\u{ac9}', SC_Extend), ('\u{acb}', '\u{acd}', SC_Extend), ('\u{ad0}', '\u{ad0}', + SC_OLetter), ('\u{ae0}', '\u{ae1}', SC_OLetter), ('\u{ae2}', '\u{ae3}', SC_Extend), + ('\u{ae6}', '\u{aef}', SC_Numeric), ('\u{af9}', '\u{af9}', SC_OLetter), ('\u{b01}', + '\u{b03}', SC_Extend), ('\u{b05}', '\u{b0c}', SC_OLetter), ('\u{b0f}', '\u{b10}', + SC_OLetter), ('\u{b13}', '\u{b28}', SC_OLetter), ('\u{b2a}', '\u{b30}', SC_OLetter), + ('\u{b32}', '\u{b33}', SC_OLetter), ('\u{b35}', '\u{b39}', SC_OLetter), ('\u{b3c}', + '\u{b3c}', SC_Extend), ('\u{b3d}', '\u{b3d}', SC_OLetter), ('\u{b3e}', '\u{b44}', + SC_Extend), ('\u{b47}', '\u{b48}', SC_Extend), ('\u{b4b}', '\u{b4d}', SC_Extend), + ('\u{b56}', '\u{b57}', SC_Extend), ('\u{b5c}', '\u{b5d}', SC_OLetter), ('\u{b5f}', + '\u{b61}', SC_OLetter), ('\u{b62}', '\u{b63}', SC_Extend), ('\u{b66}', '\u{b6f}', + SC_Numeric), ('\u{b71}', '\u{b71}', SC_OLetter), ('\u{b82}', '\u{b82}', SC_Extend), + ('\u{b83}', '\u{b83}', SC_OLetter), ('\u{b85}', '\u{b8a}', SC_OLetter), ('\u{b8e}', + '\u{b90}', SC_OLetter), ('\u{b92}', '\u{b95}', SC_OLetter), ('\u{b99}', '\u{b9a}', + SC_OLetter), ('\u{b9c}', '\u{b9c}', SC_OLetter), ('\u{b9e}', '\u{b9f}', SC_OLetter), + ('\u{ba3}', '\u{ba4}', SC_OLetter), ('\u{ba8}', '\u{baa}', SC_OLetter), ('\u{bae}', + '\u{bb9}', SC_OLetter), ('\u{bbe}', '\u{bc2}', SC_Extend), ('\u{bc6}', '\u{bc8}', + SC_Extend), ('\u{bca}', '\u{bcd}', SC_Extend), ('\u{bd0}', '\u{bd0}', SC_OLetter), + ('\u{bd7}', '\u{bd7}', SC_Extend), ('\u{be6}', '\u{bef}', SC_Numeric), ('\u{c00}', + '\u{c03}', SC_Extend), ('\u{c05}', '\u{c0c}', SC_OLetter), ('\u{c0e}', '\u{c10}', + SC_OLetter), ('\u{c12}', '\u{c28}', SC_OLetter), ('\u{c2a}', '\u{c39}', SC_OLetter), + ('\u{c3d}', '\u{c3d}', SC_OLetter), ('\u{c3e}', '\u{c44}', SC_Extend), ('\u{c46}', + '\u{c48}', SC_Extend), ('\u{c4a}', '\u{c4d}', SC_Extend), ('\u{c55}', '\u{c56}', SC_Extend), + ('\u{c58}', '\u{c5a}', SC_OLetter), ('\u{c60}', '\u{c61}', SC_OLetter), ('\u{c62}', + '\u{c63}', SC_Extend), ('\u{c66}', '\u{c6f}', SC_Numeric), ('\u{c80}', '\u{c80}', + SC_OLetter), ('\u{c81}', '\u{c83}', SC_Extend), ('\u{c85}', '\u{c8c}', SC_OLetter), + ('\u{c8e}', '\u{c90}', SC_OLetter), ('\u{c92}', '\u{ca8}', SC_OLetter), ('\u{caa}', + '\u{cb3}', SC_OLetter), ('\u{cb5}', '\u{cb9}', SC_OLetter), ('\u{cbc}', '\u{cbc}', + SC_Extend), ('\u{cbd}', '\u{cbd}', SC_OLetter), ('\u{cbe}', '\u{cc4}', SC_Extend), + ('\u{cc6}', '\u{cc8}', SC_Extend), ('\u{cca}', '\u{ccd}', SC_Extend), ('\u{cd5}', '\u{cd6}', + SC_Extend), ('\u{cde}', '\u{cde}', SC_OLetter), ('\u{ce0}', '\u{ce1}', SC_OLetter), + ('\u{ce2}', '\u{ce3}', SC_Extend), ('\u{ce6}', '\u{cef}', SC_Numeric), ('\u{cf1}', + '\u{cf2}', SC_OLetter), ('\u{d01}', '\u{d03}', SC_Extend), ('\u{d05}', '\u{d0c}', + SC_OLetter), ('\u{d0e}', '\u{d10}', SC_OLetter), ('\u{d12}', '\u{d3a}', SC_OLetter), + ('\u{d3d}', '\u{d3d}', SC_OLetter), ('\u{d3e}', '\u{d44}', SC_Extend), ('\u{d46}', + '\u{d48}', SC_Extend), ('\u{d4a}', '\u{d4d}', SC_Extend), ('\u{d4e}', '\u{d4e}', + SC_OLetter), ('\u{d54}', '\u{d56}', SC_OLetter), ('\u{d57}', '\u{d57}', SC_Extend), + ('\u{d5f}', '\u{d61}', SC_OLetter), ('\u{d62}', '\u{d63}', SC_Extend), ('\u{d66}', + '\u{d6f}', SC_Numeric), ('\u{d7a}', '\u{d7f}', SC_OLetter), ('\u{d82}', '\u{d83}', + SC_Extend), ('\u{d85}', '\u{d96}', SC_OLetter), ('\u{d9a}', '\u{db1}', SC_OLetter), + ('\u{db3}', '\u{dbb}', SC_OLetter), ('\u{dbd}', '\u{dbd}', SC_OLetter), ('\u{dc0}', + '\u{dc6}', SC_OLetter), ('\u{dca}', '\u{dca}', SC_Extend), ('\u{dcf}', '\u{dd4}', + SC_Extend), ('\u{dd6}', '\u{dd6}', SC_Extend), ('\u{dd8}', '\u{ddf}', SC_Extend), + ('\u{de6}', '\u{def}', SC_Numeric), ('\u{df2}', '\u{df3}', SC_Extend), ('\u{e01}', + '\u{e30}', SC_OLetter), ('\u{e31}', '\u{e31}', SC_Extend), ('\u{e32}', '\u{e33}', + SC_OLetter), ('\u{e34}', '\u{e3a}', SC_Extend), ('\u{e40}', '\u{e46}', SC_OLetter), + ('\u{e47}', '\u{e4e}', SC_Extend), ('\u{e50}', '\u{e59}', SC_Numeric), ('\u{e81}', + '\u{e82}', SC_OLetter), ('\u{e84}', '\u{e84}', SC_OLetter), ('\u{e87}', '\u{e88}', + SC_OLetter), ('\u{e8a}', '\u{e8a}', SC_OLetter), ('\u{e8d}', '\u{e8d}', SC_OLetter), + ('\u{e94}', '\u{e97}', SC_OLetter), ('\u{e99}', '\u{e9f}', SC_OLetter), ('\u{ea1}', + '\u{ea3}', SC_OLetter), ('\u{ea5}', '\u{ea5}', SC_OLetter), ('\u{ea7}', '\u{ea7}', + SC_OLetter), ('\u{eaa}', '\u{eab}', SC_OLetter), ('\u{ead}', '\u{eb0}', SC_OLetter), + ('\u{eb1}', '\u{eb1}', SC_Extend), ('\u{eb2}', '\u{eb3}', SC_OLetter), ('\u{eb4}', + '\u{eb9}', SC_Extend), ('\u{ebb}', '\u{ebc}', SC_Extend), ('\u{ebd}', '\u{ebd}', + SC_OLetter), ('\u{ec0}', '\u{ec4}', SC_OLetter), ('\u{ec6}', '\u{ec6}', SC_OLetter), + ('\u{ec8}', '\u{ecd}', SC_Extend), ('\u{ed0}', '\u{ed9}', SC_Numeric), ('\u{edc}', + '\u{edf}', SC_OLetter), ('\u{f00}', '\u{f00}', SC_OLetter), ('\u{f18}', '\u{f19}', + SC_Extend), ('\u{f20}', '\u{f29}', SC_Numeric), ('\u{f35}', '\u{f35}', SC_Extend), + ('\u{f37}', '\u{f37}', SC_Extend), ('\u{f39}', '\u{f39}', SC_Extend), ('\u{f3a}', '\u{f3d}', + SC_Close), ('\u{f3e}', '\u{f3f}', SC_Extend), ('\u{f40}', '\u{f47}', SC_OLetter), + ('\u{f49}', '\u{f6c}', SC_OLetter), ('\u{f71}', '\u{f84}', SC_Extend), ('\u{f86}', + '\u{f87}', SC_Extend), ('\u{f88}', '\u{f8c}', SC_OLetter), ('\u{f8d}', '\u{f97}', + SC_Extend), ('\u{f99}', '\u{fbc}', SC_Extend), ('\u{fc6}', '\u{fc6}', SC_Extend), + ('\u{1000}', '\u{102a}', SC_OLetter), ('\u{102b}', '\u{103e}', SC_Extend), ('\u{103f}', + '\u{103f}', SC_OLetter), ('\u{1040}', '\u{1049}', SC_Numeric), ('\u{104a}', '\u{104b}', + SC_STerm), ('\u{1050}', '\u{1055}', SC_OLetter), ('\u{1056}', '\u{1059}', SC_Extend), + ('\u{105a}', '\u{105d}', SC_OLetter), ('\u{105e}', '\u{1060}', SC_Extend), ('\u{1061}', + '\u{1061}', SC_OLetter), ('\u{1062}', '\u{1064}', SC_Extend), ('\u{1065}', '\u{1066}', + SC_OLetter), ('\u{1067}', '\u{106d}', SC_Extend), ('\u{106e}', '\u{1070}', SC_OLetter), + ('\u{1071}', '\u{1074}', SC_Extend), ('\u{1075}', '\u{1081}', SC_OLetter), ('\u{1082}', + '\u{108d}', SC_Extend), ('\u{108e}', '\u{108e}', SC_OLetter), ('\u{108f}', '\u{108f}', + SC_Extend), ('\u{1090}', '\u{1099}', SC_Numeric), ('\u{109a}', '\u{109d}', SC_Extend), + ('\u{10a0}', '\u{10c5}', SC_Upper), ('\u{10c7}', '\u{10c7}', SC_Upper), ('\u{10cd}', + '\u{10cd}', SC_Upper), ('\u{10d0}', '\u{10fa}', SC_OLetter), ('\u{10fc}', '\u{1248}', + SC_OLetter), ('\u{124a}', '\u{124d}', SC_OLetter), ('\u{1250}', '\u{1256}', SC_OLetter), + ('\u{1258}', '\u{1258}', SC_OLetter), ('\u{125a}', '\u{125d}', SC_OLetter), ('\u{1260}', + '\u{1288}', SC_OLetter), ('\u{128a}', '\u{128d}', SC_OLetter), ('\u{1290}', '\u{12b0}', + SC_OLetter), ('\u{12b2}', '\u{12b5}', SC_OLetter), ('\u{12b8}', '\u{12be}', SC_OLetter), + ('\u{12c0}', '\u{12c0}', SC_OLetter), ('\u{12c2}', '\u{12c5}', SC_OLetter), ('\u{12c8}', + '\u{12d6}', SC_OLetter), ('\u{12d8}', '\u{1310}', SC_OLetter), ('\u{1312}', '\u{1315}', + SC_OLetter), ('\u{1318}', '\u{135a}', SC_OLetter), ('\u{135d}', '\u{135f}', SC_Extend), + ('\u{1362}', '\u{1362}', SC_STerm), ('\u{1367}', '\u{1368}', SC_STerm), ('\u{1380}', + '\u{138f}', SC_OLetter), ('\u{13a0}', '\u{13f5}', SC_Upper), ('\u{13f8}', '\u{13fd}', + SC_Lower), ('\u{1401}', '\u{166c}', SC_OLetter), ('\u{166e}', '\u{166e}', SC_STerm), + ('\u{166f}', '\u{167f}', SC_OLetter), ('\u{1680}', '\u{1680}', SC_Sp), ('\u{1681}', + '\u{169a}', SC_OLetter), ('\u{169b}', '\u{169c}', SC_Close), ('\u{16a0}', '\u{16ea}', + SC_OLetter), ('\u{16ee}', '\u{16f8}', SC_OLetter), ('\u{1700}', '\u{170c}', SC_OLetter), + ('\u{170e}', '\u{1711}', SC_OLetter), ('\u{1712}', '\u{1714}', SC_Extend), ('\u{1720}', + '\u{1731}', SC_OLetter), ('\u{1732}', '\u{1734}', SC_Extend), ('\u{1735}', '\u{1736}', + SC_STerm), ('\u{1740}', '\u{1751}', SC_OLetter), ('\u{1752}', '\u{1753}', SC_Extend), + ('\u{1760}', '\u{176c}', SC_OLetter), ('\u{176e}', '\u{1770}', SC_OLetter), ('\u{1772}', + '\u{1773}', SC_Extend), ('\u{1780}', '\u{17b3}', SC_OLetter), ('\u{17b4}', '\u{17d3}', + SC_Extend), ('\u{17d7}', '\u{17d7}', SC_OLetter), ('\u{17dc}', '\u{17dc}', SC_OLetter), + ('\u{17dd}', '\u{17dd}', SC_Extend), ('\u{17e0}', '\u{17e9}', SC_Numeric), ('\u{1802}', + '\u{1802}', SC_SContinue), ('\u{1803}', '\u{1803}', SC_STerm), ('\u{1808}', '\u{1808}', + SC_SContinue), ('\u{1809}', '\u{1809}', SC_STerm), ('\u{180b}', '\u{180d}', SC_Extend), + ('\u{180e}', '\u{180e}', SC_Format), ('\u{1810}', '\u{1819}', SC_Numeric), ('\u{1820}', + '\u{1877}', SC_OLetter), ('\u{1880}', '\u{1884}', SC_OLetter), ('\u{1885}', '\u{1886}', + SC_Extend), ('\u{1887}', '\u{18a8}', SC_OLetter), ('\u{18a9}', '\u{18a9}', SC_Extend), + ('\u{18aa}', '\u{18aa}', SC_OLetter), ('\u{18b0}', '\u{18f5}', SC_OLetter), ('\u{1900}', + '\u{191e}', SC_OLetter), ('\u{1920}', '\u{192b}', SC_Extend), ('\u{1930}', '\u{193b}', + SC_Extend), ('\u{1944}', '\u{1945}', SC_STerm), ('\u{1946}', '\u{194f}', SC_Numeric), + ('\u{1950}', '\u{196d}', SC_OLetter), ('\u{1970}', '\u{1974}', SC_OLetter), ('\u{1980}', + '\u{19ab}', SC_OLetter), ('\u{19b0}', '\u{19c9}', SC_OLetter), ('\u{19d0}', '\u{19d9}', + SC_Numeric), ('\u{1a00}', '\u{1a16}', SC_OLetter), ('\u{1a17}', '\u{1a1b}', SC_Extend), + ('\u{1a20}', '\u{1a54}', SC_OLetter), ('\u{1a55}', '\u{1a5e}', SC_Extend), ('\u{1a60}', + '\u{1a7c}', SC_Extend), ('\u{1a7f}', '\u{1a7f}', SC_Extend), ('\u{1a80}', '\u{1a89}', + SC_Numeric), ('\u{1a90}', '\u{1a99}', SC_Numeric), ('\u{1aa7}', '\u{1aa7}', SC_OLetter), + ('\u{1aa8}', '\u{1aab}', SC_STerm), ('\u{1ab0}', '\u{1abe}', SC_Extend), ('\u{1b00}', + '\u{1b04}', SC_Extend), ('\u{1b05}', '\u{1b33}', SC_OLetter), ('\u{1b34}', '\u{1b44}', + SC_Extend), ('\u{1b45}', '\u{1b4b}', SC_OLetter), ('\u{1b50}', '\u{1b59}', SC_Numeric), + ('\u{1b5a}', '\u{1b5b}', SC_STerm), ('\u{1b5e}', '\u{1b5f}', SC_STerm), ('\u{1b6b}', + '\u{1b73}', SC_Extend), ('\u{1b80}', '\u{1b82}', SC_Extend), ('\u{1b83}', '\u{1ba0}', + SC_OLetter), ('\u{1ba1}', '\u{1bad}', SC_Extend), ('\u{1bae}', '\u{1baf}', SC_OLetter), + ('\u{1bb0}', '\u{1bb9}', SC_Numeric), ('\u{1bba}', '\u{1be5}', SC_OLetter), ('\u{1be6}', + '\u{1bf3}', SC_Extend), ('\u{1c00}', '\u{1c23}', SC_OLetter), ('\u{1c24}', '\u{1c37}', + SC_Extend), ('\u{1c3b}', '\u{1c3c}', SC_STerm), ('\u{1c40}', '\u{1c49}', SC_Numeric), + ('\u{1c4d}', '\u{1c4f}', SC_OLetter), ('\u{1c50}', '\u{1c59}', SC_Numeric), ('\u{1c5a}', + '\u{1c7d}', SC_OLetter), ('\u{1c7e}', '\u{1c7f}', SC_STerm), ('\u{1c80}', '\u{1c88}', + SC_Lower), ('\u{1cd0}', '\u{1cd2}', SC_Extend), ('\u{1cd4}', '\u{1ce8}', SC_Extend), + ('\u{1ce9}', '\u{1cec}', SC_OLetter), ('\u{1ced}', '\u{1ced}', SC_Extend), ('\u{1cee}', + '\u{1cf1}', SC_OLetter), ('\u{1cf2}', '\u{1cf4}', SC_Extend), ('\u{1cf5}', '\u{1cf6}', + SC_OLetter), ('\u{1cf8}', '\u{1cf9}', SC_Extend), ('\u{1d00}', '\u{1dbf}', SC_Lower), + ('\u{1dc0}', '\u{1df5}', SC_Extend), ('\u{1dfb}', '\u{1dff}', SC_Extend), ('\u{1e00}', + '\u{1e00}', SC_Upper), ('\u{1e01}', '\u{1e01}', SC_Lower), ('\u{1e02}', '\u{1e02}', + SC_Upper), ('\u{1e03}', '\u{1e03}', SC_Lower), ('\u{1e04}', '\u{1e04}', SC_Upper), + ('\u{1e05}', '\u{1e05}', SC_Lower), ('\u{1e06}', '\u{1e06}', SC_Upper), ('\u{1e07}', + '\u{1e07}', SC_Lower), ('\u{1e08}', '\u{1e08}', SC_Upper), ('\u{1e09}', '\u{1e09}', + SC_Lower), ('\u{1e0a}', '\u{1e0a}', SC_Upper), ('\u{1e0b}', '\u{1e0b}', SC_Lower), + ('\u{1e0c}', '\u{1e0c}', SC_Upper), ('\u{1e0d}', '\u{1e0d}', SC_Lower), ('\u{1e0e}', + '\u{1e0e}', SC_Upper), ('\u{1e0f}', '\u{1e0f}', SC_Lower), ('\u{1e10}', '\u{1e10}', + SC_Upper), ('\u{1e11}', '\u{1e11}', SC_Lower), ('\u{1e12}', '\u{1e12}', SC_Upper), + ('\u{1e13}', '\u{1e13}', SC_Lower), ('\u{1e14}', '\u{1e14}', SC_Upper), ('\u{1e15}', + '\u{1e15}', SC_Lower), ('\u{1e16}', '\u{1e16}', SC_Upper), ('\u{1e17}', '\u{1e17}', + SC_Lower), ('\u{1e18}', '\u{1e18}', SC_Upper), ('\u{1e19}', '\u{1e19}', SC_Lower), + ('\u{1e1a}', '\u{1e1a}', SC_Upper), ('\u{1e1b}', '\u{1e1b}', SC_Lower), ('\u{1e1c}', + '\u{1e1c}', SC_Upper), ('\u{1e1d}', '\u{1e1d}', SC_Lower), ('\u{1e1e}', '\u{1e1e}', + SC_Upper), ('\u{1e1f}', '\u{1e1f}', SC_Lower), ('\u{1e20}', '\u{1e20}', SC_Upper), + ('\u{1e21}', '\u{1e21}', SC_Lower), ('\u{1e22}', '\u{1e22}', SC_Upper), ('\u{1e23}', + '\u{1e23}', SC_Lower), ('\u{1e24}', '\u{1e24}', SC_Upper), ('\u{1e25}', '\u{1e25}', + SC_Lower), ('\u{1e26}', '\u{1e26}', SC_Upper), ('\u{1e27}', '\u{1e27}', SC_Lower), + ('\u{1e28}', '\u{1e28}', SC_Upper), ('\u{1e29}', '\u{1e29}', SC_Lower), ('\u{1e2a}', + '\u{1e2a}', SC_Upper), ('\u{1e2b}', '\u{1e2b}', SC_Lower), ('\u{1e2c}', '\u{1e2c}', + SC_Upper), ('\u{1e2d}', '\u{1e2d}', SC_Lower), ('\u{1e2e}', '\u{1e2e}', SC_Upper), + ('\u{1e2f}', '\u{1e2f}', SC_Lower), ('\u{1e30}', '\u{1e30}', SC_Upper), ('\u{1e31}', + '\u{1e31}', SC_Lower), ('\u{1e32}', '\u{1e32}', SC_Upper), ('\u{1e33}', '\u{1e33}', + SC_Lower), ('\u{1e34}', '\u{1e34}', SC_Upper), ('\u{1e35}', '\u{1e35}', SC_Lower), + ('\u{1e36}', '\u{1e36}', SC_Upper), ('\u{1e37}', '\u{1e37}', SC_Lower), ('\u{1e38}', + '\u{1e38}', SC_Upper), ('\u{1e39}', '\u{1e39}', SC_Lower), ('\u{1e3a}', '\u{1e3a}', + SC_Upper), ('\u{1e3b}', '\u{1e3b}', SC_Lower), ('\u{1e3c}', '\u{1e3c}', SC_Upper), + ('\u{1e3d}', '\u{1e3d}', SC_Lower), ('\u{1e3e}', '\u{1e3e}', SC_Upper), ('\u{1e3f}', + '\u{1e3f}', SC_Lower), ('\u{1e40}', '\u{1e40}', SC_Upper), ('\u{1e41}', '\u{1e41}', + SC_Lower), ('\u{1e42}', '\u{1e42}', SC_Upper), ('\u{1e43}', '\u{1e43}', SC_Lower), + ('\u{1e44}', '\u{1e44}', SC_Upper), ('\u{1e45}', '\u{1e45}', SC_Lower), ('\u{1e46}', + '\u{1e46}', SC_Upper), ('\u{1e47}', '\u{1e47}', SC_Lower), ('\u{1e48}', '\u{1e48}', + SC_Upper), ('\u{1e49}', '\u{1e49}', SC_Lower), ('\u{1e4a}', '\u{1e4a}', SC_Upper), + ('\u{1e4b}', '\u{1e4b}', SC_Lower), ('\u{1e4c}', '\u{1e4c}', SC_Upper), ('\u{1e4d}', + '\u{1e4d}', SC_Lower), ('\u{1e4e}', '\u{1e4e}', SC_Upper), ('\u{1e4f}', '\u{1e4f}', + SC_Lower), ('\u{1e50}', '\u{1e50}', SC_Upper), ('\u{1e51}', '\u{1e51}', SC_Lower), + ('\u{1e52}', '\u{1e52}', SC_Upper), ('\u{1e53}', '\u{1e53}', SC_Lower), ('\u{1e54}', + '\u{1e54}', SC_Upper), ('\u{1e55}', '\u{1e55}', SC_Lower), ('\u{1e56}', '\u{1e56}', + SC_Upper), ('\u{1e57}', '\u{1e57}', SC_Lower), ('\u{1e58}', '\u{1e58}', SC_Upper), + ('\u{1e59}', '\u{1e59}', SC_Lower), ('\u{1e5a}', '\u{1e5a}', SC_Upper), ('\u{1e5b}', + '\u{1e5b}', SC_Lower), ('\u{1e5c}', '\u{1e5c}', SC_Upper), ('\u{1e5d}', '\u{1e5d}', + SC_Lower), ('\u{1e5e}', '\u{1e5e}', SC_Upper), ('\u{1e5f}', '\u{1e5f}', SC_Lower), + ('\u{1e60}', '\u{1e60}', SC_Upper), ('\u{1e61}', '\u{1e61}', SC_Lower), ('\u{1e62}', + '\u{1e62}', SC_Upper), ('\u{1e63}', '\u{1e63}', SC_Lower), ('\u{1e64}', '\u{1e64}', + SC_Upper), ('\u{1e65}', '\u{1e65}', SC_Lower), ('\u{1e66}', '\u{1e66}', SC_Upper), + ('\u{1e67}', '\u{1e67}', SC_Lower), ('\u{1e68}', '\u{1e68}', SC_Upper), ('\u{1e69}', + '\u{1e69}', SC_Lower), ('\u{1e6a}', '\u{1e6a}', SC_Upper), ('\u{1e6b}', '\u{1e6b}', + SC_Lower), ('\u{1e6c}', '\u{1e6c}', SC_Upper), ('\u{1e6d}', '\u{1e6d}', SC_Lower), + ('\u{1e6e}', '\u{1e6e}', SC_Upper), ('\u{1e6f}', '\u{1e6f}', SC_Lower), ('\u{1e70}', + '\u{1e70}', SC_Upper), ('\u{1e71}', '\u{1e71}', SC_Lower), ('\u{1e72}', '\u{1e72}', + SC_Upper), ('\u{1e73}', '\u{1e73}', SC_Lower), ('\u{1e74}', '\u{1e74}', SC_Upper), + ('\u{1e75}', '\u{1e75}', SC_Lower), ('\u{1e76}', '\u{1e76}', SC_Upper), ('\u{1e77}', + '\u{1e77}', SC_Lower), ('\u{1e78}', '\u{1e78}', SC_Upper), ('\u{1e79}', '\u{1e79}', + SC_Lower), ('\u{1e7a}', '\u{1e7a}', SC_Upper), ('\u{1e7b}', '\u{1e7b}', SC_Lower), + ('\u{1e7c}', '\u{1e7c}', SC_Upper), ('\u{1e7d}', '\u{1e7d}', SC_Lower), ('\u{1e7e}', + '\u{1e7e}', SC_Upper), ('\u{1e7f}', '\u{1e7f}', SC_Lower), ('\u{1e80}', '\u{1e80}', + SC_Upper), ('\u{1e81}', '\u{1e81}', SC_Lower), ('\u{1e82}', '\u{1e82}', SC_Upper), + ('\u{1e83}', '\u{1e83}', SC_Lower), ('\u{1e84}', '\u{1e84}', SC_Upper), ('\u{1e85}', + '\u{1e85}', SC_Lower), ('\u{1e86}', '\u{1e86}', SC_Upper), ('\u{1e87}', '\u{1e87}', + SC_Lower), ('\u{1e88}', '\u{1e88}', SC_Upper), ('\u{1e89}', '\u{1e89}', SC_Lower), + ('\u{1e8a}', '\u{1e8a}', SC_Upper), ('\u{1e8b}', '\u{1e8b}', SC_Lower), ('\u{1e8c}', + '\u{1e8c}', SC_Upper), ('\u{1e8d}', '\u{1e8d}', SC_Lower), ('\u{1e8e}', '\u{1e8e}', + SC_Upper), ('\u{1e8f}', '\u{1e8f}', SC_Lower), ('\u{1e90}', '\u{1e90}', SC_Upper), + ('\u{1e91}', '\u{1e91}', SC_Lower), ('\u{1e92}', '\u{1e92}', SC_Upper), ('\u{1e93}', + '\u{1e93}', SC_Lower), ('\u{1e94}', '\u{1e94}', SC_Upper), ('\u{1e95}', '\u{1e9d}', + SC_Lower), ('\u{1e9e}', '\u{1e9e}', SC_Upper), ('\u{1e9f}', '\u{1e9f}', SC_Lower), + ('\u{1ea0}', '\u{1ea0}', SC_Upper), ('\u{1ea1}', '\u{1ea1}', SC_Lower), ('\u{1ea2}', + '\u{1ea2}', SC_Upper), ('\u{1ea3}', '\u{1ea3}', SC_Lower), ('\u{1ea4}', '\u{1ea4}', + SC_Upper), ('\u{1ea5}', '\u{1ea5}', SC_Lower), ('\u{1ea6}', '\u{1ea6}', SC_Upper), + ('\u{1ea7}', '\u{1ea7}', SC_Lower), ('\u{1ea8}', '\u{1ea8}', SC_Upper), ('\u{1ea9}', + '\u{1ea9}', SC_Lower), ('\u{1eaa}', '\u{1eaa}', SC_Upper), ('\u{1eab}', '\u{1eab}', + SC_Lower), ('\u{1eac}', '\u{1eac}', SC_Upper), ('\u{1ead}', '\u{1ead}', SC_Lower), + ('\u{1eae}', '\u{1eae}', SC_Upper), ('\u{1eaf}', '\u{1eaf}', SC_Lower), ('\u{1eb0}', + '\u{1eb0}', SC_Upper), ('\u{1eb1}', '\u{1eb1}', SC_Lower), ('\u{1eb2}', '\u{1eb2}', + SC_Upper), ('\u{1eb3}', '\u{1eb3}', SC_Lower), ('\u{1eb4}', '\u{1eb4}', SC_Upper), + ('\u{1eb5}', '\u{1eb5}', SC_Lower), ('\u{1eb6}', '\u{1eb6}', SC_Upper), ('\u{1eb7}', + '\u{1eb7}', SC_Lower), ('\u{1eb8}', '\u{1eb8}', SC_Upper), ('\u{1eb9}', '\u{1eb9}', + SC_Lower), ('\u{1eba}', '\u{1eba}', SC_Upper), ('\u{1ebb}', '\u{1ebb}', SC_Lower), + ('\u{1ebc}', '\u{1ebc}', SC_Upper), ('\u{1ebd}', '\u{1ebd}', SC_Lower), ('\u{1ebe}', + '\u{1ebe}', SC_Upper), ('\u{1ebf}', '\u{1ebf}', SC_Lower), ('\u{1ec0}', '\u{1ec0}', + SC_Upper), ('\u{1ec1}', '\u{1ec1}', SC_Lower), ('\u{1ec2}', '\u{1ec2}', SC_Upper), + ('\u{1ec3}', '\u{1ec3}', SC_Lower), ('\u{1ec4}', '\u{1ec4}', SC_Upper), ('\u{1ec5}', + '\u{1ec5}', SC_Lower), ('\u{1ec6}', '\u{1ec6}', SC_Upper), ('\u{1ec7}', '\u{1ec7}', + SC_Lower), ('\u{1ec8}', '\u{1ec8}', SC_Upper), ('\u{1ec9}', '\u{1ec9}', SC_Lower), + ('\u{1eca}', '\u{1eca}', SC_Upper), ('\u{1ecb}', '\u{1ecb}', SC_Lower), ('\u{1ecc}', + '\u{1ecc}', SC_Upper), ('\u{1ecd}', '\u{1ecd}', SC_Lower), ('\u{1ece}', '\u{1ece}', + SC_Upper), ('\u{1ecf}', '\u{1ecf}', SC_Lower), ('\u{1ed0}', '\u{1ed0}', SC_Upper), + ('\u{1ed1}', '\u{1ed1}', SC_Lower), ('\u{1ed2}', '\u{1ed2}', SC_Upper), ('\u{1ed3}', + '\u{1ed3}', SC_Lower), ('\u{1ed4}', '\u{1ed4}', SC_Upper), ('\u{1ed5}', '\u{1ed5}', + SC_Lower), ('\u{1ed6}', '\u{1ed6}', SC_Upper), ('\u{1ed7}', '\u{1ed7}', SC_Lower), + ('\u{1ed8}', '\u{1ed8}', SC_Upper), ('\u{1ed9}', '\u{1ed9}', SC_Lower), ('\u{1eda}', + '\u{1eda}', SC_Upper), ('\u{1edb}', '\u{1edb}', SC_Lower), ('\u{1edc}', '\u{1edc}', + SC_Upper), ('\u{1edd}', '\u{1edd}', SC_Lower), ('\u{1ede}', '\u{1ede}', SC_Upper), + ('\u{1edf}', '\u{1edf}', SC_Lower), ('\u{1ee0}', '\u{1ee0}', SC_Upper), ('\u{1ee1}', + '\u{1ee1}', SC_Lower), ('\u{1ee2}', '\u{1ee2}', SC_Upper), ('\u{1ee3}', '\u{1ee3}', + SC_Lower), ('\u{1ee4}', '\u{1ee4}', SC_Upper), ('\u{1ee5}', '\u{1ee5}', SC_Lower), + ('\u{1ee6}', '\u{1ee6}', SC_Upper), ('\u{1ee7}', '\u{1ee7}', SC_Lower), ('\u{1ee8}', + '\u{1ee8}', SC_Upper), ('\u{1ee9}', '\u{1ee9}', SC_Lower), ('\u{1eea}', '\u{1eea}', + SC_Upper), ('\u{1eeb}', '\u{1eeb}', SC_Lower), ('\u{1eec}', '\u{1eec}', SC_Upper), + ('\u{1eed}', '\u{1eed}', SC_Lower), ('\u{1eee}', '\u{1eee}', SC_Upper), ('\u{1eef}', + '\u{1eef}', SC_Lower), ('\u{1ef0}', '\u{1ef0}', SC_Upper), ('\u{1ef1}', '\u{1ef1}', + SC_Lower), ('\u{1ef2}', '\u{1ef2}', SC_Upper), ('\u{1ef3}', '\u{1ef3}', SC_Lower), + ('\u{1ef4}', '\u{1ef4}', SC_Upper), ('\u{1ef5}', '\u{1ef5}', SC_Lower), ('\u{1ef6}', + '\u{1ef6}', SC_Upper), ('\u{1ef7}', '\u{1ef7}', SC_Lower), ('\u{1ef8}', '\u{1ef8}', + SC_Upper), ('\u{1ef9}', '\u{1ef9}', SC_Lower), ('\u{1efa}', '\u{1efa}', SC_Upper), + ('\u{1efb}', '\u{1efb}', SC_Lower), ('\u{1efc}', '\u{1efc}', SC_Upper), ('\u{1efd}', + '\u{1efd}', SC_Lower), ('\u{1efe}', '\u{1efe}', SC_Upper), ('\u{1eff}', '\u{1f07}', + SC_Lower), ('\u{1f08}', '\u{1f0f}', SC_Upper), ('\u{1f10}', '\u{1f15}', SC_Lower), + ('\u{1f18}', '\u{1f1d}', SC_Upper), ('\u{1f20}', '\u{1f27}', SC_Lower), ('\u{1f28}', + '\u{1f2f}', SC_Upper), ('\u{1f30}', '\u{1f37}', SC_Lower), ('\u{1f38}', '\u{1f3f}', + SC_Upper), ('\u{1f40}', '\u{1f45}', SC_Lower), ('\u{1f48}', '\u{1f4d}', SC_Upper), + ('\u{1f50}', '\u{1f57}', SC_Lower), ('\u{1f59}', '\u{1f59}', SC_Upper), ('\u{1f5b}', + '\u{1f5b}', SC_Upper), ('\u{1f5d}', '\u{1f5d}', SC_Upper), ('\u{1f5f}', '\u{1f5f}', + SC_Upper), ('\u{1f60}', '\u{1f67}', SC_Lower), ('\u{1f68}', '\u{1f6f}', SC_Upper), + ('\u{1f70}', '\u{1f7d}', SC_Lower), ('\u{1f80}', '\u{1f87}', SC_Lower), ('\u{1f88}', + '\u{1f8f}', SC_Upper), ('\u{1f90}', '\u{1f97}', SC_Lower), ('\u{1f98}', '\u{1f9f}', + SC_Upper), ('\u{1fa0}', '\u{1fa7}', SC_Lower), ('\u{1fa8}', '\u{1faf}', SC_Upper), + ('\u{1fb0}', '\u{1fb4}', SC_Lower), ('\u{1fb6}', '\u{1fb7}', SC_Lower), ('\u{1fb8}', + '\u{1fbc}', SC_Upper), ('\u{1fbe}', '\u{1fbe}', SC_Lower), ('\u{1fc2}', '\u{1fc4}', + SC_Lower), ('\u{1fc6}', '\u{1fc7}', SC_Lower), ('\u{1fc8}', '\u{1fcc}', SC_Upper), + ('\u{1fd0}', '\u{1fd3}', SC_Lower), ('\u{1fd6}', '\u{1fd7}', SC_Lower), ('\u{1fd8}', + '\u{1fdb}', SC_Upper), ('\u{1fe0}', '\u{1fe7}', SC_Lower), ('\u{1fe8}', '\u{1fec}', + SC_Upper), ('\u{1ff2}', '\u{1ff4}', SC_Lower), ('\u{1ff6}', '\u{1ff7}', SC_Lower), + ('\u{1ff8}', '\u{1ffc}', SC_Upper), ('\u{2000}', '\u{200a}', SC_Sp), ('\u{200b}', + '\u{200b}', SC_Format), ('\u{200c}', '\u{200d}', SC_Extend), ('\u{200e}', '\u{200f}', + SC_Format), ('\u{2013}', '\u{2014}', SC_SContinue), ('\u{2018}', '\u{201f}', SC_Close), + ('\u{2024}', '\u{2024}', SC_ATerm), ('\u{2028}', '\u{2029}', SC_Sep), ('\u{202a}', + '\u{202e}', SC_Format), ('\u{202f}', '\u{202f}', SC_Sp), ('\u{2039}', '\u{203a}', SC_Close), + ('\u{203c}', '\u{203d}', SC_STerm), ('\u{2045}', '\u{2046}', SC_Close), ('\u{2047}', + '\u{2049}', SC_STerm), ('\u{205f}', '\u{205f}', SC_Sp), ('\u{2060}', '\u{2064}', SC_Format), + ('\u{2066}', '\u{206f}', SC_Format), ('\u{2071}', '\u{2071}', SC_Lower), ('\u{207d}', + '\u{207e}', SC_Close), ('\u{207f}', '\u{207f}', SC_Lower), ('\u{208d}', '\u{208e}', + SC_Close), ('\u{2090}', '\u{209c}', SC_Lower), ('\u{20d0}', '\u{20f0}', SC_Extend), + ('\u{2102}', '\u{2102}', SC_Upper), ('\u{2107}', '\u{2107}', SC_Upper), ('\u{210a}', + '\u{210a}', SC_Lower), ('\u{210b}', '\u{210d}', SC_Upper), ('\u{210e}', '\u{210f}', + SC_Lower), ('\u{2110}', '\u{2112}', SC_Upper), ('\u{2113}', '\u{2113}', SC_Lower), + ('\u{2115}', '\u{2115}', SC_Upper), ('\u{2119}', '\u{211d}', SC_Upper), ('\u{2124}', + '\u{2124}', SC_Upper), ('\u{2126}', '\u{2126}', SC_Upper), ('\u{2128}', '\u{2128}', + SC_Upper), ('\u{212a}', '\u{212d}', SC_Upper), ('\u{212f}', '\u{212f}', SC_Lower), + ('\u{2130}', '\u{2133}', SC_Upper), ('\u{2134}', '\u{2134}', SC_Lower), ('\u{2135}', + '\u{2138}', SC_OLetter), ('\u{2139}', '\u{2139}', SC_Lower), ('\u{213c}', '\u{213d}', + SC_Lower), ('\u{213e}', '\u{213f}', SC_Upper), ('\u{2145}', '\u{2145}', SC_Upper), + ('\u{2146}', '\u{2149}', SC_Lower), ('\u{214e}', '\u{214e}', SC_Lower), ('\u{2160}', + '\u{216f}', SC_Upper), ('\u{2170}', '\u{217f}', SC_Lower), ('\u{2180}', '\u{2182}', + SC_OLetter), ('\u{2183}', '\u{2183}', SC_Upper), ('\u{2184}', '\u{2184}', SC_Lower), + ('\u{2185}', '\u{2188}', SC_OLetter), ('\u{2308}', '\u{230b}', SC_Close), ('\u{2329}', + '\u{232a}', SC_Close), ('\u{24b6}', '\u{24cf}', SC_Upper), ('\u{24d0}', '\u{24e9}', + SC_Lower), ('\u{275b}', '\u{2760}', SC_Close), ('\u{2768}', '\u{2775}', SC_Close), + ('\u{27c5}', '\u{27c6}', SC_Close), ('\u{27e6}', '\u{27ef}', SC_Close), ('\u{2983}', + '\u{2998}', SC_Close), ('\u{29d8}', '\u{29db}', SC_Close), ('\u{29fc}', '\u{29fd}', + SC_Close), ('\u{2c00}', '\u{2c2e}', SC_Upper), ('\u{2c30}', '\u{2c5e}', SC_Lower), + ('\u{2c60}', '\u{2c60}', SC_Upper), ('\u{2c61}', '\u{2c61}', SC_Lower), ('\u{2c62}', + '\u{2c64}', SC_Upper), ('\u{2c65}', '\u{2c66}', SC_Lower), ('\u{2c67}', '\u{2c67}', + SC_Upper), ('\u{2c68}', '\u{2c68}', SC_Lower), ('\u{2c69}', '\u{2c69}', SC_Upper), + ('\u{2c6a}', '\u{2c6a}', SC_Lower), ('\u{2c6b}', '\u{2c6b}', SC_Upper), ('\u{2c6c}', + '\u{2c6c}', SC_Lower), ('\u{2c6d}', '\u{2c70}', SC_Upper), ('\u{2c71}', '\u{2c71}', + SC_Lower), ('\u{2c72}', '\u{2c72}', SC_Upper), ('\u{2c73}', '\u{2c74}', SC_Lower), + ('\u{2c75}', '\u{2c75}', SC_Upper), ('\u{2c76}', '\u{2c7d}', SC_Lower), ('\u{2c7e}', + '\u{2c80}', SC_Upper), ('\u{2c81}', '\u{2c81}', SC_Lower), ('\u{2c82}', '\u{2c82}', + SC_Upper), ('\u{2c83}', '\u{2c83}', SC_Lower), ('\u{2c84}', '\u{2c84}', SC_Upper), + ('\u{2c85}', '\u{2c85}', SC_Lower), ('\u{2c86}', '\u{2c86}', SC_Upper), ('\u{2c87}', + '\u{2c87}', SC_Lower), ('\u{2c88}', '\u{2c88}', SC_Upper), ('\u{2c89}', '\u{2c89}', + SC_Lower), ('\u{2c8a}', '\u{2c8a}', SC_Upper), ('\u{2c8b}', '\u{2c8b}', SC_Lower), + ('\u{2c8c}', '\u{2c8c}', SC_Upper), ('\u{2c8d}', '\u{2c8d}', SC_Lower), ('\u{2c8e}', + '\u{2c8e}', SC_Upper), ('\u{2c8f}', '\u{2c8f}', SC_Lower), ('\u{2c90}', '\u{2c90}', + SC_Upper), ('\u{2c91}', '\u{2c91}', SC_Lower), ('\u{2c92}', '\u{2c92}', SC_Upper), + ('\u{2c93}', '\u{2c93}', SC_Lower), ('\u{2c94}', '\u{2c94}', SC_Upper), ('\u{2c95}', + '\u{2c95}', SC_Lower), ('\u{2c96}', '\u{2c96}', SC_Upper), ('\u{2c97}', '\u{2c97}', + SC_Lower), ('\u{2c98}', '\u{2c98}', SC_Upper), ('\u{2c99}', '\u{2c99}', SC_Lower), + ('\u{2c9a}', '\u{2c9a}', SC_Upper), ('\u{2c9b}', '\u{2c9b}', SC_Lower), ('\u{2c9c}', + '\u{2c9c}', SC_Upper), ('\u{2c9d}', '\u{2c9d}', SC_Lower), ('\u{2c9e}', '\u{2c9e}', + SC_Upper), ('\u{2c9f}', '\u{2c9f}', SC_Lower), ('\u{2ca0}', '\u{2ca0}', SC_Upper), + ('\u{2ca1}', '\u{2ca1}', SC_Lower), ('\u{2ca2}', '\u{2ca2}', SC_Upper), ('\u{2ca3}', + '\u{2ca3}', SC_Lower), ('\u{2ca4}', '\u{2ca4}', SC_Upper), ('\u{2ca5}', '\u{2ca5}', + SC_Lower), ('\u{2ca6}', '\u{2ca6}', SC_Upper), ('\u{2ca7}', '\u{2ca7}', SC_Lower), + ('\u{2ca8}', '\u{2ca8}', SC_Upper), ('\u{2ca9}', '\u{2ca9}', SC_Lower), ('\u{2caa}', + '\u{2caa}', SC_Upper), ('\u{2cab}', '\u{2cab}', SC_Lower), ('\u{2cac}', '\u{2cac}', + SC_Upper), ('\u{2cad}', '\u{2cad}', SC_Lower), ('\u{2cae}', '\u{2cae}', SC_Upper), + ('\u{2caf}', '\u{2caf}', SC_Lower), ('\u{2cb0}', '\u{2cb0}', SC_Upper), ('\u{2cb1}', + '\u{2cb1}', SC_Lower), ('\u{2cb2}', '\u{2cb2}', SC_Upper), ('\u{2cb3}', '\u{2cb3}', + SC_Lower), ('\u{2cb4}', '\u{2cb4}', SC_Upper), ('\u{2cb5}', '\u{2cb5}', SC_Lower), + ('\u{2cb6}', '\u{2cb6}', SC_Upper), ('\u{2cb7}', '\u{2cb7}', SC_Lower), ('\u{2cb8}', + '\u{2cb8}', SC_Upper), ('\u{2cb9}', '\u{2cb9}', SC_Lower), ('\u{2cba}', '\u{2cba}', + SC_Upper), ('\u{2cbb}', '\u{2cbb}', SC_Lower), ('\u{2cbc}', '\u{2cbc}', SC_Upper), + ('\u{2cbd}', '\u{2cbd}', SC_Lower), ('\u{2cbe}', '\u{2cbe}', SC_Upper), ('\u{2cbf}', + '\u{2cbf}', SC_Lower), ('\u{2cc0}', '\u{2cc0}', SC_Upper), ('\u{2cc1}', '\u{2cc1}', + SC_Lower), ('\u{2cc2}', '\u{2cc2}', SC_Upper), ('\u{2cc3}', '\u{2cc3}', SC_Lower), + ('\u{2cc4}', '\u{2cc4}', SC_Upper), ('\u{2cc5}', '\u{2cc5}', SC_Lower), ('\u{2cc6}', + '\u{2cc6}', SC_Upper), ('\u{2cc7}', '\u{2cc7}', SC_Lower), ('\u{2cc8}', '\u{2cc8}', + SC_Upper), ('\u{2cc9}', '\u{2cc9}', SC_Lower), ('\u{2cca}', '\u{2cca}', SC_Upper), + ('\u{2ccb}', '\u{2ccb}', SC_Lower), ('\u{2ccc}', '\u{2ccc}', SC_Upper), ('\u{2ccd}', + '\u{2ccd}', SC_Lower), ('\u{2cce}', '\u{2cce}', SC_Upper), ('\u{2ccf}', '\u{2ccf}', + SC_Lower), ('\u{2cd0}', '\u{2cd0}', SC_Upper), ('\u{2cd1}', '\u{2cd1}', SC_Lower), + ('\u{2cd2}', '\u{2cd2}', SC_Upper), ('\u{2cd3}', '\u{2cd3}', SC_Lower), ('\u{2cd4}', + '\u{2cd4}', SC_Upper), ('\u{2cd5}', '\u{2cd5}', SC_Lower), ('\u{2cd6}', '\u{2cd6}', + SC_Upper), ('\u{2cd7}', '\u{2cd7}', SC_Lower), ('\u{2cd8}', '\u{2cd8}', SC_Upper), + ('\u{2cd9}', '\u{2cd9}', SC_Lower), ('\u{2cda}', '\u{2cda}', SC_Upper), ('\u{2cdb}', + '\u{2cdb}', SC_Lower), ('\u{2cdc}', '\u{2cdc}', SC_Upper), ('\u{2cdd}', '\u{2cdd}', + SC_Lower), ('\u{2cde}', '\u{2cde}', SC_Upper), ('\u{2cdf}', '\u{2cdf}', SC_Lower), + ('\u{2ce0}', '\u{2ce0}', SC_Upper), ('\u{2ce1}', '\u{2ce1}', SC_Lower), ('\u{2ce2}', + '\u{2ce2}', SC_Upper), ('\u{2ce3}', '\u{2ce4}', SC_Lower), ('\u{2ceb}', '\u{2ceb}', + SC_Upper), ('\u{2cec}', '\u{2cec}', SC_Lower), ('\u{2ced}', '\u{2ced}', SC_Upper), + ('\u{2cee}', '\u{2cee}', SC_Lower), ('\u{2cef}', '\u{2cf1}', SC_Extend), ('\u{2cf2}', + '\u{2cf2}', SC_Upper), ('\u{2cf3}', '\u{2cf3}', SC_Lower), ('\u{2d00}', '\u{2d25}', + SC_Lower), ('\u{2d27}', '\u{2d27}', SC_Lower), ('\u{2d2d}', '\u{2d2d}', SC_Lower), + ('\u{2d30}', '\u{2d67}', SC_OLetter), ('\u{2d6f}', '\u{2d6f}', SC_OLetter), ('\u{2d7f}', + '\u{2d7f}', SC_Extend), ('\u{2d80}', '\u{2d96}', SC_OLetter), ('\u{2da0}', '\u{2da6}', + SC_OLetter), ('\u{2da8}', '\u{2dae}', SC_OLetter), ('\u{2db0}', '\u{2db6}', SC_OLetter), + ('\u{2db8}', '\u{2dbe}', SC_OLetter), ('\u{2dc0}', '\u{2dc6}', SC_OLetter), ('\u{2dc8}', + '\u{2dce}', SC_OLetter), ('\u{2dd0}', '\u{2dd6}', SC_OLetter), ('\u{2dd8}', '\u{2dde}', + SC_OLetter), ('\u{2de0}', '\u{2dff}', SC_Extend), ('\u{2e00}', '\u{2e0d}', SC_Close), + ('\u{2e1c}', '\u{2e1d}', SC_Close), ('\u{2e20}', '\u{2e29}', SC_Close), ('\u{2e2e}', + '\u{2e2e}', SC_STerm), ('\u{2e2f}', '\u{2e2f}', SC_OLetter), ('\u{2e3c}', '\u{2e3c}', + SC_STerm), ('\u{2e42}', '\u{2e42}', SC_Close), ('\u{3000}', '\u{3000}', SC_Sp), ('\u{3001}', + '\u{3001}', SC_SContinue), ('\u{3002}', '\u{3002}', SC_STerm), ('\u{3005}', '\u{3007}', + SC_OLetter), ('\u{3008}', '\u{3011}', SC_Close), ('\u{3014}', '\u{301b}', SC_Close), + ('\u{301d}', '\u{301f}', SC_Close), ('\u{3021}', '\u{3029}', SC_OLetter), ('\u{302a}', + '\u{302f}', SC_Extend), ('\u{3031}', '\u{3035}', SC_OLetter), ('\u{3038}', '\u{303c}', + SC_OLetter), ('\u{3041}', '\u{3096}', SC_OLetter), ('\u{3099}', '\u{309a}', SC_Extend), + ('\u{309d}', '\u{309f}', SC_OLetter), ('\u{30a1}', '\u{30fa}', SC_OLetter), ('\u{30fc}', + '\u{30ff}', SC_OLetter), ('\u{3105}', '\u{312d}', SC_OLetter), ('\u{3131}', '\u{318e}', + SC_OLetter), ('\u{31a0}', '\u{31ba}', SC_OLetter), ('\u{31f0}', '\u{31ff}', SC_OLetter), + ('\u{3400}', '\u{4db5}', SC_OLetter), ('\u{4e00}', '\u{9fd5}', SC_OLetter), ('\u{a000}', + '\u{a48c}', SC_OLetter), ('\u{a4d0}', '\u{a4fd}', SC_OLetter), ('\u{a4ff}', '\u{a4ff}', + SC_STerm), ('\u{a500}', '\u{a60c}', SC_OLetter), ('\u{a60e}', '\u{a60f}', SC_STerm), + ('\u{a610}', '\u{a61f}', SC_OLetter), ('\u{a620}', '\u{a629}', SC_Numeric), ('\u{a62a}', + '\u{a62b}', SC_OLetter), ('\u{a640}', '\u{a640}', SC_Upper), ('\u{a641}', '\u{a641}', + SC_Lower), ('\u{a642}', '\u{a642}', SC_Upper), ('\u{a643}', '\u{a643}', SC_Lower), + ('\u{a644}', '\u{a644}', SC_Upper), ('\u{a645}', '\u{a645}', SC_Lower), ('\u{a646}', + '\u{a646}', SC_Upper), ('\u{a647}', '\u{a647}', SC_Lower), ('\u{a648}', '\u{a648}', + SC_Upper), ('\u{a649}', '\u{a649}', SC_Lower), ('\u{a64a}', '\u{a64a}', SC_Upper), + ('\u{a64b}', '\u{a64b}', SC_Lower), ('\u{a64c}', '\u{a64c}', SC_Upper), ('\u{a64d}', + '\u{a64d}', SC_Lower), ('\u{a64e}', '\u{a64e}', SC_Upper), ('\u{a64f}', '\u{a64f}', + SC_Lower), ('\u{a650}', '\u{a650}', SC_Upper), ('\u{a651}', '\u{a651}', SC_Lower), + ('\u{a652}', '\u{a652}', SC_Upper), ('\u{a653}', '\u{a653}', SC_Lower), ('\u{a654}', + '\u{a654}', SC_Upper), ('\u{a655}', '\u{a655}', SC_Lower), ('\u{a656}', '\u{a656}', + SC_Upper), ('\u{a657}', '\u{a657}', SC_Lower), ('\u{a658}', '\u{a658}', SC_Upper), + ('\u{a659}', '\u{a659}', SC_Lower), ('\u{a65a}', '\u{a65a}', SC_Upper), ('\u{a65b}', + '\u{a65b}', SC_Lower), ('\u{a65c}', '\u{a65c}', SC_Upper), ('\u{a65d}', '\u{a65d}', + SC_Lower), ('\u{a65e}', '\u{a65e}', SC_Upper), ('\u{a65f}', '\u{a65f}', SC_Lower), + ('\u{a660}', '\u{a660}', SC_Upper), ('\u{a661}', '\u{a661}', SC_Lower), ('\u{a662}', + '\u{a662}', SC_Upper), ('\u{a663}', '\u{a663}', SC_Lower), ('\u{a664}', '\u{a664}', + SC_Upper), ('\u{a665}', '\u{a665}', SC_Lower), ('\u{a666}', '\u{a666}', SC_Upper), + ('\u{a667}', '\u{a667}', SC_Lower), ('\u{a668}', '\u{a668}', SC_Upper), ('\u{a669}', + '\u{a669}', SC_Lower), ('\u{a66a}', '\u{a66a}', SC_Upper), ('\u{a66b}', '\u{a66b}', + SC_Lower), ('\u{a66c}', '\u{a66c}', SC_Upper), ('\u{a66d}', '\u{a66d}', SC_Lower), + ('\u{a66e}', '\u{a66e}', SC_OLetter), ('\u{a66f}', '\u{a672}', SC_Extend), ('\u{a674}', + '\u{a67d}', SC_Extend), ('\u{a67f}', '\u{a67f}', SC_OLetter), ('\u{a680}', '\u{a680}', + SC_Upper), ('\u{a681}', '\u{a681}', SC_Lower), ('\u{a682}', '\u{a682}', SC_Upper), + ('\u{a683}', '\u{a683}', SC_Lower), ('\u{a684}', '\u{a684}', SC_Upper), ('\u{a685}', + '\u{a685}', SC_Lower), ('\u{a686}', '\u{a686}', SC_Upper), ('\u{a687}', '\u{a687}', + SC_Lower), ('\u{a688}', '\u{a688}', SC_Upper), ('\u{a689}', '\u{a689}', SC_Lower), + ('\u{a68a}', '\u{a68a}', SC_Upper), ('\u{a68b}', '\u{a68b}', SC_Lower), ('\u{a68c}', + '\u{a68c}', SC_Upper), ('\u{a68d}', '\u{a68d}', SC_Lower), ('\u{a68e}', '\u{a68e}', + SC_Upper), ('\u{a68f}', '\u{a68f}', SC_Lower), ('\u{a690}', '\u{a690}', SC_Upper), + ('\u{a691}', '\u{a691}', SC_Lower), ('\u{a692}', '\u{a692}', SC_Upper), ('\u{a693}', + '\u{a693}', SC_Lower), ('\u{a694}', '\u{a694}', SC_Upper), ('\u{a695}', '\u{a695}', + SC_Lower), ('\u{a696}', '\u{a696}', SC_Upper), ('\u{a697}', '\u{a697}', SC_Lower), + ('\u{a698}', '\u{a698}', SC_Upper), ('\u{a699}', '\u{a699}', SC_Lower), ('\u{a69a}', + '\u{a69a}', SC_Upper), ('\u{a69b}', '\u{a69d}', SC_Lower), ('\u{a69e}', '\u{a69f}', + SC_Extend), ('\u{a6a0}', '\u{a6ef}', SC_OLetter), ('\u{a6f0}', '\u{a6f1}', SC_Extend), + ('\u{a6f3}', '\u{a6f3}', SC_STerm), ('\u{a6f7}', '\u{a6f7}', SC_STerm), ('\u{a717}', + '\u{a71f}', SC_OLetter), ('\u{a722}', '\u{a722}', SC_Upper), ('\u{a723}', '\u{a723}', + SC_Lower), ('\u{a724}', '\u{a724}', SC_Upper), ('\u{a725}', '\u{a725}', SC_Lower), + ('\u{a726}', '\u{a726}', SC_Upper), ('\u{a727}', '\u{a727}', SC_Lower), ('\u{a728}', + '\u{a728}', SC_Upper), ('\u{a729}', '\u{a729}', SC_Lower), ('\u{a72a}', '\u{a72a}', + SC_Upper), ('\u{a72b}', '\u{a72b}', SC_Lower), ('\u{a72c}', '\u{a72c}', SC_Upper), + ('\u{a72d}', '\u{a72d}', SC_Lower), ('\u{a72e}', '\u{a72e}', SC_Upper), ('\u{a72f}', + '\u{a731}', SC_Lower), ('\u{a732}', '\u{a732}', SC_Upper), ('\u{a733}', '\u{a733}', + SC_Lower), ('\u{a734}', '\u{a734}', SC_Upper), ('\u{a735}', '\u{a735}', SC_Lower), + ('\u{a736}', '\u{a736}', SC_Upper), ('\u{a737}', '\u{a737}', SC_Lower), ('\u{a738}', + '\u{a738}', SC_Upper), ('\u{a739}', '\u{a739}', SC_Lower), ('\u{a73a}', '\u{a73a}', + SC_Upper), ('\u{a73b}', '\u{a73b}', SC_Lower), ('\u{a73c}', '\u{a73c}', SC_Upper), + ('\u{a73d}', '\u{a73d}', SC_Lower), ('\u{a73e}', '\u{a73e}', SC_Upper), ('\u{a73f}', + '\u{a73f}', SC_Lower), ('\u{a740}', '\u{a740}', SC_Upper), ('\u{a741}', '\u{a741}', + SC_Lower), ('\u{a742}', '\u{a742}', SC_Upper), ('\u{a743}', '\u{a743}', SC_Lower), + ('\u{a744}', '\u{a744}', SC_Upper), ('\u{a745}', '\u{a745}', SC_Lower), ('\u{a746}', + '\u{a746}', SC_Upper), ('\u{a747}', '\u{a747}', SC_Lower), ('\u{a748}', '\u{a748}', + SC_Upper), ('\u{a749}', '\u{a749}', SC_Lower), ('\u{a74a}', '\u{a74a}', SC_Upper), + ('\u{a74b}', '\u{a74b}', SC_Lower), ('\u{a74c}', '\u{a74c}', SC_Upper), ('\u{a74d}', + '\u{a74d}', SC_Lower), ('\u{a74e}', '\u{a74e}', SC_Upper), ('\u{a74f}', '\u{a74f}', + SC_Lower), ('\u{a750}', '\u{a750}', SC_Upper), ('\u{a751}', '\u{a751}', SC_Lower), + ('\u{a752}', '\u{a752}', SC_Upper), ('\u{a753}', '\u{a753}', SC_Lower), ('\u{a754}', + '\u{a754}', SC_Upper), ('\u{a755}', '\u{a755}', SC_Lower), ('\u{a756}', '\u{a756}', + SC_Upper), ('\u{a757}', '\u{a757}', SC_Lower), ('\u{a758}', '\u{a758}', SC_Upper), + ('\u{a759}', '\u{a759}', SC_Lower), ('\u{a75a}', '\u{a75a}', SC_Upper), ('\u{a75b}', + '\u{a75b}', SC_Lower), ('\u{a75c}', '\u{a75c}', SC_Upper), ('\u{a75d}', '\u{a75d}', + SC_Lower), ('\u{a75e}', '\u{a75e}', SC_Upper), ('\u{a75f}', '\u{a75f}', SC_Lower), + ('\u{a760}', '\u{a760}', SC_Upper), ('\u{a761}', '\u{a761}', SC_Lower), ('\u{a762}', + '\u{a762}', SC_Upper), ('\u{a763}', '\u{a763}', SC_Lower), ('\u{a764}', '\u{a764}', + SC_Upper), ('\u{a765}', '\u{a765}', SC_Lower), ('\u{a766}', '\u{a766}', SC_Upper), + ('\u{a767}', '\u{a767}', SC_Lower), ('\u{a768}', '\u{a768}', SC_Upper), ('\u{a769}', + '\u{a769}', SC_Lower), ('\u{a76a}', '\u{a76a}', SC_Upper), ('\u{a76b}', '\u{a76b}', + SC_Lower), ('\u{a76c}', '\u{a76c}', SC_Upper), ('\u{a76d}', '\u{a76d}', SC_Lower), + ('\u{a76e}', '\u{a76e}', SC_Upper), ('\u{a76f}', '\u{a778}', SC_Lower), ('\u{a779}', + '\u{a779}', SC_Upper), ('\u{a77a}', '\u{a77a}', SC_Lower), ('\u{a77b}', '\u{a77b}', + SC_Upper), ('\u{a77c}', '\u{a77c}', SC_Lower), ('\u{a77d}', '\u{a77e}', SC_Upper), + ('\u{a77f}', '\u{a77f}', SC_Lower), ('\u{a780}', '\u{a780}', SC_Upper), ('\u{a781}', + '\u{a781}', SC_Lower), ('\u{a782}', '\u{a782}', SC_Upper), ('\u{a783}', '\u{a783}', + SC_Lower), ('\u{a784}', '\u{a784}', SC_Upper), ('\u{a785}', '\u{a785}', SC_Lower), + ('\u{a786}', '\u{a786}', SC_Upper), ('\u{a787}', '\u{a787}', SC_Lower), ('\u{a788}', + '\u{a788}', SC_OLetter), ('\u{a78b}', '\u{a78b}', SC_Upper), ('\u{a78c}', '\u{a78c}', + SC_Lower), ('\u{a78d}', '\u{a78d}', SC_Upper), ('\u{a78e}', '\u{a78e}', SC_Lower), + ('\u{a78f}', '\u{a78f}', SC_OLetter), ('\u{a790}', '\u{a790}', SC_Upper), ('\u{a791}', + '\u{a791}', SC_Lower), ('\u{a792}', '\u{a792}', SC_Upper), ('\u{a793}', '\u{a795}', + SC_Lower), ('\u{a796}', '\u{a796}', SC_Upper), ('\u{a797}', '\u{a797}', SC_Lower), + ('\u{a798}', '\u{a798}', SC_Upper), ('\u{a799}', '\u{a799}', SC_Lower), ('\u{a79a}', + '\u{a79a}', SC_Upper), ('\u{a79b}', '\u{a79b}', SC_Lower), ('\u{a79c}', '\u{a79c}', + SC_Upper), ('\u{a79d}', '\u{a79d}', SC_Lower), ('\u{a79e}', '\u{a79e}', SC_Upper), + ('\u{a79f}', '\u{a79f}', SC_Lower), ('\u{a7a0}', '\u{a7a0}', SC_Upper), ('\u{a7a1}', + '\u{a7a1}', SC_Lower), ('\u{a7a2}', '\u{a7a2}', SC_Upper), ('\u{a7a3}', '\u{a7a3}', + SC_Lower), ('\u{a7a4}', '\u{a7a4}', SC_Upper), ('\u{a7a5}', '\u{a7a5}', SC_Lower), + ('\u{a7a6}', '\u{a7a6}', SC_Upper), ('\u{a7a7}', '\u{a7a7}', SC_Lower), ('\u{a7a8}', + '\u{a7a8}', SC_Upper), ('\u{a7a9}', '\u{a7a9}', SC_Lower), ('\u{a7aa}', '\u{a7ae}', + SC_Upper), ('\u{a7b0}', '\u{a7b4}', SC_Upper), ('\u{a7b5}', '\u{a7b5}', SC_Lower), + ('\u{a7b6}', '\u{a7b6}', SC_Upper), ('\u{a7b7}', '\u{a7b7}', SC_Lower), ('\u{a7f7}', + '\u{a7f7}', SC_OLetter), ('\u{a7f8}', '\u{a7fa}', SC_Lower), ('\u{a7fb}', '\u{a801}', + SC_OLetter), ('\u{a802}', '\u{a802}', SC_Extend), ('\u{a803}', '\u{a805}', SC_OLetter), + ('\u{a806}', '\u{a806}', SC_Extend), ('\u{a807}', '\u{a80a}', SC_OLetter), ('\u{a80b}', + '\u{a80b}', SC_Extend), ('\u{a80c}', '\u{a822}', SC_OLetter), ('\u{a823}', '\u{a827}', + SC_Extend), ('\u{a840}', '\u{a873}', SC_OLetter), ('\u{a876}', '\u{a877}', SC_STerm), + ('\u{a880}', '\u{a881}', SC_Extend), ('\u{a882}', '\u{a8b3}', SC_OLetter), ('\u{a8b4}', + '\u{a8c5}', SC_Extend), ('\u{a8ce}', '\u{a8cf}', SC_STerm), ('\u{a8d0}', '\u{a8d9}', + SC_Numeric), ('\u{a8e0}', '\u{a8f1}', SC_Extend), ('\u{a8f2}', '\u{a8f7}', SC_OLetter), + ('\u{a8fb}', '\u{a8fb}', SC_OLetter), ('\u{a8fd}', '\u{a8fd}', SC_OLetter), ('\u{a900}', + '\u{a909}', SC_Numeric), ('\u{a90a}', '\u{a925}', SC_OLetter), ('\u{a926}', '\u{a92d}', + SC_Extend), ('\u{a92f}', '\u{a92f}', SC_STerm), ('\u{a930}', '\u{a946}', SC_OLetter), + ('\u{a947}', '\u{a953}', SC_Extend), ('\u{a960}', '\u{a97c}', SC_OLetter), ('\u{a980}', + '\u{a983}', SC_Extend), ('\u{a984}', '\u{a9b2}', SC_OLetter), ('\u{a9b3}', '\u{a9c0}', + SC_Extend), ('\u{a9c8}', '\u{a9c9}', SC_STerm), ('\u{a9cf}', '\u{a9cf}', SC_OLetter), + ('\u{a9d0}', '\u{a9d9}', SC_Numeric), ('\u{a9e0}', '\u{a9e4}', SC_OLetter), ('\u{a9e5}', + '\u{a9e5}', SC_Extend), ('\u{a9e6}', '\u{a9ef}', SC_OLetter), ('\u{a9f0}', '\u{a9f9}', + SC_Numeric), ('\u{a9fa}', '\u{a9fe}', SC_OLetter), ('\u{aa00}', '\u{aa28}', SC_OLetter), + ('\u{aa29}', '\u{aa36}', SC_Extend), ('\u{aa40}', '\u{aa42}', SC_OLetter), ('\u{aa43}', + '\u{aa43}', SC_Extend), ('\u{aa44}', '\u{aa4b}', SC_OLetter), ('\u{aa4c}', '\u{aa4d}', + SC_Extend), ('\u{aa50}', '\u{aa59}', SC_Numeric), ('\u{aa5d}', '\u{aa5f}', SC_STerm), + ('\u{aa60}', '\u{aa76}', SC_OLetter), ('\u{aa7a}', '\u{aa7a}', SC_OLetter), ('\u{aa7b}', + '\u{aa7d}', SC_Extend), ('\u{aa7e}', '\u{aaaf}', SC_OLetter), ('\u{aab0}', '\u{aab0}', + SC_Extend), ('\u{aab1}', '\u{aab1}', SC_OLetter), ('\u{aab2}', '\u{aab4}', SC_Extend), + ('\u{aab5}', '\u{aab6}', SC_OLetter), ('\u{aab7}', '\u{aab8}', SC_Extend), ('\u{aab9}', + '\u{aabd}', SC_OLetter), ('\u{aabe}', '\u{aabf}', SC_Extend), ('\u{aac0}', '\u{aac0}', + SC_OLetter), ('\u{aac1}', '\u{aac1}', SC_Extend), ('\u{aac2}', '\u{aac2}', SC_OLetter), + ('\u{aadb}', '\u{aadd}', SC_OLetter), ('\u{aae0}', '\u{aaea}', SC_OLetter), ('\u{aaeb}', + '\u{aaef}', SC_Extend), ('\u{aaf0}', '\u{aaf1}', SC_STerm), ('\u{aaf2}', '\u{aaf4}', + SC_OLetter), ('\u{aaf5}', '\u{aaf6}', SC_Extend), ('\u{ab01}', '\u{ab06}', SC_OLetter), + ('\u{ab09}', '\u{ab0e}', SC_OLetter), ('\u{ab11}', '\u{ab16}', SC_OLetter), ('\u{ab20}', + '\u{ab26}', SC_OLetter), ('\u{ab28}', '\u{ab2e}', SC_OLetter), ('\u{ab30}', '\u{ab5a}', + SC_Lower), ('\u{ab5c}', '\u{ab65}', SC_Lower), ('\u{ab70}', '\u{abbf}', SC_Lower), + ('\u{abc0}', '\u{abe2}', SC_OLetter), ('\u{abe3}', '\u{abea}', SC_Extend), ('\u{abeb}', + '\u{abeb}', SC_STerm), ('\u{abec}', '\u{abed}', SC_Extend), ('\u{abf0}', '\u{abf9}', + SC_Numeric), ('\u{ac00}', '\u{d7a3}', SC_OLetter), ('\u{d7b0}', '\u{d7c6}', SC_OLetter), + ('\u{d7cb}', '\u{d7fb}', SC_OLetter), ('\u{f900}', '\u{fa6d}', SC_OLetter), ('\u{fa70}', + '\u{fad9}', SC_OLetter), ('\u{fb00}', '\u{fb06}', SC_Lower), ('\u{fb13}', '\u{fb17}', + SC_Lower), ('\u{fb1d}', '\u{fb1d}', SC_OLetter), ('\u{fb1e}', '\u{fb1e}', SC_Extend), + ('\u{fb1f}', '\u{fb28}', SC_OLetter), ('\u{fb2a}', '\u{fb36}', SC_OLetter), ('\u{fb38}', + '\u{fb3c}', SC_OLetter), ('\u{fb3e}', '\u{fb3e}', SC_OLetter), ('\u{fb40}', '\u{fb41}', + SC_OLetter), ('\u{fb43}', '\u{fb44}', SC_OLetter), ('\u{fb46}', '\u{fbb1}', SC_OLetter), + ('\u{fbd3}', '\u{fd3d}', SC_OLetter), ('\u{fd3e}', '\u{fd3f}', SC_Close), ('\u{fd50}', + '\u{fd8f}', SC_OLetter), ('\u{fd92}', '\u{fdc7}', SC_OLetter), ('\u{fdf0}', '\u{fdfb}', + SC_OLetter), ('\u{fe00}', '\u{fe0f}', SC_Extend), ('\u{fe10}', '\u{fe11}', SC_SContinue), + ('\u{fe13}', '\u{fe13}', SC_SContinue), ('\u{fe17}', '\u{fe18}', SC_Close), ('\u{fe20}', + '\u{fe2f}', SC_Extend), ('\u{fe31}', '\u{fe32}', SC_SContinue), ('\u{fe35}', '\u{fe44}', + SC_Close), ('\u{fe47}', '\u{fe48}', SC_Close), ('\u{fe50}', '\u{fe51}', SC_SContinue), + ('\u{fe52}', '\u{fe52}', SC_ATerm), ('\u{fe55}', '\u{fe55}', SC_SContinue), ('\u{fe56}', + '\u{fe57}', SC_STerm), ('\u{fe58}', '\u{fe58}', SC_SContinue), ('\u{fe59}', '\u{fe5e}', + SC_Close), ('\u{fe63}', '\u{fe63}', SC_SContinue), ('\u{fe70}', '\u{fe74}', SC_OLetter), + ('\u{fe76}', '\u{fefc}', SC_OLetter), ('\u{feff}', '\u{feff}', SC_Format), ('\u{ff01}', + '\u{ff01}', SC_STerm), ('\u{ff08}', '\u{ff09}', SC_Close), ('\u{ff0c}', '\u{ff0d}', + SC_SContinue), ('\u{ff0e}', '\u{ff0e}', SC_ATerm), ('\u{ff1a}', '\u{ff1a}', SC_SContinue), + ('\u{ff1f}', '\u{ff1f}', SC_STerm), ('\u{ff21}', '\u{ff3a}', SC_Upper), ('\u{ff3b}', + '\u{ff3b}', SC_Close), ('\u{ff3d}', '\u{ff3d}', SC_Close), ('\u{ff41}', '\u{ff5a}', + SC_Lower), ('\u{ff5b}', '\u{ff5b}', SC_Close), ('\u{ff5d}', '\u{ff5d}', SC_Close), + ('\u{ff5f}', '\u{ff60}', SC_Close), ('\u{ff61}', '\u{ff61}', SC_STerm), ('\u{ff62}', + '\u{ff63}', SC_Close), ('\u{ff64}', '\u{ff64}', SC_SContinue), ('\u{ff66}', '\u{ff9d}', + SC_OLetter), ('\u{ff9e}', '\u{ff9f}', SC_Extend), ('\u{ffa0}', '\u{ffbe}', SC_OLetter), + ('\u{ffc2}', '\u{ffc7}', SC_OLetter), ('\u{ffca}', '\u{ffcf}', SC_OLetter), ('\u{ffd2}', + '\u{ffd7}', SC_OLetter), ('\u{ffda}', '\u{ffdc}', SC_OLetter), ('\u{fff9}', '\u{fffb}', + SC_Format), ('\u{10000}', '\u{1000b}', SC_OLetter), ('\u{1000d}', '\u{10026}', SC_OLetter), + ('\u{10028}', '\u{1003a}', SC_OLetter), ('\u{1003c}', '\u{1003d}', SC_OLetter), + ('\u{1003f}', '\u{1004d}', SC_OLetter), ('\u{10050}', '\u{1005d}', SC_OLetter), + ('\u{10080}', '\u{100fa}', SC_OLetter), ('\u{10140}', '\u{10174}', SC_OLetter), + ('\u{101fd}', '\u{101fd}', SC_Extend), ('\u{10280}', '\u{1029c}', SC_OLetter), ('\u{102a0}', + '\u{102d0}', SC_OLetter), ('\u{102e0}', '\u{102e0}', SC_Extend), ('\u{10300}', '\u{1031f}', + SC_OLetter), ('\u{10330}', '\u{1034a}', SC_OLetter), ('\u{10350}', '\u{10375}', SC_OLetter), + ('\u{10376}', '\u{1037a}', SC_Extend), ('\u{10380}', '\u{1039d}', SC_OLetter), ('\u{103a0}', + '\u{103c3}', SC_OLetter), ('\u{103c8}', '\u{103cf}', SC_OLetter), ('\u{103d1}', '\u{103d5}', + SC_OLetter), ('\u{10400}', '\u{10427}', SC_Upper), ('\u{10428}', '\u{1044f}', SC_Lower), + ('\u{10450}', '\u{1049d}', SC_OLetter), ('\u{104a0}', '\u{104a9}', SC_Numeric), + ('\u{104b0}', '\u{104d3}', SC_Upper), ('\u{104d8}', '\u{104fb}', SC_Lower), ('\u{10500}', + '\u{10527}', SC_OLetter), ('\u{10530}', '\u{10563}', SC_OLetter), ('\u{10600}', '\u{10736}', + SC_OLetter), ('\u{10740}', '\u{10755}', SC_OLetter), ('\u{10760}', '\u{10767}', SC_OLetter), + ('\u{10800}', '\u{10805}', SC_OLetter), ('\u{10808}', '\u{10808}', SC_OLetter), + ('\u{1080a}', '\u{10835}', SC_OLetter), ('\u{10837}', '\u{10838}', SC_OLetter), + ('\u{1083c}', '\u{1083c}', SC_OLetter), ('\u{1083f}', '\u{10855}', SC_OLetter), + ('\u{10860}', '\u{10876}', SC_OLetter), ('\u{10880}', '\u{1089e}', SC_OLetter), + ('\u{108e0}', '\u{108f2}', SC_OLetter), ('\u{108f4}', '\u{108f5}', SC_OLetter), + ('\u{10900}', '\u{10915}', SC_OLetter), ('\u{10920}', '\u{10939}', SC_OLetter), + ('\u{10980}', '\u{109b7}', SC_OLetter), ('\u{109be}', '\u{109bf}', SC_OLetter), + ('\u{10a00}', '\u{10a00}', SC_OLetter), ('\u{10a01}', '\u{10a03}', SC_Extend), ('\u{10a05}', + '\u{10a06}', SC_Extend), ('\u{10a0c}', '\u{10a0f}', SC_Extend), ('\u{10a10}', '\u{10a13}', + SC_OLetter), ('\u{10a15}', '\u{10a17}', SC_OLetter), ('\u{10a19}', '\u{10a33}', SC_OLetter), + ('\u{10a38}', '\u{10a3a}', SC_Extend), ('\u{10a3f}', '\u{10a3f}', SC_Extend), ('\u{10a56}', + '\u{10a57}', SC_STerm), ('\u{10a60}', '\u{10a7c}', SC_OLetter), ('\u{10a80}', '\u{10a9c}', + SC_OLetter), ('\u{10ac0}', '\u{10ac7}', SC_OLetter), ('\u{10ac9}', '\u{10ae4}', SC_OLetter), + ('\u{10ae5}', '\u{10ae6}', SC_Extend), ('\u{10b00}', '\u{10b35}', SC_OLetter), ('\u{10b40}', + '\u{10b55}', SC_OLetter), ('\u{10b60}', '\u{10b72}', SC_OLetter), ('\u{10b80}', '\u{10b91}', + SC_OLetter), ('\u{10c00}', '\u{10c48}', SC_OLetter), ('\u{10c80}', '\u{10cb2}', SC_Upper), + ('\u{10cc0}', '\u{10cf2}', SC_Lower), ('\u{11000}', '\u{11002}', SC_Extend), ('\u{11003}', + '\u{11037}', SC_OLetter), ('\u{11038}', '\u{11046}', SC_Extend), ('\u{11047}', '\u{11048}', + SC_STerm), ('\u{11066}', '\u{1106f}', SC_Numeric), ('\u{1107f}', '\u{11082}', SC_Extend), + ('\u{11083}', '\u{110af}', SC_OLetter), ('\u{110b0}', '\u{110ba}', SC_Extend), ('\u{110bd}', + '\u{110bd}', SC_Format), ('\u{110be}', '\u{110c1}', SC_STerm), ('\u{110d0}', '\u{110e8}', + SC_OLetter), ('\u{110f0}', '\u{110f9}', SC_Numeric), ('\u{11100}', '\u{11102}', SC_Extend), + ('\u{11103}', '\u{11126}', SC_OLetter), ('\u{11127}', '\u{11134}', SC_Extend), ('\u{11136}', + '\u{1113f}', SC_Numeric), ('\u{11141}', '\u{11143}', SC_STerm), ('\u{11150}', '\u{11172}', + SC_OLetter), ('\u{11173}', '\u{11173}', SC_Extend), ('\u{11176}', '\u{11176}', SC_OLetter), + ('\u{11180}', '\u{11182}', SC_Extend), ('\u{11183}', '\u{111b2}', SC_OLetter), ('\u{111b3}', + '\u{111c0}', SC_Extend), ('\u{111c1}', '\u{111c4}', SC_OLetter), ('\u{111c5}', '\u{111c6}', + SC_STerm), ('\u{111ca}', '\u{111cc}', SC_Extend), ('\u{111cd}', '\u{111cd}', SC_STerm), + ('\u{111d0}', '\u{111d9}', SC_Numeric), ('\u{111da}', '\u{111da}', SC_OLetter), + ('\u{111dc}', '\u{111dc}', SC_OLetter), ('\u{111de}', '\u{111df}', SC_STerm), ('\u{11200}', + '\u{11211}', SC_OLetter), ('\u{11213}', '\u{1122b}', SC_OLetter), ('\u{1122c}', '\u{11237}', + SC_Extend), ('\u{11238}', '\u{11239}', SC_STerm), ('\u{1123b}', '\u{1123c}', SC_STerm), + ('\u{1123e}', '\u{1123e}', SC_Extend), ('\u{11280}', '\u{11286}', SC_OLetter), ('\u{11288}', + '\u{11288}', SC_OLetter), ('\u{1128a}', '\u{1128d}', SC_OLetter), ('\u{1128f}', '\u{1129d}', + SC_OLetter), ('\u{1129f}', '\u{112a8}', SC_OLetter), ('\u{112a9}', '\u{112a9}', SC_STerm), + ('\u{112b0}', '\u{112de}', SC_OLetter), ('\u{112df}', '\u{112ea}', SC_Extend), ('\u{112f0}', + '\u{112f9}', SC_Numeric), ('\u{11300}', '\u{11303}', SC_Extend), ('\u{11305}', '\u{1130c}', + SC_OLetter), ('\u{1130f}', '\u{11310}', SC_OLetter), ('\u{11313}', '\u{11328}', SC_OLetter), + ('\u{1132a}', '\u{11330}', SC_OLetter), ('\u{11332}', '\u{11333}', SC_OLetter), + ('\u{11335}', '\u{11339}', SC_OLetter), ('\u{1133c}', '\u{1133c}', SC_Extend), ('\u{1133d}', + '\u{1133d}', SC_OLetter), ('\u{1133e}', '\u{11344}', SC_Extend), ('\u{11347}', '\u{11348}', + SC_Extend), ('\u{1134b}', '\u{1134d}', SC_Extend), ('\u{11350}', '\u{11350}', SC_OLetter), + ('\u{11357}', '\u{11357}', SC_Extend), ('\u{1135d}', '\u{11361}', SC_OLetter), ('\u{11362}', + '\u{11363}', SC_Extend), ('\u{11366}', '\u{1136c}', SC_Extend), ('\u{11370}', '\u{11374}', + SC_Extend), ('\u{11400}', '\u{11434}', SC_OLetter), ('\u{11435}', '\u{11446}', SC_Extend), + ('\u{11447}', '\u{1144a}', SC_OLetter), ('\u{1144b}', '\u{1144c}', SC_STerm), ('\u{11450}', + '\u{11459}', SC_Numeric), ('\u{11480}', '\u{114af}', SC_OLetter), ('\u{114b0}', '\u{114c3}', + SC_Extend), ('\u{114c4}', '\u{114c5}', SC_OLetter), ('\u{114c7}', '\u{114c7}', SC_OLetter), + ('\u{114d0}', '\u{114d9}', SC_Numeric), ('\u{11580}', '\u{115ae}', SC_OLetter), + ('\u{115af}', '\u{115b5}', SC_Extend), ('\u{115b8}', '\u{115c0}', SC_Extend), ('\u{115c2}', + '\u{115c3}', SC_STerm), ('\u{115c9}', '\u{115d7}', SC_STerm), ('\u{115d8}', '\u{115db}', + SC_OLetter), ('\u{115dc}', '\u{115dd}', SC_Extend), ('\u{11600}', '\u{1162f}', SC_OLetter), + ('\u{11630}', '\u{11640}', SC_Extend), ('\u{11641}', '\u{11642}', SC_STerm), ('\u{11644}', + '\u{11644}', SC_OLetter), ('\u{11650}', '\u{11659}', SC_Numeric), ('\u{11680}', '\u{116aa}', + SC_OLetter), ('\u{116ab}', '\u{116b7}', SC_Extend), ('\u{116c0}', '\u{116c9}', SC_Numeric), + ('\u{11700}', '\u{11719}', SC_OLetter), ('\u{1171d}', '\u{1172b}', SC_Extend), ('\u{11730}', + '\u{11739}', SC_Numeric), ('\u{1173c}', '\u{1173e}', SC_STerm), ('\u{118a0}', '\u{118bf}', + SC_Upper), ('\u{118c0}', '\u{118df}', SC_Lower), ('\u{118e0}', '\u{118e9}', SC_Numeric), + ('\u{118ff}', '\u{118ff}', SC_OLetter), ('\u{11ac0}', '\u{11af8}', SC_OLetter), + ('\u{11c00}', '\u{11c08}', SC_OLetter), ('\u{11c0a}', '\u{11c2e}', SC_OLetter), + ('\u{11c2f}', '\u{11c36}', SC_Extend), ('\u{11c38}', '\u{11c3f}', SC_Extend), ('\u{11c40}', + '\u{11c40}', SC_OLetter), ('\u{11c41}', '\u{11c42}', SC_STerm), ('\u{11c50}', '\u{11c59}', + SC_Numeric), ('\u{11c72}', '\u{11c8f}', SC_OLetter), ('\u{11c92}', '\u{11ca7}', SC_Extend), + ('\u{11ca9}', '\u{11cb6}', SC_Extend), ('\u{12000}', '\u{12399}', SC_OLetter), ('\u{12400}', + '\u{1246e}', SC_OLetter), ('\u{12480}', '\u{12543}', SC_OLetter), ('\u{13000}', '\u{1342e}', + SC_OLetter), ('\u{14400}', '\u{14646}', SC_OLetter), ('\u{16800}', '\u{16a38}', SC_OLetter), + ('\u{16a40}', '\u{16a5e}', SC_OLetter), ('\u{16a60}', '\u{16a69}', SC_Numeric), + ('\u{16a6e}', '\u{16a6f}', SC_STerm), ('\u{16ad0}', '\u{16aed}', SC_OLetter), ('\u{16af0}', + '\u{16af4}', SC_Extend), ('\u{16af5}', '\u{16af5}', SC_STerm), ('\u{16b00}', '\u{16b2f}', + SC_OLetter), ('\u{16b30}', '\u{16b36}', SC_Extend), ('\u{16b37}', '\u{16b38}', SC_STerm), + ('\u{16b40}', '\u{16b43}', SC_OLetter), ('\u{16b44}', '\u{16b44}', SC_STerm), ('\u{16b50}', + '\u{16b59}', SC_Numeric), ('\u{16b63}', '\u{16b77}', SC_OLetter), ('\u{16b7d}', '\u{16b8f}', + SC_OLetter), ('\u{16f00}', '\u{16f44}', SC_OLetter), ('\u{16f50}', '\u{16f50}', SC_OLetter), + ('\u{16f51}', '\u{16f7e}', SC_Extend), ('\u{16f8f}', '\u{16f92}', SC_Extend), ('\u{16f93}', + '\u{16f9f}', SC_OLetter), ('\u{16fe0}', '\u{16fe0}', SC_OLetter), ('\u{17000}', '\u{187ec}', + SC_OLetter), ('\u{18800}', '\u{18af2}', SC_OLetter), ('\u{1b000}', '\u{1b001}', SC_OLetter), + ('\u{1bc00}', '\u{1bc6a}', SC_OLetter), ('\u{1bc70}', '\u{1bc7c}', SC_OLetter), + ('\u{1bc80}', '\u{1bc88}', SC_OLetter), ('\u{1bc90}', '\u{1bc99}', SC_OLetter), + ('\u{1bc9d}', '\u{1bc9e}', SC_Extend), ('\u{1bc9f}', '\u{1bc9f}', SC_STerm), ('\u{1bca0}', + '\u{1bca3}', SC_Format), ('\u{1d165}', '\u{1d169}', SC_Extend), ('\u{1d16d}', '\u{1d172}', + SC_Extend), ('\u{1d173}', '\u{1d17a}', SC_Format), ('\u{1d17b}', '\u{1d182}', SC_Extend), + ('\u{1d185}', '\u{1d18b}', SC_Extend), ('\u{1d1aa}', '\u{1d1ad}', SC_Extend), ('\u{1d242}', + '\u{1d244}', SC_Extend), ('\u{1d400}', '\u{1d419}', SC_Upper), ('\u{1d41a}', '\u{1d433}', + SC_Lower), ('\u{1d434}', '\u{1d44d}', SC_Upper), ('\u{1d44e}', '\u{1d454}', SC_Lower), + ('\u{1d456}', '\u{1d467}', SC_Lower), ('\u{1d468}', '\u{1d481}', SC_Upper), ('\u{1d482}', + '\u{1d49b}', SC_Lower), ('\u{1d49c}', '\u{1d49c}', SC_Upper), ('\u{1d49e}', '\u{1d49f}', + SC_Upper), ('\u{1d4a2}', '\u{1d4a2}', SC_Upper), ('\u{1d4a5}', '\u{1d4a6}', SC_Upper), + ('\u{1d4a9}', '\u{1d4ac}', SC_Upper), ('\u{1d4ae}', '\u{1d4b5}', SC_Upper), ('\u{1d4b6}', + '\u{1d4b9}', SC_Lower), ('\u{1d4bb}', '\u{1d4bb}', SC_Lower), ('\u{1d4bd}', '\u{1d4c3}', + SC_Lower), ('\u{1d4c5}', '\u{1d4cf}', SC_Lower), ('\u{1d4d0}', '\u{1d4e9}', SC_Upper), + ('\u{1d4ea}', '\u{1d503}', SC_Lower), ('\u{1d504}', '\u{1d505}', SC_Upper), ('\u{1d507}', + '\u{1d50a}', SC_Upper), ('\u{1d50d}', '\u{1d514}', SC_Upper), ('\u{1d516}', '\u{1d51c}', + SC_Upper), ('\u{1d51e}', '\u{1d537}', SC_Lower), ('\u{1d538}', '\u{1d539}', SC_Upper), + ('\u{1d53b}', '\u{1d53e}', SC_Upper), ('\u{1d540}', '\u{1d544}', SC_Upper), ('\u{1d546}', + '\u{1d546}', SC_Upper), ('\u{1d54a}', '\u{1d550}', SC_Upper), ('\u{1d552}', '\u{1d56b}', + SC_Lower), ('\u{1d56c}', '\u{1d585}', SC_Upper), ('\u{1d586}', '\u{1d59f}', SC_Lower), + ('\u{1d5a0}', '\u{1d5b9}', SC_Upper), ('\u{1d5ba}', '\u{1d5d3}', SC_Lower), ('\u{1d5d4}', + '\u{1d5ed}', SC_Upper), ('\u{1d5ee}', '\u{1d607}', SC_Lower), ('\u{1d608}', '\u{1d621}', + SC_Upper), ('\u{1d622}', '\u{1d63b}', SC_Lower), ('\u{1d63c}', '\u{1d655}', SC_Upper), + ('\u{1d656}', '\u{1d66f}', SC_Lower), ('\u{1d670}', '\u{1d689}', SC_Upper), ('\u{1d68a}', + '\u{1d6a5}', SC_Lower), ('\u{1d6a8}', '\u{1d6c0}', SC_Upper), ('\u{1d6c2}', '\u{1d6da}', + SC_Lower), ('\u{1d6dc}', '\u{1d6e1}', SC_Lower), ('\u{1d6e2}', '\u{1d6fa}', SC_Upper), + ('\u{1d6fc}', '\u{1d714}', SC_Lower), ('\u{1d716}', '\u{1d71b}', SC_Lower), ('\u{1d71c}', + '\u{1d734}', SC_Upper), ('\u{1d736}', '\u{1d74e}', SC_Lower), ('\u{1d750}', '\u{1d755}', + SC_Lower), ('\u{1d756}', '\u{1d76e}', SC_Upper), ('\u{1d770}', '\u{1d788}', SC_Lower), + ('\u{1d78a}', '\u{1d78f}', SC_Lower), ('\u{1d790}', '\u{1d7a8}', SC_Upper), ('\u{1d7aa}', + '\u{1d7c2}', SC_Lower), ('\u{1d7c4}', '\u{1d7c9}', SC_Lower), ('\u{1d7ca}', '\u{1d7ca}', + SC_Upper), ('\u{1d7cb}', '\u{1d7cb}', SC_Lower), ('\u{1d7ce}', '\u{1d7ff}', SC_Numeric), + ('\u{1da00}', '\u{1da36}', SC_Extend), ('\u{1da3b}', '\u{1da6c}', SC_Extend), ('\u{1da75}', + '\u{1da75}', SC_Extend), ('\u{1da84}', '\u{1da84}', SC_Extend), ('\u{1da88}', '\u{1da88}', + SC_STerm), ('\u{1da9b}', '\u{1da9f}', SC_Extend), ('\u{1daa1}', '\u{1daaf}', SC_Extend), + ('\u{1e000}', '\u{1e006}', SC_Extend), ('\u{1e008}', '\u{1e018}', SC_Extend), ('\u{1e01b}', + '\u{1e021}', SC_Extend), ('\u{1e023}', '\u{1e024}', SC_Extend), ('\u{1e026}', '\u{1e02a}', + SC_Extend), ('\u{1e800}', '\u{1e8c4}', SC_OLetter), ('\u{1e8d0}', '\u{1e8d6}', SC_Extend), + ('\u{1e900}', '\u{1e921}', SC_Upper), ('\u{1e922}', '\u{1e943}', SC_Lower), ('\u{1e944}', + '\u{1e94a}', SC_Extend), ('\u{1e950}', '\u{1e959}', SC_Numeric), ('\u{1ee00}', '\u{1ee03}', + SC_OLetter), ('\u{1ee05}', '\u{1ee1f}', SC_OLetter), ('\u{1ee21}', '\u{1ee22}', SC_OLetter), + ('\u{1ee24}', '\u{1ee24}', SC_OLetter), ('\u{1ee27}', '\u{1ee27}', SC_OLetter), + ('\u{1ee29}', '\u{1ee32}', SC_OLetter), ('\u{1ee34}', '\u{1ee37}', SC_OLetter), + ('\u{1ee39}', '\u{1ee39}', SC_OLetter), ('\u{1ee3b}', '\u{1ee3b}', SC_OLetter), + ('\u{1ee42}', '\u{1ee42}', SC_OLetter), ('\u{1ee47}', '\u{1ee47}', SC_OLetter), + ('\u{1ee49}', '\u{1ee49}', SC_OLetter), ('\u{1ee4b}', '\u{1ee4b}', SC_OLetter), + ('\u{1ee4d}', '\u{1ee4f}', SC_OLetter), ('\u{1ee51}', '\u{1ee52}', SC_OLetter), + ('\u{1ee54}', '\u{1ee54}', SC_OLetter), ('\u{1ee57}', '\u{1ee57}', SC_OLetter), + ('\u{1ee59}', '\u{1ee59}', SC_OLetter), ('\u{1ee5b}', '\u{1ee5b}', SC_OLetter), + ('\u{1ee5d}', '\u{1ee5d}', SC_OLetter), ('\u{1ee5f}', '\u{1ee5f}', SC_OLetter), + ('\u{1ee61}', '\u{1ee62}', SC_OLetter), ('\u{1ee64}', '\u{1ee64}', SC_OLetter), + ('\u{1ee67}', '\u{1ee6a}', SC_OLetter), ('\u{1ee6c}', '\u{1ee72}', SC_OLetter), + ('\u{1ee74}', '\u{1ee77}', SC_OLetter), ('\u{1ee79}', '\u{1ee7c}', SC_OLetter), + ('\u{1ee7e}', '\u{1ee7e}', SC_OLetter), ('\u{1ee80}', '\u{1ee89}', SC_OLetter), + ('\u{1ee8b}', '\u{1ee9b}', SC_OLetter), ('\u{1eea1}', '\u{1eea3}', SC_OLetter), + ('\u{1eea5}', '\u{1eea9}', SC_OLetter), ('\u{1eeab}', '\u{1eebb}', SC_OLetter), + ('\u{1f130}', '\u{1f149}', SC_Upper), ('\u{1f150}', '\u{1f169}', SC_Upper), ('\u{1f170}', + '\u{1f189}', SC_Upper), ('\u{1f676}', '\u{1f678}', SC_Close), ('\u{20000}', '\u{2a6d6}', + SC_OLetter), ('\u{2a700}', '\u{2b734}', SC_OLetter), ('\u{2b740}', '\u{2b81d}', SC_OLetter), + ('\u{2b820}', '\u{2cea1}', SC_OLetter), ('\u{2f800}', '\u{2fa1d}', SC_OLetter), + ('\u{e0001}', '\u{e0001}', SC_Format), ('\u{e0020}', '\u{e007f}', SC_Extend), ('\u{e0100}', + '\u{e01ef}', SC_Extend) + ]; + +} diff --git a/src/testdata.rs b/src/testdata.rs index 185489e..7ce6b82 100644 --- a/src/testdata.rs +++ b/src/testdata.rs @@ -2056,3 +2056,336 @@ "\u{2c}", "\u{2c}", "\u{61}"]) ]; + // official Unicode test data + // http://www.unicode.org/Public/UNIDATA/auxiliary/SentenceBreakTest.txt + pub const TEST_SENTENCE: &'static [(&'static str, &'static [&'static str])] = &[ + ("\u{1}\u{1}", &["\u{1}\u{1}"]), ("\u{1}\u{308}\u{1}", &["\u{1}\u{308}\u{1}"]), + ("\u{1}\u{d}", &["\u{1}\u{d}"]), ("\u{1}\u{308}\u{d}", &["\u{1}\u{308}\u{d}"]), + ("\u{1}\u{a}", &["\u{1}\u{a}"]), ("\u{1}\u{308}\u{a}", &["\u{1}\u{308}\u{a}"]), + ("\u{1}\u{85}", &["\u{1}\u{85}"]), ("\u{1}\u{308}\u{85}", &["\u{1}\u{308}\u{85}"]), + ("\u{1}\u{9}", &["\u{1}\u{9}"]), ("\u{1}\u{308}\u{9}", &["\u{1}\u{308}\u{9}"]), + ("\u{1}\u{61}", &["\u{1}\u{61}"]), ("\u{1}\u{308}\u{61}", &["\u{1}\u{308}\u{61}"]), + ("\u{1}\u{41}", &["\u{1}\u{41}"]), ("\u{1}\u{308}\u{41}", &["\u{1}\u{308}\u{41}"]), + ("\u{1}\u{1bb}", &["\u{1}\u{1bb}"]), ("\u{1}\u{308}\u{1bb}", &["\u{1}\u{308}\u{1bb}"]), + ("\u{1}\u{30}", &["\u{1}\u{30}"]), ("\u{1}\u{308}\u{30}", &["\u{1}\u{308}\u{30}"]), + ("\u{1}\u{2e}", &["\u{1}\u{2e}"]), ("\u{1}\u{308}\u{2e}", &["\u{1}\u{308}\u{2e}"]), + ("\u{1}\u{21}", &["\u{1}\u{21}"]), ("\u{1}\u{308}\u{21}", &["\u{1}\u{308}\u{21}"]), + ("\u{1}\u{22}", &["\u{1}\u{22}"]), ("\u{1}\u{308}\u{22}", &["\u{1}\u{308}\u{22}"]), + ("\u{1}\u{2c}", &["\u{1}\u{2c}"]), ("\u{1}\u{308}\u{2c}", &["\u{1}\u{308}\u{2c}"]), + ("\u{1}\u{ad}", &["\u{1}\u{ad}"]), ("\u{1}\u{308}\u{ad}", &["\u{1}\u{308}\u{ad}"]), + ("\u{1}\u{300}", &["\u{1}\u{300}"]), ("\u{1}\u{308}\u{300}", &["\u{1}\u{308}\u{300}"]), + ("\u{d}\u{1}", &["\u{d}", "\u{1}"]), ("\u{d}\u{308}\u{1}", &["\u{d}", "\u{308}\u{1}"]), + ("\u{d}\u{d}", &["\u{d}", "\u{d}"]), ("\u{d}\u{308}\u{d}", &["\u{d}", "\u{308}\u{d}"]), + ("\u{d}\u{a}", &["\u{d}\u{a}"]), ("\u{d}\u{308}\u{a}", &["\u{d}", "\u{308}\u{a}"]), + ("\u{d}\u{85}", &["\u{d}", "\u{85}"]), ("\u{d}\u{308}\u{85}", &["\u{d}", "\u{308}\u{85}"]), + ("\u{d}\u{9}", &["\u{d}", "\u{9}"]), ("\u{d}\u{308}\u{9}", &["\u{d}", "\u{308}\u{9}"]), + ("\u{d}\u{61}", &["\u{d}", "\u{61}"]), ("\u{d}\u{308}\u{61}", &["\u{d}", "\u{308}\u{61}"]), + ("\u{d}\u{41}", &["\u{d}", "\u{41}"]), ("\u{d}\u{308}\u{41}", &["\u{d}", "\u{308}\u{41}"]), + ("\u{d}\u{1bb}", &["\u{d}", "\u{1bb}"]), ("\u{d}\u{308}\u{1bb}", &["\u{d}", + "\u{308}\u{1bb}"]), ("\u{d}\u{30}", &["\u{d}", "\u{30}"]), ("\u{d}\u{308}\u{30}", &["\u{d}", + "\u{308}\u{30}"]), ("\u{d}\u{2e}", &["\u{d}", "\u{2e}"]), ("\u{d}\u{308}\u{2e}", &["\u{d}", + "\u{308}\u{2e}"]), ("\u{d}\u{21}", &["\u{d}", "\u{21}"]), ("\u{d}\u{308}\u{21}", &["\u{d}", + "\u{308}\u{21}"]), ("\u{d}\u{22}", &["\u{d}", "\u{22}"]), ("\u{d}\u{308}\u{22}", &["\u{d}", + "\u{308}\u{22}"]), ("\u{d}\u{2c}", &["\u{d}", "\u{2c}"]), ("\u{d}\u{308}\u{2c}", &["\u{d}", + "\u{308}\u{2c}"]), ("\u{d}\u{ad}", &["\u{d}", "\u{ad}"]), ("\u{d}\u{308}\u{ad}", &["\u{d}", + "\u{308}\u{ad}"]), ("\u{d}\u{300}", &["\u{d}", "\u{300}"]), ("\u{d}\u{308}\u{300}", + &["\u{d}", "\u{308}\u{300}"]), ("\u{a}\u{1}", &["\u{a}", "\u{1}"]), ("\u{a}\u{308}\u{1}", + &["\u{a}", "\u{308}\u{1}"]), ("\u{a}\u{d}", &["\u{a}", "\u{d}"]), ("\u{a}\u{308}\u{d}", + &["\u{a}", "\u{308}\u{d}"]), ("\u{a}\u{a}", &["\u{a}", "\u{a}"]), ("\u{a}\u{308}\u{a}", + &["\u{a}", "\u{308}\u{a}"]), ("\u{a}\u{85}", &["\u{a}", "\u{85}"]), ("\u{a}\u{308}\u{85}", + &["\u{a}", "\u{308}\u{85}"]), ("\u{a}\u{9}", &["\u{a}", "\u{9}"]), ("\u{a}\u{308}\u{9}", + &["\u{a}", "\u{308}\u{9}"]), ("\u{a}\u{61}", &["\u{a}", "\u{61}"]), ("\u{a}\u{308}\u{61}", + &["\u{a}", "\u{308}\u{61}"]), ("\u{a}\u{41}", &["\u{a}", "\u{41}"]), ("\u{a}\u{308}\u{41}", + &["\u{a}", "\u{308}\u{41}"]), ("\u{a}\u{1bb}", &["\u{a}", "\u{1bb}"]), + ("\u{a}\u{308}\u{1bb}", &["\u{a}", "\u{308}\u{1bb}"]), ("\u{a}\u{30}", &["\u{a}", + "\u{30}"]), ("\u{a}\u{308}\u{30}", &["\u{a}", "\u{308}\u{30}"]), ("\u{a}\u{2e}", &["\u{a}", + "\u{2e}"]), ("\u{a}\u{308}\u{2e}", &["\u{a}", "\u{308}\u{2e}"]), ("\u{a}\u{21}", &["\u{a}", + "\u{21}"]), ("\u{a}\u{308}\u{21}", &["\u{a}", "\u{308}\u{21}"]), ("\u{a}\u{22}", &["\u{a}", + "\u{22}"]), ("\u{a}\u{308}\u{22}", &["\u{a}", "\u{308}\u{22}"]), ("\u{a}\u{2c}", &["\u{a}", + "\u{2c}"]), ("\u{a}\u{308}\u{2c}", &["\u{a}", "\u{308}\u{2c}"]), ("\u{a}\u{ad}", &["\u{a}", + "\u{ad}"]), ("\u{a}\u{308}\u{ad}", &["\u{a}", "\u{308}\u{ad}"]), ("\u{a}\u{300}", &["\u{a}", + "\u{300}"]), ("\u{a}\u{308}\u{300}", &["\u{a}", "\u{308}\u{300}"]), ("\u{85}\u{1}", + &["\u{85}", "\u{1}"]), ("\u{85}\u{308}\u{1}", &["\u{85}", "\u{308}\u{1}"]), ("\u{85}\u{d}", + &["\u{85}", "\u{d}"]), ("\u{85}\u{308}\u{d}", &["\u{85}", "\u{308}\u{d}"]), ("\u{85}\u{a}", + &["\u{85}", "\u{a}"]), ("\u{85}\u{308}\u{a}", &["\u{85}", "\u{308}\u{a}"]), ("\u{85}\u{85}", + &["\u{85}", "\u{85}"]), ("\u{85}\u{308}\u{85}", &["\u{85}", "\u{308}\u{85}"]), + ("\u{85}\u{9}", &["\u{85}", "\u{9}"]), ("\u{85}\u{308}\u{9}", &["\u{85}", "\u{308}\u{9}"]), + ("\u{85}\u{61}", &["\u{85}", "\u{61}"]), ("\u{85}\u{308}\u{61}", &["\u{85}", + "\u{308}\u{61}"]), ("\u{85}\u{41}", &["\u{85}", "\u{41}"]), ("\u{85}\u{308}\u{41}", + &["\u{85}", "\u{308}\u{41}"]), ("\u{85}\u{1bb}", &["\u{85}", "\u{1bb}"]), + ("\u{85}\u{308}\u{1bb}", &["\u{85}", "\u{308}\u{1bb}"]), ("\u{85}\u{30}", &["\u{85}", + "\u{30}"]), ("\u{85}\u{308}\u{30}", &["\u{85}", "\u{308}\u{30}"]), ("\u{85}\u{2e}", + &["\u{85}", "\u{2e}"]), ("\u{85}\u{308}\u{2e}", &["\u{85}", "\u{308}\u{2e}"]), + ("\u{85}\u{21}", &["\u{85}", "\u{21}"]), ("\u{85}\u{308}\u{21}", &["\u{85}", + "\u{308}\u{21}"]), ("\u{85}\u{22}", &["\u{85}", "\u{22}"]), ("\u{85}\u{308}\u{22}", + &["\u{85}", "\u{308}\u{22}"]), ("\u{85}\u{2c}", &["\u{85}", "\u{2c}"]), + ("\u{85}\u{308}\u{2c}", &["\u{85}", "\u{308}\u{2c}"]), ("\u{85}\u{ad}", &["\u{85}", + "\u{ad}"]), ("\u{85}\u{308}\u{ad}", &["\u{85}", "\u{308}\u{ad}"]), ("\u{85}\u{300}", + &["\u{85}", "\u{300}"]), ("\u{85}\u{308}\u{300}", &["\u{85}", "\u{308}\u{300}"]), + ("\u{9}\u{1}", &["\u{9}\u{1}"]), ("\u{9}\u{308}\u{1}", &["\u{9}\u{308}\u{1}"]), + ("\u{9}\u{d}", &["\u{9}\u{d}"]), ("\u{9}\u{308}\u{d}", &["\u{9}\u{308}\u{d}"]), + ("\u{9}\u{a}", &["\u{9}\u{a}"]), ("\u{9}\u{308}\u{a}", &["\u{9}\u{308}\u{a}"]), + ("\u{9}\u{85}", &["\u{9}\u{85}"]), ("\u{9}\u{308}\u{85}", &["\u{9}\u{308}\u{85}"]), + ("\u{9}\u{9}", &["\u{9}\u{9}"]), ("\u{9}\u{308}\u{9}", &["\u{9}\u{308}\u{9}"]), + ("\u{9}\u{61}", &["\u{9}\u{61}"]), ("\u{9}\u{308}\u{61}", &["\u{9}\u{308}\u{61}"]), + ("\u{9}\u{41}", &["\u{9}\u{41}"]), ("\u{9}\u{308}\u{41}", &["\u{9}\u{308}\u{41}"]), + ("\u{9}\u{1bb}", &["\u{9}\u{1bb}"]), ("\u{9}\u{308}\u{1bb}", &["\u{9}\u{308}\u{1bb}"]), + ("\u{9}\u{30}", &["\u{9}\u{30}"]), ("\u{9}\u{308}\u{30}", &["\u{9}\u{308}\u{30}"]), + ("\u{9}\u{2e}", &["\u{9}\u{2e}"]), ("\u{9}\u{308}\u{2e}", &["\u{9}\u{308}\u{2e}"]), + ("\u{9}\u{21}", &["\u{9}\u{21}"]), ("\u{9}\u{308}\u{21}", &["\u{9}\u{308}\u{21}"]), + ("\u{9}\u{22}", &["\u{9}\u{22}"]), ("\u{9}\u{308}\u{22}", &["\u{9}\u{308}\u{22}"]), + ("\u{9}\u{2c}", &["\u{9}\u{2c}"]), ("\u{9}\u{308}\u{2c}", &["\u{9}\u{308}\u{2c}"]), + ("\u{9}\u{ad}", &["\u{9}\u{ad}"]), ("\u{9}\u{308}\u{ad}", &["\u{9}\u{308}\u{ad}"]), + ("\u{9}\u{300}", &["\u{9}\u{300}"]), ("\u{9}\u{308}\u{300}", &["\u{9}\u{308}\u{300}"]), + ("\u{61}\u{1}", &["\u{61}\u{1}"]), ("\u{61}\u{308}\u{1}", &["\u{61}\u{308}\u{1}"]), + ("\u{61}\u{d}", &["\u{61}\u{d}"]), ("\u{61}\u{308}\u{d}", &["\u{61}\u{308}\u{d}"]), + ("\u{61}\u{a}", &["\u{61}\u{a}"]), ("\u{61}\u{308}\u{a}", &["\u{61}\u{308}\u{a}"]), + ("\u{61}\u{85}", &["\u{61}\u{85}"]), ("\u{61}\u{308}\u{85}", &["\u{61}\u{308}\u{85}"]), + ("\u{61}\u{9}", &["\u{61}\u{9}"]), ("\u{61}\u{308}\u{9}", &["\u{61}\u{308}\u{9}"]), + ("\u{61}\u{61}", &["\u{61}\u{61}"]), ("\u{61}\u{308}\u{61}", &["\u{61}\u{308}\u{61}"]), + ("\u{61}\u{41}", &["\u{61}\u{41}"]), ("\u{61}\u{308}\u{41}", &["\u{61}\u{308}\u{41}"]), + ("\u{61}\u{1bb}", &["\u{61}\u{1bb}"]), ("\u{61}\u{308}\u{1bb}", &["\u{61}\u{308}\u{1bb}"]), + ("\u{61}\u{30}", &["\u{61}\u{30}"]), ("\u{61}\u{308}\u{30}", &["\u{61}\u{308}\u{30}"]), + ("\u{61}\u{2e}", &["\u{61}\u{2e}"]), ("\u{61}\u{308}\u{2e}", &["\u{61}\u{308}\u{2e}"]), + ("\u{61}\u{21}", &["\u{61}\u{21}"]), ("\u{61}\u{308}\u{21}", &["\u{61}\u{308}\u{21}"]), + ("\u{61}\u{22}", &["\u{61}\u{22}"]), ("\u{61}\u{308}\u{22}", &["\u{61}\u{308}\u{22}"]), + ("\u{61}\u{2c}", &["\u{61}\u{2c}"]), ("\u{61}\u{308}\u{2c}", &["\u{61}\u{308}\u{2c}"]), + ("\u{61}\u{ad}", &["\u{61}\u{ad}"]), ("\u{61}\u{308}\u{ad}", &["\u{61}\u{308}\u{ad}"]), + ("\u{61}\u{300}", &["\u{61}\u{300}"]), ("\u{61}\u{308}\u{300}", &["\u{61}\u{308}\u{300}"]), + ("\u{41}\u{1}", &["\u{41}\u{1}"]), ("\u{41}\u{308}\u{1}", &["\u{41}\u{308}\u{1}"]), + ("\u{41}\u{d}", &["\u{41}\u{d}"]), ("\u{41}\u{308}\u{d}", &["\u{41}\u{308}\u{d}"]), + ("\u{41}\u{a}", &["\u{41}\u{a}"]), ("\u{41}\u{308}\u{a}", &["\u{41}\u{308}\u{a}"]), + ("\u{41}\u{85}", &["\u{41}\u{85}"]), ("\u{41}\u{308}\u{85}", &["\u{41}\u{308}\u{85}"]), + ("\u{41}\u{9}", &["\u{41}\u{9}"]), ("\u{41}\u{308}\u{9}", &["\u{41}\u{308}\u{9}"]), + ("\u{41}\u{61}", &["\u{41}\u{61}"]), ("\u{41}\u{308}\u{61}", &["\u{41}\u{308}\u{61}"]), + ("\u{41}\u{41}", &["\u{41}\u{41}"]), ("\u{41}\u{308}\u{41}", &["\u{41}\u{308}\u{41}"]), + ("\u{41}\u{1bb}", &["\u{41}\u{1bb}"]), ("\u{41}\u{308}\u{1bb}", &["\u{41}\u{308}\u{1bb}"]), + ("\u{41}\u{30}", &["\u{41}\u{30}"]), ("\u{41}\u{308}\u{30}", &["\u{41}\u{308}\u{30}"]), + ("\u{41}\u{2e}", &["\u{41}\u{2e}"]), ("\u{41}\u{308}\u{2e}", &["\u{41}\u{308}\u{2e}"]), + ("\u{41}\u{21}", &["\u{41}\u{21}"]), ("\u{41}\u{308}\u{21}", &["\u{41}\u{308}\u{21}"]), + ("\u{41}\u{22}", &["\u{41}\u{22}"]), ("\u{41}\u{308}\u{22}", &["\u{41}\u{308}\u{22}"]), + ("\u{41}\u{2c}", &["\u{41}\u{2c}"]), ("\u{41}\u{308}\u{2c}", &["\u{41}\u{308}\u{2c}"]), + ("\u{41}\u{ad}", &["\u{41}\u{ad}"]), ("\u{41}\u{308}\u{ad}", &["\u{41}\u{308}\u{ad}"]), + ("\u{41}\u{300}", &["\u{41}\u{300}"]), ("\u{41}\u{308}\u{300}", &["\u{41}\u{308}\u{300}"]), + ("\u{1bb}\u{1}", &["\u{1bb}\u{1}"]), ("\u{1bb}\u{308}\u{1}", &["\u{1bb}\u{308}\u{1}"]), + ("\u{1bb}\u{d}", &["\u{1bb}\u{d}"]), ("\u{1bb}\u{308}\u{d}", &["\u{1bb}\u{308}\u{d}"]), + ("\u{1bb}\u{a}", &["\u{1bb}\u{a}"]), ("\u{1bb}\u{308}\u{a}", &["\u{1bb}\u{308}\u{a}"]), + ("\u{1bb}\u{85}", &["\u{1bb}\u{85}"]), ("\u{1bb}\u{308}\u{85}", &["\u{1bb}\u{308}\u{85}"]), + ("\u{1bb}\u{9}", &["\u{1bb}\u{9}"]), ("\u{1bb}\u{308}\u{9}", &["\u{1bb}\u{308}\u{9}"]), + ("\u{1bb}\u{61}", &["\u{1bb}\u{61}"]), ("\u{1bb}\u{308}\u{61}", &["\u{1bb}\u{308}\u{61}"]), + ("\u{1bb}\u{41}", &["\u{1bb}\u{41}"]), ("\u{1bb}\u{308}\u{41}", &["\u{1bb}\u{308}\u{41}"]), + ("\u{1bb}\u{1bb}", &["\u{1bb}\u{1bb}"]), ("\u{1bb}\u{308}\u{1bb}", + &["\u{1bb}\u{308}\u{1bb}"]), ("\u{1bb}\u{30}", &["\u{1bb}\u{30}"]), ("\u{1bb}\u{308}\u{30}", + &["\u{1bb}\u{308}\u{30}"]), ("\u{1bb}\u{2e}", &["\u{1bb}\u{2e}"]), ("\u{1bb}\u{308}\u{2e}", + &["\u{1bb}\u{308}\u{2e}"]), ("\u{1bb}\u{21}", &["\u{1bb}\u{21}"]), ("\u{1bb}\u{308}\u{21}", + &["\u{1bb}\u{308}\u{21}"]), ("\u{1bb}\u{22}", &["\u{1bb}\u{22}"]), ("\u{1bb}\u{308}\u{22}", + &["\u{1bb}\u{308}\u{22}"]), ("\u{1bb}\u{2c}", &["\u{1bb}\u{2c}"]), ("\u{1bb}\u{308}\u{2c}", + &["\u{1bb}\u{308}\u{2c}"]), ("\u{1bb}\u{ad}", &["\u{1bb}\u{ad}"]), ("\u{1bb}\u{308}\u{ad}", + &["\u{1bb}\u{308}\u{ad}"]), ("\u{1bb}\u{300}", &["\u{1bb}\u{300}"]), + ("\u{1bb}\u{308}\u{300}", &["\u{1bb}\u{308}\u{300}"]), ("\u{30}\u{1}", &["\u{30}\u{1}"]), + ("\u{30}\u{308}\u{1}", &["\u{30}\u{308}\u{1}"]), ("\u{30}\u{d}", &["\u{30}\u{d}"]), + ("\u{30}\u{308}\u{d}", &["\u{30}\u{308}\u{d}"]), ("\u{30}\u{a}", &["\u{30}\u{a}"]), + ("\u{30}\u{308}\u{a}", &["\u{30}\u{308}\u{a}"]), ("\u{30}\u{85}", &["\u{30}\u{85}"]), + ("\u{30}\u{308}\u{85}", &["\u{30}\u{308}\u{85}"]), ("\u{30}\u{9}", &["\u{30}\u{9}"]), + ("\u{30}\u{308}\u{9}", &["\u{30}\u{308}\u{9}"]), ("\u{30}\u{61}", &["\u{30}\u{61}"]), + ("\u{30}\u{308}\u{61}", &["\u{30}\u{308}\u{61}"]), ("\u{30}\u{41}", &["\u{30}\u{41}"]), + ("\u{30}\u{308}\u{41}", &["\u{30}\u{308}\u{41}"]), ("\u{30}\u{1bb}", &["\u{30}\u{1bb}"]), + ("\u{30}\u{308}\u{1bb}", &["\u{30}\u{308}\u{1bb}"]), ("\u{30}\u{30}", &["\u{30}\u{30}"]), + ("\u{30}\u{308}\u{30}", &["\u{30}\u{308}\u{30}"]), ("\u{30}\u{2e}", &["\u{30}\u{2e}"]), + ("\u{30}\u{308}\u{2e}", &["\u{30}\u{308}\u{2e}"]), ("\u{30}\u{21}", &["\u{30}\u{21}"]), + ("\u{30}\u{308}\u{21}", &["\u{30}\u{308}\u{21}"]), ("\u{30}\u{22}", &["\u{30}\u{22}"]), + ("\u{30}\u{308}\u{22}", &["\u{30}\u{308}\u{22}"]), ("\u{30}\u{2c}", &["\u{30}\u{2c}"]), + ("\u{30}\u{308}\u{2c}", &["\u{30}\u{308}\u{2c}"]), ("\u{30}\u{ad}", &["\u{30}\u{ad}"]), + ("\u{30}\u{308}\u{ad}", &["\u{30}\u{308}\u{ad}"]), ("\u{30}\u{300}", &["\u{30}\u{300}"]), + ("\u{30}\u{308}\u{300}", &["\u{30}\u{308}\u{300}"]), ("\u{2e}\u{1}", &["\u{2e}", "\u{1}"]), + ("\u{2e}\u{308}\u{1}", &["\u{2e}\u{308}", "\u{1}"]), ("\u{2e}\u{d}", &["\u{2e}\u{d}"]), + ("\u{2e}\u{308}\u{d}", &["\u{2e}\u{308}\u{d}"]), ("\u{2e}\u{a}", &["\u{2e}\u{a}"]), + ("\u{2e}\u{308}\u{a}", &["\u{2e}\u{308}\u{a}"]), ("\u{2e}\u{85}", &["\u{2e}\u{85}"]), + ("\u{2e}\u{308}\u{85}", &["\u{2e}\u{308}\u{85}"]), ("\u{2e}\u{9}", &["\u{2e}\u{9}"]), + ("\u{2e}\u{308}\u{9}", &["\u{2e}\u{308}\u{9}"]), ("\u{2e}\u{61}", &["\u{2e}\u{61}"]), + ("\u{2e}\u{308}\u{61}", &["\u{2e}\u{308}\u{61}"]), ("\u{2e}\u{41}", &["\u{2e}", "\u{41}"]), + ("\u{2e}\u{308}\u{41}", &["\u{2e}\u{308}", "\u{41}"]), ("\u{2e}\u{1bb}", &["\u{2e}", + "\u{1bb}"]), ("\u{2e}\u{308}\u{1bb}", &["\u{2e}\u{308}", "\u{1bb}"]), ("\u{2e}\u{30}", + &["\u{2e}\u{30}"]), ("\u{2e}\u{308}\u{30}", &["\u{2e}\u{308}\u{30}"]), ("\u{2e}\u{2e}", + &["\u{2e}\u{2e}"]), ("\u{2e}\u{308}\u{2e}", &["\u{2e}\u{308}\u{2e}"]), ("\u{2e}\u{21}", + &["\u{2e}\u{21}"]), ("\u{2e}\u{308}\u{21}", &["\u{2e}\u{308}\u{21}"]), ("\u{2e}\u{22}", + &["\u{2e}\u{22}"]), ("\u{2e}\u{308}\u{22}", &["\u{2e}\u{308}\u{22}"]), ("\u{2e}\u{2c}", + &["\u{2e}\u{2c}"]), ("\u{2e}\u{308}\u{2c}", &["\u{2e}\u{308}\u{2c}"]), ("\u{2e}\u{ad}", + &["\u{2e}\u{ad}"]), ("\u{2e}\u{308}\u{ad}", &["\u{2e}\u{308}\u{ad}"]), ("\u{2e}\u{300}", + &["\u{2e}\u{300}"]), ("\u{2e}\u{308}\u{300}", &["\u{2e}\u{308}\u{300}"]), ("\u{21}\u{1}", + &["\u{21}", "\u{1}"]), ("\u{21}\u{308}\u{1}", &["\u{21}\u{308}", "\u{1}"]), ("\u{21}\u{d}", + &["\u{21}\u{d}"]), ("\u{21}\u{308}\u{d}", &["\u{21}\u{308}\u{d}"]), ("\u{21}\u{a}", + &["\u{21}\u{a}"]), ("\u{21}\u{308}\u{a}", &["\u{21}\u{308}\u{a}"]), ("\u{21}\u{85}", + &["\u{21}\u{85}"]), ("\u{21}\u{308}\u{85}", &["\u{21}\u{308}\u{85}"]), ("\u{21}\u{9}", + &["\u{21}\u{9}"]), ("\u{21}\u{308}\u{9}", &["\u{21}\u{308}\u{9}"]), ("\u{21}\u{61}", + &["\u{21}", "\u{61}"]), ("\u{21}\u{308}\u{61}", &["\u{21}\u{308}", "\u{61}"]), + ("\u{21}\u{41}", &["\u{21}", "\u{41}"]), ("\u{21}\u{308}\u{41}", &["\u{21}\u{308}", + "\u{41}"]), ("\u{21}\u{1bb}", &["\u{21}", "\u{1bb}"]), ("\u{21}\u{308}\u{1bb}", + &["\u{21}\u{308}", "\u{1bb}"]), ("\u{21}\u{30}", &["\u{21}", "\u{30}"]), + ("\u{21}\u{308}\u{30}", &["\u{21}\u{308}", "\u{30}"]), ("\u{21}\u{2e}", &["\u{21}\u{2e}"]), + ("\u{21}\u{308}\u{2e}", &["\u{21}\u{308}\u{2e}"]), ("\u{21}\u{21}", &["\u{21}\u{21}"]), + ("\u{21}\u{308}\u{21}", &["\u{21}\u{308}\u{21}"]), ("\u{21}\u{22}", &["\u{21}\u{22}"]), + ("\u{21}\u{308}\u{22}", &["\u{21}\u{308}\u{22}"]), ("\u{21}\u{2c}", &["\u{21}\u{2c}"]), + ("\u{21}\u{308}\u{2c}", &["\u{21}\u{308}\u{2c}"]), ("\u{21}\u{ad}", &["\u{21}\u{ad}"]), + ("\u{21}\u{308}\u{ad}", &["\u{21}\u{308}\u{ad}"]), ("\u{21}\u{300}", &["\u{21}\u{300}"]), + ("\u{21}\u{308}\u{300}", &["\u{21}\u{308}\u{300}"]), ("\u{22}\u{1}", &["\u{22}\u{1}"]), + ("\u{22}\u{308}\u{1}", &["\u{22}\u{308}\u{1}"]), ("\u{22}\u{d}", &["\u{22}\u{d}"]), + ("\u{22}\u{308}\u{d}", &["\u{22}\u{308}\u{d}"]), ("\u{22}\u{a}", &["\u{22}\u{a}"]), + ("\u{22}\u{308}\u{a}", &["\u{22}\u{308}\u{a}"]), ("\u{22}\u{85}", &["\u{22}\u{85}"]), + ("\u{22}\u{308}\u{85}", &["\u{22}\u{308}\u{85}"]), ("\u{22}\u{9}", &["\u{22}\u{9}"]), + ("\u{22}\u{308}\u{9}", &["\u{22}\u{308}\u{9}"]), ("\u{22}\u{61}", &["\u{22}\u{61}"]), + ("\u{22}\u{308}\u{61}", &["\u{22}\u{308}\u{61}"]), ("\u{22}\u{41}", &["\u{22}\u{41}"]), + ("\u{22}\u{308}\u{41}", &["\u{22}\u{308}\u{41}"]), ("\u{22}\u{1bb}", &["\u{22}\u{1bb}"]), + ("\u{22}\u{308}\u{1bb}", &["\u{22}\u{308}\u{1bb}"]), ("\u{22}\u{30}", &["\u{22}\u{30}"]), + ("\u{22}\u{308}\u{30}", &["\u{22}\u{308}\u{30}"]), ("\u{22}\u{2e}", &["\u{22}\u{2e}"]), + ("\u{22}\u{308}\u{2e}", &["\u{22}\u{308}\u{2e}"]), ("\u{22}\u{21}", &["\u{22}\u{21}"]), + ("\u{22}\u{308}\u{21}", &["\u{22}\u{308}\u{21}"]), ("\u{22}\u{22}", &["\u{22}\u{22}"]), + ("\u{22}\u{308}\u{22}", &["\u{22}\u{308}\u{22}"]), ("\u{22}\u{2c}", &["\u{22}\u{2c}"]), + ("\u{22}\u{308}\u{2c}", &["\u{22}\u{308}\u{2c}"]), ("\u{22}\u{ad}", &["\u{22}\u{ad}"]), + ("\u{22}\u{308}\u{ad}", &["\u{22}\u{308}\u{ad}"]), ("\u{22}\u{300}", &["\u{22}\u{300}"]), + ("\u{22}\u{308}\u{300}", &["\u{22}\u{308}\u{300}"]), ("\u{2c}\u{1}", &["\u{2c}\u{1}"]), + ("\u{2c}\u{308}\u{1}", &["\u{2c}\u{308}\u{1}"]), ("\u{2c}\u{d}", &["\u{2c}\u{d}"]), + ("\u{2c}\u{308}\u{d}", &["\u{2c}\u{308}\u{d}"]), ("\u{2c}\u{a}", &["\u{2c}\u{a}"]), + ("\u{2c}\u{308}\u{a}", &["\u{2c}\u{308}\u{a}"]), ("\u{2c}\u{85}", &["\u{2c}\u{85}"]), + ("\u{2c}\u{308}\u{85}", &["\u{2c}\u{308}\u{85}"]), ("\u{2c}\u{9}", &["\u{2c}\u{9}"]), + ("\u{2c}\u{308}\u{9}", &["\u{2c}\u{308}\u{9}"]), ("\u{2c}\u{61}", &["\u{2c}\u{61}"]), + ("\u{2c}\u{308}\u{61}", &["\u{2c}\u{308}\u{61}"]), ("\u{2c}\u{41}", &["\u{2c}\u{41}"]), + ("\u{2c}\u{308}\u{41}", &["\u{2c}\u{308}\u{41}"]), ("\u{2c}\u{1bb}", &["\u{2c}\u{1bb}"]), + ("\u{2c}\u{308}\u{1bb}", &["\u{2c}\u{308}\u{1bb}"]), ("\u{2c}\u{30}", &["\u{2c}\u{30}"]), + ("\u{2c}\u{308}\u{30}", &["\u{2c}\u{308}\u{30}"]), ("\u{2c}\u{2e}", &["\u{2c}\u{2e}"]), + ("\u{2c}\u{308}\u{2e}", &["\u{2c}\u{308}\u{2e}"]), ("\u{2c}\u{21}", &["\u{2c}\u{21}"]), + ("\u{2c}\u{308}\u{21}", &["\u{2c}\u{308}\u{21}"]), ("\u{2c}\u{22}", &["\u{2c}\u{22}"]), + ("\u{2c}\u{308}\u{22}", &["\u{2c}\u{308}\u{22}"]), ("\u{2c}\u{2c}", &["\u{2c}\u{2c}"]), + ("\u{2c}\u{308}\u{2c}", &["\u{2c}\u{308}\u{2c}"]), ("\u{2c}\u{ad}", &["\u{2c}\u{ad}"]), + ("\u{2c}\u{308}\u{ad}", &["\u{2c}\u{308}\u{ad}"]), ("\u{2c}\u{300}", &["\u{2c}\u{300}"]), + ("\u{2c}\u{308}\u{300}", &["\u{2c}\u{308}\u{300}"]), ("\u{ad}\u{1}", &["\u{ad}\u{1}"]), + ("\u{ad}\u{308}\u{1}", &["\u{ad}\u{308}\u{1}"]), ("\u{ad}\u{d}", &["\u{ad}\u{d}"]), + ("\u{ad}\u{308}\u{d}", &["\u{ad}\u{308}\u{d}"]), ("\u{ad}\u{a}", &["\u{ad}\u{a}"]), + ("\u{ad}\u{308}\u{a}", &["\u{ad}\u{308}\u{a}"]), ("\u{ad}\u{85}", &["\u{ad}\u{85}"]), + ("\u{ad}\u{308}\u{85}", &["\u{ad}\u{308}\u{85}"]), ("\u{ad}\u{9}", &["\u{ad}\u{9}"]), + ("\u{ad}\u{308}\u{9}", &["\u{ad}\u{308}\u{9}"]), ("\u{ad}\u{61}", &["\u{ad}\u{61}"]), + ("\u{ad}\u{308}\u{61}", &["\u{ad}\u{308}\u{61}"]), ("\u{ad}\u{41}", &["\u{ad}\u{41}"]), + ("\u{ad}\u{308}\u{41}", &["\u{ad}\u{308}\u{41}"]), ("\u{ad}\u{1bb}", &["\u{ad}\u{1bb}"]), + ("\u{ad}\u{308}\u{1bb}", &["\u{ad}\u{308}\u{1bb}"]), ("\u{ad}\u{30}", &["\u{ad}\u{30}"]), + ("\u{ad}\u{308}\u{30}", &["\u{ad}\u{308}\u{30}"]), ("\u{ad}\u{2e}", &["\u{ad}\u{2e}"]), + ("\u{ad}\u{308}\u{2e}", &["\u{ad}\u{308}\u{2e}"]), ("\u{ad}\u{21}", &["\u{ad}\u{21}"]), + ("\u{ad}\u{308}\u{21}", &["\u{ad}\u{308}\u{21}"]), ("\u{ad}\u{22}", &["\u{ad}\u{22}"]), + ("\u{ad}\u{308}\u{22}", &["\u{ad}\u{308}\u{22}"]), ("\u{ad}\u{2c}", &["\u{ad}\u{2c}"]), + ("\u{ad}\u{308}\u{2c}", &["\u{ad}\u{308}\u{2c}"]), ("\u{ad}\u{ad}", &["\u{ad}\u{ad}"]), + ("\u{ad}\u{308}\u{ad}", &["\u{ad}\u{308}\u{ad}"]), ("\u{ad}\u{300}", &["\u{ad}\u{300}"]), + ("\u{ad}\u{308}\u{300}", &["\u{ad}\u{308}\u{300}"]), ("\u{300}\u{1}", &["\u{300}\u{1}"]), + ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}\u{1}"]), ("\u{300}\u{d}", &["\u{300}\u{d}"]), + ("\u{300}\u{308}\u{d}", &["\u{300}\u{308}\u{d}"]), ("\u{300}\u{a}", &["\u{300}\u{a}"]), + ("\u{300}\u{308}\u{a}", &["\u{300}\u{308}\u{a}"]), ("\u{300}\u{85}", &["\u{300}\u{85}"]), + ("\u{300}\u{308}\u{85}", &["\u{300}\u{308}\u{85}"]), ("\u{300}\u{9}", &["\u{300}\u{9}"]), + ("\u{300}\u{308}\u{9}", &["\u{300}\u{308}\u{9}"]), ("\u{300}\u{61}", &["\u{300}\u{61}"]), + ("\u{300}\u{308}\u{61}", &["\u{300}\u{308}\u{61}"]), ("\u{300}\u{41}", &["\u{300}\u{41}"]), + ("\u{300}\u{308}\u{41}", &["\u{300}\u{308}\u{41}"]), ("\u{300}\u{1bb}", + &["\u{300}\u{1bb}"]), ("\u{300}\u{308}\u{1bb}", &["\u{300}\u{308}\u{1bb}"]), + ("\u{300}\u{30}", &["\u{300}\u{30}"]), ("\u{300}\u{308}\u{30}", &["\u{300}\u{308}\u{30}"]), + ("\u{300}\u{2e}", &["\u{300}\u{2e}"]), ("\u{300}\u{308}\u{2e}", &["\u{300}\u{308}\u{2e}"]), + ("\u{300}\u{21}", &["\u{300}\u{21}"]), ("\u{300}\u{308}\u{21}", &["\u{300}\u{308}\u{21}"]), + ("\u{300}\u{22}", &["\u{300}\u{22}"]), ("\u{300}\u{308}\u{22}", &["\u{300}\u{308}\u{22}"]), + ("\u{300}\u{2c}", &["\u{300}\u{2c}"]), ("\u{300}\u{308}\u{2c}", &["\u{300}\u{308}\u{2c}"]), + ("\u{300}\u{ad}", &["\u{300}\u{ad}"]), ("\u{300}\u{308}\u{ad}", &["\u{300}\u{308}\u{ad}"]), + ("\u{300}\u{300}", &["\u{300}\u{300}"]), ("\u{300}\u{308}\u{300}", + &["\u{300}\u{308}\u{300}"]), ("\u{d}\u{a}\u{61}\u{a}\u{308}", &["\u{d}\u{a}", "\u{61}\u{a}", + "\u{308}"]), ("\u{61}\u{308}", &["\u{61}\u{308}"]), ("\u{20}\u{200d}\u{646}", + &["\u{20}\u{200d}\u{646}"]), ("\u{646}\u{200d}\u{20}", &["\u{646}\u{200d}\u{20}"]), + ("\u{28}\u{22}\u{47}\u{6f}\u{2e}\u{22}\u{29}\u{20}\u{28}\u{48}\u{65}\u{20}\u{64}\u{69}\u{64}\u{2e}\u{29}", + &["\u{28}\u{22}\u{47}\u{6f}\u{2e}\u{22}\u{29}\u{20}", + "\u{28}\u{48}\u{65}\u{20}\u{64}\u{69}\u{64}\u{2e}\u{29}"]), + ("\u{28}\u{201c}\u{47}\u{6f}\u{3f}\u{201d}\u{29}\u{20}\u{28}\u{48}\u{65}\u{20}\u{64}\u{69}\u{64}\u{2e}\u{29}", + &["\u{28}\u{201c}\u{47}\u{6f}\u{3f}\u{201d}\u{29}\u{20}", + "\u{28}\u{48}\u{65}\u{20}\u{64}\u{69}\u{64}\u{2e}\u{29}"]), + ("\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{2e}\u{20}\u{69}\u{73}", + &["\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{2e}\u{20}\u{69}\u{73}"]), + ("\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{3f}\u{20}\u{48}\u{65}", + &["\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{3f}\u{20}", "\u{48}\u{65}"]), + ("\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{2e}", + &["\u{55}\u{2e}\u{53}\u{2e}\u{41}\u{300}\u{2e}"]), ("\u{33}\u{2e}\u{34}", + &["\u{33}\u{2e}\u{34}"]), ("\u{63}\u{2e}\u{64}", &["\u{63}\u{2e}\u{64}"]), + ("\u{43}\u{2e}\u{64}", &["\u{43}\u{2e}\u{64}"]), ("\u{63}\u{2e}\u{44}", + &["\u{63}\u{2e}\u{44}"]), ("\u{43}\u{2e}\u{44}", &["\u{43}\u{2e}\u{44}"]), + ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{74}\u{68}\u{65}", + &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{74}\u{68}\u{65}"]), + ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{54}\u{68}\u{65}", + &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}", "\u{54}\u{68}\u{65}"]), + ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{2018}\u{28}\u{74}\u{68}\u{65}", + &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{2018}\u{28}\u{74}\u{68}\u{65}"]), + ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{2018}\u{28}\u{54}\u{68}\u{65}", + &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}", "\u{2018}\u{28}\u{54}\u{68}\u{65}"]), + ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{308}\u{74}\u{68}\u{65}", + &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{308}\u{74}\u{68}\u{65}"]), + ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{308}\u{54}\u{68}\u{65}", + &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{a0}\u{308}", "\u{54}\u{68}\u{65}"]), + ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{308}\u{54}\u{68}\u{65}", + &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{2019}\u{308}", "\u{54}\u{68}\u{65}"]), + ("\u{65}\u{74}\u{63}\u{2e}\u{29}\u{a}\u{308}\u{54}\u{68}\u{65}", + &["\u{65}\u{74}\u{63}\u{2e}\u{29}\u{a}", "\u{308}\u{54}\u{68}\u{65}"]), + ("\u{74}\u{68}\u{65}\u{20}\u{72}\u{65}\u{73}\u{70}\u{2e}\u{20}\u{6c}\u{65}\u{61}\u{64}\u{65}\u{72}\u{73}\u{20}\u{61}\u{72}\u{65}", + &["\u{74}\u{68}\u{65}\u{20}\u{72}\u{65}\u{73}\u{70}\u{2e}\u{20}\u{6c}\u{65}\u{61}\u{64}\u{65}\u{72}\u{73}\u{20}\u{61}\u{72}\u{65}"]), + ("\u{5b57}\u{2e}\u{5b57}", &["\u{5b57}\u{2e}", "\u{5b57}"]), + ("\u{65}\u{74}\u{63}\u{2e}\u{5b83}", &["\u{65}\u{74}\u{63}\u{2e}", "\u{5b83}"]), + ("\u{65}\u{74}\u{63}\u{2e}\u{3002}", &["\u{65}\u{74}\u{63}\u{2e}\u{3002}"]), + ("\u{5b57}\u{3002}\u{5b83}", &["\u{5b57}\u{3002}", "\u{5b83}"]), ("\u{21}\u{20}\u{20}", + &["\u{21}\u{20}\u{20}"]), + ("\u{2060}\u{28}\u{2060}\u{22}\u{2060}\u{47}\u{2060}\u{6f}\u{2060}\u{2e}\u{2060}\u{22}\u{2060}\u{29}\u{2060}\u{20}\u{2060}\u{28}\u{2060}\u{48}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{64}\u{2060}\u{69}\u{2060}\u{64}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2060}", + &["\u{2060}\u{28}\u{2060}\u{22}\u{2060}\u{47}\u{2060}\u{6f}\u{2060}\u{2e}\u{2060}\u{22}\u{2060}\u{29}\u{2060}\u{20}\u{2060}", + "\u{28}\u{2060}\u{48}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{64}\u{2060}\u{69}\u{2060}\u{64}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2060}"]), + ("\u{2060}\u{28}\u{2060}\u{201c}\u{2060}\u{47}\u{2060}\u{6f}\u{2060}\u{3f}\u{2060}\u{201d}\u{2060}\u{29}\u{2060}\u{20}\u{2060}\u{28}\u{2060}\u{48}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{64}\u{2060}\u{69}\u{2060}\u{64}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2060}", + &["\u{2060}\u{28}\u{2060}\u{201c}\u{2060}\u{47}\u{2060}\u{6f}\u{2060}\u{3f}\u{2060}\u{201d}\u{2060}\u{29}\u{2060}\u{20}\u{2060}", + "\u{28}\u{2060}\u{48}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{64}\u{2060}\u{69}\u{2060}\u{64}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2060}"]), + ("\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{2e}\u{2060}\u{20}\u{2060}\u{69}\u{2060}\u{73}\u{2060}\u{2060}", + &["\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{2e}\u{2060}\u{20}\u{2060}\u{69}\u{2060}\u{73}\u{2060}\u{2060}"]), + ("\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{3f}\u{2060}\u{20}\u{2060}\u{48}\u{2060}\u{65}\u{2060}\u{2060}", + &["\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{3f}\u{2060}\u{20}\u{2060}", + "\u{48}\u{2060}\u{65}\u{2060}\u{2060}"]), + ("\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{2e}\u{2060}\u{2060}", + &["\u{2060}\u{55}\u{2060}\u{2e}\u{2060}\u{53}\u{2060}\u{2e}\u{2060}\u{41}\u{2060}\u{300}\u{2e}\u{2060}\u{2060}"]), + ("\u{2060}\u{33}\u{2060}\u{2e}\u{2060}\u{34}\u{2060}\u{2060}", + &["\u{2060}\u{33}\u{2060}\u{2e}\u{2060}\u{34}\u{2060}\u{2060}"]), + ("\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{64}\u{2060}\u{2060}", + &["\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{64}\u{2060}\u{2060}"]), + ("\u{2060}\u{43}\u{2060}\u{2e}\u{2060}\u{64}\u{2060}\u{2060}", + &["\u{2060}\u{43}\u{2060}\u{2e}\u{2060}\u{64}\u{2060}\u{2060}"]), + ("\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{44}\u{2060}\u{2060}", + &["\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{44}\u{2060}\u{2060}"]), + ("\u{2060}\u{43}\u{2060}\u{2e}\u{2060}\u{44}\u{2060}\u{2060}", + &["\u{2060}\u{43}\u{2060}\u{2e}\u{2060}\u{44}\u{2060}\u{2060}"]), + ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}", + &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]), + ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}", + &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}", + "\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]), + ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{2018}\u{2060}\u{28}\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}", + &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{2018}\u{2060}\u{28}\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]), + ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{2018}\u{2060}\u{28}\u{2060}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}", + &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}", + "\u{2018}\u{2060}\u{28}\u{2060}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]), + ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{308}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}", + &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{308}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]), + ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{308}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}", + &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{a0}\u{2060}\u{308}", + "\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]), + ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{308}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}", + &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{2019}\u{2060}\u{308}", + "\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]), + ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{a}\u{2060}\u{308}\u{2060}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}", + &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{29}\u{2060}\u{a}", + "\u{2060}\u{308}\u{2060}\u{54}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{2060}"]), + ("\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{72}\u{2060}\u{65}\u{2060}\u{73}\u{2060}\u{70}\u{2060}\u{2e}\u{2060}\u{20}\u{2060}\u{6c}\u{2060}\u{65}\u{2060}\u{61}\u{2060}\u{64}\u{2060}\u{65}\u{2060}\u{72}\u{2060}\u{73}\u{2060}\u{20}\u{2060}\u{61}\u{2060}\u{72}\u{2060}\u{65}\u{2060}\u{2060}", + &["\u{2060}\u{74}\u{2060}\u{68}\u{2060}\u{65}\u{2060}\u{20}\u{2060}\u{72}\u{2060}\u{65}\u{2060}\u{73}\u{2060}\u{70}\u{2060}\u{2e}\u{2060}\u{20}\u{2060}\u{6c}\u{2060}\u{65}\u{2060}\u{61}\u{2060}\u{64}\u{2060}\u{65}\u{2060}\u{72}\u{2060}\u{73}\u{2060}\u{20}\u{2060}\u{61}\u{2060}\u{72}\u{2060}\u{65}\u{2060}\u{2060}"]), + ("\u{2060}\u{5b57}\u{2060}\u{2e}\u{2060}\u{5b57}\u{2060}\u{2060}", + &["\u{2060}\u{5b57}\u{2060}\u{2e}\u{2060}", "\u{5b57}\u{2060}\u{2060}"]), + ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{5b83}\u{2060}\u{2060}", + &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}", + "\u{5b83}\u{2060}\u{2060}"]), + ("\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{3002}\u{2060}\u{2060}", + &["\u{2060}\u{65}\u{2060}\u{74}\u{2060}\u{63}\u{2060}\u{2e}\u{2060}\u{3002}\u{2060}\u{2060}"]), + ("\u{2060}\u{5b57}\u{2060}\u{3002}\u{2060}\u{5b83}\u{2060}\u{2060}", + &["\u{2060}\u{5b57}\u{2060}\u{3002}\u{2060}", "\u{5b83}\u{2060}\u{2060}"]), + ("\u{2060}\u{21}\u{2060}\u{20}\u{2060}\u{20}\u{2060}\u{2060}", + &["\u{2060}\u{21}\u{2060}\u{20}\u{2060}\u{20}\u{2060}\u{2060}"]) + ]; + From 7ac6f29fd8da4c802213b3986204d851bb11a498 Mon Sep 17 00:00:00 2001 From: Tom Cumming Date: Thu, 4 May 2017 20:30:17 +0100 Subject: [PATCH 2/4] Added forward iterator for unicode sentences Passes all tests in the examples provided here: http://www.unicode.org/Public/9.0.0/ucd/auxiliary/SentenceBreakTest.txt --- Cargo.toml | 2 +- src/lib.rs | 15 ++- src/sentence.rs | 302 ++++++++++++++++++++++++++++++++++++++++++++++++ src/test.rs | 21 ++++ 4 files changed, 338 insertions(+), 2 deletions(-) create mode 100644 src/sentence.rs diff --git a/Cargo.toml b/Cargo.toml index a7d093d..36c378f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ license = "MIT/Apache-2.0" keywords = ["text", "unicode", "grapheme", "word", "boundary"] readme = "README.md" description = """ -This crate provides Grapheme Cluster and Word boundaries +This crate provides Grapheme Cluster, Word and Sentence boundaries according to Unicode Standard Annex #29 rules. """ diff --git a/src/lib.rs b/src/lib.rs index 6f903c0..dce216e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Iterators which split strings on Grapheme Cluster or Word boundaries, according +//! Iterators which split strings on Grapheme Cluster, Word or Sentence boundaries, according //! to the [Unicode Standard Annex #29](http://www.unicode.org/reports/tr29/) rules. //! //! ```rust @@ -67,10 +67,12 @@ pub use grapheme::{Graphemes, GraphemeIndices}; pub use grapheme::{GraphemeCursor, GraphemeIncomplete}; pub use tables::UNICODE_VERSION; pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords}; +pub use sentence::{USentenceBounds}; mod grapheme; mod tables; mod word; +mod sentence; #[cfg(test)] mod test; @@ -174,6 +176,12 @@ pub trait UnicodeSegmentation { /// assert_eq!(&swi1[..], b); /// ``` fn split_word_bound_indices<'a>(&'a self) -> UWordBoundIndices<'a>; + + /// Returns an iterator over substrings of `self` separated on + /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries). + /// + /// The concatenation of the substrings returned by this function is just the original string. + fn split_sentence_bounds<'a>(&'a self) -> USentenceBounds<'a>; } impl UnicodeSegmentation for str { @@ -201,4 +209,9 @@ impl UnicodeSegmentation for str { fn split_word_bound_indices(&self) -> UWordBoundIndices { word::new_word_bound_indices(self) } + + #[inline] + fn split_sentence_bounds(&self) -> USentenceBounds { + sentence::new_sentence_bounds(self) + } } diff --git a/src/sentence.rs b/src/sentence.rs new file mode 100644 index 0000000..b5dbdf6 --- /dev/null +++ b/src/sentence.rs @@ -0,0 +1,302 @@ +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use core::cmp; + +// All of the logic for forward iteration over sentences +mod fwd { + use tables::sentence::SentenceCat; + use core::cmp; + + #[derive(Clone, Copy, PartialEq, Eq)] + enum StatePart { + Sot, + Eot, + Other, + CR, + LF, + Sep, + ATerm, + UpperLower, + ClosePlus, + SpPlus, + STerm + } + + #[derive(Clone, PartialEq, Eq)] + struct SentenceBreaksState(pub [StatePart; 4]); + + const INITIAL_STATE: SentenceBreaksState = SentenceBreaksState([ + StatePart::Sot, + StatePart::Sot, + StatePart::Sot, + StatePart::Sot + ]); + + pub struct SentenceBreaks<'a> { + pub string: &'a str, + pos: usize, + state: SentenceBreaksState + } + + impl SentenceBreaksState { + fn next(&self, cat: SentenceCat) -> SentenceBreaksState { + let &SentenceBreaksState(parts) = self; + let parts = match (parts[3], cat) { + (StatePart::ClosePlus, SentenceCat::SC_Close) => parts, + (StatePart::SpPlus, SentenceCat::SC_Sp) => parts, + _ => [ + parts[1], + parts[2], + parts[3], + match cat { + SentenceCat::SC_CR => StatePart::CR, + SentenceCat::SC_LF => StatePart::LF, + SentenceCat::SC_Sep => StatePart::Sep, + SentenceCat::SC_ATerm => StatePart::ATerm, + SentenceCat::SC_Upper | + SentenceCat::SC_Lower => StatePart::UpperLower, + SentenceCat::SC_Close => StatePart::ClosePlus, + SentenceCat::SC_Sp => StatePart::SpPlus, + SentenceCat::SC_STerm => StatePart::STerm, + _ => StatePart::Other + } + ] + }; + SentenceBreaksState(parts) + } + + fn end(&self) -> SentenceBreaksState { + let &SentenceBreaksState(parts) = self; + SentenceBreaksState([ + parts[1], + parts[2], + parts[3], + StatePart::Eot + ]) + } + + fn match1(&self, part: StatePart) -> bool { + let &SentenceBreaksState(parts) = self; + part == parts[3] + } + + fn match2(&self, part1: StatePart, part2: StatePart) -> bool { + let &SentenceBreaksState(parts) = self; + part1 == parts[2] && part2 == parts[3] + } + } + + fn match_sb8(state: &SentenceBreaksState, ahead: &str) -> bool { + let aterm_part = { + // ATerm Close* Sp* + let &SentenceBreaksState(parts) = state; + let mut idx = if parts[3] == StatePart::SpPlus { 2 } else { 3 }; + if parts[idx] == StatePart::ClosePlus { idx -= 1 } + parts[idx] + }; + + if aterm_part == StatePart::ATerm { + use tables::sentence as se; + + for next_char in ahead.chars() { + //( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* Lower + match se::sentence_category(next_char) { + se::SC_Lower => return true, + se::SC_OLetter | + se::SC_Upper | + se::SC_Sep | se::SC_CR | se::SC_LF | + se::SC_STerm | se::SC_ATerm => return false, + _ => continue + } + } + } + + false + } + + fn match_sb8a(state: &SentenceBreaksState) -> bool { + // SATerm Close* Sp* + let &SentenceBreaksState(parts) = state; + let mut idx = if parts[3] == StatePart::SpPlus { 2 } else { 3 }; + if parts[idx] == StatePart::ClosePlus { idx -= 1 } + parts[idx] == StatePart::STerm || parts[idx] == StatePart::ATerm + } + + fn match_sb9(state: &SentenceBreaksState) -> bool { + // SATerm Close* + let &SentenceBreaksState(parts) = state; + let idx = if parts[3] == StatePart::ClosePlus { 2 } else { 3 }; + parts[idx] == StatePart::STerm || parts[idx] == StatePart::ATerm + } + + fn match_sb11(state: &SentenceBreaksState) -> bool { + // SATerm Close* Sp* ParaSep? + let &SentenceBreaksState(parts) = state; + let mut idx = match parts[3] { + StatePart::Sep | + StatePart::CR | + StatePart::LF => 2, + _ => 3 + }; + + if parts[idx] == StatePart::SpPlus { idx -= 1 } + if parts[idx] == StatePart::ClosePlus { idx -= 1} + + parts[idx] == StatePart::STerm || parts[idx] == StatePart::ATerm + } + + impl<'a> Iterator for SentenceBreaks<'a> { + // Returns the index of the character which follows a break + type Item = usize; + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let slen = self.string.len(); + // A sentence could be one character + (cmp::min(slen, 2), Some(slen + 1)) + } + + #[inline] + fn next(&mut self) -> Option { + use tables::sentence as se; + + for next_char in self.string[self.pos..].chars() { + let position_before = self.pos; + let state_before = self.state.clone(); + + let next_cat = se::sentence_category(next_char); + + self.pos += next_char.len_utf8(); + self.state = self.state.next(next_cat); + + match next_cat { + // SB1 + _ if state_before.match1(StatePart::Sot) => + return Some(position_before), + + // SB3 + SentenceCat::SC_LF if state_before.match1(StatePart::CR) => + continue, + + // SB4 + _ if state_before.match1(StatePart::Sep) + || state_before.match1(StatePart::CR) + || state_before.match1(StatePart::LF) + => return Some(position_before), + + // SB5 + SentenceCat::SC_Extend | + SentenceCat::SC_Format => self.state = state_before, + + // SB6 + SentenceCat::SC_Numeric if state_before.match1(StatePart::ATerm) => + continue, + + // SB7 + SentenceCat::SC_Upper if state_before.match2(StatePart::UpperLower, StatePart::ATerm) => + continue, + + // SB8 + _ if match_sb8(&state_before, &self.string[position_before..]) => + continue, + + // SB8a + SentenceCat::SC_SContinue | + SentenceCat::SC_STerm | + SentenceCat::SC_ATerm if match_sb8a(&state_before) => + continue, + + // SB9 + SentenceCat::SC_Close | + SentenceCat::SC_Sp | + SentenceCat::SC_Sep | + SentenceCat::SC_CR | + SentenceCat::SC_LF if match_sb9(&state_before) => + continue, + + // SB10 + SentenceCat::SC_Sp | + SentenceCat::SC_Sep | + SentenceCat::SC_CR | + SentenceCat::SC_LF if match_sb8a(&state_before) => + continue, + + // SB11 + _ if match_sb11(&state_before) => + return Some(position_before), + + // SB998 + _ => continue + } + } + + // SB2 + if self.state.match1(StatePart::Sot) { + None + } else if self.state.match1(StatePart::Eot) { + None + } else { + self.state = self.state.end(); + Some(self.pos) + } + } + } + + pub fn new_sentence_breaks<'a>(source: &'a str) -> SentenceBreaks<'a> { + SentenceBreaks { string: source, pos: 0, state: INITIAL_STATE } + } + +} + +/// External iterator for a string's +/// [sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries). +pub struct USentenceBounds<'a> { + iter: fwd::SentenceBreaks<'a>, + sentence_start: Option +} + +#[inline] +pub fn new_sentence_bounds<'a>(source: &'a str) -> USentenceBounds<'a> { + USentenceBounds { + iter: fwd::new_sentence_breaks(source), + sentence_start: None + } +} + +impl<'a> Iterator for USentenceBounds<'a> { + type Item = &'a str; + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let (lower, upper) = self.iter.size_hint(); + (cmp::max(0, lower - 1), upper.map(|u| cmp::max(0, u - 1))) + } + + #[inline] + fn next(&mut self) -> Option<&'a str> { + if self.sentence_start == None { + if let Some(start_pos) = self.iter.next() { + self.sentence_start = Some(start_pos) + } else { + return None + } + } + + if let Some(break_pos) = self.iter.next() { + let start_pos = self.sentence_start.unwrap(); + let sentence = &self.iter.string[start_pos..break_pos]; + self.sentence_start = Some(break_pos); + Some(sentence) + } else { + None + } + } +} diff --git a/src/test.rs b/src/test.rs index 54493fe..ad65a7f 100644 --- a/src/test.rs +++ b/src/test.rs @@ -141,6 +141,27 @@ fn test_words() { } } + +#[test] +fn test_sentences() { + use testdata::TEST_SENTENCE; + + for &(s, w) in TEST_SENTENCE.iter() { + macro_rules! assert_ { + ($test:expr, $exp:expr, $name:expr) => { + // collect into vector for better diagnostics in failure case + let testing = $test.collect::>(); + let expected = $exp.collect::>(); + assert_eq!(testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w) + } + } + + assert_!(s.split_sentence_bounds(), + w.iter().cloned(), + "Forward sentence boundaries"); + } +} + quickcheck! { fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool { let a = s.graphemes(true).collect::>(); From 50058a56157b4307154a0fc91fbebd18a407dabd Mon Sep 17 00:00:00 2001 From: Tom Cumming Date: Mon, 13 May 2019 19:06:18 +0100 Subject: [PATCH 3/4] Adds unicode_sentences and split_sentence_bound_indices --- src/lib.rs | 27 +++++++++++++++++++++- src/sentence.rs | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index dce216e..b0730e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,7 +67,7 @@ pub use grapheme::{Graphemes, GraphemeIndices}; pub use grapheme::{GraphemeCursor, GraphemeIncomplete}; pub use tables::UNICODE_VERSION; pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords}; -pub use sentence::{USentenceBounds}; +pub use sentence::{USentenceBounds, USentenceBoundIndices, UnicodeSentences}; mod grapheme; mod tables; @@ -181,7 +181,22 @@ pub trait UnicodeSegmentation { /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries). /// /// The concatenation of the substrings returned by this function is just the original string. + fn unicode_sentences<'a>(&'a self) -> UnicodeSentences<'a>; + + /// Returns an iterator over substrings of `self` separated on + /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries). + /// + /// Here, "sentences" are just those substrings which, after splitting on + /// UAX#29 sentence boundaries, contain any alphanumeric characters. That is, the + /// substring must contain at least one character with the + /// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic) + /// property, or with + /// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values). fn split_sentence_bounds<'a>(&'a self) -> USentenceBounds<'a>; + + /// Returns an iterator over substrings of `self`, split on UAX#29 sentence boundaries, + /// and their offsets. See `split_sentence_bounds()` for more information. + fn split_sentence_bound_indices<'a>(&'a self) -> USentenceBoundIndices<'a>; } impl UnicodeSegmentation for str { @@ -210,8 +225,18 @@ impl UnicodeSegmentation for str { word::new_word_bound_indices(self) } + #[inline] + fn unicode_sentences(&self) -> UnicodeSentences { + sentence::new_unicode_sentences(self) + } + #[inline] fn split_sentence_bounds(&self) -> USentenceBounds { sentence::new_sentence_bounds(self) } + + #[inline] + fn split_sentence_bound_indices(&self) -> USentenceBoundIndices { + sentence::new_sentence_bound_indices(self) + } } diff --git a/src/sentence.rs b/src/sentence.rs index b5dbdf6..0cb8fdd 100644 --- a/src/sentence.rs +++ b/src/sentence.rs @@ -9,6 +9,7 @@ // except according to those terms. use core::cmp; +use core::iter::Filter; // All of the logic for forward iteration over sentences mod fwd { @@ -40,6 +41,7 @@ mod fwd { StatePart::Sot ]); + #[derive(Clone)] pub struct SentenceBreaks<'a> { pub string: &'a str, pos: usize, @@ -256,13 +258,32 @@ mod fwd { } +/// An iterator over the substrings of a string which, after splitting the string on +/// [sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries), +/// contain any characters with the +/// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic) +/// property, or with +/// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values). +#[derive(Clone)] +pub struct UnicodeSentences<'a> { + inner: Filter, fn(&&str) -> bool>, +} + /// External iterator for a string's /// [sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries). +#[derive(Clone)] pub struct USentenceBounds<'a> { iter: fwd::SentenceBreaks<'a>, sentence_start: Option } +/// External iterator for sentence boundaries and byte offsets. +#[derive(Clone)] +pub struct USentenceBoundIndices<'a> { + start_offset: usize, + iter: USentenceBounds<'a>, +} + #[inline] pub fn new_sentence_bounds<'a>(source: &'a str) -> USentenceBounds<'a> { USentenceBounds { @@ -271,6 +292,32 @@ pub fn new_sentence_bounds<'a>(source: &'a str) -> USentenceBounds<'a> { } } +#[inline] +pub fn new_sentence_bound_indices<'a>(source: &'a str) -> USentenceBoundIndices<'a> { + USentenceBoundIndices { + start_offset: source.as_ptr() as usize, + iter: new_sentence_bounds(source) + } +} + +#[inline] +pub fn new_unicode_sentences<'b>(s: &'b str) -> UnicodeSentences<'b> { + use super::UnicodeSegmentation; + use tables::util::is_alphanumeric; + + fn has_alphanumeric(s: &&str) -> bool { s.chars().any(|c| is_alphanumeric(c)) } + let has_alphanumeric: fn(&&str) -> bool = has_alphanumeric; // coerce to fn pointer + + UnicodeSentences { inner: s.split_sentence_bounds().filter(has_alphanumeric) } +} + +impl<'a> Iterator for UnicodeSentences<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { self.inner.next() } +} + impl<'a> Iterator for USentenceBounds<'a> { type Item = &'a str; @@ -300,3 +347,17 @@ impl<'a> Iterator for USentenceBounds<'a> { } } } + +impl<'a> Iterator for USentenceBoundIndices<'a> { + type Item = (usize, &'a str); + + #[inline] + fn next(&mut self) -> Option<(usize, &'a str)> { + self.iter.next().map(|s| (s.as_ptr() as usize - self.start_offset, s)) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} From 9c7abf2717e48eb17dc1026019bb157e71421c53 Mon Sep 17 00:00:00 2001 From: Tom Cumming Date: Mon, 13 May 2019 19:24:05 +0100 Subject: [PATCH 4/4] Documentation and code reorg --- src/sentence.rs | 52 +++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/src/sentence.rs b/src/sentence.rs index 0cb8fdd..c16c927 100644 --- a/src/sentence.rs +++ b/src/sentence.rs @@ -16,6 +16,8 @@ mod fwd { use tables::sentence::SentenceCat; use core::cmp; + // Describe a parsed part of source string as described in this table: + // https://unicode.org/reports/tr29/#Default_Sentence_Boundaries #[derive(Clone, Copy, PartialEq, Eq)] enum StatePart { Sot, @@ -49,6 +51,8 @@ mod fwd { } impl SentenceBreaksState { + // Attempt to advance the internal state by one part + // Whitespace and some punctutation will be collapsed fn next(&self, cat: SentenceCat) -> SentenceBreaksState { let &SentenceBreaksState(parts) = self; let parts = match (parts[3], cat) { @@ -85,27 +89,28 @@ mod fwd { ]) } + // Helper function to check if state head matches a single `StatePart` fn match1(&self, part: StatePart) -> bool { let &SentenceBreaksState(parts) = self; part == parts[3] } + // Helper function to check if first two `StateParts` in state match + // the given two fn match2(&self, part1: StatePart, part2: StatePart) -> bool { let &SentenceBreaksState(parts) = self; part1 == parts[2] && part2 == parts[3] } } + // https://unicode.org/reports/tr29/#SB8 + // TODO cache this, it is currently quadratic fn match_sb8(state: &SentenceBreaksState, ahead: &str) -> bool { - let aterm_part = { - // ATerm Close* Sp* - let &SentenceBreaksState(parts) = state; - let mut idx = if parts[3] == StatePart::SpPlus { 2 } else { 3 }; - if parts[idx] == StatePart::ClosePlus { idx -= 1 } - parts[idx] - }; + let &SentenceBreaksState(parts) = state; + let mut idx = if parts[3] == StatePart::SpPlus { 2 } else { 3 }; + if parts[idx] == StatePart::ClosePlus { idx -= 1 } - if aterm_part == StatePart::ATerm { + if parts[idx] == StatePart::ATerm { use tables::sentence as se; for next_char in ahead.chars() { @@ -124,6 +129,7 @@ mod fwd { false } + // https://unicode.org/reports/tr29/#SB8a fn match_sb8a(state: &SentenceBreaksState) -> bool { // SATerm Close* Sp* let &SentenceBreaksState(parts) = state; @@ -132,6 +138,7 @@ mod fwd { parts[idx] == StatePart::STerm || parts[idx] == StatePart::ATerm } + // https://unicode.org/reports/tr29/#SB9 fn match_sb9(state: &SentenceBreaksState) -> bool { // SATerm Close* let &SentenceBreaksState(parts) = state; @@ -139,6 +146,7 @@ mod fwd { parts[idx] == StatePart::STerm || parts[idx] == StatePart::ATerm } + // https://unicode.org/reports/tr29/#SB11 fn match_sb11(state: &SentenceBreaksState) -> bool { // SATerm Close* Sp* ParaSep? let &SentenceBreaksState(parts) = state; @@ -180,43 +188,45 @@ mod fwd { self.state = self.state.next(next_cat); match next_cat { - // SB1 + // SB1 https://unicode.org/reports/tr29/#SB1 _ if state_before.match1(StatePart::Sot) => return Some(position_before), - // SB3 + // SB2 is handled when inner iterator (chars) is finished + + // SB3 https://unicode.org/reports/tr29/#SB3 SentenceCat::SC_LF if state_before.match1(StatePart::CR) => continue, - // SB4 + // SB4 https://unicode.org/reports/tr29/#SB4 _ if state_before.match1(StatePart::Sep) || state_before.match1(StatePart::CR) || state_before.match1(StatePart::LF) => return Some(position_before), - // SB5 + // SB5 https://unicode.org/reports/tr29/#SB5 SentenceCat::SC_Extend | SentenceCat::SC_Format => self.state = state_before, - // SB6 + // SB6 https://unicode.org/reports/tr29/#SB6 SentenceCat::SC_Numeric if state_before.match1(StatePart::ATerm) => continue, - // SB7 + // SB7 https://unicode.org/reports/tr29/#SB7 SentenceCat::SC_Upper if state_before.match2(StatePart::UpperLower, StatePart::ATerm) => continue, - // SB8 + // SB8 https://unicode.org/reports/tr29/#SB8 _ if match_sb8(&state_before, &self.string[position_before..]) => continue, - // SB8a + // SB8a https://unicode.org/reports/tr29/#SB8a SentenceCat::SC_SContinue | SentenceCat::SC_STerm | SentenceCat::SC_ATerm if match_sb8a(&state_before) => continue, - // SB9 + // SB9 https://unicode.org/reports/tr29/#SB9 SentenceCat::SC_Close | SentenceCat::SC_Sp | SentenceCat::SC_Sep | @@ -224,23 +234,23 @@ mod fwd { SentenceCat::SC_LF if match_sb9(&state_before) => continue, - // SB10 + // SB10 https://unicode.org/reports/tr29/#SB10 SentenceCat::SC_Sp | SentenceCat::SC_Sep | SentenceCat::SC_CR | SentenceCat::SC_LF if match_sb8a(&state_before) => continue, - // SB11 + // SB11 https://unicode.org/reports/tr29/#SB11 _ if match_sb11(&state_before) => return Some(position_before), - // SB998 + // SB998 https://unicode.org/reports/tr29/#SB998 _ => continue } } - // SB2 + // SB2 https://unicode.org/reports/tr29/#SB2 if self.state.match1(StatePart::Sot) { None } else if self.state.match1(StatePart::Eot) {