forked from foliojs/unicode-properties
-
Notifications
You must be signed in to change notification settings - Fork 0
/
unicode-properties.es.js
141 lines (113 loc) · 6.54 KB
/
unicode-properties.es.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import fs from 'fs';
import UnicodeTrie from 'unicode-trie';
var categories=["Cc","Zs","Po","Sc","Ps","Pe","Sm","Pd","Nd","Lu","Sk","Pc","Ll","So","Lo","Pi","Cf","No","Pf","Lt","Lm","Mn","Me","Mc","Nl","Zl","Zp","Cs","Co"];var combiningClasses=["Not_Reordered","Above","Above_Right","Below","Attached_Above_Right","Attached_Below","Overlay","Iota_Subscript","Double_Below","Double_Above","Below_Right","Above_Left","CCC10","CCC11","CCC12","CCC13","CCC14","CCC15","CCC16","CCC17","CCC18","CCC19","CCC20","CCC21","CCC22","CCC23","CCC24","CCC25","CCC30","CCC31","CCC32","CCC27","CCC28","CCC29","CCC33","CCC34","CCC35","CCC36","Nukta","Virama","CCC84","CCC91","CCC103","CCC107","CCC118","CCC122","CCC129","CCC130","CCC132","Attached_Above","Below_Left","Left","Kana_Voicing","CCC26","Right"];var scripts=["Common","Latin","Bopomofo","Inherited","Greek","Coptic","Cyrillic","Armenian","Hebrew","Arabic","Syriac","Thaana","Nko","Samaritan","Mandaic","Devanagari","Bengali","Gurmukhi","Gujarati","Oriya","Tamil","Telugu","Kannada","Malayalam","Sinhala","Thai","Lao","Tibetan","Myanmar","Georgian","Hangul","Ethiopic","Cherokee","Canadian_Aboriginal","Ogham","Runic","Tagalog","Hanunoo","Buhid","Tagbanwa","Khmer","Mongolian","Limbu","Tai_Le","New_Tai_Lue","Buginese","Tai_Tham","Balinese","Sundanese","Batak","Lepcha","Ol_Chiki","Braille","Glagolitic","Tifinagh","Han","Hiragana","Katakana","Yi","Lisu","Vai","Bamum","Syloti_Nagri","Phags_Pa","Saurashtra","Kayah_Li","Rejang","Javanese","Cham","Tai_Viet","Meetei_Mayek","null","Linear_B","Lycian","Carian","Old_Italic","Gothic","Old_Permic","Ugaritic","Old_Persian","Deseret","Shavian","Osmanya","Osage","Elbasan","Caucasian_Albanian","Linear_A","Cypriot","Imperial_Aramaic","Palmyrene","Nabataean","Hatran","Phoenician","Lydian","Meroitic_Hieroglyphs","Meroitic_Cursive","Kharoshthi","Old_South_Arabian","Old_North_Arabian","Manichaean","Avestan","Inscriptional_Parthian","Inscriptional_Pahlavi","Psalter_Pahlavi","Old_Turkic","Old_Hungarian","Hanifi_Rohingya","Old_Sogdian","Sogdian","Elymaic","Brahmi","Kaithi","Sora_Sompeng","Chakma","Mahajani","Sharada","Khojki","Multani","Khudawadi","Grantha","Newa","Tirhuta","Siddham","Modi","Takri","Ahom","Dogra","Warang_Citi","Nandinagari","Zanabazar_Square","Soyombo","Pau_Cin_Hau","Bhaiksuki","Marchen","Masaram_Gondi","Gunjala_Gondi","Makasar","Cuneiform","Egyptian_Hieroglyphs","Anatolian_Hieroglyphs","Mro","Bassa_Vah","Pahawh_Hmong","Medefaidrin","Miao","Tangut","Nushu","Duployan","SignWriting","Nyiakeng_Puachue_Hmong","Wancho","Mende_Kikakui","Adlam"];var eaw=["N","Na","A","W","H","F"];var data = {categories:categories,combiningClasses:combiningClasses,scripts:scripts,eaw:eaw};
var log2 = Math.log2 || (n => Math.log(n) / Math.LN2);
var bits = n => log2(n) + 1 | 0;
var buildUnicodeProperties = (data, trie) => {
// compute the number of bits stored for each field
var CATEGORY_BITS = bits(data.categories.length - 1);
var COMBINING_BITS = bits(data.combiningClasses.length - 1);
var SCRIPT_BITS = bits(data.scripts.length - 1);
var EAW_BITS = bits(data.eaw.length - 1);
var NUMBER_BITS = 10; // compute shift and mask values for each field
var CATEGORY_SHIFT = COMBINING_BITS + SCRIPT_BITS + EAW_BITS + NUMBER_BITS;
var COMBINING_SHIFT = SCRIPT_BITS + EAW_BITS + NUMBER_BITS;
var SCRIPT_SHIFT = EAW_BITS + NUMBER_BITS;
var EAW_SHIFT = NUMBER_BITS;
var CATEGORY_MASK = (1 << CATEGORY_BITS) - 1;
var COMBINING_MASK = (1 << COMBINING_BITS) - 1;
var SCRIPT_MASK = (1 << SCRIPT_BITS) - 1;
var EAW_MASK = (1 << EAW_BITS) - 1;
var NUMBER_MASK = (1 << NUMBER_BITS) - 1;
var getCategory = codePoint => {
var val = trie.get(codePoint);
return data.categories[val >> CATEGORY_SHIFT & CATEGORY_MASK];
};
var getCombiningClass = codePoint => {
var val = trie.get(codePoint);
return data.combiningClasses[val >> COMBINING_SHIFT & COMBINING_MASK];
};
var getScript = codePoint => {
var val = trie.get(codePoint);
return data.scripts[val >> SCRIPT_SHIFT & SCRIPT_MASK];
};
var getEastAsianWidth = codePoint => {
var val = trie.get(codePoint);
return data.eaw[val >> EAW_SHIFT & EAW_MASK];
};
var getNumericValue = codePoint => {
var val = trie.get(codePoint);
var num = val & NUMBER_MASK;
if (num === 0) {
return null;
} else if (num <= 50) {
return num - 1;
} else if (num < 0x1e0) {
var numerator = (num >> 4) - 12;
var denominator = (num & 0xf) + 1;
return numerator / denominator;
} else if (num < 0x300) {
val = (num >> 5) - 14;
var exp = (num & 0x1f) + 2;
while (exp > 0) {
val *= 10;
exp--;
}
return val;
} else {
val = (num >> 2) - 0xbf;
var _exp = (num & 3) + 1;
while (_exp > 0) {
val *= 60;
_exp--;
}
return val;
}
};
var isAlphabetic = codePoint => {
var category = getCategory(codePoint);
return category === 'Lu' || category === 'Ll' || category === 'Lt' || category === 'Lm' || category === 'Lo' || category === 'Nl';
};
var isDigit = codePoint => getCategory(codePoint) === 'Nd';
var isPunctuation = codePoint => {
var category = getCategory(codePoint);
return category === 'Pc' || category === 'Pd' || category === 'Pe' || category === 'Pf' || category === 'Pi' || category === 'Po' || category === 'Ps';
};
var isLowerCase = codePoint => {
return getCategory(codePoint) === 'Ll';
};
var isUpperCase = codePoint => getCategory(codePoint) === 'Lu';
var isTitleCase = codePoint => getCategory(codePoint) === 'Lt';
var isWhiteSpace = codePoint => {
var category = getCategory(codePoint);
return category === 'Zs' || category === 'Zl' || category === 'Zp';
};
var isBaseForm = codePoint => {
var category = getCategory(codePoint);
return category === 'Nd' || category === 'No' || category === 'Nl' || category === 'Lu' || category === 'Ll' || category === 'Lt' || category === 'Lm' || category === 'Lo' || category === 'Me' || category === 'Mc';
};
var isMark = codePoint => {
var category = getCategory(codePoint);
return category === 'Mn' || category === 'Me' || category === 'Mc';
};
return {
getCategory,
getCombiningClass,
getScript,
getEastAsianWidth,
getNumericValue,
isAlphabetic,
isDigit,
isPunctuation,
isLowerCase,
isUpperCase,
isTitleCase,
isWhiteSpace,
isBaseForm,
isMark
};
};
var trie = new UnicodeTrie(fs.readFileSync(__dirname + '/data.trie'));
var unicodeProperties = buildUnicodeProperties(data, trie);
export default unicodeProperties;
//# sourceMappingURL=unicode-properties.es.js.map