forked from foliojs/unicode-properties
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate.js
107 lines (94 loc) · 3.14 KB
/
generate.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
const codePoints = require('codepoints');
const fs = require('fs');
const UnicodeTrieBuilder = require('unicode-trie/builder');
const log2 = Math.log2 || (n => Math.log(n) / Math.LN2);
const bits = n => (log2(n) + 1) | 0;
const categories = {};
const combiningClasses = {};
const scripts = {};
const eaws = {};
let categoryCount = 0;
let combiningClassCount = 0;
let scriptCount = 0;
let eawCount = 0;
for (var codePoint of Array.from(codePoints)) {
if (codePoint != null) {
if (categories[codePoint.category] == null) {
categories[codePoint.category] = categoryCount++;
}
if (combiningClasses[codePoint.combiningClassName] == null) {
combiningClasses[codePoint.combiningClassName] = combiningClassCount++;
}
if (scripts[codePoint.script] == null) {
scripts[codePoint.script] = scriptCount++;
}
if (eaws[codePoint.eastAsianWidth] == null) {
eaws[codePoint.eastAsianWidth] = eawCount++;
}
}
}
const numberBits = 10;
const categoryBits = bits(categoryCount - 1);
const combiningClassBits = bits(combiningClassCount - 1);
const bidiMirrorBits = 1;
const scriptBits = bits(scriptCount - 1);
const eawBits = bits(eawCount - 1);
const categoryShift = combiningClassBits + scriptBits + eawBits + numberBits;
const combiningShift = scriptBits + eawBits + numberBits;
const scriptShift = eawBits + numberBits;
const eawShift = numberBits;
const numericValue = function(numeric) {
if (numeric) {
let exp,
m,
mant;
if (m = numeric.match(/^(\-?\d+)\/(\d+)$/)) {
// fraction
const num = parseInt(m[1]);
const den = parseInt(m[2]);
return ((num + 12) << 4) + (den - 1);
} else if (/^\d0+$/.test(numeric)) {
// base 10
mant = parseInt(numeric[0]);
exp = numeric.length - 1;
return ((mant + 14) << 5) + (exp - 2);
} else {
const val = parseInt(numeric);
if (val <= 50) {
return 1 + val;
} else {
// base 60
mant = val;
exp = 0;
while ((mant % 60) === 0) {
mant /= 60;
++exp;
}
return ((mant + 0xbf) << 2) + (exp - 1);
}
}
} else {
return 0;
}
};
const trie = new UnicodeTrieBuilder;
for (codePoint of Array.from(codePoints)) {
if (codePoint != null) {
const category = categories[codePoint.category];
const combiningClass = combiningClasses[codePoint.combiningClassName] || 0;
const script = scripts[codePoint.script] || 0;
eaw = eaws[codePoint.eastAsianWidth] || 0;
const val = (category << categoryShift) | (combiningClass << combiningShift) | (script << scriptShift) | (eaw << eawShift) | numericValue(codePoint.numeric);
trie.set(codePoint.code, val);
}
}
fs.writeFileSync('./data.trie', trie.toBuffer());
fs.writeFileSync('./data.json', JSON.stringify({
categories: Object.keys(categories),
combiningClasses: Object.keys(combiningClasses),
scripts: Object.keys(scripts),
eaw: Object.keys(eaws)
}));
// Trie is serialized suboptimally as JSON so it can be loaded via require,
// allowing unicode-properties to work in the browser
fs.writeFileSync('./trie.json', JSON.stringify({ data: trie.toBuffer().toString('base64') }));