@@ -13,8 +13,10 @@ import (
13
13
"flag"
14
14
"fmt"
15
15
"log"
16
+ "maps"
16
17
"os"
17
18
"regexp"
19
+ "slices"
18
20
"sort"
19
21
"strings"
20
22
"unicode"
@@ -90,13 +92,15 @@ func println(args ...interface{}) {
90
92
var category = map [string ]bool {
91
93
// Nd Lu etc.
92
94
// We use one-character names to identify merged categories
93
- "L" : true , // Lu Ll Lt Lm Lo
94
- "P" : true , // Pc Pd Ps Pe Pu Pf Po
95
- "M" : true , // Mn Mc Me
96
- "N" : true , // Nd Nl No
97
- "S" : true , // Sm Sc Sk So
98
- "Z" : true , // Zs Zl Zp
99
- "C" : true , // Cc Cf Cs Co Cn
95
+ "L" : true , // Lu Ll Lt Lm Lo
96
+ "LC" : true , // Lu Ll Lt
97
+ "P" : true , // Pc Pd Ps Pe Pu Pf Po
98
+ "M" : true , // Mn Mc Me
99
+ "N" : true , // Nd Nl No
100
+ "S" : true , // Sm Sc Sk So
101
+ "Z" : true , // Zs Zl Zp
102
+ "C" : true , // Cc Cf Cs Co Cn
103
+ "Cn" : true , // unassigned
100
104
}
101
105
102
106
// This contains only the properties we're interested in.
@@ -149,6 +153,9 @@ func categoryOp(code rune, class uint8) bool {
149
153
}
150
154
151
155
func loadChars () {
156
+ for code := range chars {
157
+ chars [code ].category = "Cn" // unassigned
158
+ }
152
159
ucd .Parse (gen .OpenUCDFile ("UnicodeData.txt" ), func (p * ucd.Parser ) {
153
160
c := Char {codePoint : p .Rune (0 )}
154
161
@@ -201,6 +208,7 @@ func loadCasefold() {
201
208
}
202
209
203
210
var categoryMapping = map [string ]string {
211
+ "LC" : "Letter, cased: Ll | Lt | Lu" ,
204
212
"Lu" : "Letter, uppercase" ,
205
213
"Ll" : "Letter, lowercase" ,
206
214
"Lt" : "Letter, titlecase" ,
@@ -257,6 +265,7 @@ func printCategories() {
257
265
printf ("\t %q: %s,\n " , k , k )
258
266
}
259
267
print ("}\n \n " )
268
+ printCategoryAliases ()
260
269
}
261
270
262
271
decl := make (sort.StringSlice , len (list ))
@@ -315,14 +324,14 @@ func printCategories() {
315
324
}
316
325
decl [ndecl ] = varDecl
317
326
ndecl ++
327
+ match := func (cat string ) bool { return cat == name }
318
328
if len (name ) == 1 { // unified categories
319
- dumpRange (
320
- "_" + name ,
321
- func (code rune ) bool { return categoryOp (code , name [0 ]) })
322
- continue
329
+ match = func (cat string ) bool { return strings .HasPrefix (cat , name ) }
323
330
}
324
- dumpRange ("_" + name ,
325
- func (code rune ) bool { return chars [code ].category == name })
331
+ if name == "LC" { // special unified category
332
+ match = func (cat string ) bool { return cat == "Ll" || cat == "Lt" || cat == "Lu" }
333
+ }
334
+ dumpRange ("_" + name , func (code rune ) bool { return match (chars [code ].category ) })
326
335
}
327
336
decl .Sort ()
328
337
println ("// These variables have type *RangeTable." )
@@ -333,6 +342,35 @@ func printCategories() {
333
342
print (")\n \n " )
334
343
}
335
344
345
+ func printCategoryAliases () {
346
+ known := make (map [string ]bool )
347
+ for _ , name := range allCategories () {
348
+ known [name ] = true
349
+ }
350
+
351
+ table := make (map [string ]string )
352
+ ucd .Parse (gen .OpenUCDFile ("PropertyValueAliases.txt" ), func (p * ucd.Parser ) {
353
+ if p .String (0 ) != "gc" {
354
+ return
355
+ }
356
+ name := p .String (1 )
357
+ if ! known [name ] {
358
+ logger .Print ("unknown category: " , name )
359
+ }
360
+ table [p .String (2 )] = name
361
+ if a := p .String (3 ); a != "" {
362
+ table [a ] = name
363
+ }
364
+ })
365
+
366
+ println ("// CategoryAliases maps category aliases to standard category names." )
367
+ println ("var CategoryAliases = map[string]string{" )
368
+ for _ , name := range slices .Sorted (maps .Keys (table )) {
369
+ printf ("\t %q: %q,\n " , name , table [name ])
370
+ }
371
+ print ("}\n \n " )
372
+ }
373
+
336
374
type Op func (code rune ) bool
337
375
338
376
func dumpRange (name string , inCategory Op ) {
0 commit comments