-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #8 from DilsonHiga/east-asian-width-gen
Adding functions to compute the visual width of east asian characters
- Loading branch information
Showing
10 changed files
with
2,980 additions
and
139 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,11 @@ | ||
|
||
# Ignore the example binaries | ||
examples/simple | ||
examples/visualWidth | ||
|
||
# Ignore the generated files | ||
generated-docs | ||
package/InternalGBPGen | ||
package/GraphemeTestGen | ||
package/InternalEmojiGen | ||
package/InternalEmojiGen | ||
package/InternalEAWGen |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
app [main] { | ||
pf: platform "https://github.com/roc-lang/basic-cli/releases/download/0.11.0/SY4WWMhWQ9NvQgvIthcv15AUeA7rAIJHAHgiaSHGhdY.tar.br", | ||
unicode: "../package/main.roc", # use release URL (ends in tar.br) for local example, see github.com/roc/unicode/releases | ||
} | ||
|
||
import pf.Stdout | ||
import unicode.CodePoint | ||
|
||
word = "世界" | ||
|
||
visualWidth : Result U32 CodePoint.Utf8ParseErr | ||
visualWidth = | ||
word | ||
|> Str.toUtf8 | ||
|> CodePoint.parseUtf8 | ||
|> Result.map (\lst -> List.map lst CodePoint.visualWidth) | ||
|> Result.map List.sum | ||
|
||
main = | ||
when visualWidth is | ||
Ok width -> Stdout.line "\n\nThe word $(word) will be displayed with the width of $(Num.toStr width) characters on most UIs.\n\n" | ||
Err _ -> crash "ERROR: Unable to parse $(word)!" | ||
|
||
expect visualWidth == Ok 4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
## WARNING This file is automatically generated. Do not edit it manually. ## | ||
module [eastAsianWidthProperty, eastAsianWidth] | ||
|
||
eastAsianWidth : U32 -> U32 | ||
eastAsianWidth = \codePoint -> | ||
when eastAsianWidthProperty codePoint is | ||
F | W | A -> 2 | ||
H | N | Na -> 1 | ||
|
||
eastAsianWidthProperty = \cp -> if (0x0020 <= cp && cp <= 0x007E) || (0x00A2 <= cp && cp <= 0x00A3) || (0x00A5 <= cp && cp <= 0x00A6) || cp == 0x00AC || cp == 0x00AF || (0x27E6 <= cp && cp <= 0x27ED) || (0x2985 <= cp && cp <= 0x2986) then (Na) else if cp == 0x00A1 || cp == 0x00A4 || (0x00A7 <= cp && cp <= 0x00A8) || cp == 0x00AA || (0x00AD <= cp && cp <= 0x00AE) || (0x00B0 <= cp && cp <= 0x00B4) || (0x00B6 <= cp && cp <= 0x00BA) || (0x00BC <= cp && cp <= 0x00BF) || cp == 0x00C6 || cp == 0x00D0 || (0x00D7 <= cp && cp <= 0x00D8) || (0x00DE <= cp && cp <= 0x00E1) || cp == 0x00E6 || (0x00E8 <= cp && cp <= 0x00EA) || (0x00EC <= cp && cp <= 0x00ED) || cp == 0x00F0 || (0x00F2 <= cp && cp <= 0x00F3) || (0x00F7 <= cp && cp <= 0x00FA) || cp == 0x00FC || cp == 0x00FE || cp == 0x0101 || cp == 0x0111 || cp == 0x0113 || cp == 0x011B || (0x0126 <= cp && cp <= 0x0127) || cp == 0x012B || (0x0131 <= cp && cp <= 0x0133) || cp == 0x0138 || (0x013F <= cp && cp <= 0x0142) || cp == 0x0144 || (0x0148 <= cp && cp <= 0x014B) || cp == 0x014D || (0x0152 <= cp && cp <= 0x0153) || (0x0166 <= cp && cp <= 0x0167) || cp == 0x016B || cp == 0x01CE || cp == 0x01D0 || cp == 0x01D2 || cp == 0x01D4 || cp == 0x01D6 || cp == 0x01D8 || cp == 0x01DA || cp == 0x01DC || cp == 0x0251 || cp == 0x0261 || cp == 0x02C4 || cp == 0x02C7 || (0x02C9 <= cp && cp <= 0x02CB) || cp == 0x02CD || cp == 0x02D0 || (0x02D8 <= cp && cp <= 0x02DB) || cp == 0x02DD || cp == 0x02DF || (0x0300 <= cp && cp <= 0x036F) || (0x0391 <= cp && cp <= 0x03A1) || (0x03A3 <= cp && cp <= 0x03A9) || (0x03B1 <= cp && cp <= 0x03C1) || (0x03C3 <= cp && cp <= 0x03C9) || cp == 0x0401 || (0x0410 <= cp && cp <= 0x044F) || cp == 0x0451 || cp == 0x2010 || (0x2013 <= cp && cp <= 0x2016) || (0x2018 <= cp && cp <= 0x2019) || (0x201C <= cp && cp <= 0x201D) || (0x2020 <= cp && cp <= 0x2022) || (0x2024 <= cp && cp <= 0x2027) || cp == 0x2030 || (0x2032 <= cp && cp <= 0x2033) || cp == 0x2035 || cp == 0x203B || cp == 0x203E || cp == 0x2074 || cp == 0x207F || (0x2081 <= cp && cp <= 0x2084) || cp == 0x20AC || cp == 0x2103 || cp == 0x2105 || cp == 0x2109 || cp == 0x2113 || cp == 0x2116 || (0x2121 <= cp && cp <= 0x2122) || cp == 0x2126 || cp == 0x212B || (0x2153 <= cp && cp <= 0x2154) || (0x215B <= cp && cp <= 0x215E) || (0x2160 <= cp && cp <= 0x216B) || (0x2170 <= cp && cp <= 0x2179) || cp == 0x2189 || (0x2190 <= cp && cp <= 0x2199) || (0x21B8 <= cp && cp <= 0x21B9) || cp == 0x21D2 || cp == 0x21D4 || cp == 0x21E7 || cp == 0x2200 || (0x2202 <= cp && cp <= 0x2203) || (0x2207 <= cp && cp <= 0x2208) || cp == 0x220B || cp == 0x220F || cp == 0x2211 || cp == 0x2215 || cp == 0x221A || (0x221D <= cp && cp <= 0x2220) || cp == 0x2223 || cp == 0x2225 || (0x2227 <= cp && cp <= 0x222C) || cp == 0x222E || (0x2234 <= cp && cp <= 0x2237) || (0x223C <= cp && cp <= 0x223D) || cp == 0x2248 || cp == 0x224C || cp == 0x2252 || (0x2260 <= cp && cp <= 0x2261) || (0x2264 <= cp && cp <= 0x2267) || (0x226A <= cp && cp <= 0x226B) || (0x226E <= cp && cp <= 0x226F) || (0x2282 <= cp && cp <= 0x2283) || (0x2286 <= cp && cp <= 0x2287) || cp == 0x2295 || cp == 0x2299 || cp == 0x22A5 || cp == 0x22BF || cp == 0x2312 || (0x2460 <= cp && cp <= 0x24E9) || (0x24EB <= cp && cp <= 0x254B) || (0x2550 <= cp && cp <= 0x2573) || (0x2580 <= cp && cp <= 0x258F) || (0x2592 <= cp && cp <= 0x2595) || (0x25A0 <= cp && cp <= 0x25A1) || (0x25A3 <= cp && cp <= 0x25A9) || (0x25B2 <= cp && cp <= 0x25B3) || (0x25B6 <= cp && cp <= 0x25B7) || (0x25BC <= cp && cp <= 0x25BD) || (0x25C0 <= cp && cp <= 0x25C1) || (0x25C6 <= cp && cp <= 0x25C8) || cp == 0x25CB || (0x25CE <= cp && cp <= 0x25D1) || (0x25E2 <= cp && cp <= 0x25E5) || cp == 0x25EF || (0x2605 <= cp && cp <= 0x2606) || cp == 0x2609 || (0x260E <= cp && cp <= 0x260F) || cp == 0x261C || cp == 0x261E || cp == 0x2640 || cp == 0x2642 || (0x2660 <= cp && cp <= 0x2661) || (0x2663 <= cp && cp <= 0x2665) || (0x2667 <= cp && cp <= 0x266A) || (0x266C <= cp && cp <= 0x266D) || cp == 0x266F || (0x269E <= cp && cp <= 0x269F) || cp == 0x26BF || (0x26C6 <= cp && cp <= 0x26CD) || (0x26CF <= cp && cp <= 0x26D3) || (0x26D5 <= cp && cp <= 0x26E1) || cp == 0x26E3 || (0x26E8 <= cp && cp <= 0x26E9) || (0x26EB <= cp && cp <= 0x26F1) || cp == 0x26F4 || (0x26F6 <= cp && cp <= 0x26F9) || (0x26FB <= cp && cp <= 0x26FC) || (0x26FE <= cp && cp <= 0x26FF) || cp == 0x273D || (0x2776 <= cp && cp <= 0x277F) || (0x2B56 <= cp && cp <= 0x2B59) || (0x3248 <= cp && cp <= 0x324F) || (0xE000 <= cp && cp <= 0xF8FF) || (0xFE00 <= cp && cp <= 0xFE0F) || cp == 0xFFFD || (0x1F100 <= cp && cp <= 0x1F10A) || (0x1F110 <= cp && cp <= 0x1F12D) || (0x1F130 <= cp && cp <= 0x1F169) || (0x1F170 <= cp && cp <= 0x1F18D) || (0x1F18F <= cp && cp <= 0x1F190) || (0x1F19B <= cp && cp <= 0x1F1AC) || (0xE0100 <= cp && cp <= 0xE01EF) || (0xF0000 <= cp && cp <= 0xFFFFD) || (0x100000 <= cp && cp <= 0x10FFFD) then (A) else if (0x1100 <= cp && cp <= 0x115F) || (0x231A <= cp && cp <= 0x231B) || (0x2329 <= cp && cp <= 0x232A) || (0x23E9 <= cp && cp <= 0x23EC) || cp == 0x23F0 || cp == 0x23F3 || (0x25FD <= cp && cp <= 0x25FE) || (0x2614 <= cp && cp <= 0x2615) || (0x2648 <= cp && cp <= 0x2653) || cp == 0x267F || cp == 0x2693 || cp == 0x26A1 || (0x26AA <= cp && cp <= 0x26AB) || (0x26BD <= cp && cp <= 0x26BE) || (0x26C4 <= cp && cp <= 0x26C5) || cp == 0x26CE || cp == 0x26D4 || cp == 0x26EA || (0x26F2 <= cp && cp <= 0x26F3) || cp == 0x26F5 || cp == 0x26FA || cp == 0x26FD || cp == 0x2705 || (0x270A <= cp && cp <= 0x270B) || cp == 0x2728 || cp == 0x274C || cp == 0x274E || (0x2753 <= cp && cp <= 0x2755) || cp == 0x2757 || (0x2795 <= cp && cp <= 0x2797) || cp == 0x27B0 || cp == 0x27BF || (0x2B1B <= cp && cp <= 0x2B1C) || cp == 0x2B50 || cp == 0x2B55 || (0x2E80 <= cp && cp <= 0x2E99) || (0x2E9B <= cp && cp <= 0x2EF3) || (0x2F00 <= cp && cp <= 0x2FD5) || (0x2FF0 <= cp && cp <= 0x2FFF) || (0x3001 <= cp && cp <= 0x303E) || (0x3041 <= cp && cp <= 0x3096) || (0x3099 <= cp && cp <= 0x30FF) || (0x3105 <= cp && cp <= 0x312F) || (0x3131 <= cp && cp <= 0x318E) || (0x3190 <= cp && cp <= 0x31E3) || (0x31EF <= cp && cp <= 0x321E) || (0x3220 <= cp && cp <= 0x3247) || (0x3250 <= cp && cp <= 0x4DBF) || (0x4E00 <= cp && cp <= 0xA48C) || (0xA490 <= cp && cp <= 0xA4C6) || (0xA960 <= cp && cp <= 0xA97C) || (0xAC00 <= cp && cp <= 0xD7A3) || (0xF900 <= cp && cp <= 0xFAFF) || (0xFE10 <= cp && cp <= 0xFE19) || (0xFE30 <= cp && cp <= 0xFE52) || (0xFE54 <= cp && cp <= 0xFE66) || (0xFE68 <= cp && cp <= 0xFE6B) || (0x16FE0 <= cp && cp <= 0x16FE4) || (0x16FF0 <= cp && cp <= 0x16FF1) || (0x17000 <= cp && cp <= 0x187F7) || (0x18800 <= cp && cp <= 0x18CD5) || (0x18D00 <= cp && cp <= 0x18D08) || (0x1AFF0 <= cp && cp <= 0x1AFF3) || (0x1AFF5 <= cp && cp <= 0x1AFFB) || (0x1AFFD <= cp && cp <= 0x1AFFE) || (0x1B000 <= cp && cp <= 0x1B122) || cp == 0x1B132 || (0x1B150 <= cp && cp <= 0x1B152) || cp == 0x1B155 || (0x1B164 <= cp && cp <= 0x1B167) || (0x1B170 <= cp && cp <= 0x1B2FB) || cp == 0x1F004 || cp == 0x1F0CF || cp == 0x1F18E || (0x1F191 <= cp && cp <= 0x1F19A) || (0x1F200 <= cp && cp <= 0x1F202) || (0x1F210 <= cp && cp <= 0x1F23B) || (0x1F240 <= cp && cp <= 0x1F248) || (0x1F250 <= cp && cp <= 0x1F251) || (0x1F260 <= cp && cp <= 0x1F265) || (0x1F300 <= cp && cp <= 0x1F320) || (0x1F32D <= cp && cp <= 0x1F335) || (0x1F337 <= cp && cp <= 0x1F37C) || (0x1F37E <= cp && cp <= 0x1F393) || (0x1F3A0 <= cp && cp <= 0x1F3CA) || (0x1F3CF <= cp && cp <= 0x1F3D3) || (0x1F3E0 <= cp && cp <= 0x1F3F0) || cp == 0x1F3F4 || (0x1F3F8 <= cp && cp <= 0x1F43E) || cp == 0x1F440 || (0x1F442 <= cp && cp <= 0x1F4FC) || (0x1F4FF <= cp && cp <= 0x1F53D) || (0x1F54B <= cp && cp <= 0x1F54E) || (0x1F550 <= cp && cp <= 0x1F567) || cp == 0x1F57A || (0x1F595 <= cp && cp <= 0x1F596) || cp == 0x1F5A4 || (0x1F5FB <= cp && cp <= 0x1F64F) || (0x1F680 <= cp && cp <= 0x1F6C5) || cp == 0x1F6CC || (0x1F6D0 <= cp && cp <= 0x1F6D2) || (0x1F6D5 <= cp && cp <= 0x1F6D7) || (0x1F6DC <= cp && cp <= 0x1F6DF) || (0x1F6EB <= cp && cp <= 0x1F6EC) || (0x1F6F4 <= cp && cp <= 0x1F6FC) || (0x1F7E0 <= cp && cp <= 0x1F7EB) || cp == 0x1F7F0 || (0x1F90C <= cp && cp <= 0x1F93A) || (0x1F93C <= cp && cp <= 0x1F945) || (0x1F947 <= cp && cp <= 0x1F9FF) || (0x1FA70 <= cp && cp <= 0x1FA7C) || (0x1FA80 <= cp && cp <= 0x1FA88) || (0x1FA90 <= cp && cp <= 0x1FABD) || (0x1FABF <= cp && cp <= 0x1FAC5) || (0x1FACE <= cp && cp <= 0x1FADB) || (0x1FAE0 <= cp && cp <= 0x1FAE8) || (0x1FAF0 <= cp && cp <= 0x1FAF8) || (0x20000 <= cp && cp <= 0x2FFFD) || (0x30000 <= cp && cp <= 0x3FFFD) then (W) else if cp == 0x20A9 || (0xFF61 <= cp && cp <= 0xFFBE) || (0xFFC2 <= cp && cp <= 0xFFC7) || (0xFFCA <= cp && cp <= 0xFFCF) || (0xFFD2 <= cp && cp <= 0xFFD7) || (0xFFDA <= cp && cp <= 0xFFDC) || (0xFFE8 <= cp && cp <= 0xFFEE) then (H) else if cp == 0x3000 || (0xFF01 <= cp && cp <= 0xFF60) || (0xFFE0 <= cp && cp <= 0xFFE6) then (F) else N | ||
|
||
expect eastAsianWidthProperty 198 == A | ||
|
||
expect eastAsianWidthProperty 9449 == A | ||
|
||
expect eastAsianWidthProperty 9742 == A | ||
|
||
expect eastAsianWidthProperty 8361 == H | ||
|
||
expect eastAsianWidthProperty 65379 == H | ||
|
||
expect eastAsianWidthProperty 162 == Na | ||
|
||
expect eastAsianWidthProperty 165 == Na | ||
|
||
expect eastAsianWidthProperty 172 == Na | ||
|
||
expect eastAsianWidthProperty 12295 == W | ||
|
||
expect eastAsianWidthProperty 12321 == W | ||
|
||
expect eastAsianWidthProperty 12329 == W | ||
|
||
expect eastAsianWidthProperty 65092 == W | ||
|
||
expect eastAsianWidthProperty 65097 == W | ||
|
||
expect eastAsianWidthProperty 194564 == W | ||
|
||
expect eastAsianWidthProperty 65313 == F | ||
|
||
expect eastAsianWidthProperty 65510 == F | ||
|
||
expect eastAsianWidthProperty 70200 == N | ||
|
||
expect eastAsianWidthProperty 70419 == N | ||
|
||
expect eastAsianWidthProperty 72330 == N |
Oops, something went wrong.