From cdf02e0cae3e27f8dae7acab14d87daa962eda1f Mon Sep 17 00:00:00 2001 From: Apple OSS Distributions <91980991+AppleOSSDistributions@users.noreply.github.com> Date: Tue, 24 Sep 2024 17:44:43 +0000 Subject: [PATCH] adv_cmds-231 Imported from adv_cmds-231.tar.gz --- .upstream_base_commits | 15 + adv_cmds.xcodeproj/project.pbxproj | 620 +++++--- colldef/parse.y | 2 +- genwrap/genwrap.8 | 119 +- genwrap/genwrap.c | 415 ++++- genwrap/genwrap.h | 6 +- genwrap/genwrap.y | 120 +- genwrap/lex.l | 1 + genwrap/tests/analytics_redacted.wrapper | 8 - genwrap/tests/analytics_redacted_a.out | 2 - genwrap/tests/analytics_redacted_b.out | 3 - genwrap/tests/analytics_redacted_c.out | 4 - genwrap/tests/analytics_redacted_d.out | 4 - genwrap/tests/analytics_redacted_e.out | 5 - genwrap/tests/analytics_simple.wrapper | 2 + genwrap/tests/analytics_simple_a.out | 2 +- genwrap/tests/analytics_simple_b.out | 3 +- genwrap/tests/analytics_simple_c.out | 4 +- genwrap/tests/analytics_simple_d.out | 3 +- genwrap/tests/analytics_simple_e.out | 4 +- genwrap/tests/analytics_simple_f.out | 5 +- genwrap/tests/analytics_simple_g.out | 4 + .../arg_selector_complex_logonly_args.wrapper | 11 + genwrap/tests/genwrap_test.sh | 164 +- genwrap/wrapper-head.c | 27 + genwrap/wrapper-tail.c | 571 +++++-- localedef/README | 11 + localedef/charmap.c | 404 +++++ localedef/charmap.p-1 | 23 - localedef/charmap.p-2 | 115 -- localedef/charmap.test | 38 - localedef/collate.c | 1329 +++++++++++++++++ localedef/ctype.c | 598 ++++++++ localedef/def.a55 | 6 - localedef/def.p-1 | 157 -- localedef/def.p-2 | 280 ---- localedef/libc/collate.h | 180 +++ localedef/libc/lmessages.h | 44 + localedef/libc/lmonetary.h | 66 + localedef/libc/lnumeric.h | 50 + localedef/libc/runefile.h | 117 ++ localedef/libc/setlocale.h | 43 + localedef/libc/timelocal.h | 57 + localedef/libc/xlocale_private.h | 266 ++++ localedef/localedef.1 | 320 +++- localedef/localedef.c | 731 +++++++++ localedef/localedef.h | 203 +++ localedef/localedef.pl | 1166 --------------- localedef/localedef.plist.part | 31 + localedef/messages.c | 123 ++ localedef/monetary.c | 254 ++++ localedef/numeric.c | 117 ++ localedef/parser.y | 776 ++++++++++ localedef/scanner.c | 942 ++++++++++++ localedef/sys/tree.h | 1069 +++++++++++++ localedef/time.c | 302 ++++ localedef/wide.c | 664 ++++++++ tests/adv_cmds.plist.in | 75 + 58 files changed, 10361 insertions(+), 2320 deletions(-) delete mode 100644 genwrap/tests/analytics_redacted.wrapper delete mode 100644 genwrap/tests/analytics_redacted_a.out delete mode 100644 genwrap/tests/analytics_redacted_b.out delete mode 100644 genwrap/tests/analytics_redacted_c.out delete mode 100644 genwrap/tests/analytics_redacted_d.out delete mode 100644 genwrap/tests/analytics_redacted_e.out create mode 100644 genwrap/tests/analytics_simple_g.out create mode 100644 genwrap/tests/arg_selector_complex_logonly_args.wrapper create mode 100644 localedef/README create mode 100644 localedef/charmap.c delete mode 100644 localedef/charmap.p-1 delete mode 100644 localedef/charmap.p-2 delete mode 100644 localedef/charmap.test create mode 100644 localedef/collate.c create mode 100644 localedef/ctype.c delete mode 100644 localedef/def.a55 delete mode 100644 localedef/def.p-1 delete mode 100644 localedef/def.p-2 create mode 100644 localedef/libc/collate.h create mode 100644 localedef/libc/lmessages.h create mode 100644 localedef/libc/lmonetary.h create mode 100644 localedef/libc/lnumeric.h create mode 100644 localedef/libc/runefile.h create mode 100644 localedef/libc/setlocale.h create mode 100644 localedef/libc/timelocal.h create mode 100644 localedef/libc/xlocale_private.h create mode 100644 localedef/localedef.c create mode 100644 localedef/localedef.h delete mode 100644 localedef/localedef.pl create mode 100644 localedef/localedef.plist.part create mode 100644 localedef/messages.c create mode 100644 localedef/monetary.c create mode 100644 localedef/numeric.c create mode 100644 localedef/parser.y create mode 100644 localedef/scanner.c create mode 100644 localedef/sys/tree.h create mode 100644 localedef/time.c create mode 100644 localedef/wide.c diff --git a/.upstream_base_commits b/.upstream_base_commits index c4a444d..3a340e6 100644 --- a/.upstream_base_commits +++ b/.upstream_base_commits @@ -23,6 +23,21 @@ mklocale/lex.l freebsd usr.bin/mklocale/lex.l 8a16b7a18f5d0b031f09832fd7752fba71 mklocale/mklocale.1 freebsd usr.bin/mklocale/mklocale.1 fbbd9655e5107c68e4e0146ff22b73d7350475bc mklocale/yacc.y freebsd usr.bin/mklocale/yacc.y 8a16b7a18f5d0b031f09832fd7752fba717e2a97 +localedef/README freebsd usr.bin/localedef/README 057ca2d4372f94cc0911c140cd4d8f117fe9b26a +localedef/charmap.c freebsd usr.bin/localedef/charmap.c 3a7ffe206ce7e4741fae26432f6d6e5159207f45 +localedef/collate.c freebsd usr.bin/localedef/collate.c 3141e51d2e38fa7b9e6d81477dfa860d087c514d +localedef/ctype.c freebsd usr.bin/localedef/ctype.c 1d386b48a555f61cb7325543adbbb5c3f3407a66 +localedef/localedef.1 freebsd usr.bin/localedef/localedef.1 fa9896e082a1046ff4fbc75fcba4d18d1f2efc19 +localedef/localedef.c freebsd usr.bin/localedef/localedef.c 3141e51d2e38fa7b9e6d81477dfa860d087c514d +localedef/localedef.h freebsd usr.bin/localedef/localedef.h 3141e51d2e38fa7b9e6d81477dfa860d087c514d +localedef/messages.c freebsd usr.bin/localedef/messages.c 1d386b48a555f61cb7325543adbbb5c3f3407a66 +localedef/monetary.c freebsd usr.bin/localedef/monetary.c 3141e51d2e38fa7b9e6d81477dfa860d087c514d +localedef/numeric.c freebsd usr.bin/localedef/numeric.c 3141e51d2e38fa7b9e6d81477dfa860d087c514d +localedef/parser.y freebsd usr.bin/localedef/parser.y b3e7694832e81d7a904a10f525f8797b753bf0d3 +localedef/scanner.c freebsd usr.bin/localedef/scanner.c 1d386b48a555f61cb7325543adbbb5c3f3407a66 +localedef/time.c freebsd usr.bin/localedef/time.c 3141e51d2e38fa7b9e6d81477dfa860d087c514d +localedef/wide.c freebsd usr.bin/localedef/wide.c 1d386b48a555f61cb7325543adbbb5c3f3407a66 + pkill/pkill.1 freebsd bin/pkill/pkill.1 3d17b350e5c949d0d5a42aa907034a2a22f7312b pkill/pkill.c freebsd bin/pkill/pkill.c 3610bffd2888b65389a46e8d075ce8e1fc83af4c pkill/tests/pgrep-_f_test.sh freebsd bin/pkill/tests/pgrep-_f_test.sh 822057bfbbbcdb722a7ccd0e40d6b7e82b1d83f7 diff --git a/adv_cmds.xcodeproj/project.pbxproj b/adv_cmds.xcodeproj/project.pbxproj index af68d7d..e844681 100644 --- a/adv_cmds.xcodeproj/project.pbxproj +++ b/adv_cmds.xcodeproj/project.pbxproj @@ -7,17 +7,16 @@ objects = { /* Begin PBXAggregateTarget section */ - FD0D7F98108FE550004F2A1C /* localedef */ = { + 2A485F732B5DAEBE009D80F8 /* Host */ = { isa = PBXAggregateTarget; - buildConfigurationList = FD0D7FA2108FE56E004F2A1C /* Build configuration list for PBXAggregateTarget "localedef" */; + buildConfigurationList = 2A485F752B5DAEBE009D80F8 /* Build configuration list for PBXAggregateTarget "Host" */; buildPhases = ( - FD0D7F97108FE550004F2A1C /* ShellScript */, - FD0D7FA5108FE5AA004F2A1C /* Install man1 */, ); dependencies = ( + 2A485F902B641B2A009D80F8 /* PBXTargetDependency */, ); - name = localedef; - productName = localedef2; + name = Host; + productName = Host; }; FD201DCD14369D0C00906237 /* pgrep */ = { isa = PBXAggregateTarget; @@ -66,7 +65,7 @@ 2A7E4099297A4741003942C8 /* PBXTargetDependency */, FDF2775D0FC6102600D7A3C6 /* PBXTargetDependency */, FDF2775B0FC6102600D7A3C6 /* PBXTargetDependency */, - FD0D7FA9108FE5C3004F2A1C /* PBXTargetDependency */, + 2A15A1D12B45C53D00A3DA9E /* PBXTargetDependency */, FDF277570FC6102600D7A3C6 /* PBXTargetDependency */, FDF277530FC6102600D7A3C6 /* PBXTargetDependency */, FD201DD414369D6300906237 /* PBXTargetDependency */, @@ -76,9 +75,9 @@ FDF2776D0FC6102B00D7A3C6 /* PBXTargetDependency */, FDF2776B0FC6102B00D7A3C6 /* PBXTargetDependency */, FDF277690FC6102B00D7A3C6 /* PBXTargetDependency */, - 2A7E4129297A6BD7003942C8 /* PBXTargetDependency */, 2A7E411C297A647D003942C8 /* PBXTargetDependency */, 2A7E40D0297A5223003942C8 /* PBXTargetDependency */, + 2A9E2AAD2B19941600F5F14D /* PBXTargetDependency */, 2A96BE7C29839DA800F1705B /* PBXTargetDependency */, 2A7E40D2297A5227003942C8 /* PBXTargetDependency */, 2A7E4104297A5D67003942C8 /* PBXTargetDependency */, @@ -102,6 +101,17 @@ 2A114C9429674FF3005099EA /* genwrap.y in Sources */ = {isa = PBXBuildFile; fileRef = 2A114C9329674FF3005099EA /* genwrap.y */; }; 2A190210296C04C4008E5A05 /* genwrap.c in Sources */ = {isa = PBXBuildFile; fileRef = 2A19020F296C04C4008E5A05 /* genwrap.c */; }; 2A190214296DDC68008E5A05 /* genwrap.8 in Install Section 8 Manpages */ = {isa = PBXBuildFile; fileRef = 2A190213296DDC5C008E5A05 /* genwrap.8 */; }; + 2A485F7E2B641A27009D80F8 /* scanner.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03042A2EE86700440D64 /* scanner.c */; }; + 2A485F7F2B641A27009D80F8 /* charmap.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA02FB2A2EE84E00440D64 /* charmap.c */; }; + 2A485F802B641A27009D80F8 /* parser.c in Sources */ = {isa = PBXBuildFile; fileRef = 2A5628D32A73835C0083A770 /* parser.c */; }; + 2A485F812B641A27009D80F8 /* collate.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA02FC2A2EE84E00440D64 /* collate.c */; }; + 2A485F822B641A27009D80F8 /* ctype.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA02FD2A2EE84E00440D64 /* ctype.c */; }; + 2A485F832B641A27009D80F8 /* localedef.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA02FE2A2EE86700440D64 /* localedef.c */; }; + 2A485F842B641A27009D80F8 /* messages.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03012A2EE86700440D64 /* messages.c */; }; + 2A485F852B641A27009D80F8 /* monetary.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03082A2EE86700440D64 /* monetary.c */; }; + 2A485F862B641A27009D80F8 /* numeric.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03052A2EE86700440D64 /* numeric.c */; }; + 2A485F872B641A27009D80F8 /* time.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03002A2EE86700440D64 /* time.c */; }; + 2A485F882B641A27009D80F8 /* wide.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03062A2EE86700440D64 /* wide.c */; }; 2A51186227E442D30059F4ED /* k_flag_posix_messages.out in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A51185F27E442440059F4ED /* k_flag_posix_messages.out */; }; 2A51186327E442D50059F4ED /* k_flag_posix_monetary.out in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A51185A27E442440059F4ED /* k_flag_posix_monetary.out */; }; 2A51186427E442D80059F4ED /* k_flag_posix_numeric.out in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A51185D27E442440059F4ED /* k_flag_posix_numeric.out */; }; @@ -126,6 +136,7 @@ 2A51188027E443900059F4ED /* pgrep-t_test.sh in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A51184827E442190059F4ED /* pgrep-t_test.sh */; }; 2A51188227E443900059F4ED /* pgrep-v_test.sh in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A51184D27E4421A0059F4ED /* pgrep-v_test.sh */; }; 2A51188327E443900059F4ED /* pgrep-x_test.sh in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A51184327E442190059F4ED /* pgrep-x_test.sh */; }; + 2A5628D42A73835C0083A770 /* parser.c in Sources */ = {isa = PBXBuildFile; fileRef = 2A5628D32A73835C0083A770 /* parser.c */; }; 2A688B002A46200A00F211FD /* env_selector_addarg.c in Sources */ = {isa = PBXBuildFile; fileRef = 2A688AFF2A46200A00F211FD /* env_selector_addarg.c */; }; 2A7E409E297A4ACD003942C8 /* lex.l in Sources */ = {isa = PBXBuildFile; fileRef = 2A114C9129674DB9005099EA /* lex.l */; }; 2A7E409F297A4ACD003942C8 /* genwrap.y in Sources */ = {isa = PBXBuildFile; fileRef = 2A114C9329674FF3005099EA /* genwrap.y */; }; @@ -138,12 +149,6 @@ 2A7E410B297A5EBD003942C8 /* genwrap_test.sh in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A7E4109297A5EA2003942C8 /* genwrap_test.sh */; }; 2A7E410C297A6224003942C8 /* simple_shim.wrapper in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A7E4093297A4593003942C8 /* simple_shim.wrapper */; }; 2A7E411A297A6457003942C8 /* analytics_simple.c in Sources */ = {isa = PBXBuildFile; fileRef = 2A7E4119297A6457003942C8 /* analytics_simple.c */; }; - 2A7E412B297A6C06003942C8 /* analytics_redacted.c in Sources */ = {isa = PBXBuildFile; fileRef = 2A7E412A297A6C06003942C8 /* analytics_redacted.c */; }; - 2A7E4132297A6E49003942C8 /* analytics_redacted_a.out in Copy Test Files */ = {isa = PBXBuildFile; fileRef = 2A7E412D297A6E1D003942C8 /* analytics_redacted_a.out */; }; - 2A7E4133297A6E49003942C8 /* analytics_redacted_b.out in Copy Test Files */ = {isa = PBXBuildFile; fileRef = 2A7E412C297A6E1D003942C8 /* analytics_redacted_b.out */; }; - 2A7E4134297A6E49003942C8 /* analytics_redacted_c.out in Copy Test Files */ = {isa = PBXBuildFile; fileRef = 2A7E412E297A6E1D003942C8 /* analytics_redacted_c.out */; }; - 2A7E4135297A6E49003942C8 /* analytics_redacted_d.out in Copy Test Files */ = {isa = PBXBuildFile; fileRef = 2A7E412F297A6E1D003942C8 /* analytics_redacted_d.out */; }; - 2A7E4136297A6E49003942C8 /* analytics_redacted_e.out in Copy Test Files */ = {isa = PBXBuildFile; fileRef = 2A7E4130297A6E1D003942C8 /* analytics_redacted_e.out */; }; 2A7E413D297A6E7F003942C8 /* analytics_simple_a.out in Copy Test Files */ = {isa = PBXBuildFile; fileRef = 2A7E413C297A6E70003942C8 /* analytics_simple_a.out */; }; 2A7E413E297A6E7F003942C8 /* analytics_simple_b.out in Copy Test Files */ = {isa = PBXBuildFile; fileRef = 2A7E413B297A6E70003942C8 /* analytics_simple_b.out */; }; 2A7E413F297A6E7F003942C8 /* analytics_simple_c.out in Copy Test Files */ = {isa = PBXBuildFile; fileRef = 2A7E413A297A6E70003942C8 /* analytics_simple_c.out */; }; @@ -157,6 +162,7 @@ 2A7F779227B30E9000CACBDB /* ps.c in Sources */ = {isa = PBXBuildFile; fileRef = FDF2772F0FC60FDF00D7A3C6 /* ps.c */; }; 2A7F779327B30E9000CACBDB /* tasks.c in Sources */ = {isa = PBXBuildFile; fileRef = FDF277310FC60FDF00D7A3C6 /* tasks.c */; }; 2A85AF27280639D000F493F9 /* 91596308_test.sh in CopyFiles */ = {isa = PBXBuildFile; fileRef = 2A85AF26280639C900F493F9 /* 91596308_test.sh */; }; + 2A941CA72BDE2BD0003C7827 /* analytics_simple_g.out in Copy Test Files */ = {isa = PBXBuildFile; fileRef = 2A941CA62BDE2BC8003C7827 /* analytics_simple_g.out */; }; 2A96BE7A29839D5A00F1705B /* arg_selector_complex_logonly.c in Sources */ = {isa = PBXBuildFile; fileRef = 2A96BE7929839D5A00F1705B /* arg_selector_complex_logonly.c */; }; 2A96BED829A064A000F1705B /* 33386332_test.sh in CopyFiles */ = {isa = PBXBuildFile; fileRef = 2A96BED729A0649B00F1705B /* 33386332_test.sh */; }; 2A9C8A2A29C8F65E00416E6B /* pgrep-_p_test.sh in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A9C8A2329C8F58200416E6B /* pgrep-_p_test.sh */; }; @@ -169,8 +175,19 @@ 2A9C8A3429C8F87300416E6B /* pkill-_lf_test.sh in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A9C8A2729C8F60E00416E6B /* pkill-_lf_test.sh */; }; 2A9C8A4129C8F93600416E6B /* spin_helper.c in Sources */ = {isa = PBXBuildFile; fileRef = 2A9C8A4029C8F93600416E6B /* spin_helper.c */; }; 2A9C8A4529C8FB4900416E6B /* pgrep-f_test.sh in Install Test Files */ = {isa = PBXBuildFile; fileRef = 2A9C8A4429C8FB4900416E6B /* pgrep-f_test.sh */; }; + 2A9E2AAB2B198F0900F5F14D /* arg_selector_complex_logonly_args.c in Sources */ = {isa = PBXBuildFile; fileRef = 2A9E2AA72B198C7E00F5F14D /* arg_selector_complex_logonly_args.c */; }; + 2ABFB0FC2A7A1750008292A6 /* localedef.1 in Install man1 */ = {isa = PBXBuildFile; fileRef = 2AFA03022A2EE86700440D64 /* localedef.1 */; }; + 2AFA03132A2EE8D000440D64 /* charmap.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA02FB2A2EE84E00440D64 /* charmap.c */; }; + 2AFA03142A2EE8D400440D64 /* collate.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA02FC2A2EE84E00440D64 /* collate.c */; }; + 2AFA03152A2EE8D700440D64 /* ctype.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA02FD2A2EE84E00440D64 /* ctype.c */; }; + 2AFA03162A2EE8DB00440D64 /* localedef.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA02FE2A2EE86700440D64 /* localedef.c */; }; + 2AFA03172A2EE8DF00440D64 /* messages.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03012A2EE86700440D64 /* messages.c */; }; + 2AFA03182A2EE8E300440D64 /* monetary.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03082A2EE86700440D64 /* monetary.c */; }; + 2AFA03192A2EE8E700440D64 /* numeric.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03052A2EE86700440D64 /* numeric.c */; }; + 2AFA031A2A2EE8EB00440D64 /* scanner.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03042A2EE86700440D64 /* scanner.c */; }; + 2AFA031B2A2EE8EE00440D64 /* time.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03002A2EE86700440D64 /* time.c */; }; + 2AFA031C2A2EE8F200440D64 /* wide.c in Sources */ = {isa = PBXBuildFile; fileRef = 2AFA03062A2EE86700440D64 /* wide.c */; }; 32B89590292CD2B500B9F9DD /* libxo.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 32A3CA9829251B5D00FABB5C /* libxo.tbd */; }; - FD0D7FA4108FE58C004F2A1C /* localedef.1 in Install man1 */ = {isa = PBXBuildFile; fileRef = FDF277140FC60FDF00D7A3C6 /* localedef.1 */; }; FD201DC214369B4200906237 /* pkill.c in Sources */ = {isa = PBXBuildFile; fileRef = FD201DB014369AD000906237 /* pkill.c */; }; FD201DC314369B4600906237 /* pkill.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = FD201DAF14369AD000906237 /* pkill.1 */; }; FDCD383B143BC63000AB81C6 /* libsysmon.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = FDCD383A143BC63000AB81C6 /* libsysmon.dylib */; }; @@ -223,6 +240,20 @@ /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ + 2A15A1D02B45C53D00A3DA9E /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = FDF276430FC60E9000D7A3C6 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 2AFA030C2A2EE8AB00440D64; + remoteInfo = localedef; + }; + 2A485F8F2B641B2A009D80F8 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = FDF276430FC60E9000D7A3C6 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 2A485F7B2B641A27009D80F8; + remoteInfo = localedef_host; + }; 2A688AF62A461F0100F211FD /* PBXContainerItemProxy */ = { isa = PBXContainerItemProxy; containerPortal = FDF276430FC60E9000D7A3C6 /* Project object */; @@ -328,20 +359,6 @@ remoteGlobalIDString = 2A7E410D297A63E0003942C8; remoteInfo = analytics_simple; }; - 2A7E411F297A6BA1003942C8 /* PBXContainerItemProxy */ = { - isa = PBXContainerItemProxy; - containerPortal = FDF276430FC60E9000D7A3C6 /* Project object */; - proxyType = 1; - remoteGlobalIDString = 2A7E409C297A4ACD003942C8; - remoteInfo = genwrap_static; - }; - 2A7E4128297A6BD7003942C8 /* PBXContainerItemProxy */ = { - isa = PBXContainerItemProxy; - containerPortal = FDF276430FC60E9000D7A3C6 /* Project object */; - proxyType = 1; - remoteGlobalIDString = 2A7E411D297A6BA1003942C8; - remoteInfo = analytics_redacted; - }; 2A7F779A27B30ED600CACBDB /* PBXContainerItemProxy */ = { isa = PBXContainerItemProxy; containerPortal = FDF276430FC60E9000D7A3C6 /* Project object */; @@ -377,12 +394,19 @@ remoteGlobalIDString = 2A9C8A3529C8F8EF00416E6B; remoteInfo = spin_helper; }; - FD0D7FA8108FE5C3004F2A1C /* PBXContainerItemProxy */ = { + 2A9E2A9E2B198AE100F5F14D /* PBXContainerItemProxy */ = { isa = PBXContainerItemProxy; containerPortal = FDF276430FC60E9000D7A3C6 /* Project object */; proxyType = 1; - remoteGlobalIDString = FD0D7F98108FE550004F2A1C; - remoteInfo = localedef; + remoteGlobalIDString = 2A7E409C297A4ACD003942C8; + remoteInfo = genwrap_static; + }; + 2A9E2AAC2B19941600F5F14D /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = FDF276430FC60E9000D7A3C6 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 2A9E2A9C2B198AE100F5F14D; + remoteInfo = arg_selector_complex_logonly_args; }; FD201DBE14369B1700906237 /* PBXContainerItemProxy */ = { isa = PBXContainerItemProxy; @@ -715,26 +739,21 @@ 2A7E4140297A6E7F003942C8 /* analytics_simple_d.out in Copy Test Files */, 2A7E4141297A6E7F003942C8 /* analytics_simple_e.out in Copy Test Files */, 2A7E4142297A6E7F003942C8 /* analytics_simple_f.out in Copy Test Files */, + 2A941CA72BDE2BD0003C7827 /* analytics_simple_g.out in Copy Test Files */, ); name = "Copy Test Files"; runOnlyForDeploymentPostprocessing = 1; }; - 2A7E4124297A6BA1003942C8 /* Copy Test Files */ = { + 2A96BE7429839C6200F1705B /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; - dstPath = /AppleInternal/Tests/adv_cmds/genwrap; + dstPath = /usr/share/man/man1/; dstSubfolderSpec = 0; files = ( - 2A7E4132297A6E49003942C8 /* analytics_redacted_a.out in Copy Test Files */, - 2A7E4133297A6E49003942C8 /* analytics_redacted_b.out in Copy Test Files */, - 2A7E4134297A6E49003942C8 /* analytics_redacted_c.out in Copy Test Files */, - 2A7E4135297A6E49003942C8 /* analytics_redacted_d.out in Copy Test Files */, - 2A7E4136297A6E49003942C8 /* analytics_redacted_e.out in Copy Test Files */, ); - name = "Copy Test Files"; runOnlyForDeploymentPostprocessing = 1; }; - 2A96BE7429839C6200F1705B /* CopyFiles */ = { + 2A9E2AA32B198AE100F5F14D /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; dstPath = /usr/share/man/man1/; @@ -743,13 +762,13 @@ ); runOnlyForDeploymentPostprocessing = 1; }; - FD0D7FA5108FE5AA004F2A1C /* Install man1 */ = { + 2AFA030B2A2EE8AB00440D64 /* Install man1 */ = { isa = PBXCopyFilesBuildPhase; - buildActionMask = 8; + buildActionMask = 2147483647; dstPath = /usr/share/man/man1; dstSubfolderSpec = 0; files = ( - FD0D7FA4108FE58C004F2A1C /* localedef.1 in Install man1 */, + 2ABFB0FC2A7A1750008292A6 /* localedef.1 in Install man1 */, ); name = "Install man1"; runOnlyForDeploymentPostprocessing = 1; @@ -930,6 +949,7 @@ 2A114C9329674FF3005099EA /* genwrap.y */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.yacc; name = genwrap.y; path = genwrap/genwrap.y; sourceTree = ""; }; 2A19020F296C04C4008E5A05 /* genwrap.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = genwrap.c; path = genwrap/genwrap.c; sourceTree = ""; }; 2A190213296DDC5C008E5A05 /* genwrap.8 */ = {isa = PBXFileReference; lastKnownFileType = text; name = genwrap.8; path = genwrap/genwrap.8; sourceTree = ""; }; + 2A485F8E2B641A27009D80F8 /* localedef */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = localedef; sourceTree = BUILT_PRODUCTS_DIR; }; 2A51184227E442190059F4ED /* pgrep-o_test.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; name = "pgrep-o_test.sh"; path = "pkill/tests/pgrep-o_test.sh"; sourceTree = SOURCE_ROOT; }; 2A51184327E442190059F4ED /* pgrep-x_test.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; name = "pgrep-x_test.sh"; path = "pkill/tests/pgrep-x_test.sh"; sourceTree = SOURCE_ROOT; }; 2A51184427E442190059F4ED /* pgrep-i_test.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; name = "pgrep-i_test.sh"; path = "pkill/tests/pgrep-i_test.sh"; sourceTree = SOURCE_ROOT; }; @@ -954,9 +974,10 @@ 2A51185E27E442440059F4ED /* locale_test.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; name = locale_test.sh; path = tests/locale_test.sh; sourceTree = ""; }; 2A51185F27E442440059F4ED /* k_flag_posix_messages.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = k_flag_posix_messages.out; path = tests/k_flag_posix_messages.out; sourceTree = ""; }; 2A51186027E442440059F4ED /* k_flag_posix_time.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = k_flag_posix_time.out; path = tests/k_flag_posix_time.out; sourceTree = ""; }; + 2A5628D32A73835C0083A770 /* parser.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = parser.c; sourceTree = DERIVED_FILE_DIR; }; 2A688AF32A461EF000F211FD /* env_selector_addarg.wrapper */ = {isa = PBXFileReference; lastKnownFileType = text; name = env_selector_addarg.wrapper; path = genwrap/tests/env_selector_addarg.wrapper; sourceTree = ""; }; 2A688AFE2A461F0100F211FD /* env_selector_addarg */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = env_selector_addarg; sourceTree = BUILT_PRODUCTS_DIR; }; - 2A688AFF2A46200A00F211FD /* env_selector_addarg.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = env_selector_addarg.c; sourceTree = BUILT_PRODUCTS_DIR; }; + 2A688AFF2A46200A00F211FD /* env_selector_addarg.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = env_selector_addarg.c; sourceTree = ""; }; 2A7E408F297A4581003942C8 /* env_selector.wrapper */ = {isa = PBXFileReference; lastKnownFileType = text; name = env_selector.wrapper; path = genwrap/tests/env_selector.wrapper; sourceTree = ""; }; 2A7E4090297A4581003942C8 /* arg_selector_simple_a.wrapper */ = {isa = PBXFileReference; lastKnownFileType = text; name = arg_selector_simple_a.wrapper; path = genwrap/tests/arg_selector_simple_a.wrapper; sourceTree = ""; }; 2A7E4091297A4581003942C8 /* arg_selector_simple_b.wrapper */ = {isa = PBXFileReference; lastKnownFileType = text; name = arg_selector_simple_b.wrapper; path = genwrap/tests/arg_selector_simple_b.wrapper; sourceTree = ""; }; @@ -965,25 +986,18 @@ 2A7E40AC297A4ACD003942C8 /* genwrap_static */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = genwrap_static; sourceTree = BUILT_PRODUCTS_DIR; }; 2A7E40B1297A4D49003942C8 /* arg_selector_complex */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = arg_selector_complex; sourceTree = BUILT_PRODUCTS_DIR; }; 2A7E40C8297A50B7003942C8 /* arg_selector_simple_a */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = arg_selector_simple_a; sourceTree = BUILT_PRODUCTS_DIR; }; - 2A7E40D6297A58EC003942C8 /* arg_selector_simple_a.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = arg_selector_simple_a.c; sourceTree = BUILT_PRODUCTS_DIR; }; - 2A7E40D9297A5966003942C8 /* arg_selector_complex.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = arg_selector_complex.c; sourceTree = BUILT_PRODUCTS_DIR; }; + 2A7E40D6297A58EC003942C8 /* arg_selector_simple_a.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = arg_selector_simple_a.c; sourceTree = ""; }; + 2A7E40D9297A5966003942C8 /* arg_selector_complex.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = arg_selector_complex.c; sourceTree = ""; }; 2A7E40E5297A5A4E003942C8 /* arg_selector_simple_b */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = arg_selector_simple_b; sourceTree = BUILT_PRODUCTS_DIR; }; - 2A7E40E6297A5A91003942C8 /* arg_selector_simple_b.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = arg_selector_simple_b.c; sourceTree = BUILT_PRODUCTS_DIR; }; + 2A7E40E6297A5A91003942C8 /* arg_selector_simple_b.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = arg_selector_simple_b.c; sourceTree = ""; }; 2A7E40F2297A5AC2003942C8 /* env_selector */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = env_selector; sourceTree = BUILT_PRODUCTS_DIR; }; - 2A7E40F4297A5CC5003942C8 /* env_selector.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = env_selector.c; sourceTree = BUILT_PRODUCTS_DIR; }; + 2A7E40F4297A5CC5003942C8 /* env_selector.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = env_selector.c; sourceTree = ""; }; 2A7E4100297A5D04003942C8 /* simple_shim */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = simple_shim; sourceTree = BUILT_PRODUCTS_DIR; }; - 2A7E4101297A5D38003942C8 /* simple_shim.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = simple_shim.c; sourceTree = BUILT_PRODUCTS_DIR; }; + 2A7E4101297A5D38003942C8 /* simple_shim.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = simple_shim.c; sourceTree = ""; }; 2A7E4109297A5EA2003942C8 /* genwrap_test.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; name = genwrap_test.sh; path = genwrap/tests/genwrap_test.sh; sourceTree = ""; }; 2A7E4117297A63E0003942C8 /* analytics_simple */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = analytics_simple; sourceTree = BUILT_PRODUCTS_DIR; }; 2A7E4118297A6415003942C8 /* analytics_simple.wrapper */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_simple.wrapper; path = genwrap/tests/analytics_simple.wrapper; sourceTree = ""; }; - 2A7E4119297A6457003942C8 /* analytics_simple.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = analytics_simple.c; sourceTree = BUILT_PRODUCTS_DIR; }; - 2A7E4127297A6BA1003942C8 /* analytics_redacted */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = analytics_redacted; sourceTree = BUILT_PRODUCTS_DIR; }; - 2A7E412A297A6C06003942C8 /* analytics_redacted.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = analytics_redacted.c; sourceTree = BUILT_PRODUCTS_DIR; }; - 2A7E412C297A6E1D003942C8 /* analytics_redacted_b.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_redacted_b.out; path = genwrap/tests/analytics_redacted_b.out; sourceTree = ""; }; - 2A7E412D297A6E1D003942C8 /* analytics_redacted_a.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_redacted_a.out; path = genwrap/tests/analytics_redacted_a.out; sourceTree = ""; }; - 2A7E412E297A6E1D003942C8 /* analytics_redacted_c.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_redacted_c.out; path = genwrap/tests/analytics_redacted_c.out; sourceTree = ""; }; - 2A7E412F297A6E1D003942C8 /* analytics_redacted_d.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_redacted_d.out; path = genwrap/tests/analytics_redacted_d.out; sourceTree = ""; }; - 2A7E4130297A6E1D003942C8 /* analytics_redacted_e.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_redacted_e.out; path = genwrap/tests/analytics_redacted_e.out; sourceTree = ""; }; + 2A7E4119297A6457003942C8 /* analytics_simple.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = analytics_simple.c; sourceTree = ""; }; 2A7E4137297A6E6F003942C8 /* analytics_simple_d.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_simple_d.out; path = genwrap/tests/analytics_simple_d.out; sourceTree = ""; }; 2A7E4138297A6E6F003942C8 /* analytics_simple_e.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_simple_e.out; path = genwrap/tests/analytics_simple_e.out; sourceTree = ""; }; 2A7E4139297A6E6F003942C8 /* analytics_simple_f.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_simple_f.out; path = genwrap/tests/analytics_simple_f.out; sourceTree = ""; }; @@ -992,9 +1006,10 @@ 2A7E413C297A6E70003942C8 /* analytics_simple_a.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_simple_a.out; path = genwrap/tests/analytics_simple_a.out; sourceTree = ""; }; 2A7F779927B30E9000CACBDB /* ps_lowpriv */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = ps_lowpriv; sourceTree = BUILT_PRODUCTS_DIR; }; 2A85AF26280639C900F493F9 /* 91596308_test.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; name = 91596308_test.sh; path = tests/91596308_test.sh; sourceTree = ""; }; + 2A941CA62BDE2BC8003C7827 /* analytics_simple_g.out */ = {isa = PBXFileReference; lastKnownFileType = text; name = analytics_simple_g.out; path = genwrap/tests/analytics_simple_g.out; sourceTree = ""; }; 2A96BE7729839C6200F1705B /* arg_selector_complex_logonly */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = arg_selector_complex_logonly; sourceTree = BUILT_PRODUCTS_DIR; }; 2A96BE7829839C8C00F1705B /* arg_selector_complex_logonly.wrapper */ = {isa = PBXFileReference; lastKnownFileType = text; name = arg_selector_complex_logonly.wrapper; path = genwrap/tests/arg_selector_complex_logonly.wrapper; sourceTree = ""; }; - 2A96BE7929839D5A00F1705B /* arg_selector_complex_logonly.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = arg_selector_complex_logonly.c; sourceTree = BUILT_PRODUCTS_DIR; }; + 2A96BE7929839D5A00F1705B /* arg_selector_complex_logonly.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = arg_selector_complex_logonly.c; sourceTree = ""; }; 2A96BED729A0649B00F1705B /* 33386332_test.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; name = 33386332_test.sh; path = tests/33386332_test.sh; sourceTree = ""; }; 2A9C8A2229C8F58200416E6B /* pgrep-_u_test.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "pgrep-_u_test.sh"; path = "pkill/tests/pgrep-_u_test.sh"; sourceTree = ""; }; 2A9C8A2329C8F58200416E6B /* pgrep-_p_test.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "pgrep-_p_test.sh"; path = "pkill/tests/pgrep-_p_test.sh"; sourceTree = ""; }; @@ -1007,6 +1022,24 @@ 2A9C8A3F29C8F8EF00416E6B /* spin_helper */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = spin_helper; sourceTree = BUILT_PRODUCTS_DIR; }; 2A9C8A4029C8F93600416E6B /* spin_helper.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = spin_helper.c; path = pkill/tests/spin_helper.c; sourceTree = ""; }; 2A9C8A4429C8FB4900416E6B /* pgrep-f_test.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "pgrep-f_test.sh"; path = "pkill/tests/pgrep-f_test.sh"; sourceTree = ""; }; + 2A9E2A9B2B198AD200F5F14D /* arg_selector_complex_logonly_args.wrapper */ = {isa = PBXFileReference; lastKnownFileType = text; name = arg_selector_complex_logonly_args.wrapper; path = genwrap/tests/arg_selector_complex_logonly_args.wrapper; sourceTree = ""; }; + 2A9E2AA62B198AE100F5F14D /* arg_selector_complex_logonly_args */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = arg_selector_complex_logonly_args; sourceTree = BUILT_PRODUCTS_DIR; }; + 2A9E2AA72B198C7E00F5F14D /* arg_selector_complex_logonly_args.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = arg_selector_complex_logonly_args.c; sourceTree = ""; }; + 2AFA02FB2A2EE84E00440D64 /* charmap.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = charmap.c; sourceTree = ""; }; + 2AFA02FC2A2EE84E00440D64 /* collate.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = collate.c; sourceTree = ""; }; + 2AFA02FD2A2EE84E00440D64 /* ctype.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = ctype.c; sourceTree = ""; }; + 2AFA02FE2A2EE86700440D64 /* localedef.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = localedef.c; sourceTree = ""; }; + 2AFA02FF2A2EE86700440D64 /* parser.y */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.yacc; path = parser.y; sourceTree = ""; }; + 2AFA03002A2EE86700440D64 /* time.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = time.c; sourceTree = ""; }; + 2AFA03012A2EE86700440D64 /* messages.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = messages.c; sourceTree = ""; }; + 2AFA03022A2EE86700440D64 /* localedef.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = localedef.1; sourceTree = ""; }; + 2AFA03032A2EE86700440D64 /* localedef.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = localedef.h; sourceTree = ""; }; + 2AFA03042A2EE86700440D64 /* scanner.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = scanner.c; sourceTree = ""; }; + 2AFA03052A2EE86700440D64 /* numeric.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = numeric.c; sourceTree = ""; }; + 2AFA03062A2EE86700440D64 /* wide.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = wide.c; sourceTree = ""; }; + 2AFA03072A2EE86700440D64 /* README */ = {isa = PBXFileReference; lastKnownFileType = text; path = README; sourceTree = ""; }; + 2AFA03082A2EE86700440D64 /* monetary.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = monetary.c; sourceTree = ""; }; + 2AFA030D2A2EE8AB00440D64 /* localedef */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = localedef; sourceTree = BUILT_PRODUCTS_DIR; }; 32A3CA9829251B5D00FABB5C /* libxo.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libxo.tbd; path = usr/lib/libxo.tbd; sourceTree = SDKROOT; }; FD201DAF14369AD000906237 /* pkill.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; name = pkill.1; path = pkill/pkill.1; sourceTree = ""; }; FD201DB014369AD000906237 /* pkill.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = pkill.c; path = pkill/pkill.c; sourceTree = ""; }; @@ -1054,7 +1087,6 @@ FDF277060FC60FDF00D7A3C6 /* last.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = last.c; sourceTree = ""; }; FDF277090FC60FDF00D7A3C6 /* locale.1 */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.man; path = locale.1; sourceTree = ""; }; FDF2770A0FC60FDF00D7A3C6 /* locale.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = locale.cc; sourceTree = ""; }; - FDF277140FC60FDF00D7A3C6 /* localedef.1 */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.man; path = localedef.1; sourceTree = ""; }; FDF277170FC60FDF00D7A3C6 /* lsvfs.1 */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.man; path = lsvfs.1; sourceTree = ""; }; FDF277180FC60FDF00D7A3C6 /* lsvfs.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = lsvfs.c; sourceTree = ""; }; FDF2771F0FC60FDF00D7A3C6 /* extern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = extern.h; sourceTree = ""; }; @@ -1089,7 +1121,6 @@ FDF277480FC60FDF00D7A3C6 /* whois.1 */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.man; path = whois.1; sourceTree = ""; }; FDF277490FC60FDF00D7A3C6 /* whois.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = whois.c; sourceTree = ""; }; FDF277E70FC6144400D7A3C6 /* libtermcap.dylib */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libtermcap.dylib; path = /usr/lib/libtermcap.dylib; sourceTree = ""; }; - FDF279760FC629B100D7A3C6 /* localedef.pl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.perl; path = localedef.pl; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -1100,6 +1131,13 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + 2A485F892B641A27009D80F8 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; 2A688AFA2A461F0100F211FD /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; @@ -1156,28 +1194,35 @@ ); runOnlyForDeploymentPostprocessing = 0; }; - 2A7E4123297A6BA1003942C8 /* Frameworks */ = { + 2A7F779427B30E9000CACBDB /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( ); runOnlyForDeploymentPostprocessing = 0; }; - 2A7F779427B30E9000CACBDB /* Frameworks */ = { + 2A96BE7329839C6200F1705B /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( ); runOnlyForDeploymentPostprocessing = 0; }; - 2A96BE7329839C6200F1705B /* Frameworks */ = { + 2A9C8A3B29C8F8EF00416E6B /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( ); runOnlyForDeploymentPostprocessing = 0; }; - 2A9C8A3B29C8F8EF00416E6B /* Frameworks */ = { + 2A9E2AA22B198AE100F5F14D /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 2AFA030A2A2EE8AB00440D64 /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( @@ -1365,19 +1410,16 @@ 2A7E4089297A4510003942C8 /* tests */ = { isa = PBXGroup; children = ( - 2A7E412D297A6E1D003942C8 /* analytics_redacted_a.out */, - 2A7E412C297A6E1D003942C8 /* analytics_redacted_b.out */, - 2A7E412E297A6E1D003942C8 /* analytics_redacted_c.out */, - 2A7E412F297A6E1D003942C8 /* analytics_redacted_d.out */, - 2A7E4130297A6E1D003942C8 /* analytics_redacted_e.out */, 2A7E413C297A6E70003942C8 /* analytics_simple_a.out */, 2A7E413B297A6E70003942C8 /* analytics_simple_b.out */, 2A7E413A297A6E70003942C8 /* analytics_simple_c.out */, 2A7E4137297A6E6F003942C8 /* analytics_simple_d.out */, 2A7E4138297A6E6F003942C8 /* analytics_simple_e.out */, 2A7E4139297A6E6F003942C8 /* analytics_simple_f.out */, + 2A941CA62BDE2BC8003C7827 /* analytics_simple_g.out */, 2A7E4118297A6415003942C8 /* analytics_simple.wrapper */, 2A7E4092297A4581003942C8 /* arg_selector_complex.wrapper */, + 2A9E2A9B2B198AD200F5F14D /* arg_selector_complex_logonly_args.wrapper */, 2A96BE7829839C8C00F1705B /* arg_selector_complex_logonly.wrapper */, 2A7E4090297A4581003942C8 /* arg_selector_simple_a.wrapper */, 2A7E4091297A4581003942C8 /* arg_selector_simple_b.wrapper */, @@ -1392,18 +1434,19 @@ 2A7E40D5297A58D0003942C8 /* Derived Files */ = { isa = PBXGroup; children = ( - 2A7E412A297A6C06003942C8 /* analytics_redacted.c */, 2A7E4119297A6457003942C8 /* analytics_simple.c */, 2A7E40D9297A5966003942C8 /* arg_selector_complex.c */, + 2A9E2AA72B198C7E00F5F14D /* arg_selector_complex_logonly_args.c */, 2A96BE7929839D5A00F1705B /* arg_selector_complex_logonly.c */, 2A7E40D6297A58EC003942C8 /* arg_selector_simple_a.c */, 2A7E40E6297A5A91003942C8 /* arg_selector_simple_b.c */, 2A7E40F4297A5CC5003942C8 /* env_selector.c */, 2A688AFF2A46200A00F211FD /* env_selector_addarg.c */, + 2A5628D32A73835C0083A770 /* parser.c */, 2A7E4101297A5D38003942C8 /* simple_shim.c */, ); name = "Derived Files"; - sourceTree = ""; + sourceTree = DERIVED_FILE_DIR; }; 32A3CA9729251B5D00FABB5C /* Frameworks */ = { isa = PBXGroup; @@ -1461,10 +1504,12 @@ 2A7E40F2297A5AC2003942C8 /* env_selector */, 2A7E4100297A5D04003942C8 /* simple_shim */, 2A7E4117297A63E0003942C8 /* analytics_simple */, - 2A7E4127297A6BA1003942C8 /* analytics_redacted */, 2A96BE7729839C6200F1705B /* arg_selector_complex_logonly */, 2A9C8A3F29C8F8EF00416E6B /* spin_helper */, 2A688AFE2A461F0100F211FD /* env_selector_addarg */, + 2A9E2AA62B198AE100F5F14D /* arg_selector_complex_logonly_args */, + 2AFA030D2A2EE8AB00440D64 /* localedef */, + 2A485F8E2B641A27009D80F8 /* localedef */, ); name = Products; sourceTree = ""; @@ -1583,8 +1628,20 @@ FDF2770C0FC60FDF00D7A3C6 /* localedef */ = { isa = PBXGroup; children = ( - FDF279760FC629B100D7A3C6 /* localedef.pl */, - FDF277140FC60FDF00D7A3C6 /* localedef.1 */, + 2AFA02FB2A2EE84E00440D64 /* charmap.c */, + 2AFA02FC2A2EE84E00440D64 /* collate.c */, + 2AFA02FD2A2EE84E00440D64 /* ctype.c */, + 2AFA03022A2EE86700440D64 /* localedef.1 */, + 2AFA02FE2A2EE86700440D64 /* localedef.c */, + 2AFA03032A2EE86700440D64 /* localedef.h */, + 2AFA03012A2EE86700440D64 /* messages.c */, + 2AFA03082A2EE86700440D64 /* monetary.c */, + 2AFA03052A2EE86700440D64 /* numeric.c */, + 2AFA02FF2A2EE86700440D64 /* parser.y */, + 2AFA03072A2EE86700440D64 /* README */, + 2AFA03042A2EE86700440D64 /* scanner.c */, + 2AFA03002A2EE86700440D64 /* time.c */, + 2AFA03062A2EE86700440D64 /* wide.c */, ); path = localedef; sourceTree = ""; @@ -1695,6 +1752,23 @@ productReference = 2A114C7F29674842005099EA /* genwrap */; productType = "com.apple.product-type.tool"; }; + 2A485F7B2B641A27009D80F8 /* localedef_host */ = { + isa = PBXNativeTarget; + buildConfigurationList = 2A485F8C2B641A27009D80F8 /* Build configuration list for PBXNativeTarget "localedef_host" */; + buildPhases = ( + 2A485F7C2B641A27009D80F8 /* Generate YACC Output */, + 2A485F7D2B641A27009D80F8 /* Sources */, + 2A485F892B641A27009D80F8 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = localedef_host; + productName = localedef; + productReference = 2A485F8E2B641A27009D80F8 /* localedef */; + productType = "com.apple.product-type.tool"; + }; 2A688AF42A461F0100F211FD /* env_selector_addarg */ = { isa = PBXNativeTarget; buildConfigurationList = 2A688AFC2A461F0100F211FD /* Build configuration list for PBXNativeTarget "env_selector_addarg" */; @@ -1844,25 +1918,6 @@ productReference = 2A7E4117297A63E0003942C8 /* analytics_simple */; productType = "com.apple.product-type.tool"; }; - 2A7E411D297A6BA1003942C8 /* analytics_redacted */ = { - isa = PBXNativeTarget; - buildConfigurationList = 2A7E4125297A6BA1003942C8 /* Build configuration list for PBXNativeTarget "analytics_redacted" */; - buildPhases = ( - 2A7E4120297A6BA1003942C8 /* ShellScript */, - 2A7E4121297A6BA1003942C8 /* Sources */, - 2A7E4123297A6BA1003942C8 /* Frameworks */, - 2A7E4124297A6BA1003942C8 /* Copy Test Files */, - ); - buildRules = ( - ); - dependencies = ( - 2A7E411E297A6BA1003942C8 /* PBXTargetDependency */, - ); - name = analytics_redacted; - productName = arg_selector_complex; - productReference = 2A7E4127297A6BA1003942C8 /* analytics_redacted */; - productType = "com.apple.product-type.tool"; - }; 2A7F778C27B30E9000CACBDB /* ps_lowpriv */ = { isa = PBXNativeTarget; buildConfigurationList = 2A7F779727B30E9000CACBDB /* Build configuration list for PBXNativeTarget "ps_lowpriv" */; @@ -1914,6 +1969,43 @@ productReference = 2A9C8A3F29C8F8EF00416E6B /* spin_helper */; productType = "com.apple.product-type.tool"; }; + 2A9E2A9C2B198AE100F5F14D /* arg_selector_complex_logonly_args */ = { + isa = PBXNativeTarget; + buildConfigurationList = 2A9E2AA42B198AE100F5F14D /* Build configuration list for PBXNativeTarget "arg_selector_complex_logonly_args" */; + buildPhases = ( + 2A9E2A9F2B198AE100F5F14D /* ShellScript */, + 2A9E2AA02B198AE100F5F14D /* Sources */, + 2A9E2AA22B198AE100F5F14D /* Frameworks */, + 2A9E2AA32B198AE100F5F14D /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + 2A9E2A9D2B198AE100F5F14D /* PBXTargetDependency */, + ); + name = arg_selector_complex_logonly_args; + productName = arg_selector_complex; + productReference = 2A9E2AA62B198AE100F5F14D /* arg_selector_complex_logonly_args */; + productType = "com.apple.product-type.tool"; + }; + 2AFA030C2A2EE8AB00440D64 /* localedef */ = { + isa = PBXNativeTarget; + buildConfigurationList = 2AFA03112A2EE8AB00440D64 /* Build configuration list for PBXNativeTarget "localedef" */; + buildPhases = ( + 2AFA03202A2EEEAA00440D64 /* Generate YACC Output */, + 2AFA03092A2EE8AB00440D64 /* Sources */, + 2AFA030A2A2EE8AB00440D64 /* Frameworks */, + 2AFA030B2A2EE8AB00440D64 /* Install man1 */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = localedef; + productName = localedef; + productReference = 2AFA030D2A2EE8AB00440D64 /* localedef */; + productType = "com.apple.product-type.tool"; + }; FD201DB414369B0300906237 /* pkill */ = { isa = PBXNativeTarget; buildConfigurationList = FD201DBC14369B0400906237 /* Build configuration list for PBXNativeTarget "pkill" */; @@ -2167,9 +2259,15 @@ 2A114C7E29674842005099EA = { CreatedOnToolsVersion = 14.0; }; + 2A485F732B5DAEBE009D80F8 = { + CreatedOnToolsVersion = 15.3; + }; 2A7E40B0297A4D49003942C8 = { CreatedOnToolsVersion = 14.3; }; + 2AFA030C2A2EE8AB00440D64 = { + CreatedOnToolsVersion = 15.0; + }; }; }; buildConfigurationList = FDF276460FC60E9000D7A3C6 /* Build configuration list for PBXProject "adv_cmds" */; @@ -2188,6 +2286,7 @@ targets = ( FDF276500FC60EDA00D7A3C6 /* Desktop */, FDF2764E0FC60ECD00D7A3C6 /* Embedded */, + 2A485F732B5DAEBE009D80F8 /* Host */, FDF276740FC60F2100D7A3C6 /* cap_mkdb */, FDF2767A0FC60F2A00D7A3C6 /* colldef */, FDF276800FC60F3100D7A3C6 /* finger */, @@ -2195,7 +2294,8 @@ 2A114C7E29674842005099EA /* genwrap */, FDF276920FC60F4B00D7A3C6 /* last */, FDF276980FC60F5000D7A3C6 /* locale */, - FD0D7F98108FE550004F2A1C /* localedef */, + 2AFA030C2A2EE8AB00440D64 /* localedef */, + 2A485F7B2B641A27009D80F8 /* localedef_host */, FDF276A40FC60F5E00D7A3C6 /* lsvfs */, FDF276B00FC60F7600D7A3C6 /* mklocale */, FD201DCD14369D0C00906237 /* pgrep */, @@ -2207,9 +2307,9 @@ FDF276BC0FC60F8400D7A3C6 /* tty */, FDF276C20FC60F8A00D7A3C6 /* whois */, 2A7E409C297A4ACD003942C8 /* genwrap_static */, - 2A7E411D297A6BA1003942C8 /* analytics_redacted */, 2A7E410D297A63E0003942C8 /* analytics_simple */, 2A7E40B0297A4D49003942C8 /* arg_selector_complex */, + 2A9E2A9C2B198AE100F5F14D /* arg_selector_complex_logonly_args */, 2A96BE6D29839C6200F1705B /* arg_selector_complex_logonly */, 2A7E40BE297A50B7003942C8 /* arg_selector_simple_a */, 2A7E40DB297A5A4E003942C8 /* arg_selector_simple_b */, @@ -2222,6 +2322,27 @@ /* End PBXProject section */ /* Begin PBXShellScriptBuildPhase section */ + 2A485F7C2B641A27009D80F8 /* Generate YACC Output */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputFileListPaths = ( + ); + inputPaths = ( + "$(SRCROOT)/localedef/parser.y", + ); + name = "Generate YACC Output"; + outputFileListPaths = ( + ); + outputPaths = ( + "$(DERIVED_FILE_DIR)/parser.c", + "$(DERIVED_FILE_DIR)/parser.h", + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "bison -d -o \"$SCRIPT_OUTPUT_FILE_0\" \"$SCRIPT_INPUT_FILE_0\"\n"; + }; 2A51188427E6881E0059F4ED /* Install Test plist */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 8; @@ -2275,7 +2396,7 @@ outputFileListPaths = ( ); outputPaths = ( - "$(BUILT_PRODUCTS_DIR)/env_selector_addarg.c", + "$(DERIVED_FILE_DIR)/env_selector_addarg.c", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; @@ -2294,7 +2415,7 @@ outputFileListPaths = ( ); outputPaths = ( - "$(BUILT_PRODUCTS_DIR)/arg_selector_complex.c", + "$(DERIVED_FILE_DIR)/arg_selector_complex.c", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; @@ -2313,7 +2434,7 @@ outputFileListPaths = ( ); outputPaths = ( - "$(BUILT_PRODUCTS_DIR)/arg_selector_simple_a.c", + "$(DERIVED_FILE_DIR)/arg_selector_simple_a.c", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; @@ -2332,7 +2453,7 @@ outputFileListPaths = ( ); outputPaths = ( - "$(BUILT_PRODUCTS_DIR)/arg_selector_simple_b.c", + "$(DERIVED_FILE_DIR)/arg_selector_simple_b.c", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; @@ -2351,7 +2472,7 @@ outputFileListPaths = ( ); outputPaths = ( - "$(BUILT_PRODUCTS_DIR)/env_selector.c", + "$(DERIVED_FILE_DIR)/env_selector.c", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; @@ -2370,7 +2491,7 @@ outputFileListPaths = ( ); outputPaths = ( - "$(BUILT_PRODUCTS_DIR)/simple_shim.c", + "$(DERIVED_FILE_DIR)/simple_shim.c", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; @@ -2389,26 +2510,7 @@ outputFileListPaths = ( ); outputPaths = ( - "$(BUILT_PRODUCTS_DIR)/analytics_simple.c", - ); - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = ". \"$PROJECT_DIR\"/xcodescripts/generate-wrapper.sh\n"; - }; - 2A7E4120297A6BA1003942C8 /* ShellScript */ = { - isa = PBXShellScriptBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - inputFileListPaths = ( - ); - inputPaths = ( - "$(SRCROOT)/genwrap/tests/analytics_redacted.wrapper", - ); - outputFileListPaths = ( - ); - outputPaths = ( - "$(BUILT_PRODUCTS_DIR)/analytics_redacted.c", + "$(DERIVED_FILE_DIR)/analytics_simple.c", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; @@ -2471,42 +2573,69 @@ outputFileListPaths = ( ); outputPaths = ( - "$(BUILT_PRODUCTS_DIR)/arg_selector_complex_logonly.c", + "$(DERIVED_FILE_DIR)/arg_selector_complex_logonly.c", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; shellScript = ". \"$PROJECT_DIR\"/xcodescripts/generate-wrapper.sh\n"; }; - D329D454279B7F050096F68B /* Install OpenSource plist */ = { + 2A9E2A9F2B198AE100F5F14D /* ShellScript */ = { isa = PBXShellScriptBuildPhase; - buildActionMask = 8; + buildActionMask = 2147483647; files = ( ); inputFileListPaths = ( ); inputPaths = ( + "$(SRCROOT)/genwrap/tests/arg_selector_complex_logonly_args.wrapper", ); - name = "Install OpenSource plist"; outputFileListPaths = ( ); outputPaths = ( + "$(DERIVED_FILE_DIR)/arg_selector_complex_logonly_args.c", ); - runOnlyForDeploymentPostprocessing = 1; + runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; - shellScript = ". \"$PROJECT_DIR\"/xcodescripts/install-opensource.sh\n"; + shellScript = ". \"$PROJECT_DIR\"/xcodescripts/generate-wrapper.sh\n \n"; + }; + 2AFA03202A2EEEAA00440D64 /* Generate YACC Output */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputFileListPaths = ( + ); + inputPaths = ( + "$(SRCROOT)/localedef/parser.y", + ); + name = "Generate YACC Output"; + outputFileListPaths = ( + ); + outputPaths = ( + "$(DERIVED_FILE_DIR)/parser.c", + "$(DERIVED_FILE_DIR)/parser.h", + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "bison -d -o \"$SCRIPT_OUTPUT_FILE_0\" \"$SCRIPT_INPUT_FILE_0\"\n"; }; - FD0D7F97108FE550004F2A1C /* ShellScript */ = { + D329D454279B7F050096F68B /* Install OpenSource plist */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 8; files = ( ); + inputFileListPaths = ( + ); inputPaths = ( ); + name = "Install OpenSource plist"; + outputFileListPaths = ( + ); outputPaths = ( ); runOnlyForDeploymentPostprocessing = 1; shellPath = /bin/sh; - shellScript = "set -ex\n/usr/bin/install -m ${INSTALL_MODE_FLAG} ${SRCROOT}/localedef/localedef.pl ${INSTALL_DIR}/localedef\n${CHMOD} +x ${INSTALL_DIR}/localedef"; + shellScript = ". \"$PROJECT_DIR\"/xcodescripts/install-opensource.sh\n"; }; FD201DD014369D1000906237 /* ShellScript */ = { isa = PBXShellScriptBuildPhase; @@ -2535,6 +2664,24 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + 2A485F7D2B641A27009D80F8 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 2A485F7E2B641A27009D80F8 /* scanner.c in Sources */, + 2A485F7F2B641A27009D80F8 /* charmap.c in Sources */, + 2A485F802B641A27009D80F8 /* parser.c in Sources */, + 2A485F812B641A27009D80F8 /* collate.c in Sources */, + 2A485F822B641A27009D80F8 /* ctype.c in Sources */, + 2A485F832B641A27009D80F8 /* localedef.c in Sources */, + 2A485F842B641A27009D80F8 /* messages.c in Sources */, + 2A485F852B641A27009D80F8 /* monetary.c in Sources */, + 2A485F862B641A27009D80F8 /* numeric.c in Sources */, + 2A485F872B641A27009D80F8 /* time.c in Sources */, + 2A485F882B641A27009D80F8 /* wide.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; 2A688AF82A461F0100F211FD /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -2601,14 +2748,6 @@ ); runOnlyForDeploymentPostprocessing = 0; }; - 2A7E4121297A6BA1003942C8 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - 2A7E412B297A6C06003942C8 /* analytics_redacted.c in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; 2A7F778D27B30E9000CACBDB /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -2638,6 +2777,32 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + 2A9E2AA02B198AE100F5F14D /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 2A9E2AAB2B198F0900F5F14D /* arg_selector_complex_logonly_args.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 2AFA03092A2EE8AB00440D64 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 2AFA031A2A2EE8EB00440D64 /* scanner.c in Sources */, + 2AFA03132A2EE8D000440D64 /* charmap.c in Sources */, + 2A5628D42A73835C0083A770 /* parser.c in Sources */, + 2AFA03142A2EE8D400440D64 /* collate.c in Sources */, + 2AFA03152A2EE8D700440D64 /* ctype.c in Sources */, + 2AFA03162A2EE8DB00440D64 /* localedef.c in Sources */, + 2AFA03172A2EE8DF00440D64 /* messages.c in Sources */, + 2AFA03182A2EE8E300440D64 /* monetary.c in Sources */, + 2AFA03192A2EE8E700440D64 /* numeric.c in Sources */, + 2AFA031B2A2EE8EE00440D64 /* time.c in Sources */, + 2AFA031C2A2EE8F200440D64 /* wide.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; FD201DB114369B0300906237 /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -2771,6 +2936,16 @@ /* End PBXSourcesBuildPhase section */ /* Begin PBXTargetDependency section */ + 2A15A1D12B45C53D00A3DA9E /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 2AFA030C2A2EE8AB00440D64 /* localedef */; + targetProxy = 2A15A1D02B45C53D00A3DA9E /* PBXContainerItemProxy */; + }; + 2A485F902B641B2A009D80F8 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 2A485F7B2B641A27009D80F8 /* localedef_host */; + targetProxy = 2A485F8F2B641B2A009D80F8 /* PBXContainerItemProxy */; + }; 2A688AF52A461F0100F211FD /* PBXTargetDependency */ = { isa = PBXTargetDependency; target = 2A7E409C297A4ACD003942C8 /* genwrap_static */; @@ -2846,16 +3021,6 @@ target = 2A7E410D297A63E0003942C8 /* analytics_simple */; targetProxy = 2A7E411B297A647D003942C8 /* PBXContainerItemProxy */; }; - 2A7E411E297A6BA1003942C8 /* PBXTargetDependency */ = { - isa = PBXTargetDependency; - target = 2A7E409C297A4ACD003942C8 /* genwrap_static */; - targetProxy = 2A7E411F297A6BA1003942C8 /* PBXContainerItemProxy */; - }; - 2A7E4129297A6BD7003942C8 /* PBXTargetDependency */ = { - isa = PBXTargetDependency; - target = 2A7E411D297A6BA1003942C8 /* analytics_redacted */; - targetProxy = 2A7E4128297A6BD7003942C8 /* PBXContainerItemProxy */; - }; 2A7F779B27B30ED600CACBDB /* PBXTargetDependency */ = { isa = PBXTargetDependency; target = 2A7F778C27B30E9000CACBDB /* ps_lowpriv */; @@ -2881,10 +3046,15 @@ target = 2A9C8A3529C8F8EF00416E6B /* spin_helper */; targetProxy = 2A9C8A4629C9034900416E6B /* PBXContainerItemProxy */; }; - FD0D7FA9108FE5C3004F2A1C /* PBXTargetDependency */ = { + 2A9E2A9D2B198AE100F5F14D /* PBXTargetDependency */ = { isa = PBXTargetDependency; - target = FD0D7F98108FE550004F2A1C /* localedef */; - targetProxy = FD0D7FA8108FE5C3004F2A1C /* PBXContainerItemProxy */; + target = 2A7E409C297A4ACD003942C8 /* genwrap_static */; + targetProxy = 2A9E2A9E2B198AE100F5F14D /* PBXContainerItemProxy */; + }; + 2A9E2AAD2B19941600F5F14D /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 2A9E2A9C2B198AE100F5F14D /* arg_selector_complex_logonly_args */; + targetProxy = 2A9E2AAC2B19941600F5F14D /* PBXContainerItemProxy */; }; FD201DBF14369B1700906237 /* PBXTargetDependency */ = { isa = PBXTargetDependency; @@ -3012,13 +3182,54 @@ MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; - OTHER_CFLAGS = ""; PRODUCT_NAME = "$(TARGET_NAME)"; SDKROOT = macosx; USER_HEADER_SEARCH_PATHS = "$(SRCROOT)/genwrap"; }; name = Release; }; + 2A485F742B5DAEBE009D80F8 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_STYLE = Automatic; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; + 2A485F8D2B641A27009D80F8 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CODE_SIGN_STYLE = Automatic; + COPY_PHASE_STRIP = NO; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + INSTALL_PATH = "$(TOOLCHAIN_INSTALL_DIR)/usr/bin"; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 14.0; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + OTHER_CFLAGS = ( + "-I$(SRCROOT)/localedef/libc", + "-I${SRCROOT}/localedef", + ); + PRODUCT_NAME = localedef; + SDKROOT = macosx; + }; + name = Release; + }; 2A688AFD2A461F0100F211FD /* Release */ = { isa = XCBuildConfiguration; buildSettings = { @@ -3247,7 +3458,21 @@ }; name = Release; }; - 2A7E4126297A6BA1003942C8 /* Release */ = { + 2A7F779827B30E9000CACBDB /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_ENTITLEMENTS = "ps/entitlements-lowpriv.plist"; + GCC_PREPROCESSOR_DEFINITIONS = "__FBSDID=__RCSID"; + "INSTALL_MODE_FLAG[sdk=macosx*]" = "u+sw,go-w,a+rX"; + INSTALL_PATH = /bin; + OTHER_CFLAGS = ""; + PRODUCT_NAME = "$(TARGET_NAME)"; + SKIP_INSTALL = YES; + WARNING_CFLAGS = "-Wno-#warnings"; + }; + name = Release; + }; + 2A96BE7629839C6200F1705B /* Release */ = { isa = XCBuildConfiguration; buildSettings = { CLANG_ANALYZER_NONNULL = YES; @@ -3270,27 +3495,12 @@ MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; - OTHER_CFLAGS = "-DWRAPPER_ANALYTICS_TESTING"; PRODUCT_NAME = "$(TARGET_NAME)"; SDKROOT = macosx; }; name = Release; }; - 2A7F779827B30E9000CACBDB /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_ENTITLEMENTS = "ps/entitlements-lowpriv.plist"; - GCC_PREPROCESSOR_DEFINITIONS = "__FBSDID=__RCSID"; - "INSTALL_MODE_FLAG[sdk=macosx*]" = "u+sw,go-w,a+rX"; - INSTALL_PATH = /bin; - OTHER_CFLAGS = ""; - PRODUCT_NAME = "$(TARGET_NAME)"; - SKIP_INSTALL = YES; - WARNING_CFLAGS = "-Wno-#warnings"; - }; - name = Release; - }; - 2A96BE7629839C6200F1705B /* Release */ = { + 2A9C8A3E29C8F8EF00416E6B /* Release */ = { isa = XCBuildConfiguration; buildSettings = { CLANG_ANALYZER_NONNULL = YES; @@ -3309,7 +3519,7 @@ GCC_C_LANGUAGE_STANDARD = gnu11; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; - INSTALL_PATH = /AppleInternal/Tests/adv_cmds/genwrap; + INSTALL_PATH = /AppleInternal/Tests/adv_cmds/pgrep; MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; @@ -3318,7 +3528,7 @@ }; name = Release; }; - 2A9C8A3E29C8F8EF00416E6B /* Release */ = { + 2A9E2AA52B198AE100F5F14D /* Release */ = { isa = XCBuildConfiguration; buildSettings = { CLANG_ANALYZER_NONNULL = YES; @@ -3337,7 +3547,7 @@ GCC_C_LANGUAGE_STANDARD = gnu11; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; - INSTALL_PATH = /AppleInternal/Tests/adv_cmds/pgrep; + INSTALL_PATH = /AppleInternal/Tests/adv_cmds/genwrap; MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; @@ -3346,12 +3556,37 @@ }; name = Release; }; - FD0D7F99108FE550004F2A1C /* Release */ = { + 2AFA03122A2EE8AB00440D64 /* Release */ = { isa = XCBuildConfiguration; buildSettings = { + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CODE_SIGN_STYLE = Automatic; + COPY_PHASE_STRIP = NO; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; INSTALL_PATH = /usr/bin; - PRODUCT_NAME = localedef; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 14.0; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + OTHER_CFLAGS = ( + "-I$(SRCROOT)/localedef/libc", + "-I${SRCROOT}/localedef", + ); + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = macosx; }; name = Release; }; @@ -3415,6 +3650,7 @@ GCC_WARN_UNUSED_LABEL = YES; GCC_WARN_UNUSED_VALUE = YES; GCC_WARN_UNUSED_VARIABLE = YES; + OTHER_CFLAGS = ""; SDKROOT = macosx.internal; USE_HEADERMAP = NO; VERSIONING_SYSTEM = "apple-generic"; @@ -3595,6 +3831,22 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + 2A485F752B5DAEBE009D80F8 /* Build configuration list for PBXAggregateTarget "Host" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 2A485F742B5DAEBE009D80F8 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 2A485F8C2B641A27009D80F8 /* Build configuration list for PBXNativeTarget "localedef_host" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 2A485F8D2B641A27009D80F8 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; 2A688AFC2A461F0100F211FD /* Build configuration list for PBXNativeTarget "env_selector_addarg" */ = { isa = XCConfigurationList; buildConfigurations = ( @@ -3659,14 +3911,6 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - 2A7E4125297A6BA1003942C8 /* Build configuration list for PBXNativeTarget "analytics_redacted" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 2A7E4126297A6BA1003942C8 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; 2A7F779727B30E9000CACBDB /* Build configuration list for PBXNativeTarget "ps_lowpriv" */ = { isa = XCConfigurationList; buildConfigurations = ( @@ -3691,10 +3935,18 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - FD0D7FA2108FE56E004F2A1C /* Build configuration list for PBXAggregateTarget "localedef" */ = { + 2A9E2AA42B198AE100F5F14D /* Build configuration list for PBXNativeTarget "arg_selector_complex_logonly_args" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 2A9E2AA52B198AE100F5F14D /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 2AFA03112A2EE8AB00440D64 /* Build configuration list for PBXNativeTarget "localedef" */ = { isa = XCConfigurationList; buildConfigurations = ( - FD0D7F99108FE550004F2A1C /* Release */, + 2AFA03122A2EE8AB00440D64 /* Release */, ); defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; diff --git a/colldef/parse.y b/colldef/parse.y index 46a4d5a..f822fbf 100644 --- a/colldef/parse.y +++ b/colldef/parse.y @@ -1348,7 +1348,7 @@ charname2(wchar_t wc) static char * show(int c) { - static char buf[5]; + static char buf[16]; if (c >=32 && c <= 126) sprintf(buf, "'%c' ", c); diff --git a/genwrap/genwrap.8 b/genwrap/genwrap.8 index f93da50..4aeced9 100644 --- a/genwrap/genwrap.8 +++ b/genwrap/genwrap.8 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2022 Apple Inc. All rights reserved. +.\" Copyright (c) 2023 Apple Inc. All rights reserved. .\" .\" @APPLE_LICENSE_HEADER_START@ .\" @@ -21,7 +21,7 @@ .\" .\" @APPLE_LICENSE_HEADER_END@ .\" -.Dd June 23, 2023 +.Dd April 28, 2024 .Dt GENWRAP 8 .Os .Sh NAME @@ -29,6 +29,8 @@ .Nd generate parameterized program wrappers .Sh SYNOPSIS .Nm +.Op Fl d +.Op Fl n Ar name .Op Fl o Ar output .Ar spec .Sh DESCRIPTION @@ -57,6 +59,28 @@ should be a file written in the format specified later in The following options are available for .Nm : .Bl -tag -offset indent +.It Fl d +Enables debug output to stderr. +In particular, this emits notice of each action taken as a result of the +specified wrapper definition as it is parsed, without any filtering. +This is mainly intended to aide in debugging +.Nm +itself. +.It Fl n Ar name +Use +.Ar name +for the generated wrapper's name. +This will be used as the name within +.Pa /var/select +to check for a defined system choice. +The +.Ar name +will be automatically derived from a filename by omitting a trailing +.Dq .wrapper , +so it is not necessary to specify for cases where the wrapper +.Pa foo.wrapper +should use +.Pa /var/select/foo . .It Fl o Ar output Switch the output file to .Ar output . @@ -72,13 +96,33 @@ Note that only generates a C source file for the shim. .Nm does not directly offer the ability to build a binary from the shim. +.Pp +The +.Nm +generated wrapper will use the following logic to choose the application to +execute, in this order of precedence: +.Pp +If an +.Va env +var is configured, the variable is set in the environment, then it will be used. +.Pp +If argument-based switching is used, then the default application is checked +first for compatibility with the arguments presented. +The default application is discussed below in the +.Sx SPECIFICATION FORMAT +section. +The applications are checked in the defined order after the default application +is checked. +.Pp +If the above criteria did not result in anything to execute, then the default +application will be executed. .Ss SPECIFICATION FORMAT .Nm uses a minimal custom language for generating shims. This language is best described as two primary functional blocks, the shim setting block and the application specification block. The shim setting block -.Sy must +.Em must appear before application specifications. Comments begin with a .Dq # , @@ -110,16 +154,16 @@ application specification block. Applications are defined after the shim setting block. At least one application must be defined. Every application must have a -.Sy path +.Cm path set. .Pp All directives besides the -.Sy application +.Cm application directives apply to the last specified -.Sy application +.Cm application prior to the directive. By convention, all directives besides the -.Sy application +.Cm application directives are indented by some consistent amount of whitespace to improve readability of the specification. .Pp @@ -134,7 +178,7 @@ The given name typically corresponds to the name of the binary that this application block will represent, but its exact value is not significant. If the -.Sy env +.Cm env directive is specified above, then the specified environment variable may be set to .Ar name @@ -142,9 +186,13 @@ to indicate that this application block's path should be used. .It default Optionally specifies that this application block is the default application. If no -.Sy default +.Cm default directive is specified, then the resulting shim will use the first specified application as the default. +If a +.Pa /var/select +symlink is specified for the given wrapper, then the named application will be +considered the default instead. .It argmode logonly Indicates that any flag directives specified should only be considered for logging purposes. @@ -153,9 +201,9 @@ If arguments are evaluated to determine which application to choose, one with set will be considered acceptable by default. .It path Dq Ar path This directive is -.Sy required +.Cm required for each specified -.Sy application . +.Cm application . The given .Ar path should be the path to the binary to be used if this application is selected. @@ -163,7 +211,20 @@ should be the path to the binary to be used if this application is selected. If a relative path is given, then it will be prefixed with the active developer directory as provided by .Xr xcode-select 1 -at runtime. +at runtime, with one exception. +If the relative path begins with a literal +.Dq $XCODE/ +component, then +.Nm +will instead invoke +.Xr xcrun 1 +with the trailing part of the relative path as the tool name. +For example, +.Dq $XCODE/yacc +will do the equivalent behavior of: +.Bd -literal -offset indent +xcrun yacc ... +.Ed .Pp If an absolute path is given, then it will be used as-is. .It addarg Do Ar argv[1] Dc Oo Do argv[n] Dc ... Oc @@ -171,7 +232,8 @@ Adds the given quoted words to the arguments we pass to this application, if selected. These arguments are inserted at argv[1], before any arguments that the wrapper was invoked with. -.It flag Do Ar flag Dc Oo optional_arg | arg Oc +.It flag Do Ar flag Dc Oo optional_arg | arg Oc Oo logonly Oc Op pattern \ +Dq pattern Adds the given .Ar flag as a recognized flag by this application. @@ -180,11 +242,27 @@ If is a long flag, it must not have a short flag alias when used in this form. .Pp If -.Sy optional_arg +.Cm optional_arg or -.Sy arg +.Cm arg are specified, then it is noted in the shim that this flag takes an argument. -.It flag Do Ar longflag Dc Do Ar shortflag Dc Oo optional_arg | arg Oc +.Pp +If +.Cm logonly +is specified, then this argument will not be considered an acceptable argument +for the application in argument-based switching. +.Pp +Specifying a +.Cm pattern +enables collection of option arguments if and only if they match the described +pattern. +The +.Ar pattern +will be interpreted and executed as a POSIX extended regular expression, as +described in +.Xr re_format 7 . +.It flag Do Ar longflag Dc Do Ar shortflag Dc Oo optional_arg | arg Oc \ +Oo logonly Oc Op pattern Dq pattern This is equivalent to the above form, except that .Ar longflag explicitly has a @@ -193,7 +271,7 @@ alias. .El .Pp The -.Sy flag +.Cm flag directives are only necessary if argument-based switching is desired, rather than environment-based switching. Informing the shim of argument structure may also be used to limit reported @@ -215,7 +293,7 @@ shims report events with the following details: The name the wrapper was invoked as, i.e., argv[0]. .It Sy chosen The name, provided in the -.Sy application +.Cm application directive, of the specific application that was chosen to be invoked. .It Sy arguments The array of arguments passed to the wrapper. @@ -224,9 +302,9 @@ The array of arguments passed to the wrapper. Note that, currently, all arguments passed to the wrapper are reported in these events. The specification grammar accepts a -.Sy noargs +.Cm noargs keyword after the -.Sy analytics +.Cm analytics , which may be used to omit flags' argument values as well as any non-option arguments to prevent an inadvertent information disclosure through, e.g., file names. @@ -301,6 +379,7 @@ Other examples may be found in .Sh SEE ALSO .Xr analyticsd 1 , .Xr xcode-select 1 , +.Xr xcrun 1 , .Xr analytics_tool 8 .Sh BUGS The shims generated by diff --git a/genwrap/genwrap.c b/genwrap/genwrap.c index d7f6968..250cc76 100644 --- a/genwrap/genwrap.c +++ b/genwrap/genwrap.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include #include @@ -37,6 +39,8 @@ #include "genwrap.h" +#define GENWRAP_MAXNAMELEN NAME_MAX + #ifndef nitems #define nitems(x) (sizeof((x)) / sizeof((x)[0])) #endif @@ -51,8 +55,10 @@ struct appflag { const char *appflag_flag; + const char *appflag_pattern; int appflag_arg; - char appflag_alias; + int appflag_alias; + uint32_t appflag_flags; LIST_ENTRY(appflag) appflag_entries; }; @@ -64,20 +70,41 @@ struct app { const char *app_path; struct appflag *app_lastflag; const char **app_add_argv; + size_t app_short_patterns; + size_t app_long_patterns; int app_add_nargv; unsigned int app_shortflags; unsigned int app_longflags; bool app_default; + bool app_anyarg_logonly; bool app_argmode_logonly; bool app_path_relcwd; }; +#define TOKEN(str) { str, sizeof(str) - 1 } + +static const struct app_path_token { + const char *token; + size_t tokensz; +} app_path_tokens[] = { + TOKEN("XCODE"), +}; + static LIST_HEAD(, app) apps = LIST_HEAD_INITIALIZER(apps); static size_t app_count; +static size_t app_maxnamelen; static const char *analytics_id; static bool analytics_no_args; static const char *envvar; +static int debug; +#define dprintf(...) do { \ + if (debug) \ + fprintf(stderr, __VA_ARGS__); \ +} while(0) + +static char *wrapper_name; + /* * Allow consumers to build without xcselect if they're using all absolute or * cwd-relative paths; xcselect requires an internal SDK. @@ -91,10 +118,40 @@ static void usage(void) { - fprintf(stderr, "Usage: %s [-o output] spec", getprogname()); + fprintf(stderr, "usage: %s [-d] [-n name] [-o output] spec\n", getprogname()); exit(1); } +/* + * The implied name of the wrapper is the filename with a trailing .wrapper + * chopped off. This is used for checking /var/select/. + */ +static char * +implied_name(const char *filename) +{ + const char *basefile, *suffix; + char *name = NULL; + + basefile = basename(filename); + suffix = strrchr(basefile, '.'); + if (suffix != NULL) { + /* + * If it has a suffix, see if it's ".wrapper". If it's not, + * we'll just use the full name. + */ + if (strcmp(suffix, ".wrapper") == 0) { + name = strndup(basefile, suffix - basefile); + if (name == NULL) + err(1, "strndup"); + } + } + + if (name == NULL) + name = strdup(basefile); + + return (name); +} + int main(int argc, char *argv[]) { @@ -102,8 +159,17 @@ main(int argc, char *argv[]) int ch; outfile = NULL; - while ((ch = getopt(argc, argv, "o:")) != -1) { + while ((ch = getopt(argc, argv, "dn:o:")) != -1) { switch (ch) { + case 'd': + debug++; + break; + case 'n': + free(wrapper_name); + wrapper_name = strdup(optarg); + if (wrapper_name == NULL) + err(1, "strdup"); + break; case 'o': if (strcmp(optarg, "-") == 0 || strcmp(optarg, "/dev/stdout") == 0) @@ -132,6 +198,9 @@ main(int argc, char *argv[]) yyin = stdin; yyfile = "(stdin)"; } else { + if (wrapper_name == NULL) + wrapper_name = implied_name(argv[0]); + yyin = fopen(argv[0], "r"); yyfile = argv[0]; } @@ -153,8 +222,21 @@ struct app * app_add(struct app *app, const char *name) { struct app *new_app; + size_t namelen; assert(name != NULL); + + /* + * name + NUL terminator must be no more than MAXNAMELEN, because we + * want it to fit into a symlink. + */ + namelen = strlen(name) + 1; + if (namelen > GENWRAP_MAXNAMELEN) + yyerror("name too long"); + + if (namelen > app_maxnamelen) + app_maxnamelen = namelen; + new_app = calloc(1, sizeof(*new_app)); if (new_app == NULL) err(1, "calloc"); @@ -172,6 +254,8 @@ app_add(struct app *app, const char *name) app_count++; + dprintf("application[name=\"%s\"]\n", new_app->app_name); + return (new_app); } @@ -180,6 +264,8 @@ app_set_default(struct app *app) { static struct app *default_app; + dprintf(" default"); + /* * Tracking the specified default app because it's not really erroneous * to specify the default app out-of-order in the specification, just to @@ -189,8 +275,11 @@ app_set_default(struct app *app) warnx("WARNING: '%s' previously set as default app", default_app->app_name); default_app->app_default = false; + dprintf("[from=\"%s\"]", default_app->app_name); } + dprintf("\n"); + default_app = app; app->app_default = true; @@ -211,6 +300,7 @@ app_set_argmode_logonly(struct app *app) { app->app_argmode_logonly = true; + dprintf(" argmode[logonly=true]\n"); } void @@ -231,6 +321,8 @@ app_add_addarg(struct app *app, const char **argv, int nargv) for (int i = app->app_add_nargv; i < total_args; i++) { int idx = i - app->app_add_nargv; + dprintf(" addarg[arg=\"%s\"]\n", argv[idx]); + /* * The caller won't be needing this anymore, just take it rather * than making our own copy. NULL out the caller's copy so that @@ -243,15 +335,70 @@ app_add_addarg(struct app *app, const char **argv, int nargv) app->app_add_nargv = total_args; } +static bool +app_validate_path(struct app *app, const char *path, bool relcwd) +{ + const struct app_path_token *token; + size_t pos; + + /* + * The only validation we even attempt at the moment is just that a + * leading $ should match a defined token. + */ + if (path[0] != '$') + return (true); + + /* Validate leading tokens last */ + path++; + for (size_t i = 0; i < nitems(app_path_tokens); i++) { + token = &app_path_tokens[i]; + + if (strncmp(path, token->token, token->tokensz) != 0) + continue; + + /* + * Must not end right after the token. + */ + if (path[token->tokensz] == '\0') + return (false); + + /* + * Make sure we don't have trailing garbage; it could also be + * that we're looking at a token that's incidentally a prefix of + * another token, so we don't outright reject it now. + */ + if (path[token->tokensz] != '/') + continue; + + /* Make sure it's not just trailing / then the end... */ + pos = token->tokensz + 1; + while (path[pos] == '/') + pos++; + + if (path[pos] == '\0') + return (false); + + return (true); + } + + return (false); +} + void app_set_path(struct app *app, const char *path, bool relcwd) { + dprintf(" path["); + if (app->app_path != NULL) { warnx("WARNING: overriding path for '%s'", app->app_name); + dprintf("from=\"%s\", ", app->app_path); free(__DECONST(char *, app->app_path)); } + if (!app_validate_path(app, path, relcwd)) + yyerror("bad path specified"); + app->app_path = strdup(path); if (app->app_path == NULL) err(1, "strdup"); @@ -259,6 +406,8 @@ app_set_path(struct app *app, const char *path, bool relcwd) if (!relcwd && path[0] != '/') needs_xcselect = true; + + dprintf("to=\"%s\"]\n", app->app_path); } const char * @@ -269,7 +418,8 @@ app_get_path(const struct app *app) } static void -app_add_one_flag(struct app *app, const char *flag, char alias, int argument) +app_add_one_flag(struct app *app, const char *flag, char alias, int argument, + uint32_t flags, const char *pattern) { struct appflag *af; @@ -278,10 +428,16 @@ app_add_one_flag(struct app *app, const char *flag, char alias, int argument) err(1, "calloc"); af->appflag_arg = argument; + af->appflag_flags = flags; af->appflag_alias = alias; af->appflag_flag = strdup(flag); if (af->appflag_flag == NULL) err(1, "strdup"); + if (pattern != NULL) { + af->appflag_pattern = strdup(pattern); + if (af->appflag_pattern == NULL) + err(1, "strdup"); + } /* Preserve arg order for the aesthetics of it... */ if (app->app_lastflag == NULL) @@ -292,28 +448,62 @@ app_add_one_flag(struct app *app, const char *flag, char alias, int argument) app->app_lastflag = af; /* A bit of accounting to simplify later iteration when writing out. */ - if (flag[1] == '\0') + if (flag[1] == '\0') { + if (pattern != NULL) + app->app_short_patterns++; app->app_shortflags++; - else + } else { + if (pattern != NULL) + app->app_long_patterns++; app->app_longflags++; + } + + if ((flags & ARGFLAG_LOGONLY) != 0) + app->app_anyarg_logonly = true; } void -app_add_flag(struct app *app, const char *flag, const char *alias, int argument) +app_add_flag(struct app *app, const char *flag, const char *alias, int argument, + uint32_t flags, const char *pattern) { assert(flag != NULL); if (alias != NULL && alias[1] != '\0') yyerror("short flag alias must only have one character"); + if (debug) { + fprintf(stderr, " arg[flag=\"%s\"", flag); + if (alias != NULL) + fprintf(stderr, ", alias=\"%s\"", alias); + switch (argument) { + case required_argument: + fprintf(stderr, ", arg=required"); + break; + case optional_argument: + fprintf(stderr, ", arg=optional"); + break; + default: + fprintf(stderr, ", arg=none"); + break; + } + + if (pattern != NULL) + fprintf(stderr, ", pattern=\"%s\"", pattern); + fprintf(stderr, ", flags=%x]\n", flags); + } + /* The alias must be added as its own flag to the optstr as well. */ - if (alias != NULL) - app_add_one_flag(app, alias, 0, argument); - app_add_one_flag(app, flag, (alias != NULL ? alias[0] : 0), argument); + if (alias != NULL) { + app_add_one_flag(app, alias, 0, argument, + flags & ~ARGFLAG_NO_ALIAS, pattern); + } + + app_add_one_flag(app, flag, (alias != NULL ? alias[0] : 0), argument, + flags, pattern); } static const char * -app_longopt_name(struct app *app) +app_longopt_name(const struct app *app) { /* Single threaded; kludgy but OK. */ static char namebuf[PATH_MAX]; @@ -323,7 +513,18 @@ app_longopt_name(struct app *app) } static const char * -app_addarg_name(struct app *app) +app_pattern_name(const struct app *app, const char *type) +{ + /* Single threaded; kludgy but OK. */ + static char namebuf[PATH_MAX]; + + snprintf(namebuf, sizeof(namebuf), "%s_%s_patterns", app->app_name, + type); + return (namebuf); +} + +static const char * +app_addarg_name(const struct app *app) { /* Single threaded; kludgy but OK. */ static char namebuf[PATH_MAX]; @@ -332,6 +533,16 @@ app_addarg_name(struct app *app) return (namebuf); } +static const char * +app_logonly_name(const struct app *app) +{ + /* Single threaded; kludgy but OK. */ + static char namebuf[PATH_MAX]; + + snprintf(namebuf, sizeof(namebuf), "%s_logonly", app->app_name); + return (namebuf); +} + void wrapper_set_analytics(const char *id, bool noargs) { @@ -349,6 +560,7 @@ wrapper_set_analytics(const char *id, bool noargs) err(1, "strdup"); analytics_no_args = noargs; + dprintf("analytics[id=\"%s\"]\n", analytics_id); } void @@ -359,6 +571,7 @@ wrapper_set_envvar(const char *var) yyerror("invalid env var value"); envvar = var; + dprintf("env[name=\"%s\"]\n", envvar); } static void @@ -383,14 +596,124 @@ wrapper_output_file(FILE *outfile, const char *path) } static void -wrapper_write_long_args(FILE *outfile, struct app *app) +wrapper_write_logonly_args(FILE *outfile, const struct app *app) +{ + const struct appflag *af; + int val; + + fprintf(outfile, "static const bool %s[] = {\n", + app_logonly_name(app)); + LIST_FOREACH(af, &app->app_flags, appflag_entries) { + if ((af->appflag_flags & ARGFLAG_LOGONLY) == 0) + continue; + if (af->appflag_flag[1] == '\0') { + /* Short option */ + val = (unsigned char)af->appflag_flag[0]; + } else { + /* + * Either set by the wrapper spec, or set to a CHAR_MAX + * + n constant below. + */ + val = af->appflag_alias; + } + + if (val >= CHAR_MAX) { + fprintf(outfile, "\t[CHAR_MAX + %d] = true,\n", + val - CHAR_MAX); + } else { + fprintf(outfile, "\t['%c'] = true,\n", + val); + } + } + fprintf(outfile, "};\n"); +} + +static void +wrapper_write_pattern(FILE *outfile, const char *pattern) +{ + + /* + * We write the pattern out byte-by-byte so that we can escape any + * slashes and quotes as needed. + */ + for (const char *walker = pattern; *walker != '\0'; walker++) { + if (*walker == '\\' || *walker == '"') + fputc('\\', outfile); + fputc(*walker, outfile); + } +} + +static void +wrapper_write_patterns(FILE *outfile, const struct app *app) +{ + const struct appflag *af; + size_t idx; + + /* + * These will all be indexed by position for the given type of flag in + * app_flags, which must be the order they're written into the + * wrapper's optstr and options array by wrapper_write_args() and + * wrapper_write_long_args() respectively. + */ + if (app->app_short_patterns != 0) { + idx = 0; + fprintf(outfile, "static struct arg_expr %s[%u] = {\n", + app_pattern_name(app, "short"), app->app_shortflags); + LIST_FOREACH(af, &app->app_flags, appflag_entries) { + if (af->appflag_flag[1] != '\0') + continue; + if (af->appflag_pattern != NULL) { + fprintf(outfile, + "\t[%zu] = { .expr_str = \"", idx); + + wrapper_write_pattern(outfile, + af->appflag_pattern); + + fprintf(outfile, "\" },\n"); + } + + idx++; + } + fprintf(outfile, "};\n"); + } + + if (app->app_long_patterns != 0) { + const char *expname; + + idx = 0; + expname = app_pattern_name(app, "long"); + fprintf(outfile, "static struct arg_expr %s[%u] = {\n", + expname, app->app_longflags); + LIST_FOREACH(af, &app->app_flags, appflag_entries) { + if (af->appflag_flag[1] == '\0') + continue; + if (af->appflag_pattern != NULL) { + fprintf(outfile, + "\t[%zu] = { .expr_str = \"", idx); + + wrapper_write_pattern(outfile, + af->appflag_pattern); + + fprintf(outfile, "\" },\n"); + } + + idx++; + } + fprintf(outfile, "};\n"); + fprintf(outfile, + "_Static_assert(nitems(%s) == nitems(%s) - 1, \"Long option mismatch\");\n", + expname, app_longopt_name(app)); + } +} + +static void +wrapper_write_long_args(FILE *outfile, const struct app *app) { static const char *argvalues[] = { [no_argument] = "no_argument", [optional_argument] = "optional_argument", [required_argument] = "required_argument", }; - struct appflag *af; int coff; /* Offset from CHAR_MAX */ @@ -410,10 +733,12 @@ wrapper_write_long_args(FILE *outfile, struct app *app) fprintf(outfile, "%s, ", argvalues[af->appflag_arg]); fprintf(outfile, "NULL, "); - if (af->appflag_alias != '\0') + if (af->appflag_alias != '\0') { fprintf(outfile, "'%c'", af->appflag_alias); - else + } else { + af->appflag_alias = CHAR_MAX + coff; fprintf(outfile, "CHAR_MAX + %d", coff++); + } fprintf(outfile, " },\n"); } fprintf(outfile, " { NULL, 0, 0, 0 },\n"); @@ -421,7 +746,7 @@ wrapper_write_long_args(FILE *outfile, struct app *app) } static void -wrapper_write_addargs(FILE *outfile, struct app *app) +wrapper_write_addargs(FILE *outfile, const struct app *app) { fprintf(outfile, "static const char *%s[] = {\n", app_addarg_name(app)); @@ -432,13 +757,13 @@ wrapper_write_addargs(FILE *outfile, struct app *app) } static void -wrapper_write_args(FILE *outfile, struct app *app) +wrapper_write_args(FILE *outfile, const struct app *app) { - struct appflag *af; + const struct appflag *af; if (app->app_shortflags > 0) { /* app_optstr */ - fprintf(outfile, " .app_optstr = \""); + fprintf(outfile, " .app_optstr = \"+"); LIST_FOREACH(af, &app->app_flags, appflag_entries) { if (af->appflag_flag[1] != '\0') continue; @@ -459,6 +784,13 @@ wrapper_write_args(FILE *outfile, struct app *app) fprintf(outfile, " .app_longopts = %s,\n", app_longopt_name(app)); } + + if (app->app_anyarg_logonly) { + fprintf(outfile, " .app_nlogonly = nitems(%s),\n", + app_logonly_name(app)); + fprintf(outfile, " .app_logonly_opts = %s,\n", + app_logonly_name(app)); + } } static void @@ -482,6 +814,10 @@ wrapper_write(FILE *outfile) */ fprintf(outfile, "#define WRAPPER_APPLICATION_COUNT %zu\n", app_count); + fprintf(outfile, "#define WRAPPER_MAXNAMELEN %zu\n", app_maxnamelen); + if (wrapper_name != NULL) + fprintf(outfile, "#define WRAPPER_NAME \"%s\"\n", wrapper_name); + if (envvar != NULL) { fprintf(outfile, "#define WRAPPER_ENV_VAR \"%s\"\n", envvar); @@ -502,14 +838,33 @@ wrapper_write(FILE *outfile) wrapper_output_file(outfile, _PATH_SKELDIR "wrapper-head.c"); fprintf(outfile, "\n/* START OF SPEC @" "generated CONTENTS */\n\n"); - fprintf(outfile, "/* Long Option Definitions */\n"); + fprintf(outfile, "/* Long, Logonly Option and Pattern Definitions */\n"); empty = true; LIST_FOREACH(app, &apps, app_entries) { - if (app->app_longflags == 0) - continue; + bool printed = false; - wrapper_write_long_args(outfile, app); - empty = false; + if (app->app_longflags != 0) { + wrapper_write_long_args(outfile, app); + empty = false; + printed = true; + } + + if (app->app_anyarg_logonly) { + wrapper_write_logonly_args(outfile, app); + empty = false; + printed = true; + } + + if (app->app_short_patterns != 0 || + app->app_long_patterns != 0) { + wrapper_write_patterns(outfile, app); + empty = false; + printed = true; + } + + if (printed) { + fprintf(outfile, "\n"); + } } if (empty) @@ -550,6 +905,16 @@ wrapper_write(FILE *outfile) } if (!LIST_EMPTY(&app->app_flags)) wrapper_write_args(outfile, app); + if (app->app_short_patterns != 0) { + fprintf(outfile, + " .app_shortopt_expr = %s,\n", + app_pattern_name(app, "short")); + } + if (app->app_long_patterns != 0) { + fprintf(outfile, + " .app_longopt_expr = %s,\n", + app_pattern_name(app, "long")); + } fprintf(outfile, " },\n"); } diff --git a/genwrap/genwrap.h b/genwrap/genwrap.h index fa26ddb..fe952d7 100644 --- a/genwrap/genwrap.h +++ b/genwrap/genwrap.h @@ -40,6 +40,10 @@ extern int yyline; /* Application logic */ struct app; +#define ARGFLAG_LOGONLY 0x0001 + +/* Don't add these flags to aliases. */ +#define ARGFLAG_NO_ALIAS (ARGFLAG_LOGONLY) struct app *app_add(struct app *current_app, const char *name); void app_set_default(struct app *app); @@ -48,7 +52,7 @@ void app_add_addarg(struct app *app, const char **argv, int nargv); void app_set_path(struct app *app, const char *path, bool relcwd); const char *app_get_path(const struct app *app); void app_add_flag(struct app *app, const char *flag, const char *alias, - int argument); + int argument, uint32_t flags, const char *pattern); void wrapper_set_analytics(const char *id, bool noargs); void wrapper_set_envvar(const char *var); diff --git a/genwrap/genwrap.y b/genwrap/genwrap.y index a109b7e..a13f4cf 100644 --- a/genwrap/genwrap.y +++ b/genwrap/genwrap.y @@ -1,5 +1,7 @@ %union { char *str; + int num; + unsigned int flag; } %token ANALYTICS @@ -15,9 +17,13 @@ %token ENV %token ARGMODE %token LOGONLY +%token PATTERN %token ID +%type flag_argspec; +%type flag_flags; + %{ /* * Copyright (c) 2022 Apple Inc. All rights reserved. @@ -47,6 +53,7 @@ #include #include +#include #include #include "genwrap.h" @@ -55,13 +62,20 @@ const char *yyfile; int yyline; static struct app *current_app; -static void addflag(const char *first, const char *second, int argument); struct stringlist { STAILQ_ENTRY(stringlist) entries; char *str; }; +static struct flagspec { + const char *flag; + const char *alias; + const char *pattern; + int argument; + uint32_t argflags; +} flagspec; + STAILQ_HEAD(stringhead, stringlist); static int string_count; @@ -71,6 +85,9 @@ static STAILQ_HEAD(, stringlist) current_stringlist = static void stringlist_init(const char *str); static void stringlist_append(const char *str); static void stringlist_done(char ***, int *); + +static void addflag(struct flagspec *sp); +static char *checkpattern(const char *pat); %} %% @@ -146,44 +163,74 @@ application_spec: yyerror("cwd paths must be relative"); app_set_path(current_app, $2, true); } | - FLAG ID { - addflag($2, NULL, no_argument); + full_argspec { + addflag(&flagspec); + } + +full_argspec: + basic_flagspec | basic_flagspec flagspec_extension + +flagspec_extension: + flagspec_extension flagspec_extension | + flag_argspec { + flagspec.argument = $1; } | - FLAG ID ARG { - addflag($2, NULL, required_argument); + flag_flags { + flagspec.argflags |= $1; } | - FLAG ID OPTARG { - addflag($2, NULL, optional_argument); + PATTERN ID { + flagspec.pattern = checkpattern($2); + /* Should not have allocated memory. */ + assert(flagspec.pattern == $2); + } + +basic_flagspec: + FLAG ID { + memset(&flagspec, 0, sizeof(flagspec)); + flagspec.flag = $2; } | FLAG ID ID { - addflag($2, $3, no_argument); + memset(&flagspec, 0, sizeof(flagspec)); + flagspec.flag = $2; + flagspec.alias = $3; + } + +flag_argspec: + ARG { + $$ = required_argument; } | - FLAG ID ID ARG { - addflag($2, $3, required_argument); + OPTARG { + $$ = optional_argument; + } + +flag_flags: + flag_flags '|' flag_flags { + $$ = $1 | $3; } | - FLAG ID ID OPTARG { - addflag($2, $3, optional_argument); + LOGONLY { + $$ = ARGFLAG_LOGONLY; } %% static void -addflag(const char *first, const char *second, int argument) +addflag(struct flagspec *fs) { - if (first[0] == '\0') + if (fs->flag[0] == '\0') yyerror("provided flag must not be empty"); - if (second != NULL && second[0] == '\0') + if (fs->alias != NULL && fs->alias[0] == '\0') yyerror("provided alias must not be empty"); - if (second == NULL) { - app_add_flag(current_app, first, NULL, argument); - return; + /* Allow whatever order for flag, alias. */ + if (fs->flag[1] != '\0' || fs->alias == NULL) { + app_add_flag(current_app, fs->flag, fs->alias, fs->argument, + fs->argflags, fs->pattern); + } else { + app_add_flag(current_app, fs->alias, fs->flag, fs->argument, + fs->argflags, fs->pattern); } - /* Allow whatever order for flag, alias. */ - if (first[1] != '\0') - app_add_flag(current_app, first, second, argument); - else - app_add_flag(current_app, second, first, argument); + /* Pattern memory now belongs to someone else. */ + fs->pattern = NULL; } void @@ -252,3 +299,30 @@ stringlist_done(char ***outlist, int *nelem) *outlist = out; *nelem = n; } + +static char * +checkpattern(const char *pat) +{ + regex_t reg; + int error; + + if (*pat == '\0') + yyerror("pattern must not be empty"); + + /* + * Try to compile it as an ERE, so that we have some idea up front if + * it is basically sane or not. + */ + if ((error = regcomp(®, pat, REG_EXTENDED | REG_NOSUB)) != 0) { + char errbuf[128]; + size_t errsz; + + errsz = regerror(error, NULL, errbuf, sizeof(errbuf)); + fprintf(stderr, "pattern error: %s%s\n", errbuf, + errsz > sizeof(errbuf) ? " [...]" : ""); + yyerror("failed to compile pattern"); + } + + regfree(®); + return (pat); +} diff --git a/genwrap/lex.l b/genwrap/lex.l index faab343..6b9f4d5 100644 --- a/genwrap/lex.l +++ b/genwrap/lex.l @@ -49,6 +49,7 @@ struct kword { { "env", ENV }, { "argmode", ARGMODE }, { "logonly", LOGONLY }, + { "pattern", PATTERN }, }; int kword_lookup(const char *); diff --git a/genwrap/tests/analytics_redacted.wrapper b/genwrap/tests/analytics_redacted.wrapper deleted file mode 100644 index 6afc2c1..0000000 --- a/genwrap/tests/analytics_redacted.wrapper +++ /dev/null @@ -1,8 +0,0 @@ -analytics "com.apple.genwrap_test" noargs - -application "foo" - cwdpath "bin/foo" - flag "a" - flag "b" arg - flag "count" "c" optional_arg - flag "d" diff --git a/genwrap/tests/analytics_redacted_a.out b/genwrap/tests/analytics_redacted_a.out deleted file mode 100644 index a766f88..0000000 --- a/genwrap/tests/analytics_redacted_a.out +++ /dev/null @@ -1,2 +0,0 @@ -arguments - -ab*REDACTED* diff --git a/genwrap/tests/analytics_redacted_b.out b/genwrap/tests/analytics_redacted_b.out deleted file mode 100644 index b9fd181..0000000 --- a/genwrap/tests/analytics_redacted_b.out +++ /dev/null @@ -1,3 +0,0 @@ -arguments - -a - --count=*REDACTED* diff --git a/genwrap/tests/analytics_redacted_c.out b/genwrap/tests/analytics_redacted_c.out deleted file mode 100644 index f2d6dfd..0000000 --- a/genwrap/tests/analytics_redacted_c.out +++ /dev/null @@ -1,4 +0,0 @@ -arguments - -a - --count=*REDACTED* - *REDACTED* diff --git a/genwrap/tests/analytics_redacted_d.out b/genwrap/tests/analytics_redacted_d.out deleted file mode 100644 index 8b0f5fc..0000000 --- a/genwrap/tests/analytics_redacted_d.out +++ /dev/null @@ -1,4 +0,0 @@ -arguments - -a - --count - *REDACTED* diff --git a/genwrap/tests/analytics_redacted_e.out b/genwrap/tests/analytics_redacted_e.out deleted file mode 100644 index 477c3d0..0000000 --- a/genwrap/tests/analytics_redacted_e.out +++ /dev/null @@ -1,5 +0,0 @@ -arguments - -ax - -y - -z - *REDACTED* diff --git a/genwrap/tests/analytics_simple.wrapper b/genwrap/tests/analytics_simple.wrapper index 9f2409e..6f3b722 100644 --- a/genwrap/tests/analytics_simple.wrapper +++ b/genwrap/tests/analytics_simple.wrapper @@ -5,4 +5,6 @@ application "foo" flag "a" flag "b" arg flag "count" "c" optional_arg + flag "C" optional_arg pattern "[0-9]{2,3}" flag "d" + flag "t" "type" arg pattern "cat|dog|fish" diff --git a/genwrap/tests/analytics_simple_a.out b/genwrap/tests/analytics_simple_a.out index df655b1..2440240 100644 --- a/genwrap/tests/analytics_simple_a.out +++ b/genwrap/tests/analytics_simple_a.out @@ -1,2 +1,2 @@ arguments - -a + -a 1 diff --git a/genwrap/tests/analytics_simple_b.out b/genwrap/tests/analytics_simple_b.out index ca7e7ce..cb91e72 100644 --- a/genwrap/tests/analytics_simple_b.out +++ b/genwrap/tests/analytics_simple_b.out @@ -1,2 +1,3 @@ arguments - -ad + -a 1 + -d 1 diff --git a/genwrap/tests/analytics_simple_c.out b/genwrap/tests/analytics_simple_c.out index b72908e..cb91e72 100644 --- a/genwrap/tests/analytics_simple_c.out +++ b/genwrap/tests/analytics_simple_c.out @@ -1,3 +1,3 @@ arguments - -a - -d + -a 1 + -d 1 diff --git a/genwrap/tests/analytics_simple_d.out b/genwrap/tests/analytics_simple_d.out index 84870cf..c7f6015 100644 --- a/genwrap/tests/analytics_simple_d.out +++ b/genwrap/tests/analytics_simple_d.out @@ -1,2 +1,3 @@ arguments - -ab3 + -a 1 + -b 1 diff --git a/genwrap/tests/analytics_simple_e.out b/genwrap/tests/analytics_simple_e.out index 5c92b3b..1cfe0f2 100644 --- a/genwrap/tests/analytics_simple_e.out +++ b/genwrap/tests/analytics_simple_e.out @@ -1,3 +1,3 @@ arguments - -a - --count=3 + --count 1 + -a 1 diff --git a/genwrap/tests/analytics_simple_f.out b/genwrap/tests/analytics_simple_f.out index b08309b..1cfe0f2 100644 --- a/genwrap/tests/analytics_simple_f.out +++ b/genwrap/tests/analytics_simple_f.out @@ -1,4 +1,3 @@ arguments - -a - --count=3 - arg + --count 1 + -a 1 diff --git a/genwrap/tests/analytics_simple_g.out b/genwrap/tests/analytics_simple_g.out new file mode 100644 index 0000000..ed10537 --- /dev/null +++ b/genwrap/tests/analytics_simple_g.out @@ -0,0 +1,4 @@ +arguments + --count 2 + -a 3 + -d 1 diff --git a/genwrap/tests/arg_selector_complex_logonly_args.wrapper b/genwrap/tests/arg_selector_complex_logonly_args.wrapper new file mode 100644 index 0000000..cbe8d2f --- /dev/null +++ b/genwrap/tests/arg_selector_complex_logonly_args.wrapper @@ -0,0 +1,11 @@ +application "foo" + cwdpath "bin/foo" + argmode logonly + flag "a" + flag "b" +application "newfoo" + default + cwdpath "bin/newfoo" + flag "exit" "x" logonly + flag "y" logonly + flag "z" diff --git a/genwrap/tests/genwrap_test.sh b/genwrap/tests/genwrap_test.sh index 79ac26c..b50f35a 100644 --- a/genwrap/tests/genwrap_test.sh +++ b/genwrap/tests/genwrap_test.sh @@ -28,7 +28,6 @@ atf_test_case analytics analytics_body() { asimple=$(atf_get_srcdir)/analytics_simple - aredacted=$(atf_get_srcdir)/analytics_redacted atf_check -o file:$(atf_get_srcdir)/analytics_simple_a.out \ ${asimple} -a @@ -42,25 +41,37 @@ analytics_body() ${asimple} -a --count=3 atf_check -o file:$(atf_get_srcdir)/analytics_simple_f.out \ ${asimple} -a --count=3 arg - - # Now try with *REDACTED* - atf_check -o file:$(atf_get_srcdir)/analytics_simple_a.out \ - ${aredacted} -a - atf_check -o file:$(atf_get_srcdir)/analytics_simple_b.out \ - ${aredacted} -ad - atf_check -o file:$(atf_get_srcdir)/analytics_simple_c.out \ - ${aredacted} -a -d - atf_check -o file:$(atf_get_srcdir)/analytics_redacted_a.out \ - ${aredacted} -ab3 - atf_check -o file:$(atf_get_srcdir)/analytics_redacted_b.out \ - ${aredacted} -a --count=3 - atf_check -o file:$(atf_get_srcdir)/analytics_redacted_c.out \ - ${aredacted} -a --count=3 arg - atf_check -o file:$(atf_get_srcdir)/analytics_redacted_d.out \ - ${aredacted} -a --count 3 - - atf_check -o file:$(atf_get_srcdir)/analytics_redacted_e.out \ - ${aredacted} -ax -y 3 -z + atf_check -o file:$(atf_get_srcdir)/analytics_simple_g.out \ + ${asimple} -a --count=3 -d -a --count=6 -a + + # Most of the basics checked, just run a couple of argument tests. + atf_check -o match:"--type__00 dog" ${asimple} --type dog + atf_check -o match:"--type dog" ${asimple} --type dog + atf_check -o match:"-t__00 dog" ${asimple} -t dog + atf_check -o match:"-t dog" ${asimple} -t dog + + # Also test our repetition capabilities + atf_check -o match:"-C 1" ${asimple} -C + atf_check -o match:"-C 23" ${asimple} -C23 + atf_check -o match:"-C 234" ${asimple} -C234 + atf_check -o match:"-C 1" ${asimple} -C2345 + + # Must be a complete match, no off-by-one. + atf_check -o match:"--type__00 1" ${asimple} --type sdog + atf_check -o match:"--type__00 1" ${asimple} --type dogs + atf_check -o match:"--type__00 1" ${asimple} --type sdogs + + # Multiple appearances, last one doesn't match should just set the + # overall option to the # found. + atf_check -o match:"--type 2" ${asimple} --type dogs --type sdogs + + # Finally, make sure we preserve multiple uses + atf_check -o save:analytics_simple_multiarg.out \ + ${asimple} --type dog --type fish + + atf_check -o match:"--type__00 dog" cat analytics_simple_multiarg.out + atf_check -o match:"--type__01 fish" cat analytics_simple_multiarg.out + atf_check -o match:"--type fish" cat analytics_simple_multiarg.out } atf_test_case arg_selector_simple @@ -104,6 +115,27 @@ arg_selector_simple_body() atf_check -o match:"new" $(atf_get_srcdir)/arg_selector_simple_b -- -x } +atf_test_case arg_selector_simple_varsel +arg_selector_simple_varsel_body() +{ + mkdir -p bin + + printf "#!/bin/sh\necho old" > bin/foo + printf "#!/bin/sh\necho new" > bin/newfoo + + chmod 755 bin/foo bin/newfoo + + # -a normally matches newfoo, since it's first... + atf_check -o match:"new" $(atf_get_srcdir)/arg_selector_simple_a -a + + # ... but /var/select/arg_selector_simple_a should be able to promote + # foo to the default. + + atf_check sudo ln -sf foo /var/select/arg_selector_simple_a + atf_check -o match:"old" $(atf_get_srcdir)/arg_selector_simple_a -a + atf_check sudo rm -f /var/select/arg_selector_simple_a +} + atf_test_case arg_selector_complex arg_selector_complex_body() { @@ -133,6 +165,44 @@ arg_selector_complex_body() atf_check -o match:"new" $(atf_get_srcdir)/arg_selector_complex_logonly --count=3 } +atf_test_case arg_selector_complex_logonly_args +arg_selector_complex_logonly_args_body() +{ + mkdir -p bin + + # newfoo supports the the -z flag, but -x and -y and marked logonly. + # foo is in logonly argmode, so it should be the fallback for pretty + # much any option not enumerated in the newfoo set. + printf "#!/bin/sh\necho old" > bin/foo + printf "#!/bin/sh\necho new" > bin/newfoo + + chmod 755 bin/foo bin/newfoo + + # No args and -z should go to newfoo. + atf_check -o match:"new" \ + $(atf_get_srcdir)/arg_selector_complex_logonly_args + atf_check -o match:"new" \ + $(atf_get_srcdir)/arg_selector_complex_logonly_args -z + + # -n should trigger a fallback to foo, along with -x and -y. + atf_check -o match:"old" \ + $(atf_get_srcdir)/arg_selector_complex_logonly_args -z -n + atf_check -o match:"old" \ + $(atf_get_srcdir)/arg_selector_complex_logonly_args -n + atf_check -o match:"old" \ + $(atf_get_srcdir)/arg_selector_complex_logonly_args -y + # Long and short forms, to be sure. + atf_check -o match:"old" \ + $(atf_get_srcdir)/arg_selector_complex_logonly_args --exit + atf_check -o match:"old" \ + $(atf_get_srcdir)/arg_selector_complex_logonly_args -x + # Valid -z on either side doesn't save it. + atf_check -o match:"old" \ + $(atf_get_srcdir)/arg_selector_complex_logonly_args -z -x + atf_check -o match:"old" \ + $(atf_get_srcdir)/arg_selector_complex_logonly_args -x -z +} + atf_test_case env_selector env_selector_body() { @@ -150,6 +220,24 @@ env_selector_body() # Should use the default application if the env var is set to a bogus # value. atf_check -o match:"old" env FOO_COMMAND="invalid" $(atf_get_srcdir)/env_selector + + # foo is the default, but make sure we can select "newfoo" with + # /var/select/env_selector + atf_check sudo ln -sf newfoo /var/select/env_selector + atf_check -o match:"new" $(atf_get_srcdir)/env_selector + + # Unknown options should just fall back; we'll use on that is overly + # long, one that fits but isn't known. + atf_check sudo ln -sf undefinedfoo /var/select/env_selector + atf_check -o match:"old" $(atf_get_srcdir)/env_selector + atf_check sudo ln -sf app /var/select/env_selector + atf_check -o match:"old" $(atf_get_srcdir)/env_selector + + # This is more appropriate for a cleanup() routine, but we don't + # currently run atf cleanup routines on failure. As a result, we may + # see some collateral damage in later tests if the above env_selector + # invocations failed for some reason. + atf_check sudo rm /var/select/env_selector } atf_test_case env_selector_addarg @@ -182,6 +270,35 @@ env_selector_addarg_body() env FOO_COMMAND="worstfoo" $(atf_get_srcdir)/env_selector_addarg -0 } +atf_test_case env_selector_varsel +env_selector_varsel_body() +{ + mkdir -p bin + + printf "#!/bin/sh\necho old" > bin/foo + printf "#!/bin/sh\necho new" > bin/newfoo + + chmod 755 bin/foo bin/newfoo + + # foo is the default, but make sure we can select "newfoo" with + # /var/select/env_selector + atf_check sudo ln -sf newfoo /var/select/env_selector + atf_check -o match:"new" $(atf_get_srcdir)/env_selector + + # Unknown options should just fall back; we'll use on that is overly + # long, one that fits but isn't known. + atf_check sudo ln -sf undefinedfoo /var/select/env_selector + atf_check -o match:"old" $(atf_get_srcdir)/env_selector + atf_check sudo ln -sf app /var/select/env_selector + atf_check -o match:"old" $(atf_get_srcdir)/env_selector + + # This is more appropriate for a cleanup() routine, but we don't + # currently run atf cleanup routines on failure. As a result, we may + # see some collateral damage in later tests if the above env_selector + # invocations failed for some reason. + atf_check sudo rm /var/select/env_selector +} + atf_test_case simple_shim simple_shim_body() @@ -204,7 +321,7 @@ ui_infile_stdin_body() atf_check $GENWRAP -o out.c $spec atf_check test -s out.c - atf_check -o file:out.c -x "cat $spec | $GENWRAP -o /dev/stdout -" + atf_check -o file:out.c -x "cat $spec | $GENWRAP -n simple_shim -o /dev/stdout -" } atf_test_case ui_outfile_stdout @@ -217,7 +334,7 @@ ui_outfile_stdout_body() atf_check $GENWRAP -o out.c $spec atf_check test -s out.c - atf_check -o file:out.c -x "cat $spec | $GENWRAP -o - /dev/stdin" + atf_check -o file:out.c -x "cat $spec | $GENWRAP -n simple_shim -o - /dev/stdin" } atf_init_test_cases() @@ -225,9 +342,12 @@ atf_init_test_cases() atf_add_test_case analytics atf_add_test_case arg_selector_simple + atf_add_test_case arg_selector_simple_varsel atf_add_test_case arg_selector_complex + atf_add_test_case arg_selector_complex_logonly_args atf_add_test_case env_selector atf_add_test_case env_selector_addarg + atf_add_test_case env_selector_varsel atf_add_test_case simple_shim atf_add_test_case ui_infile_stdin atf_add_test_case ui_outfile_stdout diff --git a/genwrap/wrapper-head.c b/genwrap/wrapper-head.c index dcc7f52..8aef16b 100644 --- a/genwrap/wrapper-head.c +++ b/genwrap/wrapper-head.c @@ -32,10 +32,13 @@ #include #include #include +#include +#include #include #include #include #include +#include #include @@ -50,10 +53,28 @@ #define nitems(x) (sizeof((x)) / sizeof((x)[0])) #endif +/* The trailing slash is important, will be glued to WRAPPER_NAME later. */ +#define _PATH_VARSEL "/var/select/" + #if !defined(WRAPPER_ANALYTICS_IDENT) && defined(WRAPPER_ANALYTICS_TESTING) #error shim was improperly modified to remove the analytics identifier #endif +/* + * Wrappers can specify regular expressions to capture argument values. We + * capture those here; the expression will only be compiled exactly once, just + * in case an argument appears multiple times. We put the storage for that in + * arg_expr directly, which might get kind of costly but our wrappers aren't + * that large to begin with. + */ +struct arg_expr { + regex_t expr_reg; + const char *expr_str; + size_t expr_count; + bool expr_compiled; + bool expr_error; +}; + /* * The wrapper generator will provide an array of struct application that we * will sift through to determine which to choose. We'll default to the first @@ -78,6 +99,12 @@ struct application { const char *app_optstr; const struct option *app_longopts; + size_t app_nlogonly; + const bool *app_logonly_opts; + + struct arg_expr *app_shortopt_expr; + struct arg_expr *app_longopt_expr; + /* * Relative paths are relative to cwd, rather than to the selected * developer tools. diff --git a/genwrap/wrapper-tail.c b/genwrap/wrapper-tail.c index e39888b..8ff6cd5 100644 --- a/genwrap/wrapper-tail.c +++ b/genwrap/wrapper-tail.c @@ -28,157 +28,335 @@ */ _Static_assert(nitems(wrapper_apps) > 0, "No applications specified"); +#define wrapper_assert_unreachable(msg) assert(0 && msg) + +#define WRAPPER_XCODE_PREFIX "$XCODE/" + +#define WRAPPER_ARGPREF_SHORT "arg_" +#define WRAPPER_ARGPREF_LONG "arg__" + #ifdef WRAPPER_ANALYTICS_IDENT static const char wrapper_arg_redacted[] = "*REDACTED*"; +static char * +wrapper_logged_arg_long_name(struct arg_expr *expr, const struct option *lopt) +{ + char *argkey = NULL; + + if (expr == NULL) { + if (asprintf(&argkey, WRAPPER_ARGPREF_LONG "%s", + lopt->name) < 0) + return (NULL); + } else { + if (asprintf(&argkey, WRAPPER_ARGPREF_LONG "%s__%.02zu", + lopt->name, expr->expr_count++) < 0) + return (NULL); + } + + assert(argkey != NULL); + return (argkey); +} + +static char * +wrapper_logged_arg_short_name(struct arg_expr *expr, int ch) +{ + char *argkey = NULL; + + if (expr == NULL) { + if (asprintf(&argkey, WRAPPER_ARGPREF_SHORT "%c", ch) < 0) + return (NULL); + } else { + if (asprintf(&argkey, WRAPPER_ARGPREF_SHORT "%c__%.02zu", ch, + expr->expr_count++) < 0) + return (NULL); + } + + assert(argkey != NULL); + return (argkey); +} + +static char * +wrapper_logged_arg_name(struct arg_expr *expr, const struct option *lopt, int ch) +{ + + if (lopt != NULL) + return (wrapper_logged_arg_long_name(expr, lopt)); + return (wrapper_logged_arg_short_name(expr, ch)); +} + +static regex_t * +wrapper_logged_arg_expr(struct arg_expr *expr, struct arg_expr **oexpr) +{ + + if (expr->expr_str == NULL) + return (NULL); + + if (!expr->expr_compiled) { + int error; + + expr->expr_compiled = true; + error = regcomp(&expr->expr_reg, expr->expr_str, REG_EXTENDED); + if (error != 0) + expr->expr_error = true; + } + + if (expr->expr_error) + return (NULL); + *oexpr = expr; + return (&expr->expr_reg); +} + +static regex_t * +wrapper_logged_arg_short_expr(const struct application *app, char flag, + struct arg_expr **oexpr) +{ + const char *walker; + size_t idx; + + if (app->app_shortopt_expr == NULL) + return (NULL); + + walker = app->app_optstr; + if (walker == NULL) + return (NULL); + + if (*walker == '+') + walker++; + + for (idx = 0; *walker != '\0' && *walker != flag; walker++) { + if (*walker == ':') + continue; /* Skip */ + idx++; + } + + /* Unrecognized options are skipped, this shouldn't happen. */ + assert(*walker != '\0'); + + return (wrapper_logged_arg_expr(&app->app_shortopt_expr[idx], oexpr)); +} + +static regex_t * +wrapper_logged_arg_long_expr(const struct application *app, size_t idx, + struct arg_expr **oexpr) +{ + + if (app->app_longopt_expr == NULL) + return (NULL); + return (wrapper_logged_arg_expr(&app->app_longopt_expr[idx], oexpr)); +} + static xpc_object_t -wrapper_logged_args_filter(const struct application *app, xpc_object_t args, - int argc, char *argv[]) +wrapper_logged_args(const struct application *app, int argc, char *argv[], + bool *halted, unsigned int *errors) { - int ch, lpoptind; + xpc_object_t args = xpc_dictionary_create_empty(); + int ch, lidx; + + *halted = false; + if (errors != NULL) + *errors = 0; + if (app->app_optstr == NULL && app->app_longopts == NULL) + return (args); opterr = 0; optind = optreset = 1; - lpoptind = 1; + lidx = -1; while ((ch = getopt_long(argc, argv, app->app_optstr, app->app_longopts, - NULL)) != -1) { - char *redacted_arg, **target_arg; - - if (ch == '?' || optarg == NULL) { - /* - * Print the previously processed arg; we didn't redact - * anything in it. - */ - if (optind > lpoptind) { - xpc_array_set_string(args, XPC_ARRAY_APPEND, - argv[optind - 1]); - lpoptind = optind; - } + &lidx)) != -1) { + uint64_t count; + const struct option *lopt = NULL; + regex_t *preg = NULL; + struct arg_expr *expr = NULL; + char *argkey; - continue; + /* + * We must halt if we hit an argument that isn't documented in + * the wrapper definition. We have no idea if whatever follows + * is an argument or not, we can only make some assumptions + * based on the basic shape of it (and we would rather not). + */ + if (ch == '?') { + *halted = true; + break; } - target_arg = &argv[optind - 1]; - - assert(lpoptind != optind); + if (lidx >= 0) { + lopt = &app->app_longopts[lidx]; + if (optarg != NULL) { + preg = wrapper_logged_arg_long_expr(app, lidx, + &expr); + } + lidx = -1; + } else if (optarg != NULL) { + preg = wrapper_logged_arg_short_expr(app, ch, &expr); + } - /* - * Determine if we can replace argv[optind - 1] wholesale or if - * we need a new string. - */ - if (optarg == *target_arg) { - /* - * optarg is standalone, so we need to add the long - * option in the preceeding index. - */ - xpc_array_set_string(args, XPC_ARRAY_APPEND, - argv[optind - 2]); - xpc_array_set_string(args, XPC_ARRAY_APPEND, - wrapper_arg_redacted); + assert((preg != NULL) == (expr != NULL)); - lpoptind = optind; + if ((argkey = wrapper_logged_arg_name(expr, lopt, ch)) == NULL) { + if (errors != NULL) + (*errors)++; continue; } - if (asprintf(&redacted_arg, "%.*s%s", - (int)(optarg - *target_arg), *target_arg, - wrapper_arg_redacted) >= 0) { - /* Success */ - xpc_array_set_string(args, XPC_ARRAY_APPEND, - redacted_arg); + /* + * If we have a pattern to check, try it. If it fails, we'll + * fall back to just inserting a count. + */ + if (preg != NULL) { + regmatch_t match; + int error; - free(redacted_arg); - } else { - /* Failed */ - char c; + assert(optarg != NULL); /* - * It's crucial that we not accidentally log - * a redacted argument. If we really can't - * replace it with an obvious token to show that - * it's redacted due to memory constraints, - * we'll just zap it for the event and restore it before - * we pass it on to the chosen application. + * We only accept a match if the match happened to be + * the whole string. This way, we don't rely on the + * wrapper definition to anchor every single expression. */ - c = *optarg; - *optarg = '\0'; - - xpc_array_set_string(args, XPC_ARRAY_APPEND, - *target_arg); - - *optarg = c; + error = regexec(preg, optarg, 1, &match, 0); + if (error == 0 && + match.rm_so == 0 && + match.rm_eo == strlen(optarg)) { + xpc_dictionary_set_string(args, argkey, + optarg); + free(argkey); + + /* + * We'll also set the unsuffixed name with the + * last key found, in case we know only the last + * one is used and want to capture that instead. + */ + if ((argkey = wrapper_logged_arg_name(NULL, lopt, + ch)) == NULL) { + if (errors != NULL) + (*errors)++; + continue; + } + xpc_dictionary_set_string(args, argkey, + optarg); + free(argkey); + + continue; + } } - lpoptind = optind; - } + count = xpc_dictionary_get_uint64(args, argkey); + xpc_dictionary_set_uint64(args, argkey, count + 1); - /* - * Round up the last argument, if needed. i.e., if we processed all of - * the options present in it without needing to redact anything. - */ - if (optind > lpoptind && optind < argc) - xpc_array_set_string(args, XPC_ARRAY_APPEND, argv[optind - 1]); + free(argkey); - argc -= optind; - argv += optind; + if (expr == NULL) + continue; - /* - * Anything left over should just be redacted, as they might be paths - * or other potentially sensitive data. - */ - for (int i = 0; i < argc; i++) { - xpc_array_set_string(args, XPC_ARRAY_APPEND, - wrapper_arg_redacted); + /* + * If we were doing pattern matching for this option but the + * last key didn't match, then we just set the value to the # + * times that the option appeared. + */ + if ((argkey = wrapper_logged_arg_name(NULL, lopt, ch)) == NULL) { + if (errors != NULL) + (*errors)++; + continue; + } + + xpc_dictionary_set_uint64(args, argkey, expr->expr_count); + free(argkey); } return (args); } +#endif -/* - * Return an array regardless; empty if there are no arguments to be logged, - * non-empty otherwise. - */ -static xpc_object_t -wrapper_logged_args(const struct application *app, int argc, char *argv[]) +#if WRAPPER_APPLICATION_COUNT > 1 +static const struct application * +wrapper_by_name(const char *appname) { - xpc_object_t args = xpc_array_create_empty(); + const struct application *app; - if (WRAPPER_ANALYTICS_NOARGS) - return (wrapper_logged_args_filter(app, args, argc, argv)); + for (size_t i = 0; i < nitems(wrapper_apps); i++) { + app = &wrapper_apps[i]; - for (int i = 1; i < argc; i++) { - xpc_array_set_string(args, XPC_ARRAY_APPEND, argv[i]); + if (strcmp(appname, app->app_name) == 0) + return (app); } - return (args); + /* + * We've historically ignored errors in env selection; should we + * consider warning here instead, rather than just falling through to + * the "default" application? The behavior will be documented either + * way. + */ + return (NULL); } -#endif -#if WRAPPER_APPLICATION_COUNT > 1 static const struct application * wrapper_check_env(void) { #ifdef WRAPPER_ENV_VAR const char *val; - if ((val = getenv(WRAPPER_ENV_VAR)) != NULL) { - const struct application *app; + if ((val = getenv(WRAPPER_ENV_VAR)) != NULL) + return (wrapper_by_name(val)); +#endif - for (size_t i = 0; i < nitems(wrapper_apps); i++) { - app = &wrapper_apps[i]; + return (NULL); +} - if (strcmp(val, app->app_name) == 0) - return (app); - } - } -#endif +static const struct application * +wrapper_check_var(void) +{ + /* + * The wrapper name may not be defined if the wrapper was fed via + * stdin, in which case we won't have defined WRAPPER_NAME. We'll just + * not check /var/select in those wrappers. + */ +#ifdef WRAPPER_NAME + static const char varpath[] = _PATH_VARSEL WRAPPER_NAME; + static bool var_app_read; + static const struct application *var_app; + char target[WRAPPER_MAXNAMELEN]; + const struct application *app; + ssize_t ret; + + if (var_app_read) + return (var_app); + + ret = readlink(varpath, target, sizeof(target)); + var_app_read = true; + if (ret <= 0 || ret == sizeof(target)) + return (NULL); + target[ret] = '\0'; + + /* + * We might get called twice under arg-based selection, so cache the + * result just in case. + */ + var_app = wrapper_by_name(target); + return (var_app); +#else return (NULL); +#endif } static bool -wrapper_check_args_long(const char *optstr, const struct option *longopts, - int argc, char *argv[]) +wrapper_check_args_excluded(const struct application *app, int opt) +{ + + /* + * Checks whether the returned option is a logonly option to be excluded + * from considering this a candidate. + */ + if (opt >= app->app_nlogonly) + return (false); + + return (app->app_logonly_opts[opt]); +} + +static bool +wrapper_check_args_long(const struct application *app, const char *optstr, + const struct option *longopts, int argc, char *argv[]) { int ch; @@ -186,9 +364,10 @@ wrapper_check_args_long(const char *optstr, const struct option *longopts, optind = optreset = 1; while ((ch = getopt_long(argc, argv, optstr, longopts, NULL)) != -1) { /* - * If we encounter an unrecognized flag, we can't use this one. + * If we encounter an unrecognized flag or a logonly flag, we + * can't use this one. */ - if (ch == '?') + if (ch == '?' || wrapper_check_args_excluded(app, ch)) return (false); } @@ -215,24 +394,37 @@ wrapper_check_args_app(const struct application *app, int argc, char *argv[]) if (app->app_opts_logonly) return (true); - return (wrapper_check_args_long(app->app_optstr, app->app_longopts, + return (wrapper_check_args_long(app, app->app_optstr, app->app_longopts, argc, argv)); } static const struct application * wrapper_check_args(int argc, char *argv[]) { - const struct application *app; + const struct application *app, *dflt_app; /* * If we only have the name, there are no arguments to check and we can * simple execute the default application. */ if (argc == 1) - return (&wrapper_apps[0]); + return (NULL); + + /* + * If a default has been provided via /var/select, that overrides what + * was specified as the default in the wrapper config -- thus, we check + * that one first, then check every other application specified. If + * none of them are compatible with the arguments chosen, we'll use the + * var-specified app anyways. + */ + dflt_app = wrapper_check_var(); + if (dflt_app != NULL && wrapper_check_args_app(dflt_app, argc, argv)) + return (dflt_app); for (size_t i = 0; i < nitems(wrapper_apps); i++) { app = &wrapper_apps[i]; + if (app == dflt_app) + continue; if (wrapper_check_args_app(app, argc, argv)) return (app); @@ -247,26 +439,101 @@ static int wrapper_execute_analytics_testing(const struct application *app, int argc, char *argv[]) { + xpc_object_t args; + bool halted; + /* * If we're testing the wrapper analytics, we're just emitting the * final arguments being reported. name/chosen are assumed to be * correct and may be tested separately. */ - xpc_object_t args = wrapper_logged_args(app, argc, argv); + args = wrapper_logged_args(app, argc, argv, &halted, NULL); printf("arguments\n"); - xpc_array_apply(args, ^bool(size_t idx, xpc_object_t val) { - /* All of our elements are strings. */ - assert(xpc_get_type(val) == XPC_TYPE_STRING); - printf("\t%s\n", xpc_string_get_string_ptr(val)); + xpc_dictionary_apply(args, ^bool(const char *key, xpc_object_t val) { + if (strncmp(key, WRAPPER_ARGPREF_LONG, + sizeof(WRAPPER_ARGPREF_LONG) - 1) == 0) { + printf("\t--%s", &key[sizeof(WRAPPER_ARGPREF_LONG) - 1]); + } else if (strncmp(key, WRAPPER_ARGPREF_SHORT, + sizeof(WRAPPER_ARGPREF_SHORT) - 1) == 0) { + printf("\t-%s", &key[sizeof(WRAPPER_ARGPREF_SHORT) - 1]); + } else { + assert(0 && "Invalid arg entry"); + } + + if (xpc_get_type(val) == XPC_TYPE_STRING) + printf(" %s", xpc_string_get_string_ptr(val)); + else if (xpc_get_type(val) == XPC_TYPE_UINT64) + printf(" %ju", (uintmax_t)xpc_uint64_get_value(val)); + else + assert(0 && "Bad value type"); + + printf("\n"); return (true); }); + + if (halted) + printf("\t<>\n"); + xpc_release(args); return (0); } #endif /* WRAPPER_ANALYTICS_TESTING */ +#ifdef WRAPPER_NEEDS_XCSELECT +static void +wrapper_invoke_xcrun(const struct application *app, int argc, char *argv[]) +{ + const char *path; + + path = strchr(app->app_path, '/'); + assert(path != NULL); + + while (path[0] == '/') + path++; + + assert(path[0] != '\0'); + + /* Chop off the program name when we're running xcrun */ + xcselect_invoke_xcrun(path, argc - 1, argv + 1, true); +} +#endif + +/* + * wrapper_handle_relpath will do one of three things: + * - Execute something else + * - Error out + * - Return with path populated to something that we should try to exec. + */ +static void +wrapper_handle_relpath(const struct application *app, char *path, size_t pathsz, + int argc, char *argv[]) +{ +#ifndef WRAPPER_NEEDS_XCSELECT + wrapper_assert_unreachable("broken wrapper shim configuration"); +#else + bool env, cltools, dflt; + + /* Shell out to xcrun for $XCODE/ paths */ + if (strncmp(app->app_path, WRAPPER_XCODE_PREFIX, + sizeof(WRAPPER_XCODE_PREFIX) - 1) == 0) { + wrapper_invoke_xcrun(app, argc, argv); + /* UNREACHABLE */ + } + + if (!xcselect_get_developer_dir_path(path, pathsz, &env, &cltools, + &dflt)) + errx(1, "Could not obtain developer dir path"); + + /* We'll catch this strlcat() error with the next one; just ignore it. */ + (void)strlcat(path, "/", pathsz); + + if (strlcat(path, app->app_path, pathsz) >= pathsz) + errx(1, "File name too long: %s", path); +#endif /* !WRAPPER_NEEDS_XCSELECT */ +} + static void wrapper_execute_addargs(const struct application *app, int *argc, char **argv[]) @@ -313,12 +580,37 @@ wrapper_execute(const struct application *app, int argc, char *argv[]) #ifdef WRAPPER_ANALYTICS_IDENT analytics_send_event_lazy(WRAPPER_ANALYTICS_IDENT, ^(void) { - xpc_object_t payload = xpc_dictionary_create_empty(); - xpc_object_t args = wrapper_logged_args(app, argc, argv); + xpc_object_t args, payload; + unsigned int errors; + bool halted; + + payload = xpc_dictionary_create_empty(); + args = wrapper_logged_args(app, argc, argv, &halted, &errors); xpc_dictionary_set_string(payload, "name", argv[0]); - xpc_dictionary_set_value(payload, "arguments", args); xpc_dictionary_set_string(payload, "chosen", app->app_name); + if (errors != 0) + xpc_dictionary_set_uint64(payload, "argerrors", errors); + if (halted) + xpc_dictionary_set_bool(payload, "arghalt", true); + + xpc_dictionary_apply(args, ^bool(const char *key, xpc_object_t val) { + xpc_type_t type; + + type = xpc_get_type(val); + assert(type == XPC_TYPE_STRING || + type == XPC_TYPE_UINT64); + + if (type == XPC_TYPE_STRING) { + xpc_dictionary_set_string(payload, key, + xpc_string_get_string_ptr(val)); + } else { + xpc_dictionary_set_uint64(payload, key, + xpc_uint64_get_value(val)); + } + + return (true); + }); xpc_release(args); return (payload); @@ -338,41 +630,27 @@ wrapper_execute(const struct application *app, int argc, char *argv[]) * cwdpaths are undocumented and only really intended for regression * testing purposes at this time. */ - if (app->app_path[0] == '/' || app->app_path_relcwd) { - if (strlcpy(path, app->app_path, sizeof(path)) >= sizeof(path)) - errx(1, "File name too long: %s", path); - goto run; - } - -#ifdef WRAPPER_NEEDS_XCSELECT - bool env, cltools, dflt; - - if (!xcselect_get_developer_dir_path(path, sizeof(path), &env, &cltools, - &dflt)) { - snprintf(path, sizeof(path), "/%s", app->app_path); - goto run; - } - - /* We'll catch this strlcat() error with the next one; just ignore it. */ - (void)strlcat(path, "/", sizeof(path)); + if (app->app_path[0] != '/' && !app->app_path_relcwd) { + wrapper_handle_relpath(app, path, sizeof(path), argc, argv); - if (strlcat(path, app->app_path, sizeof(path)) >= sizeof(path)) + /* + * If wrapper_handle_relpath returns, it must have populated + * path with the path to execute. + */ + assert(path[0] != '\0'); + } else if (strlcpy(path, app->app_path, sizeof(path)) >= sizeof(path)) { errx(1, "File name too long: %s", path); -#else /* !WRAPPER_NEEDS_XCSELECT */ - /* UNREACHABLE */ -#endif /* WRAPPER_NEEDS_XCSELECT */ + } -run: execv(path, argv); err(1, "execv(%s)", path); /* UNREACHABLE */ #endif /* !WRAPPER_ANALYTICS_TESTING */ } -int -main(int argc, char *argv[]) +static const struct application * +wrapper_app(int argc, char *argv[]) { - /* * Obviously no need to bother checking anything if we just have a * single application. Such a setup is not unexpected, as it could be @@ -384,12 +662,23 @@ main(int argc, char *argv[]) chosen_app = wrapper_check_env(); if (chosen_app != NULL) - return (wrapper_execute(chosen_app, argc, argv)); + return (chosen_app); chosen_app = wrapper_check_args(argc, argv); if (chosen_app != NULL) - return (wrapper_execute(chosen_app, argc, argv)); + return (chosen_app); + + chosen_app = wrapper_check_var(); + if (chosen_app != NULL) + return (chosen_app); #endif - return (wrapper_execute(&wrapper_apps[0], argc, argv)); + return (&wrapper_apps[0]); +} + +int +main(int argc, char *argv[]) +{ + + return (wrapper_execute(wrapper_app(argc, argv), argc, argv)); } diff --git a/localedef/README b/localedef/README new file mode 100644 index 0000000..4d97371 --- /dev/null +++ b/localedef/README @@ -0,0 +1,11 @@ +While there are tools called "localedef" in Solaris and Linux, this +tool does not share heritage with any other implementation. It was +written independently by Garrett D'Amore while employed at Nexenta +Systems, and thus carries the Nexenta Copyright. + +It was initially released under the CDDL license, but on 4 July 2014, +Nexenta reissued the source under the BSD 2-clause license. This +code is part of the Illumos project. + +see: +https://github.com/Nexenta/illumos-nexenta/commit/cf17542a37fc83d0ae093777e30d480423858c29 diff --git a/localedef/charmap.c b/localedef/charmap.c new file mode 100644 index 0000000..19e8852 --- /dev/null +++ b/localedef/charmap.c @@ -0,0 +1,404 @@ +/*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * CHARMAP file handling for localedef. + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "localedef.h" +#include "parser.h" + + +typedef struct charmap { + const char *name; + wchar_t wc; + RB_ENTRY(charmap) rb_sym; + RB_ENTRY(charmap) rb_wc; +} charmap_t; + +static int cmap_compare_sym(const void *n1, const void *n2); +static int cmap_compare_wc(const void *n1, const void *n2); + +static RB_HEAD(cmap_sym, charmap) cmap_sym; +static RB_HEAD(cmap_wc, charmap) cmap_wc; + +RB_GENERATE_STATIC(cmap_sym, charmap, rb_sym, cmap_compare_sym); +RB_GENERATE_STATIC(cmap_wc, charmap, rb_wc, cmap_compare_wc); + +/* + * Array of POSIX specific portable characters. + */ + +static const struct { + const char *name; + int ch; +} portable_chars[] = { + { "NUL", '\0' }, + { "SOH", '\x01' }, + { "STX", '\x02' }, + { "ETX", '\x03' }, + { "EOT", '\x04' }, + { "ENQ", '\x05' }, + { "ACK", '\x06' }, + { "BEL", '\a' }, + { "alert", '\a' }, + { "BS", '\b' }, + { "backspace", '\b' }, + { "HT", '\t' }, + { "tab", '\t' }, + { "LF", '\n' }, + { "newline", '\n' }, + { "VT", '\v' }, + { "vertical-tab", '\v' }, + { "FF", '\f' }, + { "form-feed", '\f' }, + { "CR", '\r' }, + { "carriage-return", '\r' }, + { "SO", '\x0e' }, + { "SI", '\x0f' }, + { "DLE", '\x10' }, + { "DC1", '\x11' }, + { "DC2", '\x12' }, + { "DC3", '\x13' }, + { "DC4", '\x14' }, + { "NAK", '\x15' }, + { "SYN", '\x16' }, + { "ETB", '\x17' }, + { "CAN", '\x18' }, + { "EM", '\x19' }, + { "SUB", '\x1a' }, + { "ESC", '\x1b' }, + { "FS", '\x1c' }, + { "IS4", '\x1c' }, + { "GS", '\x1d' }, + { "IS3", '\x1d' }, + { "RS", '\x1e' }, + { "IS2", '\x1e' }, + { "US", '\x1f' }, + { "IS1", '\x1f' }, + { "DEL", '\x7f' }, + { "space", ' ' }, + { "exclamation-mark", '!' }, + { "quotation-mark", '"' }, + { "number-sign", '#' }, + { "dollar-sign", '$' }, + { "percent-sign", '%' }, + { "ampersand", '&' }, + { "apostrophe", '\'' }, + { "left-parenthesis", '(' }, + { "right-parenthesis", ')' }, + { "asterisk", '*' }, + { "plus-sign", '+' }, + { "comma", ','}, + { "hyphen-minus", '-' }, + { "hyphen", '-' }, + { "full-stop", '.' }, + { "period", '.' }, + { "slash", '/' }, + { "solidus", '/' }, + { "zero", '0' }, + { "one", '1' }, + { "two", '2' }, + { "three", '3' }, + { "four", '4' }, + { "five", '5' }, + { "six", '6' }, + { "seven", '7' }, + { "eight", '8' }, + { "nine", '9' }, + { "colon", ':' }, + { "semicolon", ';' }, +#ifdef __APPLE__ + { "less-then-sign", '<' }, +#endif + { "less-than-sign", '<' }, + { "equals-sign", '=' }, +#ifdef __APPLE__ + { "greater-then-sign", '>' }, +#endif + { "greater-than-sign", '>' }, + { "question-mark", '?' }, + { "commercial-at", '@' }, + { "left-square-bracket", '[' }, + { "backslash", '\\' }, + { "reverse-solidus", '\\' }, + { "right-square-bracket", ']' }, + { "circumflex", '^' }, + { "circumflex-accent", '^' }, + { "low-line", '_' }, + { "underscore", '_' }, +#ifdef __APPLE__ + { "underline", '_' }, +#endif + { "grave-accent", '`' }, + { "left-brace", '{' }, + { "left-curly-bracket", '{' }, + { "vertical-line", '|' }, + { "right-brace", '}' }, + { "right-curly-bracket", '}' }, + { "tilde", '~' }, + { "A", 'A' }, + { "B", 'B' }, + { "C", 'C' }, + { "D", 'D' }, + { "E", 'E' }, + { "F", 'F' }, + { "G", 'G' }, + { "H", 'H' }, + { "I", 'I' }, + { "J", 'J' }, + { "K", 'K' }, + { "L", 'L' }, + { "M", 'M' }, + { "N", 'N' }, + { "O", 'O' }, + { "P", 'P' }, + { "Q", 'Q' }, + { "R", 'R' }, + { "S", 'S' }, + { "T", 'T' }, + { "U", 'U' }, + { "V", 'V' }, + { "W", 'W' }, + { "X", 'X' }, + { "Y", 'Y' }, + { "Z", 'Z' }, + { "a", 'a' }, + { "b", 'b' }, + { "c", 'c' }, + { "d", 'd' }, + { "e", 'e' }, + { "f", 'f' }, + { "g", 'g' }, + { "h", 'h' }, + { "i", 'i' }, + { "j", 'j' }, + { "k", 'k' }, + { "l", 'l' }, + { "m", 'm' }, + { "n", 'n' }, + { "o", 'o' }, + { "p", 'p' }, + { "q", 'q' }, + { "r", 'r' }, + { "s", 's' }, + { "t", 't' }, + { "u", 'u' }, + { "v", 'v' }, + { "w", 'w' }, + { "x", 'x' }, + { "y", 'y' }, + { "z", 'z' }, + { NULL, 0 } +}; + +static int +cmap_compare_sym(const void *n1, const void *n2) +{ + const charmap_t *c1 = n1; + const charmap_t *c2 = n2; + int rv; + + rv = strcmp(c1->name, c2->name); + return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); +} + +static int +cmap_compare_wc(const void *n1, const void *n2) +{ + const charmap_t *c1 = n1; + const charmap_t *c2 = n2; + + return ((c1->wc < c2->wc) ? -1 : (c1->wc > c2->wc) ? 1 : 0); +} + +void +init_charmap(void) +{ + RB_INIT(&cmap_sym); + + RB_INIT(&cmap_wc); +} + +static void +add_charmap_impl(const char *sym, wchar_t wc, int nodups) +{ + charmap_t srch; + charmap_t *n = NULL; + + srch.wc = wc; + srch.name = sym; + + /* + * also possibly insert the wide mapping, although note that there + * can only be one of these per wide character code. + */ + if ((wc != (wchar_t)-1) && ((RB_FIND(cmap_wc, &cmap_wc, &srch)) == NULL)) { + if ((n = calloc(1, sizeof (*n))) == NULL) { + errf("out of memory"); + return; + } + n->wc = wc; + RB_INSERT(cmap_wc, &cmap_wc, n); + } + + if (sym) { + if (RB_FIND(cmap_sym, &cmap_sym, &srch) != NULL) { + if (nodups) { + errf("duplicate character definition"); + } + return; + } + if ((n == NULL) && ((n = calloc(1, sizeof (*n))) == NULL)) { + errf("out of memory"); + return; + } + n->wc = wc; + n->name = sym; + + RB_INSERT(cmap_sym, &cmap_sym, n); + } +} + +void +add_charmap(const char *sym, int c) +{ + add_charmap_impl(sym, c, 1); +} + +void +add_charmap_undefined(char *sym) +{ + charmap_t srch; + charmap_t *cm = NULL; + + srch.name = sym; + cm = RB_FIND(cmap_sym, &cmap_sym, &srch); + + if ((undefok == 0) && ((cm == NULL) || (cm->wc == (wchar_t)-1))) { + warn("undefined symbol <%s>", sym); + add_charmap_impl(sym, -1, 0); + } else { + free(sym); + } +} + +void +add_charmap_range(char *s, char *e, int wc) +{ + int ls, le; + int si; + int sn, en; + int i; + + static const char *digits = "0123456789"; + + ls = strlen(s); + le = strlen(e); + + if (((si = strcspn(s, digits)) == 0) || (si == ls) || + (strncmp(s, e, si) != 0) || + ((int)strspn(s + si, digits) != (ls - si)) || + ((int)strspn(e + si, digits) != (le - si)) || + ((sn = atoi(s + si)) > ((en = atoi(e + si))))) { + errf("malformed charmap range"); + return; + } + + s[si] = 0; + + for (i = sn; i <= en; i++) { + char *nn; + (void) asprintf(&nn, "%s%0*u", s, ls - si, i); + if (nn == NULL) { + errf("out of memory"); + return; + } + + add_charmap_impl(nn, wc, 1); + wc++; + } + free(s); + free(e); +} + +void +add_charmap_char(const char *name, int val) +{ + add_charmap_impl(name, val, 0); +} + +/* + * POSIX insists that certain entries be present, even when not in the + * original charmap file. + */ +void +add_charmap_posix(void) +{ + int i; + + for (i = 0; portable_chars[i].name; i++) { + add_charmap_char(portable_chars[i].name, portable_chars[i].ch); + } +} + +int +lookup_charmap(const char *sym, wchar_t *wc) +{ + charmap_t srch; + charmap_t *n; + + srch.name = sym; + n = RB_FIND(cmap_sym, &cmap_sym, &srch); + if (n && n->wc != (wchar_t)-1) { + if (wc) + *wc = n->wc; + return (0); + } + return (-1); +} + +int +check_charmap(wchar_t wc) +{ + charmap_t srch; + + srch.wc = wc; + return (RB_FIND(cmap_wc, &cmap_wc, &srch) ? 0 : -1); +} diff --git a/localedef/charmap.p-1 b/localedef/charmap.p-1 deleted file mode 100644 index 3901dd9..0000000 --- a/localedef/charmap.p-1 +++ /dev/null @@ -1,23 +0,0 @@ -CHARMAP - \x20 - \x24 - \101 - \141 - \346 - \365 - \300 - \366 - \142 - \102 - \103 - \143 - \347 - \x64 - \x65 - \110 - \150 - \xb7 - \x73 - \x7a - \x65 -END CHARMAP diff --git a/localedef/charmap.p-2 b/localedef/charmap.p-2 deleted file mode 100644 index 75a3fdf..0000000 --- a/localedef/charmap.p-2 +++ /dev/null @@ -1,115 +0,0 @@ -CHARMAP - \000 - \007 - \010 - \011 - \012 - \013 - \014 - \015 - \040 - \041 - \042 - \043 - \044 - \045 - \046 - \047 - \050 - \051 - \052 - \053 - \054 - \055 - \055 - \056 - \056 - \057 - \057 - \060 - \061 - \062 - \063 - \064 - \065 - \066 - \067 - \070 - \071 - \072 - \073 - \074 - \075 - \076 - \077 - \100 - \101 - \102 - \103 - \104 - \105 - \106 - \107 - \110 - \111 - \112 - \113 - \114 - \115 - \116 - \117 -

\160 - \161 - \162 - \163 - \164 - \165 - \166 - \167 - \170 - \171 - \172 - \173 - \173 - \174 - \175 - \175 - \176 - \177 -END CHARMAP diff --git a/localedef/charmap.test b/localedef/charmap.test deleted file mode 100644 index fd40463..0000000 --- a/localedef/charmap.test +++ /dev/null @@ -1,38 +0,0 @@ -CHARMAP - 2 - 2 - - \047 - \140 - - \x41 - \102 - C - \104 - "E" - \d70 - - \\ - "" - - \x60\x41 - \x27\x41 - \x60\x61 - \x27\x61 - -... \x12\x34 -END CHARMAP - -WIDTH -... 1 -... 2 - 27 - - 2 - 2 - 2 - 2 - -WIDTH_DEFAULT 1 - -END WIDTH diff --git a/localedef/collate.c b/localedef/collate.c new file mode 100644 index 0000000..2a08077 --- /dev/null +++ b/localedef/collate.c @@ -0,0 +1,1329 @@ +/*- + * Copyright 2018 Nexenta Systems, Inc. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * LC_COLLATE database generation routines for localedef. + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include "localedef.h" +#include "parser.h" +#include "collate.h" + +_Static_assert(COLL_WEIGHTS_MAX == 10, "This code assumes a value of 10"); + +/* + * Design notes. + * + * It will be extremely helpful to the reader if they have access to + * the localedef and locale file format specifications available. + * Latest versions of these are available from www.opengroup.org. + * + * The design for the collation code is a bit complex. The goal is a + * single collation database as described in collate.h (in + * libc/port/locale). However, there are some other tidbits: + * + * a) The substitution entries are now a directly indexable array. A + * priority elsewhere in the table is taken as an index into the + * substitution table if it has a high bit (COLLATE_SUBST_PRIORITY) + * set. (The bit is cleared and the result is the index into the + * table. + * + * b) We eliminate duplicate entries into the substitution table. + * This saves a lot of space. + * + * c) The priorities for each level are "compressed", so that each + * sorting level has consecutively numbered priorities starting at 1. + * (O is reserved for the ignore priority.) This means sort levels + * which only have a few distinct priorities can represent the + * priority level in fewer bits, which makes the strxfrm output + * smaller. + * + * d) We record the total number of priorities so that strxfrm can + * figure out how many bytes to expand a numeric priority into. + * + * e) For the UNDEFINED pass (the last pass), we record the maximum + * number of bits needed to uniquely prioritize these entries, so that + * the last pass can also use smaller strxfrm output when possible. + * + * f) Priorities with the sign bit set are verboten. This works out + * because no active character set needs that bit to carry significant + * information once the character is in wide form. + * + * To process the entire data to make the database, we actually run + * multiple passes over the data. + * + * The first pass, which is done at parse time, identifies elements, + * substitutions, and such, and records them in priority order. As + * some priorities can refer to other priorities, using forward + * references, we use a table of references indicating whether the + * priority's value has been resolved, or whether it is still a + * reference. + * + * The second pass walks over all the items in priority order, noting + * that they are used directly, and not just an indirect reference. + * This is done by creating a "weight" structure for the item. The + * weights are stashed in an RB tree sorted by relative "priority". + * + * The third pass walks over all the weight structures, in priority + * order, and assigns a new monotonically increasing (per sort level) + * weight value to them. These are the values that will actually be + * written to the file. + * + * The fourth pass just writes the data out. + */ + +/* + * In order to resolve the priorities, we create a table of priorities. + * Entries in the table can be in one of three states. + * + * UNKNOWN is for newly allocated entries, and indicates that nothing + * is known about the priority. (For example, when new entries are created + * for collating-symbols, this is the value assigned for them until the + * collating symbol's order has been determined. + * + * RESOLVED is used for an entry where the priority indicates the final + * numeric weight. + * + * REFER is used for entries that reference other entries. Typically + * this is used for forward references. A collating-symbol can never + * have this value. + * + * The "pass" field is used during final resolution to aid in detection + * of referencing loops. (For example depends on , but has its + * priority dependent on .) + */ +typedef enum { + UNKNOWN, /* priority is totally unknown */ + RESOLVED, /* priority value fully resolved */ + REFER /* priority is a reference (index) */ +} res_t; + +typedef struct weight { + int32_t pri; + int opt; + RB_ENTRY(weight) entry; +} weight_t; + +typedef struct priority { + res_t res; + int32_t pri; + int pass; + int lineno; +} collpri_t; + +#define NUM_WT collinfo.directive_count + +/* + * These are the abstract collating symbols, which are just a symbolic + * way to reference a priority. + */ +struct collsym { + char *name; + int32_t ref; + RB_ENTRY(collsym) entry; +}; + +/* + * These are also abstract collating symbols, but we allow them to have + * different priorities at different levels. + */ +typedef struct collundef { + char *name; + int32_t ref[COLL_WEIGHTS_MAX]; + RB_ENTRY(collundef) entry; +} collundef_t; + +/* + * These are called "chains" in libc. This records the fact that two + * more characters should be treated as a single collating entity when + * they appear together. For example, in Spanish gets collated + * as a character between and . + */ +struct collelem { + char *symbol; + wchar_t *expand; + int32_t ref[COLL_WEIGHTS_MAX]; + RB_ENTRY(collelem) rb_bysymbol; + RB_ENTRY(collelem) rb_byexpand; +}; + +/* + * Individual characters have a sequence of weights as well. + */ +typedef struct collchar { + wchar_t wc; + int32_t ref[COLL_WEIGHTS_MAX]; + RB_ENTRY(collchar) entry; +} collchar_t; + +/* + * Substitution entries. The key is itself a priority. Note that + * when we create one of these, we *automatically* wind up with a + * fully resolved priority for the key, because creation of + * substitutions creates a resolved priority at the same time. + */ +typedef struct subst{ + int32_t key; + int32_t ref[COLLATE_STR_LEN]; + RB_ENTRY(subst) entry; + RB_ENTRY(subst) entry_ref; +} subst_t; + +static RB_HEAD(collsyms, collsym) collsyms; +static RB_HEAD(collundefs, collundef) collundefs; +static RB_HEAD(elem_by_symbol, collelem) elem_by_symbol; +static RB_HEAD(elem_by_expand, collelem) elem_by_expand; +static RB_HEAD(collchars, collchar) collchars; +static RB_HEAD(substs, subst) substs[COLL_WEIGHTS_MAX]; +static RB_HEAD(substs_ref, subst) substs_ref[COLL_WEIGHTS_MAX]; +static RB_HEAD(weights, weight) weights[COLL_WEIGHTS_MAX]; +static int32_t nweight[COLL_WEIGHTS_MAX]; + +/* + * This is state tracking for the ellipsis token. Note that we start + * the initial values so that the ellipsis logic will think we got a + * magic starting value of NUL. It starts at minus one because the + * starting point is exclusive -- i.e. the starting point is not + * itself handled by the ellipsis code. + */ +static int currorder = EOF; +static int lastorder = EOF; +static collelem_t *currelem; +static collchar_t *currchar; +static collundef_t *currundef; +static wchar_t ellipsis_start = 0; +static int32_t ellipsis_weights[COLL_WEIGHTS_MAX]; + +/* + * We keep a running tally of weights. + */ +static int nextpri = 1; +static int nextsubst[COLL_WEIGHTS_MAX] = { 0 }; + +/* + * This array collects up the weights for each level. + */ +static int32_t order_weights[COLL_WEIGHTS_MAX]; +static int curr_weight = 0; +static int32_t subst_weights[COLLATE_STR_LEN]; +static int curr_subst = 0; + +/* + * Some initial priority values. + */ +static int32_t pri_undefined[COLL_WEIGHTS_MAX]; +static int32_t pri_ignore; + +static collate_info_t collinfo; +static int32_t subst_count[COLL_WEIGHTS_MAX]; +static int32_t chain_count; +static int32_t large_count; + +static collpri_t *prilist = NULL; +static int numpri = 0; +static int maxpri = 0; + +static void start_order(int); + +static int32_t +new_pri(void) +{ + int i; + + if (numpri >= maxpri) { + maxpri = maxpri ? maxpri * 2 : 1024; + prilist = realloc(prilist, sizeof (collpri_t) * maxpri); + if (prilist == NULL) { + fprintf(stderr,"out of memory\n"); + return (-1); + } + for (i = numpri; i < maxpri; i++) { + prilist[i].res = UNKNOWN; + prilist[i].pri = 0; + prilist[i].pass = 0; + } + } + return (numpri++); +} + +static collpri_t * +get_pri(int32_t ref) +{ + if ((ref < 0) || (ref > numpri)) { + INTERR; + return (NULL); + } + return (&prilist[ref]); +} + +static void +set_pri(int32_t ref, int32_t v, res_t res) +{ + collpri_t *pri; + + pri = get_pri(ref); + + if ((res == REFER) && ((v < 0) || (v >= numpri))) { + INTERR; + } + + /* Resolve self references */ + if ((res == REFER) && (ref == v)) { + v = nextpri; + res = RESOLVED; + } + + if (pri->res != UNKNOWN) { + warn("repeated item in order list (first on %d)", + pri->lineno); + return; + } + pri->lineno = lineno; + pri->pri = v; + pri->res = res; +} + +static int32_t +resolve_pri(int32_t ref) +{ + collpri_t *pri; + static int32_t pass = 0; + + pri = get_pri(ref); + pass++; + while (pri->res == REFER) { + if (pri->pass == pass) { + /* report a line with the circular symbol */ + lineno = pri->lineno; + fprintf(stderr,"circular reference in order list\n"); + return (-1); + } + if ((pri->pri < 0) || (pri->pri >= numpri)) { + INTERR; + return (-1); + } + pri->pass = pass; + pri = &prilist[pri->pri]; + } + + if (pri->res == UNKNOWN) { + return (-1); + } + if (pri->res != RESOLVED) + INTERR; + + return (pri->pri); +} + +static int +weight_compare(const void *n1, const void *n2) +{ + int32_t k1 = ((const weight_t *)n1)->pri; + int32_t k2 = ((const weight_t *)n2)->pri; + + return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); +} + +RB_GENERATE_STATIC(weights, weight, entry, weight_compare); + +static int +collsym_compare(const void *n1, const void *n2) +{ + const collsym_t *c1 = n1; + const collsym_t *c2 = n2; + int rv; + + rv = strcmp(c1->name, c2->name); + return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); +} + +RB_GENERATE_STATIC(collsyms, collsym, entry, collsym_compare); + +static int +collundef_compare(const void *n1, const void *n2) +{ + const collundef_t *c1 = n1; + const collundef_t *c2 = n2; + int rv; + + rv = strcmp(c1->name, c2->name); + return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); +} + +RB_GENERATE_STATIC(collundefs, collundef, entry, collundef_compare); + +static int +element_compare_symbol(const void *n1, const void *n2) +{ + const collelem_t *c1 = n1; + const collelem_t *c2 = n2; + int rv; + + rv = strcmp(c1->symbol, c2->symbol); + return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); +} + +RB_GENERATE_STATIC(elem_by_symbol, collelem, rb_bysymbol, element_compare_symbol); + +static int +element_compare_expand(const void *n1, const void *n2) +{ + const collelem_t *c1 = n1; + const collelem_t *c2 = n2; + int rv; + + rv = wcscmp(c1->expand, c2->expand); + return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); +} + +RB_GENERATE_STATIC(elem_by_expand, collelem, rb_byexpand, element_compare_expand); + +static int +collchar_compare(const void *n1, const void *n2) +{ + wchar_t k1 = ((const collchar_t *)n1)->wc; + wchar_t k2 = ((const collchar_t *)n2)->wc; + + return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); +} + +RB_GENERATE_STATIC(collchars, collchar, entry, collchar_compare); + +static int +subst_compare(const void *n1, const void *n2) +{ + int32_t k1 = ((const subst_t *)n1)->key; + int32_t k2 = ((const subst_t *)n2)->key; + + return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); +} + +RB_GENERATE_STATIC(substs, subst, entry, subst_compare); + +static int +subst_compare_ref(const void *n1, const void *n2) +{ + const wchar_t *c1 = ((const subst_t *)n1)->ref; + const wchar_t *c2 = ((const subst_t *)n2)->ref; + int rv; + + rv = wcscmp(c1, c2); + return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); +} + +RB_GENERATE_STATIC(substs_ref, subst, entry_ref, subst_compare_ref); + +void +init_collate(void) +{ + int i; + + RB_INIT(&collsyms); + + RB_INIT(&collundefs); + + RB_INIT(&elem_by_symbol); + + RB_INIT(&elem_by_expand); + + RB_INIT(&collchars); + + for (i = 0; i < COLL_WEIGHTS_MAX; i++) { + RB_INIT(&substs[i]); + RB_INIT(&substs_ref[i]); + RB_INIT(&weights[i]); + nweight[i] = 1; + } + + (void) memset(&collinfo, 0, sizeof (collinfo)); + + /* allocate some initial priorities */ + pri_ignore = new_pri(); + + set_pri(pri_ignore, 0, RESOLVED); + + for (i = 0; i < COLL_WEIGHTS_MAX; i++) { + pri_undefined[i] = new_pri(); + + /* we will override this later */ + set_pri(pri_undefined[i], COLLATE_MAX_PRIORITY, UNKNOWN); + } +} + +void +define_collsym(char *name) +{ + collsym_t *sym; + + if ((sym = calloc(1, sizeof(*sym))) == NULL) { + fprintf(stderr,"out of memory\n"); + return; + } + sym->name = name; + sym->ref = new_pri(); + + if (RB_FIND(collsyms, &collsyms, sym) != NULL) { + /* + * This should never happen because we are only called + * for undefined symbols. + */ + free(sym); + INTERR; + return; + } + RB_INSERT(collsyms, &collsyms, sym); +} + +collsym_t * +lookup_collsym(char *name) +{ + collsym_t srch; + + srch.name = name; + return (RB_FIND(collsyms, &collsyms, &srch)); +} + +collelem_t * +lookup_collelem(char *symbol) +{ + collelem_t srch; + + srch.symbol = symbol; + return (RB_FIND(elem_by_symbol, &elem_by_symbol, &srch)); +} + +static collundef_t * +get_collundef(char *name) +{ + collundef_t srch; + collundef_t *ud; + int i; + + srch.name = name; + if ((ud = RB_FIND(collundefs, &collundefs, &srch)) == NULL) { + if (((ud = calloc(1, sizeof(*ud))) == NULL) || + ((ud->name = strdup(name)) == NULL)) { + fprintf(stderr,"out of memory\n"); + free(ud); + return (NULL); + } + for (i = 0; i < NUM_WT; i++) { + ud->ref[i] = new_pri(); + } + RB_INSERT(collundefs, &collundefs, ud); + } + add_charmap_undefined(name); + return (ud); +} + +static collchar_t * +get_collchar(wchar_t wc, int create) +{ + collchar_t srch; + collchar_t *cc; + int i; + + srch.wc = wc; + cc = RB_FIND(collchars, &collchars, &srch); + if ((cc == NULL) && create) { + if ((cc = calloc(1, sizeof(*cc))) == NULL) { + fprintf(stderr, "out of memory\n"); + return (NULL); + } + for (i = 0; i < NUM_WT; i++) { + cc->ref[i] = new_pri(); + } + cc->wc = wc; + RB_INSERT(collchars, &collchars, cc); + } + return (cc); +} + +void +end_order_collsym(collsym_t *sym) +{ + start_order(T_COLLSYM); + /* update the weight */ + + set_pri(sym->ref, nextpri, RESOLVED); + nextpri++; +} + +void +end_order(void) +{ + int i; + int32_t pri; + int32_t ref; + collpri_t *p; + + /* advance the priority/weight */ + pri = nextpri; + + switch (currorder) { + case T_CHAR: + for (i = 0; i < NUM_WT; i++) { + if (((ref = order_weights[i]) < 0) || + ((p = get_pri(ref)) == NULL) || + (p->pri == -1)) { + /* unspecified weight is a self reference */ + set_pri(currchar->ref[i], pri, RESOLVED); + } else { + set_pri(currchar->ref[i], ref, REFER); + } + order_weights[i] = -1; + } + + /* leave a cookie trail in case next symbol is ellipsis */ + ellipsis_start = currchar->wc + 1; + currchar = NULL; + break; + + case T_ELLIPSIS: + /* save off the weights were we can find them */ + for (i = 0; i < NUM_WT; i++) { + ellipsis_weights[i] = order_weights[i]; + order_weights[i] = -1; + } + break; + + case T_COLLELEM: + if (currelem == NULL) { + INTERR; + } else { + for (i = 0; i < NUM_WT; i++) { + + if (((ref = order_weights[i]) < 0) || + ((p = get_pri(ref)) == NULL) || + (p->pri == -1)) { + set_pri(currelem->ref[i], pri, + RESOLVED); + } else { + set_pri(currelem->ref[i], ref, REFER); + } + order_weights[i] = -1; + } + } + break; + + case T_UNDEFINED: + for (i = 0; i < NUM_WT; i++) { + if (((ref = order_weights[i]) < 0) || + ((p = get_pri(ref)) == NULL) || + (p->pri == -1)) { + set_pri(pri_undefined[i], -1, RESOLVED); + } else { + set_pri(pri_undefined[i], ref, REFER); + } + order_weights[i] = -1; + } + break; + + case T_SYMBOL: + for (i = 0; i < NUM_WT; i++) { + if (((ref = order_weights[i]) < 0) || + ((p = get_pri(ref)) == NULL) || + (p->pri == -1)) { + set_pri(currundef->ref[i], pri, RESOLVED); + } else { + set_pri(currundef->ref[i], ref, REFER); + } + order_weights[i] = -1; + } + break; + + default: + INTERR; + } + + nextpri++; +} + +static void +start_order(int type) +{ + int i; + + lastorder = currorder; + currorder = type; + + /* this is used to protect ELLIPSIS processing */ + if ((lastorder == T_ELLIPSIS) && (type != T_CHAR)) { + fprintf(stderr, "character value expected\n"); + } + + for (i = 0; i < COLL_WEIGHTS_MAX; i++) { + order_weights[i] = -1; + } + curr_weight = 0; +} + +void +start_order_undefined(void) +{ + start_order(T_UNDEFINED); +} + +void +start_order_symbol(char *name) +{ + currundef = get_collundef(name); + start_order(T_SYMBOL); +} + +void +start_order_char(wchar_t wc) +{ + collchar_t *cc; + int32_t ref; + + start_order(T_CHAR); + + /* + * If we last saw an ellipsis, then we need to close the range. + * Handle that here. Note that we have to be careful because the + * items *inside* the range are treated exclusiveley to the items + * outside of the range. The ends of the range can have quite + * different weights than the range members. + */ + if (lastorder == T_ELLIPSIS) { + int i; + + if (wc < ellipsis_start) { + fprintf(stderr, "malformed range!\n"); + return; + } + while (ellipsis_start < wc) { + /* + * pick all of the saved weights for the + * ellipsis. note that -1 encodes for the + * ellipsis itself, which means to take the + * current relative priority. + */ + if ((cc = get_collchar(ellipsis_start, 1)) == NULL) { + INTERR; + return; + } + for (i = 0; i < NUM_WT; i++) { + collpri_t *p; + if (((ref = ellipsis_weights[i]) == -1) || + ((p = get_pri(ref)) == NULL) || + (p->pri == -1)) { + set_pri(cc->ref[i], nextpri, RESOLVED); + } else { + set_pri(cc->ref[i], ref, REFER); + } + ellipsis_weights[i] = 0; + } + ellipsis_start++; + nextpri++; + } + } + + currchar = get_collchar(wc, 1); +} + +void +start_order_collelem(collelem_t *e) +{ + start_order(T_COLLELEM); + currelem = e; +} + +void +start_order_ellipsis(void) +{ + int i; + + start_order(T_ELLIPSIS); + + if (lastorder != T_CHAR) { + fprintf(stderr, "illegal starting point for range\n"); + return; + } + + for (i = 0; i < NUM_WT; i++) { + ellipsis_weights[i] = order_weights[i]; + } +} + +void +define_collelem(char *name, wchar_t *wcs) +{ + collelem_t *e; + int i; + + if (wcslen(wcs) >= COLLATE_STR_LEN) { + fprintf(stderr,"expanded collation element too long\n"); + return; + } + + if ((e = calloc(1, sizeof(*e))) == NULL) { + fprintf(stderr, "out of memory\n"); + return; + } + e->expand = wcs; + e->symbol = name; + + /* + * This is executed before the order statement, so we don't + * know how many priorities we *really* need. We allocate one + * for each possible weight. Not a big deal, as collating-elements + * prove to be quite rare. + */ + for (i = 0; i < COLL_WEIGHTS_MAX; i++) { + e->ref[i] = new_pri(); + } + + /* A character sequence can only reduce to one element. */ + if ((RB_FIND(elem_by_symbol, &elem_by_symbol, e) != NULL) || + (RB_FIND(elem_by_expand, &elem_by_expand, e) != NULL)) { + fprintf(stderr, "duplicate collating element definition\n"); + free(e); + return; + } + RB_INSERT(elem_by_symbol, &elem_by_symbol, e); + RB_INSERT(elem_by_expand, &elem_by_expand, e); +} + +void +add_order_bit(int kw) +{ + uint8_t bit = DIRECTIVE_UNDEF; + + switch (kw) { + case T_FORWARD: + bit = DIRECTIVE_FORWARD; + break; + case T_BACKWARD: + bit = DIRECTIVE_BACKWARD; + break; + case T_POSITION: + bit = DIRECTIVE_POSITION; + break; + default: + INTERR; + break; + } + collinfo.directive[collinfo.directive_count] |= bit; +} + +void +add_order_directive(void) +{ + if (collinfo.directive_count >= COLL_WEIGHTS_MAX) { + fprintf(stderr, "too many directives (max %d)\n", COLL_WEIGHTS_MAX); + return; + } + collinfo.directive_count++; +} + +static void +add_order_pri(int32_t ref) +{ + if (curr_weight >= NUM_WT) { + fprintf(stderr, "too many weights (max %d)\n", NUM_WT); + return; + } + order_weights[curr_weight] = ref; + curr_weight++; +} + +void +add_order_collsym(collsym_t *s) +{ + add_order_pri(s->ref); +} + +void +add_order_char(wchar_t wc) +{ + collchar_t *cc; + + if ((cc = get_collchar(wc, 1)) == NULL) { + INTERR; + return; + } + + add_order_pri(cc->ref[curr_weight]); +} + +void +add_order_collelem(collelem_t *e) +{ + add_order_pri(e->ref[curr_weight]); +} + +void +add_order_ignore(void) +{ + add_order_pri(pri_ignore); +} + +void +add_order_symbol(char *sym) +{ + collundef_t *c; + if ((c = get_collundef(sym)) == NULL) { + INTERR; + return; + } + add_order_pri(c->ref[curr_weight]); +} + +void +add_order_ellipsis(void) +{ + /* special NULL value indicates self reference */ + add_order_pri(0); +} + +void +add_order_subst(void) +{ + subst_t srch; + subst_t *s; + int i; + + (void) memset(&srch, 0, sizeof (srch)); + for (i = 0; i < curr_subst; i++) { + srch.ref[i] = subst_weights[i]; + subst_weights[i] = 0; + } + s = RB_FIND(substs_ref, &substs_ref[curr_weight], &srch); + + if (s == NULL) { + if ((s = calloc(1, sizeof(*s))) == NULL) { + fprintf(stderr,"out of memory\n"); + return; + } + s->key = new_pri(); + + /* + * We use a self reference for our key, but we set a + * high bit to indicate that this is a substitution + * reference. This will expedite table lookups later, + * and prevent table lookups for situations that don't + * require it. (In short, its a big win, because we + * can skip a lot of binary searching.) + */ + set_pri(s->key, + (nextsubst[curr_weight] | COLLATE_SUBST_PRIORITY), + RESOLVED); + nextsubst[curr_weight] += 1; + + for (i = 0; i < curr_subst; i++) { + s->ref[i] = srch.ref[i]; + } + + RB_INSERT(substs_ref, &substs_ref[curr_weight], s); + + if (RB_FIND(substs, &substs[curr_weight], s) != NULL) { + INTERR; + return; + } + RB_INSERT(substs, &substs[curr_weight], s); + } + curr_subst = 0; + + + /* + * We are using the current (unique) priority as a search key + * in the substitution table. + */ + add_order_pri(s->key); +} + +static void +add_subst_pri(int32_t ref) +{ + if (curr_subst >= COLLATE_STR_LEN) { + fprintf(stderr,"substitution string is too long\n"); + return; + } + subst_weights[curr_subst] = ref; + curr_subst++; +} + +void +add_subst_char(wchar_t wc) +{ + collchar_t *cc; + + + if (((cc = get_collchar(wc, 1)) == NULL) || + (cc->wc != wc)) { + INTERR; + return; + } + /* we take the weight for the character at that position */ + add_subst_pri(cc->ref[curr_weight]); +} + +void +add_subst_collelem(collelem_t *e) +{ + add_subst_pri(e->ref[curr_weight]); +} + +void +add_subst_collsym(collsym_t *s) +{ + add_subst_pri(s->ref); +} + +void +add_subst_symbol(char *ptr) +{ + collundef_t *cu; + + if ((cu = get_collundef(ptr)) != NULL) { + add_subst_pri(cu->ref[curr_weight]); + } +} + +void +add_weight(int32_t ref, int pass) +{ + weight_t srch; + weight_t *w; + + srch.pri = resolve_pri(ref); + + /* No translation of ignores */ + if (srch.pri == 0) + return; + + /* Substitution priorities are not weights */ + if (srch.pri & COLLATE_SUBST_PRIORITY) + return; + + if (RB_FIND(weights, &weights[pass], &srch) != NULL) + return; + + if ((w = calloc(1, sizeof(*w))) == NULL) { + fprintf(stderr, "out of memory\n"); + return; + } + w->pri = srch.pri; + RB_INSERT(weights, &weights[pass], w); +} + +void +add_weights(int32_t *refs) +{ + int i; + for (i = 0; i < NUM_WT; i++) { + add_weight(refs[i], i); + } +} + +int32_t +get_weight(int32_t ref, int pass) +{ + weight_t srch; + weight_t *w; + int32_t pri; + + pri = resolve_pri(ref); + if (pri & COLLATE_SUBST_PRIORITY) { + return (pri); + } + if (pri <= 0) { + return (pri); + } + srch.pri = pri; + if ((w = RB_FIND(weights, &weights[pass], &srch)) == NULL) { + INTERR; + return (-1); + } + return (w->opt); +} + +wchar_t * +wsncpy(wchar_t *s1, const wchar_t *s2, size_t n) +{ + wchar_t *os1 = s1; + + n++; + while (--n > 0 && (*s1++ = htote(*s2++)) != 0) + continue; + if (n > 0) + while (--n > 0) + *s1++ = 0; + return (os1); +} + +#define RB_COUNT(x, name, head, cnt) do { \ + (cnt) = 0; \ + RB_FOREACH(x, name, (head)) { \ + (cnt)++; \ + } \ +} while (0) + +#define RB_NUMNODES(type, name, head, cnt) do { \ + type *t; \ + cnt = 0; \ + RB_FOREACH(t, name, head) { \ + cnt++; \ + } \ +} while (0) + +void +dump_collate(void) +{ + FILE *f; + int i, j, n; + size_t sz; + int32_t pri; + collelem_t *ce; + collchar_t *cc; + subst_t *sb; + char fmt_version[COLLATE_FMT_VERSION_LEN]; + char def_version[XLOCALE_DEF_VERSION_LEN]; + collate_char_t chars[UCHAR_MAX + 1]; + collate_large_t *large; + collate_subst_t *subst[COLL_WEIGHTS_MAX]; + collate_chain_t *chain; + + /* + * We have to run through a preliminary pass to identify all the + * weights that we use for each sorting level. + */ + for (i = 0; i < NUM_WT; i++) { + add_weight(pri_ignore, i); + } + for (i = 0; i < NUM_WT; i++) { + RB_FOREACH(sb, substs, &substs[i]) { + for (j = 0; sb->ref[j]; j++) { + add_weight(sb->ref[j], i); + } + } + } + RB_FOREACH(ce, elem_by_expand, &elem_by_expand) { + add_weights(ce->ref); + } + RB_FOREACH(cc, collchars, &collchars) { + add_weights(cc->ref); + } + + /* + * Now we walk the entire set of weights, removing the gaps + * in the weights. This gives us optimum usage. The walk + * occurs in priority. + */ + for (i = 0; i < NUM_WT; i++) { + weight_t *w; + RB_FOREACH(w, weights, &weights[i]) { + w->opt = nweight[i]; + nweight[i] += 1; + } + } + + (void) memset(&chars, 0, sizeof (chars)); + (void) memset(fmt_version, 0, COLLATE_FMT_VERSION_LEN); + (void) strlcpy(fmt_version, COLLATE_FMT_VERSION, sizeof (fmt_version)); + (void) memset(def_version, 0, XLOCALE_DEF_VERSION_LEN); + if (version) + (void) strlcpy(def_version, version, sizeof (def_version)); + + /* + * We need to make sure we arrange for the UNDEFINED field + * to show up. Also, set the total weight counts. + */ + for (i = 0; i < NUM_WT; i++) { + if (resolve_pri(pri_undefined[i]) == -1) { + set_pri(pri_undefined[i], -1, RESOLVED); + /* they collate at the end of everything else */ + collinfo.undef_pri[i] = htote(COLLATE_MAX_PRIORITY); + } + collinfo.pri_count[i] = htote(nweight[i]); + } + + collinfo.pri_count[NUM_WT] = htote(max_wide()); + collinfo.undef_pri[NUM_WT] = htote(COLLATE_MAX_PRIORITY); + collinfo.directive[NUM_WT] = DIRECTIVE_UNDEFINED; + + /* + * Ordinary character priorities + */ + for (i = 0; i <= UCHAR_MAX; i++) { + if ((cc = get_collchar(i, 0)) != NULL) { + for (j = 0; j < NUM_WT; j++) { + chars[i].pri[j] = + htote(get_weight(cc->ref[j], j)); + } + } else { + for (j = 0; j < NUM_WT; j++) { + chars[i].pri[j] = + htote(get_weight(pri_undefined[j], j)); + } + /* + * Per POSIX, for undefined characters, we + * also have to add a last item, which is the + * character code. + */ + chars[i].pri[NUM_WT] = htote(i); + } + } + + /* + * Substitution tables + */ + for (i = 0; i < NUM_WT; i++) { + collate_subst_t *st = NULL; + subst_t *temp; + RB_COUNT(temp, substs, &substs[i], n); + subst_count[i] = n; + if ((st = calloc(n, sizeof(collate_subst_t))) == NULL) { + fprintf(stderr, "out of memory\n"); + return; + } + n = 0; + RB_FOREACH(sb, substs, &substs[i]) { + if ((st[n].key = resolve_pri(sb->key)) < 0) { + /* by definition these resolve! */ + INTERR; + } + if (st[n].key != (n | COLLATE_SUBST_PRIORITY)) { + INTERR; + } + st[n].key = htote(st[n].key); + for (j = 0; sb->ref[j]; j++) { + st[n].pri[j] = htote(get_weight(sb->ref[j], + i)); + } + n++; + } + if (n != subst_count[i]) + INTERR; + subst[i] = st; + } + + + /* + * Chains, i.e. collating elements + */ + RB_NUMNODES(collelem_t, elem_by_expand, &elem_by_expand, chain_count); + chain = calloc(chain_count, sizeof(collate_chain_t)); + if (chain == NULL) { + fprintf(stderr, "out of memory\n"); + return; + } + n = 0; + RB_FOREACH(ce, elem_by_expand, &elem_by_expand) { + (void) wsncpy(chain[n].str, ce->expand, COLLATE_STR_LEN); + for (i = 0; i < NUM_WT; i++) { + chain[n].pri[i] = htote(get_weight(ce->ref[i], i)); + } + n++; + } + if (n != chain_count) + INTERR; + + /* + * Large (> UCHAR_MAX) character priorities + */ + RB_NUMNODES(collchar_t, collchars, &collchars, n); + large = calloc(n, sizeof(collate_large_t)); + if (large == NULL) { + fprintf(stderr, "out of memory\n"); + return; + } + + i = 0; + RB_FOREACH(cc, collchars, &collchars) { + int undef = 0; + /* we already gathered those */ + if (cc->wc <= UCHAR_MAX) + continue; + for (j = 0; j < NUM_WT; j++) { + if ((pri = get_weight(cc->ref[j], j)) < 0) { + undef = 1; + } + if (undef && (pri >= 0)) { + /* if undefined, then all priorities are */ + INTERR; + } else { + large[i].pri.pri[j] = htote(pri); + } + } + if (!undef) { + large[i].val = htote(cc->wc); + large_count = i++; + } + } + + if ((f = open_category()) == NULL) { + return; + } + + /* Time to write the entire data set out */ + + for (i = 0; i < NUM_WT; i++) + collinfo.subst_count[i] = htote(subst_count[i]); + collinfo.chain_count = htote(chain_count); + collinfo.large_count = htote(large_count); + + if ((wr_category(fmt_version, COLLATE_FMT_VERSION_LEN, f) < 0) || + (wr_category(def_version, XLOCALE_DEF_VERSION_LEN, f) < 0) || + (wr_category(&collinfo, sizeof (collinfo), f) < 0) || + (wr_category(&chars, sizeof (chars), f) < 0)) { + return; + } + + for (i = 0; i < NUM_WT; i++) { + sz = sizeof (collate_subst_t) * subst_count[i]; + if (wr_category(subst[i], sz, f) < 0) { + return; + } + } + sz = sizeof (collate_chain_t) * chain_count; + if (wr_category(chain, sz, f) < 0) { + return; + } + sz = sizeof (collate_large_t) * large_count; + if (wr_category(large, sz, f) < 0) { + return; + } + + close_category(f); +} diff --git a/localedef/ctype.c b/localedef/ctype.c new file mode 100644 index 0000000..63a57bb --- /dev/null +++ b/localedef/ctype.c @@ -0,0 +1,598 @@ +/*- + * Copyright 2018 Nexenta Systems, Inc. + * Copyright 2012 Garrett D'Amore All rights reserved. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * LC_CTYPE database generation routines for localedef. + */ +#include +#include + +#ifdef __APPLE__ +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include "localedef.h" +#include "parser.h" + +/* Always include the defines for the target: */ +#define _DONT_USE_CTYPE_INLINE_ /* Avoid dependencies on runetype.h */ +#include "_ctype.h" +#include "runefile.h" + + +/* Needed for bootstrapping, _CTYPE_N */ +#ifndef _CTYPE_N +#define _CTYPE_N 0x00400000L +#endif + +#define _ISUPPER _CTYPE_U +#define _ISLOWER _CTYPE_L +#define _ISDIGIT _CTYPE_D +#define _ISXDIGIT _CTYPE_X +#define _ISSPACE _CTYPE_S +#define _ISBLANK _CTYPE_B +#define _ISALPHA _CTYPE_A +#define _ISPUNCT _CTYPE_P +#define _ISGRAPH _CTYPE_G +#define _ISPRINT _CTYPE_R +#define _ISCNTRL _CTYPE_C +#define _E1 _CTYPE_Q +#define _E2 _CTYPE_I +#define _E3 0 +#define _E4 _CTYPE_N +#define _E5 _CTYPE_T + +static wchar_t last_ctype; +static int ctype_compare(const void *n1, const void *n2); + +typedef struct ctype_node { + wchar_t wc; + int32_t ctype; + int32_t toupper; + int32_t tolower; + RB_ENTRY(ctype_node) entry; +} ctype_node_t; + +#ifdef __APPLE__ +static ctype_node_t *get_ctype(wchar_t wc); + +static bool any_tolower; +static bool any_toupper; + +int ctype_dumped; +#endif + +static RB_HEAD(ctypes, ctype_node) ctypes; +RB_GENERATE_STATIC(ctypes, ctype_node, entry, ctype_compare); + +#ifdef __APPLE__ +static const struct posix_ctype_spec { + int ctype; + wchar_t start; + wchar_t end; /* 0 for "just start char" */ +} posix_ctype_spec[] = { + { + .ctype = (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT), + .start = 'A', + .end = 'Z', + }, + { + .ctype = (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT), + .start = 'a', + .end = 'z', + }, + { + .ctype = (_ISDIGIT | _ISXDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT | _E4), + .start = '0', + .end = '9', + }, + { + .ctype = (_ISXDIGIT | _ISPRINT), + .start = 'A', + .end = 'F', + }, + { + .ctype = (_ISXDIGIT | _ISPRINT), + .start = 'a', + .end = 'f', + }, + { + .ctype = (_ISBLANK | _ISPRINT | _ISSPACE), + .start = ' ', + }, + { + .ctype = (_ISBLANK | _ISSPACE), + .start = '\t', + }, + { + .ctype = _ISSPACE, + .start = '\f', + }, + { + .ctype = _ISSPACE, + .start = '\n', + }, + { + .ctype = _ISSPACE, + .start = '\r', + }, + { + .ctype = _ISSPACE, + .start = '\v', + }, +}; +#endif + +static int +ctype_compare(const void *n1, const void *n2) +{ + const ctype_node_t *c1 = n1; + const ctype_node_t *c2 = n2; + + return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0); +} + +void +init_ctype(void) +{ +#ifdef __APPLE__ + const struct posix_ctype_spec *cspec; +#endif + + RB_INIT(&ctypes); +#ifdef __APPLE__ + for (size_t i = 0; i < nitems(posix_ctype_spec); i++) { + ctype_node_t *ctn; + wchar_t start, end; + + cspec = &posix_ctype_spec[i]; + start = cspec->start; + end = cspec->end; + if (end == 0) + end = start; + + for (wchar_t wc = start; wc <= end; wc++) { + ctn = get_ctype(wc); + + ctn->ctype |= cspec->ctype; + } + } +#endif +} + +static void +add_ctype_impl(ctype_node_t *ctn) +{ + switch (last_kw) { + case T_ISUPPER: + ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT); + break; + case T_ISLOWER: + ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT); + break; + case T_ISALPHA: + ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT); + break; + case T_ISDIGIT: + ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT | _E4); + break; + case T_ISSPACE: + /* + * This can be troublesome as , , + * , , and are defined both + * as space and cntrl, and POSIX doesn't allow cntrl/print + * combination. We will take care of this in dump_ctype(). + */ + ctn->ctype |= (_ISSPACE | _ISPRINT); + break; + case T_ISCNTRL: + ctn->ctype |= _ISCNTRL; + break; + case T_ISGRAPH: + ctn->ctype |= (_ISGRAPH | _ISPRINT); + break; + case T_ISPRINT: + ctn->ctype |= _ISPRINT; + break; + case T_ISPUNCT: + ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT); + break; + case T_ISXDIGIT: + ctn->ctype |= (_ISXDIGIT | _ISPRINT); + break; + case T_ISBLANK: + ctn->ctype |= (_ISBLANK | _ISSPACE); + break; + case T_ISPHONOGRAM: + ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH); + break; + case T_ISIDEOGRAM: + ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH); + break; + case T_ISENGLISH: + ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH); + break; + case T_ISNUMBER: + ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH); + break; + case T_ISSPECIAL: + ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH); + break; + case T_ISALNUM: + /* + * We can't do anything with this. The character + * should already be specified as a digit or alpha. + */ + break; + default: + errf("not a valid character class"); + } +} + +static ctype_node_t * +get_ctype(wchar_t wc) +{ + ctype_node_t srch; + ctype_node_t *ctn; + + srch.wc = wc; + if ((ctn = RB_FIND(ctypes, &ctypes, &srch)) == NULL) { + if ((ctn = calloc(1, sizeof (*ctn))) == NULL) { + errf("out of memory"); + return (NULL); + } + ctn->wc = wc; + + RB_INSERT(ctypes, &ctypes, ctn); + } + return (ctn); +} + +void +add_ctype(int val) +{ + ctype_node_t *ctn; + + if ((ctn = get_ctype(val)) == NULL) { + INTERR; + return; + } + add_ctype_impl(ctn); + last_ctype = ctn->wc; +} + +void +add_ctype_range(wchar_t end) +{ + ctype_node_t *ctn; + wchar_t cur; + + if (end < last_ctype) { + errf("malformed character range (%u ... %u))", + last_ctype, end); + return; + } + for (cur = last_ctype + 1; cur <= end; cur++) { + if ((ctn = get_ctype(cur)) == NULL) { + INTERR; + return; + } + add_ctype_impl(ctn); + } + last_ctype = end; + +} + +/* + * A word about widths: if the width mask is specified, then libc + * unconditionally honors it. Otherwise, it assumes printable + * characters have width 1, and non-printable characters have width + * -1 (except for NULL which is special with width 0). Hence, we have + * no need to inject defaults here -- the "default" unset value of 0 + * indicates that libc should use its own logic in wcwidth as described. + */ +void +add_width(int wc, int width) +{ + ctype_node_t *ctn; + + if ((ctn = get_ctype(wc)) == NULL) { + INTERR; + return; + } + ctn->ctype &= ~(_CTYPE_SWM); + switch (width) { + case 0: + ctn->ctype |= _CTYPE_SW0; + break; + case 1: + ctn->ctype |= _CTYPE_SW1; + break; + case 2: + ctn->ctype |= _CTYPE_SW2; + break; + case 3: + ctn->ctype |= _CTYPE_SW3; + break; + } +} + +void +add_width_range(int start, int end, int width) +{ + for (; start <= end; start++) { + add_width(start, width); + } +} + +void +add_caseconv(int val, int wc) +{ + ctype_node_t *ctn; + + ctn = get_ctype(val); + if (ctn == NULL) { + INTERR; + return; + } + + switch (last_kw) { + case T_TOUPPER: +#ifdef __APPLE__ + any_toupper = true; +#endif + ctn->toupper = wc; + break; + case T_TOLOWER: +#ifdef __APPLE__ + any_tolower = true; +#endif + ctn->tolower = wc; + break; + default: + INTERR; + break; + } +} + +void +dump_ctype(void) +{ + FILE *f; + _FileRuneLocale rl; + ctype_node_t *ctn, *last_ct, *last_lo, *last_up; + _FileRuneEntry *ct = NULL; + _FileRuneEntry *lo = NULL; + _FileRuneEntry *up = NULL; + wchar_t wc; + uint32_t runetype_ext_nranges; + uint32_t maplower_ext_nranges; + uint32_t mapupper_ext_nranges; + + (void) memset(&rl, 0, sizeof (rl)); + runetype_ext_nranges = 0; + last_ct = NULL; + maplower_ext_nranges = 0; + last_lo = NULL; + mapupper_ext_nranges = 0; + last_up = NULL; + + ctype_dumped = 1; + + if ((f = open_category()) == NULL) + return; + +#ifdef __APPLE__ + (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_B, 8); +#else + (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8); +#endif + (void) strlcpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding)); + + /* + * Initialize the identity map. + */ + for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) { + rl.maplower[wc] = htote(wc); + rl.mapupper[wc] = htote(wc); + } + + RB_FOREACH(ctn, ctypes, &ctypes) { + int conflict = 0; + + wc = ctn->wc; + + /* + * POSIX requires certain portable characters have + * certain types. Add them if they are missing. + */ + if ((wc >= 1) && (wc <= 127)) { +#ifdef __APPLE__ + /* + * POSIX specifies that we include some default tolower + * and toupper mappings if the locale definition does + * not emit any definition for their respective type. + */ + if (!any_tolower && wc >= 'A' && wc <= 'Z') + ctn->tolower = wc + 0x20; + if (!any_toupper && wc >= 'a' && wc <= 'z') + ctn->toupper = wc - 0x20; +#endif + if ((wc >= 'A') && (wc <= 'Z')) + ctn->ctype |= _ISUPPER; + if ((wc >= 'a') && (wc <= 'z')) + ctn->ctype |= _ISLOWER; + if ((wc >= '0') && (wc <= '9')) + ctn->ctype |= _ISDIGIT; + if (wc == ' ') + ctn->ctype |= _ISPRINT; + if (strchr(" \f\n\r\t\v", (char)wc) != NULL) + ctn->ctype |= _ISSPACE; + if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL) + ctn->ctype |= _ISXDIGIT; + if (strchr(" \t", (char)wc)) + ctn->ctype |= _ISBLANK; + + /* + * Technically these settings are only + * required for the C locale. However, it + * turns out that because of the historical + * version of isprint(), we need them for all + * locales as well. Note that these are not + * necessarily valid punctation characters in + * the current language, but ispunct() needs + * to return TRUE for them. + */ + if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~", + (char)wc)) + ctn->ctype |= _ISPUNCT; + } + + /* + * POSIX also requires that certain types imply + * others. Add any inferred types here. + */ + if (ctn->ctype & (_ISUPPER |_ISLOWER)) + ctn->ctype |= _ISALPHA; + if (ctn->ctype & _ISDIGIT) + ctn->ctype |= _ISXDIGIT; + if (ctn->ctype & _ISBLANK) + ctn->ctype |= _ISSPACE; + if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT)) + ctn->ctype |= _ISGRAPH; + if (ctn->ctype & _ISGRAPH) + ctn->ctype |= _ISPRINT; + + /* + * POSIX requires that certain combinations are invalid. + * Try fixing the cases we know about (see add_ctype_impl()). + */ + if ((ctn->ctype & (_ISSPACE|_ISCNTRL)) == (_ISSPACE|_ISCNTRL)) + ctn->ctype &= ~_ISPRINT; + + /* + * Finally, don't flag remaining cases as a fatal error, + * and just warn about them. + */ + if ((ctn->ctype & _ISALPHA) && + (ctn->ctype & (_ISPUNCT|_ISDIGIT))) + conflict++; + if ((ctn->ctype & _ISPUNCT) && + (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT))) + conflict++; + if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH)) + conflict++; + if ((ctn->ctype & _ISCNTRL) && (ctn->ctype & _ISPRINT)) + conflict++; + if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH))) + conflict++; + + if (conflict) { + warn("conflicting classes for character 0x%x (%x)", + wc, ctn->ctype); + } + /* + * Handle the lower 256 characters using the simple + * optimization. Note that if we have not defined the + * upper/lower case, then we identity map it. + */ + if ((unsigned)wc < _CACHED_RUNES) { + rl.runetype[wc] = htote(ctn->ctype); + if (ctn->tolower) + rl.maplower[wc] = htote(ctn->tolower); + if (ctn->toupper) + rl.mapupper[wc] = htote(ctn->toupper); + continue; + } + + if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype) && + (last_ct->wc + 1 == wc)) { + ct[runetype_ext_nranges - 1].max = htote(wc); + } else { + runetype_ext_nranges++; + ct = realloc(ct, sizeof (*ct) * runetype_ext_nranges); + ct[runetype_ext_nranges - 1].min = htote(wc); + ct[runetype_ext_nranges - 1].max = htote(wc); + ct[runetype_ext_nranges - 1].map = + htote(ctn->ctype); + } + last_ct = ctn; + if (ctn->tolower == 0) { + last_lo = NULL; + } else if ((last_lo != NULL) && + (last_lo->tolower + 1 == ctn->tolower)) { + lo[maplower_ext_nranges - 1].max = htote(wc); + last_lo = ctn; + } else { + maplower_ext_nranges++; + lo = realloc(lo, sizeof (*lo) * maplower_ext_nranges); + lo[maplower_ext_nranges - 1].min = htote(wc); + lo[maplower_ext_nranges - 1].max = htote(wc); + lo[maplower_ext_nranges - 1].map = + htote(ctn->tolower); + last_lo = ctn; + } + + if (ctn->toupper == 0) { + last_up = NULL; + } else if ((last_up != NULL) && + (last_up->toupper + 1 == ctn->toupper)) { + up[mapupper_ext_nranges-1].max = htote(wc); + last_up = ctn; + } else { + mapupper_ext_nranges++; + up = realloc(up, sizeof (*up) * mapupper_ext_nranges); + up[mapupper_ext_nranges - 1].min = htote(wc); + up[mapupper_ext_nranges - 1].max = htote(wc); + up[mapupper_ext_nranges - 1].map = + htote(ctn->toupper); + last_up = ctn; + } + } + + rl.runetype_ext_nranges = htote(runetype_ext_nranges); + rl.maplower_ext_nranges = htote(maplower_ext_nranges); + rl.mapupper_ext_nranges = htote(mapupper_ext_nranges); + if ((wr_category(&rl, sizeof (rl), f) < 0) || + (wr_category(ct, sizeof (*ct) * runetype_ext_nranges, f) < 0) || + (wr_category(lo, sizeof (*lo) * maplower_ext_nranges, f) < 0) || + (wr_category(up, sizeof (*up) * mapupper_ext_nranges, f) < 0)) { + return; + } + + close_category(f); +} diff --git a/localedef/def.a55 b/localedef/def.a55 deleted file mode 100644 index 386e1c1..0000000 --- a/localedef/def.a55 +++ /dev/null @@ -1,6 +0,0 @@ -LC_COLLATE -order_start forward -order_start forward;forward;forward - -order_end -END LC_COLLATE diff --git a/localedef/def.p-1 b/localedef/def.p-1 deleted file mode 100644 index adef476..0000000 --- a/localedef/def.p-1 +++ /dev/null @@ -1,157 +0,0 @@ -# -LC_CTYPE -lower ;;;;;...; -upper A;B;C;C;...;Z -space \x20;\x09;\x0a;\x0b;\x0c;\x0d -blank \040;\011 -toupper (,);(b,B);(c,C);(c,C);(d,D);(z,Z) -digit 3;2 -END LC_CTYPE -# -LC_COLLATE -# -# The following example of collation is based on the proposed -# Canadian standard Z243.4.1-1990, "Canadian Alphanumeric -# Ordering Standard For Character sets of CSA Z234.4 Standard". -# (Other parts of this example locale definition file do not -# purport to relate to Canada, or to any other real culture.) -# The proposed standard defines a 4-weight collation, such that -# in the first pass, characters are compared without regard to -# case or accents; in second pass, backwards compare without -# regard to case; in the third pass, forward compare without -# regard to diacriticals. In the 3 first passes, non-alphabetic -# characters are ignored; in the fourth pass, only special -# characters are considered, such that "The string that has a -# special character in the lowest position comes first. If two -# strings have a special character in the same position, the -# collation value of the special character determines ordering. -# -# Only a subset of the character set is used here; mostly to -# illustrate the set-up. -# -# -collating-symbol -collating-symbol -collating-symbol -collating-symbol -collating-symbol -collating-symbol -collating-symbol -collating-symbol -collating-symbol -collating-symbol -collating-symbol -collating-symbol -collating-symbol -# Further collating-symbols follow. -# -# Properly, the standard does not include any multi-character -# collating elements; the one below is added for completeness. -# -collating_element from "" -collating_element from "" -collating_element from "" -collating_element from "" -collating_element from "" -# -order_start forward;backward;forward;forward,position -# -# Collating symbols are specified first in the sequence to allocate -# basic collation values to them, lower than that of any character. - - - - - - - - - - - - - -# Further collating symbols are given a basic collating value here. -# -# Here follow special characters. - IGNORE;IGNORE;IGNORE; -# Other special characters follow here. -# -# Here follow the regular characters. - ;;;IGNORE - ;;;IGNORE - ;;;IGNORE - ;;;IGNORE - ;;;IGNORE - ;;;IGNORE - "";"";\ - "";IGNORE - "";"";\ - "";IGNORE - ;;;IGNORE -... ...;;;IGNORE - ...;;;IGNORE - ;;;IGNORE - ;;;IGNORE - ;;;IGNORE - ;;;IGNORE - ;;;IGNORE - ;;;IGNORE -# -# As an example, the strings "Bach" and "bach" could be encoded (for -# compare purposes) as: -# "Bach" ;;;;;;\ -# ;;;;; -# "bach" ;;;;;;\ -# ;;;;; -# -# The two strings are equal in pass 1 and 2, but differ in pass 3. -# -# Further characters follow. -# -UNDEFINED IGNORE;IGNORE;IGNORE;IGNORE -# -order_end -# -END LC_COLLATE -# -LC_MONETARY -int_curr_symbol "USD " -currency_symbol "$" -mon_decimal_point "." -mon_grouping 3;0 -positive_sign "" -negative_sign "-" -p_cs_precedes 1 -n_sign_posn 0 -END LC_MONETARY -# -LC_NUMERIC -copy "US_en.ASCII" -decimal_point . -thousands_sep \, -grouping 3;3 -END LC_NUMERIC -# -LC_TIME -abday "Sun";"Mon";"Tue";"Wed";"Thu";"Fri";"Sat" -# -day "Sunday";"Monday";"Tuesday";"Wednesday";\ - "Thursday";"Friday";"Saturday" -# -abmon "Jan";"Feb";"Mar";"Apr";"May";"Jun";\ - "Jul";"Aug";"Sep";"Oct";"Nov";"Dec" -# -mon "January";"February";"March";"April";\ - "May";"June";"July";"August";"September";\ - "October";"November";"December" -# -d_t_fmt "%a %b %d %T %Z %Y\n" -am_pm "Am";"Pm" -END LC_TIME -# -LC_MESSAGES -yesexpr "^([yY][[:alpha:]]*)|(OK)" -# -noexpr "^[nN][[:alpha:]]*" -END LC_MESSAGES diff --git a/localedef/def.p-2 b/localedef/def.p-2 deleted file mode 100644 index 9b6ee15..0000000 --- a/localedef/def.p-2 +++ /dev/null @@ -1,280 +0,0 @@ -LC_CTYPE -# The following is the POSIX locale LC_CTYPE. -# "alpha" is by default "upper" and "lower" -# "alnum" is by definition "alpha" and "digit" -# "print" is by default "alnum", "punct" and the character -# "graph" is by default "alnum" and "punct" -# -upper ;;;;;;;;;;;;;\ - ;;

;;;;;;;;;; -# -lower ;;;;;;;;;;;;;\ - ;;

;;;;;;;;;; -# -digit ;;;;;;;\ - ;; -# -space ;;;;\ - ; -# -cntrl ;;;;;\ - ;;\ - ;;;;;;;;\ - ;;;;;;;;\ - ;;;;;;;;\ - ; -# -punct ;;;\ - ;;;;\ - ;;;\ - ;;;;;\ - ;;;;\ - ;;;\ - ;;;\ - ;;;;\ - ;; -# -xdigit ;;;;;;;;\ - ;;;;;;;;;;;;; -# -blank ; -# -toupper (,);(,);(,);(,);(,);\ - (,);(,);(,);(,);(,);\ - (,);(,);(,);(,);(,);\ - (

,

);(,);(,);(,);(,);\ - (,);(,);(,);(,);(,);(,) -# -tolower (,);(,);(,);(,);(,);\ - (,);(,);(,);(,);(,);\ - (,);(,);(,);(,);(,);\ - (

,

);(,);(,);(,);(,);\ - (,);(,);(,);(,);(,);(,) -END LC_CTYPE -LC_COLLATE -# This is the POSIX locale definition for the LC_COLLATE category. -# The order is the same as in the ASCII codeset. -order_start forward - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

- - - - - - - - - - - - - - - -order_end -# -END LC_COLLATE -LC_MONETARY -# This is the POSIX locale definition for -# the LC_MONETARY category. -# -int_curr_symbol "" -currency_symbol "" -mon_decimal_point "" -mon_thousands_sep "" -mon_grouping -1 -positive_sign "" -negative_sign "" -int_frac_digits -1 -p_cs_precedes -1 -p_sep_by_space -1 -n_cs_precedes -1 -n_sep_by_space -1 -p_sign_posn -1 -n_sign_posn -1 -# -END LC_MONETARY -LC_NUMERIC -# This is the POSIX locale definition for -# the LC_NUMERIC category. -# -decimal_point "" -thousands_sep "" -grouping -1 -# -END LC_NUMERIC -LC_TIME -# This is the POSIX locale definition for -# the LC_TIME category. -# -# Abbreviated weekday names (%a) -abday "";"";"";"";\ - "";"";"" -# -# Full weekday names (%A) -day "";"";\ - "";"";\ - "";"";\ - "" -# -# Abbreviated month names (%b) -abmon "";"";"";\ - "

";"";"";\ - "";"";"

";\ - "";"";"" -# -# Full month names (%B) -mon "";"";\ - "";"

";\ - "";"";\ - "";"";\ - "

";"";\ - "";"" -# -# Equivalent of AM/PM (%p) "AM";"PM" -am_pm "";"

" -# -# Appropriate date and time representation (%c) -# "%a %b %e %H:%M:%S %Y" -d_t_fmt "\ - \ - \ - " -# -# Appropriate date representation (%x) "%m/%d/%y" -d_fmt "\ - " -# -# Appropriate time representation (%X) "%H:%M:%S" -t_fmt "\ - " -# -# Appropriate 12-hour time representation (%r) "%I:%M:%S %p" -t_fmt_ampm "\ -

" -# -END LC_TIME -LC_MESSAGES -# This is the POSIX locale definition for -# the LC_MESSAGES category. -# -yesexpr "" -# -noexpr "" -# -yesstr "yes" -nostr "no" -END LC_MESSAGES - diff --git a/localedef/libc/collate.h b/localedef/libc/collate.h new file mode 100644 index 0000000..1fc5681 --- /dev/null +++ b/localedef/libc/collate.h @@ -0,0 +1,180 @@ +/*- + * Copyright (c) 1995 Alex Tatmanjants + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/lib/libc/locale/collate.h,v 1.15 2005/02/27 20:31:13 ru Exp $ + */ + +#ifndef _COLLATE_H_ +#define _COLLATE_H_ + +#include +#ifndef __LIBC__ +#include +#endif /* !__LIBC__ */ +#include +#include "xlocale_private.h" + +#define COLLATE_STR_LEN 24 +#define TABLE_SIZE 100 +#define COLLATE_VERSION "1.0\n" +#define COLLATE_VERSION1_1 "1.1\n" +#define COLLATE_VERSION1_1A "1.1A\n" +#define COLLATE_VERSION1_2 "1.2\n" + +#define COLLATE_FMT_VERSION_LEN 12 +#define COLLATE_FMT_VERSION "DARWIN 1.0\n" + +/* XXX */ +#ifdef __APPLE__ +#if COLL_WEIGHTS_MAX < 10 +#undef COLL_WEIGHTS_MAX +#define COLL_WEIGHTS_MAX 10 +#endif +#endif + +/* see discussion in string/FreeBSD/strxfrm for this value */ +#ifdef __APPLE__ +#define COLLATE_MAX_PRIORITY ((1 << 24) - 1) +#else +#define COLLATE_MAX_PRIORITY (0x7fffffff) /* max signed value */ +#endif +#define COLLATE_SUBST_PRIORITY (0x40000000) /* bit indicates subst table */ + +#define DIRECTIVE_UNDEF 0x00 +#define DIRECTIVE_FORWARD 0x01 +#define DIRECTIVE_BACKWARD 0x02 +#define DIRECTIVE_POSITION 0x04 +#define DIRECTIVE_UNDEFINED 0x08 /* special last weight for UNDEFINED */ + +#define DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD) + +#define COLLATE_SUBST_DUP 0x0001 +#define COLLATE_LEGACY 0x0002 + +#define IGNORE_EQUIV_CLASS 1 + +/* __collate_st_info */ +typedef struct collate_info { + __uint8_t directive_count; + __uint8_t directive[COLL_WEIGHTS_MAX]; +#ifdef __APPLE__ + __uint8_t chain_max_len; /* In padding */ +#endif + __int32_t pri_count[COLL_WEIGHTS_MAX]; + __int32_t flags; + __int32_t chain_count; + __int32_t large_count; + __int32_t subst_count[COLL_WEIGHTS_MAX]; + __int32_t undef_pri[COLL_WEIGHTS_MAX]; +} collate_info_t; + +/* + * Pin COLL_WEIGHTS_MAX to 2 for the legacy format; it's the last value used + * before supporting higher values for newer unicode data. + */ +#define LEGACY_COLL_WEIGHTS_MAX 2 +#define LEGACY_COLLATE_STR_LEN 10 + +typedef struct collate_legacy_info { + __uint8_t directive[LEGACY_COLL_WEIGHTS_MAX]; + __uint8_t flags; +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN + __uint8_t directive_count:4; + __uint8_t chain_max_len:4; +#else + __uint8_t chain_max_len:4; + __uint8_t directive_count:4; +#endif + __int32_t undef_pri[LEGACY_COLL_WEIGHTS_MAX]; + __int32_t subst_count[LEGACY_COLL_WEIGHTS_MAX]; + __int32_t chain_count; + __int32_t large_pri_count; +} collate_legacy_info_t; + +/* __collate_st_char_pri */ +typedef struct collate_char { + __int32_t pri[COLL_WEIGHTS_MAX]; +} collate_char_t; +/* __collate_st_chain_pri */ +typedef struct collate_chain { + __darwin_wchar_t str[COLLATE_STR_LEN]; + __int32_t pri[COLL_WEIGHTS_MAX]; +} collate_chain_t; +/* __collate_st_large_char_pri */ +typedef struct collate_large { + __int32_t val; + collate_char_t pri; +} collate_large_t; +/* __collate_st_subst */ +typedef struct collate_subst { + __int32_t key; + __darwin_wchar_t pri[COLLATE_STR_LEN]; +} collate_subst_t; + +struct xlocale_collate { + struct xlocale_component header; + unsigned char __collate_load_error; + char *map; + size_t maplen; + + collate_info_t *info; + collate_subst_t *subst_table[COLL_WEIGHTS_MAX]; + collate_chain_t *chain_pri_table; + collate_large_t *large_pri_table; + collate_char_t *char_pri_table; +}; + +#ifndef __LIBC__ +extern int __collate_load_error; +#define __collate_char_pri_table (*__collate_char_pri_table_ptr) +extern collate_char_t __collate_char_pri_table[UCHAR_MAX + 1]; +extern collate_chain_t *__collate_chain_pri_table; +extern __int32_t *__collate_chain_equiv_table; +extern collate_info_t __collate_info; +#endif /* !__LIBC__ */ + +__BEGIN_DECLS +#ifdef __LIBC__ +__darwin_wchar_t *__collate_mbstowcs(const char *, locale_t); +__darwin_wchar_t *__collate_wcsdup(const __darwin_wchar_t *); +__darwin_wchar_t *__collate_substitute(const __darwin_wchar_t *, int, locale_t); +int __collate_load_tables(const char *, locale_t); +void __collate_lookup_l(const __darwin_wchar_t *, int *, int *, int *, locale_t); +void __collate_lookup_which(const __darwin_wchar_t *, int *, int *, int, locale_t); +void __collate_xfrm(const __darwin_wchar_t *, __darwin_wchar_t **, locale_t); +int __collate_range_cmp(__darwin_wchar_t, __darwin_wchar_t, locale_t); +size_t __collate_collating_symbol(__darwin_wchar_t *, size_t, const char *, size_t, __darwin_mbstate_t *, locale_t); +int __collate_equiv_class(const char *, size_t, __darwin_mbstate_t *, locale_t); +size_t __collate_equiv_match(int, __darwin_wchar_t *, size_t, __darwin_wchar_t, const char *, size_t, __darwin_mbstate_t *, size_t *, locale_t); +#else /* !__LIBC__ */ +void __collate_lookup(const unsigned char *, int *, int *, int *); +#endif /* __LIBC__ */ +#ifdef COLLATE_DEBUG +void __collate_print_tables(void); +#endif +__END_DECLS + +#endif /* !_COLLATE_H_ */ diff --git a/localedef/libc/lmessages.h b/localedef/libc/lmessages.h new file mode 100644 index 0000000..47f08a9 --- /dev/null +++ b/localedef/libc/lmessages.h @@ -0,0 +1,44 @@ +/*- + * Copyright (c) 2000, 2001 Alexey Zelkin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/lib/libc/locale/lmessages.h,v 1.3 2001/12/20 18:28:52 phantom Exp $ + */ + +#ifndef _LMESSAGES_H_ +#define _LMESSAGES_H_ + +#include + +struct lc_messages_T { + const char *yesexpr; + const char *noexpr; + const char *yesstr; + const char *nostr; +}; + +struct lc_messages_T *__get_current_messages_locale(locale_t); +int __messages_load_locale(const char *, locale_t); + +#endif /* !_LMESSAGES_H_ */ diff --git a/localedef/libc/lmonetary.h b/localedef/libc/lmonetary.h new file mode 100644 index 0000000..4707311 --- /dev/null +++ b/localedef/libc/lmonetary.h @@ -0,0 +1,66 @@ +/*- + * Copyright (c) 2000, 2001 Alexey Zelkin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/lib/libc/locale/lmonetary.h,v 1.4 2002/10/09 09:19:28 tjr Exp $ + */ + +#ifndef _LMONETARY_H_ +#define _LMONETARY_H_ +#include "xlocale_private.h" + +struct lc_monetary_T { + const char *int_curr_symbol; + const char *currency_symbol; + const char *mon_decimal_point; + const char *mon_thousands_sep; + const char *mon_grouping; + const char *positive_sign; + const char *negative_sign; + const char *int_frac_digits; + const char *frac_digits; + const char *p_cs_precedes; + const char *p_sep_by_space; + const char *n_cs_precedes; + const char *n_sep_by_space; + const char *p_sign_posn; + const char *n_sign_posn; + const char *int_p_cs_precedes; + const char *int_n_cs_precedes; + const char *int_p_sep_by_space; + const char *int_n_sep_by_space; + const char *int_p_sign_posn; + const char *int_n_sign_posn; +}; + +struct xlocale_monetary { + struct xlocale_component header; + char *buffer; + struct lc_monetary_T locale; +}; + +struct lc_monetary_T *__get_current_monetary_locale(locale_t); +int __monetary_load_locale(const char *, locale_t); + +#endif /* !_LMONETARY_H_ */ diff --git a/localedef/libc/lnumeric.h b/localedef/libc/lnumeric.h new file mode 100644 index 0000000..29d449c --- /dev/null +++ b/localedef/libc/lnumeric.h @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2000, 2001 Alexey Zelkin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/lib/libc/locale/lnumeric.h,v 1.3 2001/12/20 18:28:52 phantom Exp $ + */ + +#ifndef _LNUMERIC_H_ +#define _LNUMERIC_H_ + +#include + +#include "xlocale_private.h" + +struct lc_numeric_T { + const char *decimal_point; + const char *thousands_sep; + const char *grouping; +}; +struct xlocale_numeric { + struct xlocale_component header; + char *buffer; + struct lc_numeric_T locale; +}; + +struct lc_numeric_T *__get_current_numeric_locale(locale_t); +int __numeric_load_locale(const char *, locale_t); + +#endif /* !_LNUMERIC_H_ */ diff --git a/localedef/libc/runefile.h b/localedef/libc/runefile.h new file mode 100644 index 0000000..497cd5d --- /dev/null +++ b/localedef/libc/runefile.h @@ -0,0 +1,117 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2005 Ruslan Ermilov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _RUNEFILE_H_ +#define _RUNEFILE_H_ + +#include + +#ifndef _CACHED_RUNES +#define _CACHED_RUNES (1 << 8) +#endif + +typedef struct { + int32_t min; + int32_t max; + int32_t map; +} _FileRuneEntry; + +typedef struct { + char magic[8]; + char encoding[32]; + + uint32_t runetype[_CACHED_RUNES]; + int32_t maplower[_CACHED_RUNES]; + int32_t mapupper[_CACHED_RUNES]; + + int32_t runetype_ext_nranges; + int32_t maplower_ext_nranges; + int32_t mapupper_ext_nranges; + + int32_t variable_len; +#ifdef __APPLE__ + int32_t ncharclasses; +#endif +} _FileRuneLocale; + +#ifdef __APPLE__ +/* + * These versions accurately portray the old format, which tried to mimic the + * _RuneEntry/_RuneLocale structures in the on-disk format and thus, had some + * 32-bit pointers interspersed in interesting ways. + * + * The future versions, above, will be the existing FreeBSD way of laying it + * out, which just gets copied manually into a _RuneLocale rather than using + * some more clever techniques. + */ +typedef struct { + int32_t min; + int32_t max; + int32_t map; + int32_t __types_fake; +} _FileRuneEntry_A; + +typedef struct { + char magic[8]; + char encoding[32]; + + int32_t __sgetrune_fake; + int32_t __sputrune_fake; + int32_t __invalid_rune; + + uint32_t runetype[_CACHED_RUNES]; + int32_t maplower[_CACHED_RUNES]; + int32_t mapupper[_CACHED_RUNES]; + + int32_t runetype_ext_nranges; + int32_t __runetype_ext_ranges_fake; + int32_t maplower_ext_nranges; + int32_t __maplower_ext_ranges_fake; + int32_t mapupper_ext_nranges; + int32_t __mapupper_ext_ranges_fake; + + int32_t __variable_fake; + int32_t variable_len; + + int32_t ncharclasses; + int32_t __charclasses_fake; +} _FileRuneLocale_A; + +typedef struct { + char name[14]; /* CHARCLASS_NAME_MAX = 14 */ + __uint32_t mask; /* charclass mask */ +} _FileRuneCharClass; + +#define _FILE_RUNE_MAGIC_A "RuneMagA" /* Indicates version A of RuneLocale */ +#define _FILE_RUNE_MAGIC_B "RuneMagB" /* Indicates version B of RuneLocale */ +#endif +#define _FILE_RUNE_MAGIC_1 "RuneMag1" + +#endif /* !_RUNEFILE_H_ */ diff --git a/localedef/libc/setlocale.h b/localedef/libc/setlocale.h new file mode 100644 index 0000000..fd002f2 --- /dev/null +++ b/localedef/libc/setlocale.h @@ -0,0 +1,43 @@ +/*- + * Copyright (C) 1997 by Andrey A. Chernov, Moscow, Russia. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/lib/libc/locale/setlocale.h,v 1.6 2003/07/06 02:03:37 ache Exp $ + */ + +#ifndef _SETLOCALE_H_ +#define _SETLOCALE_H_ + +#include + +#define ENCODING_LEN 31 +#define CATEGORY_LEN 11 + +extern char *_PathLocale; + +int __detect_path_locale(void); +int __wrap_setrunelocale(const char *, locale_t); +int __open_path_locale(const char *); + +#endif /* !_SETLOCALE_H_ */ diff --git a/localedef/libc/timelocal.h b/localedef/libc/timelocal.h new file mode 100644 index 0000000..02806e8 --- /dev/null +++ b/localedef/libc/timelocal.h @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 1997-2002 FreeBSD Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/lib/libc/stdtime/timelocal.h,v 1.11 2002/01/24 15:07:44 phantom Exp $ + */ + +#ifndef _TIMELOCAL_H_ +#define _TIMELOCAL_H_ + +#include + +/* + * Private header file for the strftime and strptime localization + * stuff. + */ +struct lc_time_T { + const char *mon[12]; + const char *month[12]; + const char *wday[7]; + const char *weekday[7]; + const char *X_fmt; + const char *x_fmt; + const char *c_fmt; + const char *am; + const char *pm; + const char *date_fmt; + const char *alt_month[12]; + const char *md_order; + const char *ampm_fmt; +}; + +struct lc_time_T *__get_current_time_locale(locale_t); +int __time_load_locale(const char *, locale_t); + +#endif /* !_TIMELOCAL_H_ */ diff --git a/localedef/libc/xlocale_private.h b/localedef/libc/xlocale_private.h new file mode 100644 index 0000000..3ed113a --- /dev/null +++ b/localedef/libc/xlocale_private.h @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2005, 2008 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _XLOCALE_PRIVATE_H_ +#define _XLOCALE_PRIVATE_H_ + +#include +#define __DARWIN_XLOCALE_PRIVATE +#include +#undef __DARWIN_XLOCALE_PRIVATE +#include +#include +#include +#include +#include +#ifdef __LIBC__ +#include +#endif +#include +#include +#include "setlocale.h" +#ifdef __LIBC__ +#include "timelocal.h" +#endif +#include + +#undef MB_CUR_MAX_L +#define MB_CUR_MAX_L(x) (XLOCALE_CTYPE(x)->__mb_cur_max) +#undef MB_CUR_MAX +#define MB_CUR_MAX MB_CUR_MAX_L(__current_locale()) + +typedef void (*__free_extra_t)(void *); + +#define XMAGIC 0x786c6f63616c6530LL /* 'xlocale0' */ + +/** + * The XLC_ values are indexes into the components array. They are defined in + * the same order as the LC_ values in locale.h, but without the LC_ALL zero + * value. Translating from LC_X to XLC_X is done by subtracting one. + * + * Any reordering of this enum should ensure that these invariants are not + * violated. + */ +enum { + XLC_COLLATE = 0, + XLC_CTYPE, + XLC_MONETARY, + XLC_NUMERIC, + XLC_TIME, + XLC_MESSAGES, + XLC_LAST +}; + +_Static_assert(XLC_LAST - XLC_COLLATE == 6, "XLC values should be contiguous"); +_Static_assert(XLC_COLLATE == LC_COLLATE - 1, + "XLC_COLLATE doesn't match the LC_COLLATE value."); +_Static_assert(XLC_CTYPE == LC_CTYPE - 1, + "XLC_CTYPE doesn't match the LC_CTYPE value."); +_Static_assert(XLC_MONETARY == LC_MONETARY - 1, + "XLC_MONETARY doesn't match the LC_MONETARY value."); +_Static_assert(XLC_NUMERIC == LC_NUMERIC - 1, + "XLC_NUMERIC doesn't match the LC_NUMERIC value."); +_Static_assert(XLC_TIME == LC_TIME - 1, + "XLC_TIME doesn't match the LC_TIME value."); +_Static_assert(XLC_MESSAGES == LC_MESSAGES - 1, + "XLC_MESSAGES doesn't match the LC_MESSAGES value."); + +struct xlocale_refcounted { + /** Number of references to this component. */ + _Atomic long retain_count; + /** Function used to destroy this component, if one is required. */ + __free_extra_t destructor; +}; + +#define XLOCALE_DEF_VERSION_LEN 12 + +/** + * Header for a locale component. All locale components must begin wtih this + * header. + */ +struct xlocale_component { + struct xlocale_refcounted header; + /** Name of the locale used for this component. */ + char locale[ENCODING_LEN+1]; + /** Version of the definition for this component. */ + char version[XLOCALE_DEF_VERSION_LEN]; +}; + +struct xlocale_ldpart { + struct xlocale_component header; + char *buffer; +}; +/* + * the next four structures must have the first three fields of the same + * as the xlocale_ldpart structure above. + */ +struct xlocale_messages; +struct xlocale_monetary; +struct xlocale_numeric; +struct xlocale_time; + +#define XLC_PART_MASKS ((1 << XLC_MESSAGES) | (1 << XLC_MONETARY) | \ + (1 << XLC_NUMERIC) | (1 << XLC_TIME)) + +/* the extended locale structure */ + /* values for __numeric_fp_cvt */ +#define LC_NUMERIC_FP_UNINITIALIZED 0 +#define LC_NUMERIC_FP_SAME_LOCALE 1 +#define LC_NUMERIC_FP_USE_LOCALE 2 + +struct _xlocale { +/* The item(s) before __magic are not copied when duplicating locale_t's */ + struct xlocale_refcounted header; + /* only used for locale_t's in __lc_numeric_loc */ + /* 10 independent mbstate_t buffers! */ + __darwin_mbstate_t __mbs_mblen; + __darwin_mbstate_t __mbs_mbrlen; + __darwin_mbstate_t __mbs_mbrtowc; + __darwin_mbstate_t __mbs_mbsnrtowcs; + __darwin_mbstate_t __mbs_mbsrtowcs; + __darwin_mbstate_t __mbs_mbtowc; + __darwin_mbstate_t __mbs_wcrtomb; + __darwin_mbstate_t __mbs_wcsnrtombs; + __darwin_mbstate_t __mbs_wcsrtombs; + __darwin_mbstate_t __mbs_wctomb; + os_unfair_lock __lock; +/* magic (Here up to the end is copied when duplicating locale_t's) */ + int64_t __magic; +/* flags */ + unsigned char _messages_using_locale; + unsigned char _monetary_using_locale; + unsigned char _numeric_using_locale; + unsigned char _time_using_locale; + unsigned char __mlocale_changed; + unsigned char __nlocale_changed; + unsigned char __numeric_fp_cvt; + struct xlocale_component *components[XLC_LAST]; + struct _xlocale *__lc_numeric_loc; +/* localeconv */ + struct lconv __lc_localeconv; +}; + +#define XLOCALE_COLLATE(l) \ + ((struct xlocale_collate *)(l)->components[XLC_COLLATE]) +#define XLOCALE_CTYPE(l) \ + ((struct xlocale_ctype *)(l)->components[XLC_CTYPE]) +#define XLOCALE_MONETARY(l) \ + ((struct xlocale_monetary *)(l)->components[XLC_MONETARY]) +#define XLOCALE_NUMERIC(l) \ + ((struct xlocale_numeric *)(l)->components[XLC_NUMERIC]) +#define XLOCALE_TIME(l) \ + ((struct xlocale_time *)(l)->components[XLC_TIME]) +#define XLOCALE_MESSAGES(l) \ + ((struct xlocale_messages *)(l)->components[XLC_MESSAGES]) + +#define DEFAULT_CURRENT_LOCALE(x) \ + if ((x) == NULL) { \ + (x) = __current_locale(); \ + } else if ((x) == LC_GLOBAL_LOCALE) { \ + (x) = &__global_locale; \ + } + +#define NORMALIZE_LOCALE(x) if ((x) == LC_C_LOCALE) { \ + (x) = _c_locale; \ + } else if ((x) == LC_GLOBAL_LOCALE) { \ + (x) = &__global_locale; \ + } + +#define XL_LOCK(x) os_unfair_lock_lock(&(x)->__lock); +#define XL_UNLOCK(x) os_unfair_lock_unlock(&(x)->__lock); + +static __inline void* +xlocale_retain(void *val) +{ + struct xlocale_refcounted *obj = val; + + if (obj == NULL) + return (NULL); + + atomic_fetch_add(&obj->retain_count, 1); + + return (val); +} + +static __inline void +xlocale_release(void *val) +{ + struct xlocale_refcounted *obj = val; + long count; + + if (obj == NULL) + return; + + count = atomic_fetch_add(&obj->retain_count, -1) - 1; + if (count < 0 && obj->destructor != NULL) { + (*obj->destructor)(val); + } +} + +__attribute__((visibility("hidden"))) +extern struct xlocale_ctype _DefaultRuneXLocale; + +__attribute__((visibility("hidden"))) +extern struct _xlocale __global_locale; + +__attribute__((visibility("hidden"))) +extern pthread_key_t __locale_key; + +__BEGIN_DECLS + +void destruct_ldpart(void *); +locale_t __numeric_ctype(locale_t); +void __xlocale_init(void); + +#ifdef __LIBC__ +static inline __attribute__((always_inline)) locale_t +__current_locale(void) +{ +#if TARGET_OS_SIMULATOR + /* Crash in _objc_inform for duplicate class name during simulator launch + * TODO: Remove after the simulator's libSystem is initialized properly. + */ + if (__locale_key == (pthread_key_t)-1) { + return &__global_locale; + } +#endif + void *__thread_locale; + if (_pthread_has_direct_tsd()) { + __thread_locale = _pthread_getspecific_direct(__locale_key); + } else { + __thread_locale = pthread_getspecific(__locale_key); + } + return (__thread_locale ? (locale_t)__thread_locale : &__global_locale); +} + +static inline __attribute__((always_inline)) locale_t +__locale_ptr(locale_t __loc) +{ + NORMALIZE_LOCALE(__loc); + return __loc; +} +#endif + +__END_DECLS + +#endif /* _XLOCALE_PRIVATE_H_ */ diff --git a/localedef/localedef.1 b/localedef/localedef.1 index c8f3907..be37715 100644 --- a/localedef/localedef.1 +++ b/localedef/localedef.1 @@ -1,122 +1,278 @@ -.\"Modified from man(1) of FreeBSD, the NetBSD mdoc.template, and mdoc.samples. -.Dd September 9, 2004 +.\" Copyright (c) 1992, X/Open Company Limited All Rights Reserved +.\" Portions Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved +.\" Portions Copyright 2013 DEY Storage Systems, Inc. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for +.\" permission to reproduce portions of its copyrighted documentation. +.\" Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, +.\" have given us permission to reprint portions of their documentation. In +.\" the following statement, the phrase "this text" refers to portions of the +.\" system documentation. Portions of this text are reprinted and reproduced +.\" in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, +.\" 2004 Edition, Standard for Information Technology -- Portable Operating +.\" System Interface (POSIX), The Open Group Base Specifications Issue 6, +.\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics +.\" Engineers, Inc and The Open Group. In the event of any discrepancy between +.\" these versions and the original IEEE and The Open Group Standard, the +.\" original IEEE and The Open Group Standard is the referee document. The +.\" original Standard can be obtained online at +.\" http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common +.\" Development and Distribution License (the "License"). You may not use +.\" this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or +.\" http://www.opensolaris.org/os/licensing. See the License for the specific +.\" language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and +.\" include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, +.\" add the following below this CDDL HEADER, with the fields enclosed by +.\" brackets "[]" replaced with your own identifying information: +.\" Portions Copyright [yyyy] [name of copyright owner] +.\" +.Dd June 29, 2023 .Dt LOCALEDEF 1 -.Os Darwin +.Os .Sh NAME .Nm localedef .Nd define locale environment .Sh SYNOPSIS .Nm -.Op Fl c +.Op Fl bcDlUv .Op Fl f Ar charmap .Op Fl i Ar sourcefile -.Ar name -." localedef [-c][-f charmap][-i sourcefile] name +.Op Fl u Ar codeset +.Op Fl w Ar widthfile +.Ar localename .Sh DESCRIPTION -The +The .Nm -utility reads source definitions for one or more locale categories +utility converts source definitions for locale categories +into a format usable by the functions and utilities whose operational behavior +is determined by the setting of the locale environment variables; see +.Xr environ 7 . +.Pp +The utility reads source definitions for one or more locale categories belonging to the same locale from the file named in the .Fl i option (if specified) or from standard input. .Pp -The -.Ar name -operand identifies the target locale. The -.Nm -utility supports -the creation of public, or generally accessible locales, as well -as private, or restricted-access locales. -.Pp -Each category source definition is identified by the corresponding -environment variable name and terminated by an END category-name +Each category source definition is identified by the corresponding environment +variable name and terminated by an +.Sy END +.Em category-name statement. -.Pp -.Bl -tag -width "LC_MONETARY" -compact -offset indent -.It LC_CTYPE +The following categories are supported: +.Bl -tag -width ".Ev LC_MONETARY" +.It Ev LC_CTYPE Defines character classification and case conversion. -.It LC_COLLATE +.It Ev LC_COLLATE Defines collation rules. -.It LC_MONETARY +.It Ev LC_MONETARY Defines the format and symbols used in formatting of monetary information. -.It LC_NUMERIC -Defines the decimal delimiter, grouping, and grouping symbol for non-monetary numeric editing. -.It LC_TIME +.It Ev LC_NUMERIC +Defines the decimal delimiter, grouping and grouping symbol for non-monetary +numeric editing. +.It Ev LC_TIME Defines the format and content of date and time information. -.It LC_MESSAGES +.It Ev LC_MESSAGES Defines the format and values of affirmative and negative responses. .El -.Sh OPTIONS -The following options are supported: .Pp -.Bl -tag -width -indent +The following options are supported: +.Bl -tag -width indent +.It Fl b +Use big-endian byte order for output. .It Fl c -Create permanent output even if warning messages have been issued. +Creates permanent output even if warning messages have been issued. +.It Fl D +BSD-style +output. +Rather than the default of creating the +.Ar localename +directory and creating files like +.Pa LC_CTYPE , +.Pa LC_COLLATE , +etc.\& in that directory, +the output file names have the format +.Dq . +and are dumped to the current directory. .It Fl f Ar charmap -Specify the pathname of a file containing a mapping of character symbols and collating element symbols to actual character encodings. +Specifies the pathname of a file containing a mapping of character symbols and +collating element symbols to actual character encodings. +This option must be specified if symbolic names (other than collating symbols +defined in a +.Sy collating-symbol +keyword) are used. +If the +.Fl f +option is not present, the default character mapping will be used. .It Fl i Ar sourcefile -The pathname of a file containing the source definitions. If this option is not present, source definitions will be read from standard input. +The path name of a file containing the source definitions. +If this option is not present, source definitions will be read from +standard input. +.It Fl l +Use little-endian byte order for output. +.It Fl u Ar codeset +Specifies the name of a codeset used as the target mapping of character symbols +and collating element symbols whose encoding values are defined in terms of the +ISO/IEC 10646-1:2000 standard position constant values. +See +.Sx NOTES . +.It Fl U +Ignore the presence of character symbols that have no matching character +definition. +This facilitates the use of a common locale definition file to be used across multiple +encodings, even when some symbols are not present in a given encoding. +.It Fl v +Emit verbose debugging output on standard output. +.It Fl w Ar widthfile +The path name of the file containing character screen width definitions. +If not supplied, then default screen widths will be assumed, which will +generally not account for East Asian encodings requiring more than a single +character cell to display, nor for combining or accent marks that occupy +no additional screen width. +.It Fl V Ar version +Specifies a version string describing the version of the locale definition. +This string can be retrieved with +.Xr querylocale 3 , +and is intended to allow applications to detect locale definition changes. +Currently it is stored only for the +.Sy LC_COLLATE +category. .El -.Sh OPERANDS -The following operand is supported: -.Bl -tag -width -indent -.It Ar name +.Pp +The following operands are required: +.Bl -tag -width ".Ar localename" +.It Ar localename Identifies the locale. If the name contains one or more slash characters, -.Ar name -will be interpreted as a pathname -where the created locale definitions will be stored. -If -.Ar name -does not contain any slash characters, -the locale will be public. -This capability is restricted to users with appropriate privileges. -(As a consequence of specifying one name, -although several categories can be processed in one execution, -only categories belonging to the same locale can be processed.) +.Ar localename +will be interpreted as a path name where the created locale +definitions will be stored. +This capability may be restricted to users with appropriate privileges. +(As a consequence of specifying one +.Ar localename , +although several categories can be processed in one execution, only categories +belonging to the same locale can be processed.) .El +.Sh OUTPUT +.Nm +creates a directory of files that represents the locale's data, +unless instructed otherwise by the +.Fl D ( +BSD +output) option. +The contants of this directory should generally be copied into the +appropriate subdirectory of +.Pa /usr/share/locale +in order the definitions to be visible to programs linked with libc. .Sh ENVIRONMENT -The following environment variables affect the execution of +See +.Xr environ 7 +for definitions of the following environment variables that affect the +execution of .Nm : -.Bl -tag -width "LC_COLLATE" -.It Ev LANG -Provide a default value for the internationalization variables -that are unset or null. -If LANG is unset or null, -the corresponding value from the implementation-dependent default locale -will be used. -If any of the internationalization variables contains an invalid setting, -the utility will behave as if none of the variables had been defined. -.It Ev LC_ALL -If set to a non-empty string value, override the values of all the other internationalization variables. -.It Ev LC_COLLATE -(This variable has no effect on -.Nm ; -the POSIX locale will be used for this category.) -.It Ev LC_CTYPE -Determine the locale for the interpretation of sequences of bytes -of text data as characters -(for example, single- as opposed to multi-byte characters -in arguments and input files). -This variable has no effect on the processing of -.Nm -input data; -the POSIX locale is used for this purpose, -regardless of the value of this variable. -.It Ev LC_MESSAGES -Determine the locale that should be used to affect the format and contents of diagnostic messages written to standard error. -.It Ev NLSPATH -Determine the location of message catalogues for the processing of LC_MESSAGES. -.El +.Ev LANG , +.Ev LC_ALL , +.Ev LC_COLLATE , +.Ev LC_CTYPE , +.Ev LC_MESSAGES , +.Ev LC_MONETARY , +.Ev LC_MUMERIC , +.Ev LC_TIME , +and +.Ev NLSPATH . .Sh EXIT STATUS The following exit values are returned: -.Bl -tag -width -indent +.Bl -tag -width XX .It 0 No errors occurred and the locales were successfully created. .It 1 Warnings occurred and the locales were successfully created. .It 2 -The locale specification exceeded implementation limits or the coded character set or sets used were not supported by the implementation, and no locale was created. -.It >2 +The locale specification exceeded implementation limits or the coded character +set or sets used were not supported by the implementation, and no locale was +created. +.It >3 Warnings or errors occurred and no output was created. .El +.Pp +If an error is detected, no permanent output will be created. +.Sh SEE ALSO +.Xr locale 1 , +.Xr iconv_open 3 , +.Xr nl_langinfo 3 , +.Xr querylocale 3 , +.Xr strftime 3 , +.Xr environ 7 +.Sh WARNINGS +If warnings occur, permanent output will be created if the +.Fl c +option was specified. +The following conditions will cause warning messages to be issued: +.Bl -bullet +.It +If a symbolic name not found in the +.Pa charmap +file is used for the descriptions of the +.Sy LC_CTYPE +or +.Sy LC_COLLATE +categories (for other categories, this will be an error condition). +.It +If optional keywords not supported by the implementation are present in the +source. +.El +.Sh NOTES +When the +.Fl u +option is used, the +.Ar codeset +option-argument is interpreted as a name of a codeset to which the +ISO/IEC 10646-1:2000 standard position constant values are converted. +Both the ISO/IEC 10646-1:2000 standard position constant values and +other formats (decimal, hexadecimal, or octal) are valid as encoding +values within the charmap file. +The +.Ar codeset +can be any codeset that is supported by the +.Fn iconv_open 3 +function. +.Pp +When conflicts occur between the charmap specification of +.Ar codeset , +.Em mb_cur_max , +or +.Em mb_cur_min +and the corresponding value for the codeset represented by the +.Fl u +option-argument +.Ar codeset , +the +.Nm +utility fails with an error. +.Pp +When conflicts occur between the charmap encoding values specified for symbolic +names of characters of the portable character set and the character encoding +values defined by the US-ASCII, the result is unspecified. +.Sh HISTORY +.Nm +first appeared in +.Fx 11 . +.Pp +It was written by +.An Garrett D'Amore +.Aq Mt garrett@nexenta.com +for illumos. +.An John Marino +.Aq Mt draco@marino.st +provided the alternations necessary to compile cleanly on +.Dx . +.An Baptiste Daroussin +.Aq Mt bapt@FreeBSD.org +ported it to +.Fx +and converted it to +.Xr tree 3 . diff --git a/localedef/localedef.c b/localedef/localedef.c new file mode 100644 index 0000000..947ac91 --- /dev/null +++ b/localedef/localedef.c @@ -0,0 +1,731 @@ +/*- + * Copyright 2018 Nexenta Systems, Inc. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * POSIX localedef. + */ +#include + +#ifdef __APPLE__ +#include +#include +#else +#include +#endif +#include +#include + +#ifdef __APPLE__ +#include + +#include +#include +#include +#include /* _PATH_LOCALE */ +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "collate.h" +#include "localedef.h" +#include "parser.h" + +#ifndef TEXT_DOMAIN +#define TEXT_DOMAIN "SYS_TEST" +#endif + +#ifdef __APPLE__ +#include + +#define htobe32(x) OSSwapHostToBigInt32(x) +#define htole32(x) OSSwapHostToLittleInt32(x) +#endif + +#ifdef __APPLE__ +int bsd = 0; +#else +static int bsd = 0; +#endif +static int byteorder = 0; +int verbose = 0; +int undefok = 0; +int warnok = 0; +static char *locname = NULL; +#ifdef __APPLE__ +static char rootpath[PATH_MAX]; +#endif +static char locpath[PATH_MAX]; +char *version = NULL; + +const char * +category_name(void) +{ + switch (get_category()) { + case T_CHARMAP: + return ("CHARMAP"); + case T_WIDTH: + return ("WIDTH"); + case T_COLLATE: + return ("LC_COLLATE"); + case T_CTYPE: + return ("LC_CTYPE"); + case T_MESSAGES: + return ("LC_MESSAGES"); + case T_MONETARY: + return ("LC_MONETARY"); + case T_NUMERIC: + return ("LC_NUMERIC"); + case T_TIME: + return ("LC_TIME"); + default: + INTERR; + return (NULL); + } +} + +static char * +category_file(void) +{ + if (bsd) + (void) snprintf(locpath, sizeof (locpath), "%s.%s", + locname, category_name()); + else +#ifdef __APPLE__ + { + const char *catname = category_name(); + + /* + * macOS hasn't switched to accepting the immediate LC_MESSAGES + * as a file, so continue creating it as a directory. + */ + if (strcmp(catname, "LC_MESSAGES") == 0) { + (void) snprintf(locpath, sizeof (locpath), + "%s%s/LC_MESSAGES/%s", rootpath, locname, catname); + } else { + (void) snprintf(locpath, sizeof (locpath), "%s%s/%s", + rootpath, locname, catname); + } + } +#else + (void) snprintf(locpath, sizeof (locpath), "%s/%s", + locname, category_name()); +#endif + return (locpath); +} + +FILE * +open_category(void) +{ + FILE *file; + + if (verbose) { + (void) printf("Writing category %s: ", category_name()); + (void) fflush(stdout); + } + + /* make the parent directory */ + if (!bsd) + (void) mkdir(dirname(category_file()), 0755); + + /* + * note that we have to regenerate the file name, as dirname + * clobbered it. + */ + file = fopen(category_file(), "w"); + if (file == NULL) { + errf("%s", strerror(errno)); + return (NULL); + } + return (file); +} + +void +close_category(FILE *f) +{ +#ifdef __APPLE__ + int serrno; +#endif + if (fchmod(fileno(f), 0644) < 0) { +#ifdef __APPLE__ + serrno = errno; +#endif + (void) fclose(f); + (void) unlink(category_file()); +#ifdef __APPLE__ + errf("%s", strerror(serrno)); +#else +#endif + errf("%s", strerror(errno)); + } + if (fclose(f) < 0) { +#ifdef __APPLE__ + serrno = errno; +#endif + (void) unlink(category_file()); +#ifdef __APPLE__ + errf("%s", strerror(serrno)); +#else + errf("%s", strerror(errno)); +#endif + } + if (verbose) { + (void) fprintf(stdout, "done.\n"); + (void) fflush(stdout); + } +} + +#ifdef __APPLE__ +static const char *kw_noquotes[] = { + "grouping", + "mon_grouping", +}; + +static void +inject_category_line(char *line, int linelen) +{ + const char *check_kw; + char *kwend; + + kwend = strchr(line, ' '); + if (kwend == NULL) + goto enqueue; + + *kwend = '\0'; + for (size_t i = 0; i < nitems(kw_noquotes); i++) { + check_kw = kw_noquotes[i]; + + if (strcmp(line, check_kw) == 0) { + /* line is still NUL terminated after linelen */ + char *cp = kwend + 1, *ep = &line[linelen - 1]; + + while (isspace(*cp) && cp < ep) + cp++; + + if (*cp == '"') + *cp = ' '; + + cp = ep; + while (isspace(*cp) && cp > kwend) + cp--; + + if (*cp == '"') + *cp = ' '; + } + } + +enqueue: + /* + * The above may have transformed our space into a nul byte for quick + * and dirty comparisons; send it back. + */ + if (kwend != NULL) + *kwend = ' '; + scan_enqueue(line, linelen); +} + +static int +inject_category_exec(char * const * argv, char * const * envp) +{ + FILE *rpipe; + char *line; + posix_spawn_file_actions_t fa; + posix_spawnattr_t sa; + size_t linecap; + ssize_t linelen; + pid_t pid, wpid; + int pfd[2], rv, status; + + rpipe = NULL; + pfd[0] = pfd[1] = -1; + pid = wpid = -1; + + if (pipe(pfd) == -1) + return (errno); + + fa = NULL; + sa = NULL; + if ((rv = posix_spawn_file_actions_init(&fa)) != 0) + goto cleanup; + + if ((rv = posix_spawn_file_actions_adddup2(&fa, pfd[1], + STDOUT_FILENO)) != 0) + goto cleanup; + + if ((rv = posix_spawn_file_actions_addopen(&fa, STDIN_FILENO, + _PATH_DEVNULL, O_RDWR, 0644)) != 0) + goto cleanup; + + if ((rv = posix_spawn_file_actions_adddup2(&fa, STDIN_FILENO, + STDERR_FILENO)) != 0) + goto cleanup; + + if ((rv = posix_spawnattr_init(&sa)) != 0) + goto cleanup; + + if ((rv = posix_spawnattr_setflags(&sa, POSIX_SPAWN_CLOEXEC_DEFAULT)) != + 0) + goto cleanup; + + rv = posix_spawn(&pid, "/usr/bin/locale", &fa, &sa, argv, envp); + +cleanup: + /* First, close the write side */ + close(pfd[1]); + pfd[1] = -1; + /* Next, clean up attrs */ + if (sa != NULL) + posix_spawnattr_destroy(&sa); + if (fa != NULL) + posix_spawn_file_actions_destroy(&fa); + /* Finally, propagate the error. */ + if (rv != 0) + goto out; + + /* Parent */ + if ((rpipe = fdopen(pfd[0], "r")) == NULL) { + rv = errno; + goto out; + } + + pfd[0] = -1; /* Now owned by rpipe */ + + line = NULL; + linecap = 0; + + while ((linelen = getline(&line, &linecap, rpipe)) > 0) { + char *ep = &line[linelen - 1]; + + for (char *sp = line; sp < ep; sp++) { + if (*sp == '=') + *sp = ' '; + } + + inject_category_line(line, linelen); + } + + free(line); +out: + if (pfd[0] >= 0) + close(pfd[0]); + if (rpipe != NULL) + fclose(rpipe); + + if (pid >= 0) { + /* Reap the process on our way out. */ + while ((wpid = waitpid(pid, &status, 0)) == -1 && + errno == EINTR) { + /* Re-enter */ + } + + if (wpid == -1) + rv = errno; + } + + return (rv); +} + +/* Inject the category from `src` locale to us. */ +static int +inject_category(char *src) +{ + char *envvar; + char *locale_argv[4] = { "locale", "-k", NULL, NULL }; + char *locale_envp[2] = { NULL, NULL }; + FILE *localef; + int rv; + + /* XXX Need to check that we only allow the usual keywords in this case. */ + + rv = EINVAL; + envvar = NULL; + + /* + * Just set LC_ALL in the exec'ed environment, since locale(1) will need + * to call setlocale(3) anyways; no sense affecting the current process. + */ + if (asprintf(&envvar, "LC_ALL=%s", src) <= 0) + return (ENOMEM); + locale_envp[0] = envvar; + + locale_argv[2] = category_name(); + + rv = inject_category_exec(locale_argv, locale_envp); + free(envvar); + return (rv); +} +#endif + +/* + * This function is used when copying the category from another + * locale. Note that the copy is actually performed using a hard + * link for efficiency. + */ +void +copy_category(char *src) +{ + char srcpath[PATH_MAX]; + int rv; + + (void) snprintf(srcpath, sizeof (srcpath), "%s/%s", + src, category_name()); + rv = access(srcpath, R_OK); + if ((rv != 0) && (strchr(srcpath, '/') == NULL)) { + /* Maybe we should try the system locale */ +#ifdef __APPLE__ + (void) snprintf(srcpath, sizeof (srcpath), + "%s/%s/%s", _PATH_LOCALE, src, category_name()); +#else + (void) snprintf(srcpath, sizeof (srcpath), + "/usr/lib/locale/%s/%s", src, category_name()); +#endif + rv = access(srcpath, R_OK); + } + + if (rv != 0) { +#ifdef __APPLE__ + bool exists; + + exists = false; + if (strcmp(src, "C") == 0 || strcmp(src, "POSIX") == 0 || + strncmp(src, "C.", 2) == 0) { + exists = true; + } else { + (void) snprintf(srcpath, sizeof (srcpath), + "%s/%s", _PATH_LOCALE, src); + + exists = access(srcpath, R_OK) == 0; + } + + if (exists) { + /* + * The locale exists, it's simply missing a component. + * We'll shell out to locale(1) to get the definition to + * inject. + */ + + if (inject_category(src) == 0) + return; + } + +#endif + fprintf(stderr,"source locale data unavailable: %s\n", src); + return; + } + + if (verbose > 1) { + (void) printf("Copying category %s from %s: ", + category_name(), src); + (void) fflush(stdout); + } + + /* make the parent directory */ + if (!bsd) + (void) mkdir(dirname(category_file()), 0755); + + if (link(srcpath, category_file()) != 0) { + fprintf(stderr,"unable to copy locale data: %s\n", + strerror(errno)); + return; + } + if (verbose > 1) { + (void) printf("done.\n"); + } +} + +int +putl_category(const char *s, FILE *f) +{ +#ifdef __APPLE__ + int serrno; +#endif + if (s && fputs(s, f) == EOF) { +#ifdef __APPLE__ + serrno = errno; +#endif + (void) fclose(f); + (void) unlink(category_file()); +#ifdef __APPLE__ + errf("%s", strerror(serrno)); +#else + errf("%s", strerror(errno)); +#endif + return (EOF); + } + if (fputc('\n', f) == EOF) { +#ifdef __APPLE__ + serrno = errno; +#endif + (void) fclose(f); + (void) unlink(category_file()); +#ifdef __APPLE__ + errf("%s", strerror(serrno)); +#else + errf("%s", strerror(errno)); +#endif + return (EOF); + } + return (0); +} + +int +wr_category(void *buf, size_t sz, FILE *f) +{ + if (!sz) { + return (0); + } + if (fwrite(buf, sz, 1, f) < 1) { +#ifdef __APPLE__ + int serrno = errno; +#endif + (void) fclose(f); + (void) unlink(category_file()); +#ifdef __APPLE__ + errf("%s", strerror(serrno)); +#else + errf("%s", strerror(errno)); +#endif + return (EOF); + } + return (0); +} + +uint32_t +htote(uint32_t arg) +{ + + if (byteorder == 4321) + return (htobe32(arg)); + else if (byteorder == 1234) + return (htole32(arg)); + else + return (arg); +} + +int yyparse(void); + +static void +usage(void) +{ + (void) fprintf(stderr, "Usage: localedef [options] localename\n"); + (void) fprintf(stderr, "[options] are:\n"); + (void) fprintf(stderr, " -D : BSD-style output\n"); + (void) fprintf(stderr, " -b : big-endian output\n"); + (void) fprintf(stderr, " -c : ignore warnings\n"); + (void) fprintf(stderr, " -l : little-endian output\n"); + (void) fprintf(stderr, " -v : verbose output\n"); + (void) fprintf(stderr, " -U : ignore undefined symbols\n"); + (void) fprintf(stderr, " -f charmap : use given charmap file\n"); + (void) fprintf(stderr, " -u encoding : assume encoding\n"); + (void) fprintf(stderr, " -w widths : use screen widths file\n"); + (void) fprintf(stderr, " -i locsrc : source file for locale\n"); + (void) fprintf(stderr, " -V version : version string for locale\n"); + exit(4); +} + +int +main(int argc, char **argv) +{ + int c; + char *lfname = NULL; + char *cfname = NULL; + char *wfname = NULL; + DIR *dir; + + init_charmap(); + init_collate(); + init_ctype(); + init_messages(); + init_monetary(); + init_numeric(); + init_time(); + +#if YYDEBUG + yydebug = 0; +#endif + + (void) setlocale(LC_ALL, ""); + + while ((c = getopt(argc, argv, "blw:i:cf:u:vUDV:")) != -1) { + switch (c) { + case 'D': + bsd = 1; + break; + case 'b': + case 'l': + if (byteorder != 0) + usage(); + byteorder = c == 'b' ? 4321 : 1234; + break; + case 'v': + verbose++; + break; + case 'i': + lfname = optarg; + break; + case 'u': + set_wide_encoding(optarg); + break; + case 'f': + cfname = optarg; + break; + case 'U': + undefok++; + break; + case 'c': + warnok++; + break; + case 'w': + wfname = optarg; + break; + case '?': + usage(); + break; + case 'V': + version = optarg; + break; + } + } + + if ((argc - 1) != (optind)) { + usage(); + } + locname = argv[argc - 1]; + if (verbose) { + (void) printf("Processing locale %s.\n", locname); + } + + if (version && strlen(version) >= XLOCALE_DEF_VERSION_LEN) { + (void) fprintf(stderr, "Version string too long.\n"); + exit(1); + } + + if (cfname) { + if (verbose) + (void) printf("Loading charmap %s.\n", cfname); + reset_scanner(cfname); + (void) yyparse(); + } + + if (wfname) { + if (verbose) + (void) printf("Loading widths %s.\n", wfname); + reset_scanner(wfname); + (void) yyparse(); + } + + if (verbose) { + (void) printf("Loading POSIX portable characters.\n"); + } + add_charmap_posix(); + + if (lfname) { + reset_scanner(lfname); + } else { + reset_scanner(NULL); + } + + /* make the directory for the locale if not already present */ + if (!bsd) { +#ifdef __APPLE__ + char *cp; + + /* + * If there's not a slash in the locale name, it must be + * interpreted as a public locale. We'll leave rootpath alone + * if locname was specified as a path, otherwise it'll be + * _PATH_LOCALE or some such path. + */ + cp = strchr(locname, '/'); + if (cp == NULL) { + size_t sz; + + sz = snprintf(rootpath, sizeof(rootpath), "%s/", + _PATH_LOCALE); + + /* _PATH_LOCALE *must* fit within the limits. */ + assert(sz < sizeof(rootpath)); +#ifdef NDEBUG + (void)sz; +#endif + + if (snprintf(locpath, sizeof(locpath), "%s/%s", + rootpath, locname) >= sizeof(locpath)) + errf("locale name too long: %s", locname); + + while ((dir = opendir(locpath)) == NULL) { + if ((errno != ENOENT) || + (mkdir(locpath, 0755) < 0)) { + errf("%s", strerror(errno)); + } + } + + goto created; + } +#endif + + while ((dir = opendir(locname)) == NULL) { + if ((errno != ENOENT) || + (mkdir(locname, 0755) < 0)) { + errf("%s", strerror(errno)); + } + } +#ifdef __APPLE__ +created: +#endif + (void) closedir(dir); + +#ifndef __APPLE__ + /* + * It's unclear what this mkdir(dirname(category_file)) + * below aims to do; the category doesn't seem like it should be + * set yet, and we created the locale dir above it. + */ + (void) mkdir(dirname(category_file()), 0755); +#endif + } + + (void) yyparse(); +#ifdef __APPLE__ + scan_done(); +#endif + if (verbose) { + (void) printf("All done.\n"); + } + return (warnings ? 1 : 0); +} diff --git a/localedef/localedef.h b/localedef/localedef.h new file mode 100644 index 0000000..332363a --- /dev/null +++ b/localedef/localedef.h @@ -0,0 +1,203 @@ +/*- + * Copyright 2018 Nexenta Systems, Inc. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * POSIX localedef. + */ + +/* Common header files. */ +#include +#include +#include +#include +#include + +#ifdef __APPLE__ +/* sys/param.h */ +#ifndef nitems +#define nitems(x) (sizeof((x)) / sizeof((x)[0])) +#endif +#endif /* __APPLE__ */ + +extern int com_char; +extern int esc_char; +extern int mb_cur_max; +extern int mb_cur_min; +extern int last_kw; +#ifdef __APPLE__ +extern int bsd; +#endif +extern int verbose; +#if YYDEBUG +extern int yydebug; +#endif +extern int lineno; +extern int undefok; /* mostly ignore undefined symbols */ +extern int warnok; +extern int warnings; +#ifdef __APPLE__ +extern int ctype_dumped; +#endif + +extern char *version; + +int yylex(void); +void yyerror(const char *); +_Noreturn void errf(const char *, ...) __printflike(1, 2); +void warn(const char *, ...) __printflike(1, 2); + +int putl_category(const char *, FILE *); +int wr_category(void *, size_t, FILE *); +FILE *open_category(void); +void close_category(FILE *); +void copy_category(char *); +const char *category_name(void); + +int get_category(void); +int get_symbol(void); +int get_escaped(int); +int get_wide(void); +void reset_scanner(const char *); +void scan_to_eol(void); +#ifdef __APPLE__ +void scan_done(void); +void scan_enqueue(const char *, size_t); +#endif +void add_wcs(wchar_t); +void add_tok(int); +wchar_t *get_wcs(void); + +uint32_t htote(uint32_t); + +/* charmap.c - CHARMAP handling */ +void init_charmap(void); +void add_charmap(const char *, int); +void add_charmap_undefined(char *); +void add_charmap_posix(void); +void add_charmap_range(char *, char *, int); +void add_charmap_char(const char *name, int val); +int lookup_charmap(const char *, wchar_t *); +int check_charmap_undefined(char *); +int check_charmap(wchar_t); + +/* collate.o - LC_COLLATE handling */ +typedef struct collelem collelem_t; +typedef struct collsym collsym_t; +void init_collate(void); +void define_collsym(char *); +void define_collelem(char *, wchar_t *); +void add_order_directive(void); +void add_order_bit(int); +void dump_collate(void); +collsym_t *lookup_collsym(char *); +collelem_t *lookup_collelem(char *); +void start_order_collelem(collelem_t *); +void start_order_undefined(void); +void start_order_symbol(char *); +void start_order_char(wchar_t); +void start_order_ellipsis(void); +void end_order_collsym(collsym_t *); +void end_order(void); +void add_weight(int32_t, int); +void add_weights(int32_t *); +void add_weight_num(int); +void add_order_collelem(collelem_t *); +void add_order_collsym(collsym_t *); +void add_order_char(wchar_t); +void add_order_ignore(void); +void add_order_ellipsis(void); +void add_order_symbol(char *); +void add_order_subst(void); +void add_subst_char(wchar_t); +void add_subst_collsym(collsym_t *); +void add_subst_collelem(collelem_t *); +void add_subst_symbol(char *); +int32_t get_weight(int32_t, int); +wchar_t * wsncpy(wchar_t *, const wchar_t *, size_t); + + +/* ctype.c - LC_CTYPE handling */ +void init_ctype(void); +void add_ctype(int); +void add_ctype_range(wchar_t); +void add_width(int, int); +void add_width_range(int, int, int); +void add_caseconv(int, int); +void dump_ctype(void); + +/* messages.c - LC_MESSAGES handling */ +void init_messages(void); +void add_message(wchar_t *); +void dump_messages(void); + +/* monetary.c - LC_MONETARY handling */ +void init_monetary(void); +void add_monetary_str(wchar_t *); +void add_monetary_num(int); +void reset_monetary_group(void); +void add_monetary_group(int); +void dump_monetary(void); + +/* numeric.c - LC_NUMERIC handling */ +void init_numeric(void); +void add_numeric_str(wchar_t *); +void reset_numeric_group(void); +void add_numeric_group(int); +void dump_numeric(void); + +/* time.c - LC_TIME handling */ +void init_time(void); +void add_time_str(wchar_t *); +void reset_time_list(void); +void add_time_list(wchar_t *); +void check_time_list(void); +void dump_time(void); + +/* wide.c - Wide character handling. */ +int to_wide(wchar_t *, const char *); +int to_mbs(char *, wchar_t); +int to_mb(char *, wchar_t); +char *to_mb_string(const wchar_t *); +void set_wide_encoding(const char *); +#ifdef __APPLE__ +void werr(const char *, ...) __printflike(1, 2); +#else +void werr(const char *, ...); +#endif +const char *get_wide_encoding(void); +int max_wide(void); + +//#define _(x) gettext(x) +#ifdef __APPLE__ +/* Match Illumos' port and make internal faults fatal. */ +#define INTERR errf("internal fault (%s:%d)\n", __FILE__, __LINE__) +#else +#define INTERR fprintf(stderr,"internal fault (%s:%d)\n", __FILE__, __LINE__) +#endif diff --git a/localedef/localedef.pl b/localedef/localedef.pl deleted file mode 100644 index 222eda0..0000000 --- a/localedef/localedef.pl +++ /dev/null @@ -1,1166 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use Getopt::Std; -use Fcntl qw(O_TRUNC O_CREAT O_WRONLY SEEK_SET); -use File::Temp qw(tempfile); -use IO::File; - -my %opt; -getopts("cf:u:i:", \%opt); - -my $comment_char = "#"; -my $escape_char = "\\"; -my $val_match = undef; # set in set_escape -my %sym = (); -my %width = (); -my %ctype_classes = ( - # there are the charactors that get automagically included, there is no - # standard way to avoid them. XXX even if you have a charset without - # some of these charactors defined! - - # They are accessable in a regex via [:classname:], and libc has a - # isX() for most of these. - upper => {map { ($_, 1); } qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)}, - lower => {map { ($_, 1); } qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)}, - alpha => {}, - #alnum => {}, - digit => {map { ($_, 1); } qw(0 1 2 3 4 5 6 7 8 9)}, - space => {}, - cntrl => {}, - punct => {}, - graph => {}, - print => {}, - xdigit => {map { ($_, 1); } qw(0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f)}, - blank => {" " => 1, "\t" => 1}, - - toupper => {map { ($_, "\U$_"); } qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)}, - tolower => {map { ($_, "\L$_"); } qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)}, -); - -my %cele = ( - # collating-elements -- these are a lot like %sym that only works - # in LC_COLLATE, can also be accessed in a regex via [.element.] -); - -my %csym = ( - # collating-symbols -- these are used to define a set of charactors - # that compare as equals (in one or more passes), can also be accessed - # in a regex via [=symbol=] -); - -my @corder = (); # collating order -my @corder_weights = (); # collating directions (forward, backward, position) - -my @colldef = (); - -my(%monetary, %numeric, %time, %messages); - -# This is the default charmap, unlike %ctype_classes you _can_ avoid this -# merely by having your own charmap definition file -my $default_charmap = < \\000 - \\007 - \\010 - \\011 - \\012 - \\013 - \\014 - \\015 - \\040 - \\041 - \\042 - \\043 - \\044 - \\045 - \\046 - \\047 - \\050 - \\051 - \\052 - \\053 - \\054 - \\055 - \\055 - \\056 - \\056 - \\057 - \\057 - \\060 - \\061 - \\062 - \\063 - \\064 - \\065 - \\066 - \\067 - \\070 - \\071 - \\072 - \\073 - \\074 - \\074 - \\075 - \\076 - \\076 - \\077 - \\100 - \\101 - \\102 - \\103 - \\104 - \\105 - \\106 - \\107 - \\110 - \\111 - \\112 - \\113 - \\114 - \\115 - \\116 - \\117 -

\\120 - \\121 - \\122 - \\123 - \\124 - \\125 - \\126 - \\127 - \\130 - \\131 - \\132 - \\133 - \\134 - \\134 - \\135 - \\136 - \\136 - \\137 - \\137 - \\137 - \\140 - \\141 - \\142 - \\143 - \\144 - \\145 - \\146 - \\147 - \\150 - \\151 - \\152 - \\153 - \\154 - \\155 - \\156 - \\157 -

\\160 - \\161 - \\162 - \\163 - \\164 - \\165 - \\166 - \\167 - \\170 - \\171 - \\172 - \\173 - \\173 - \\174 - \\175 - \\175 - \\176 - \\177 - - \\x01 - \\x02 - \\x03 - \\x04 - \\x05 - \\x06 - \\x07 - \\x08 - \\x09 - \\x0a - \\x0b - \\x0c - \\x0d - \\x0e - \\x0f - \\x10 - \\x11 - \\x12 - \\x13 - \\x14 - \\x15 - \\x16 - \\x17 - \\x18 - \\x19 - \\x1a - \\x1b - \\x1c - \\x1c - \\x1d - \\x1d - \\x1e - \\x1e - \\x1f - \\x1f -END CHARMAP -EOT - -&set_escape($escape_char); - -use strict qw(vars); - -if (@ARGV != 1) { - &exit(4, "usage: $0 [-c] [-f charmap-file] [-u codesetname] [-i localdef-file] LOCALENAME\n"); -} - -my $locale_dir = $ARGV[0]; -$locale_dir = "/usr/share/locale/$locale_dir" unless ($locale_dir =~ m{/}); - -my $CMAP; -if (defined($opt{'f'})) { - # Using new IO::File $opt{'f'}, "r" runs into problems with long path names - sysopen(CMAP_KLUDGE, $opt{'f'}, O_RDONLY) || &exit(4, "Can't open $opt{f}: $!\n"); - $CMAP = new IO::Handle; - $CMAP->fdopen(fileno(CMAP_KLUDGE), "r") || &exit(4, "Can't fdopen $opt{f}: $!\n"); -} else { - # er, not everyone gets IO::Scalar, so use an unamed tmp file - # $CMAP = new IO::Scalar \$default_charmap; - $CMAP = new_tmpfile IO::File; - print $CMAP $default_charmap; - seek $CMAP, 0, SEEK_SET; -} - -while(<$CMAP>) { - if (m/^\s*CHARMAP\s*$/) { - &parse_charmaps(); - } elsif (m/^\s*WIDTH\s*$/) { - &parse_widths(); - } elsif (m/^\s*($comment_char.*)?$/) { - } else { - chomp; - &exit(4, "syntax error on line $. ($_)"); - } -} -&parse_widths() if (0 == %width); - -if (defined($opt{'i'})) { - sysopen(STDIN, $opt{'i'}, 0) || &exit(4, "Can't open localdef file $opt{i}: $!"); -} else { - $opt{'i'} = "/dev/stdin"; -} - -my %LC_parsers = ( - NONE => [\&parse_LC_NONE, qr/^\s*((escape|comment)_char\s+$val_match\s*)?$/], - CTYPE => [\&parse_LC_CTYPE, qr/^\s*(\S+)\s+(\S+.*?)\s*$/], - COLLATE => [\&parse_LC_COLLATE, qr/^\s*(<[^>\s]+>|order_end|END|(\S*)\s+(\S+.*?)|collating[_-]element\s*<[^>]+>\s+from\s+$val_match)\s*$/, 1], - TIME => [\&parse_LC_TIME, qr/^\s*(ab_?day|day|abmon|mon|d_t_fmt|d_fmt|t_fmt|am_pm|t_fmt_ampm|era|era_d_fmt|era_t_fmt|era_d_t_fmt|alt_digits|copy|END)\s+(\S+.*?)\s*$/], - NUMERIC => [\&parse_LC_NUMERIC, qr/^\s*(decimal_point|thousands_sep|grouping|END|copy)\s+(\S+.*?)\s*$/], - MONETARY => [\&parse_LC_MONETARY, qr/^\s*(int_curr_symbol|currency_symbol|mon_decimal_point|mon_thousands_sep|mon_grouping|positive_sign|negative_sign|int_frac_digits|frac_digits|p_cs_precedes|p_sep_by_space|n_cs_precedes|n_sep_by_space|p_sign_posn|n_sign_posn|int_p_cs_precedes|int_n_cs_precedes|int_p_sep_by_space|int_n_sep_by_space|int_p_sign_posn|int_n_sign_posn|copy|END)\s+(\S+.*?)\s*$/], - MESSAGES => [\&parse_LC_MESSAGES, qr/^\s*(END|yesexpr|noexpr|yesstr|nostr|copy)\s+(\S+.*?)\s*$/], - "COLLATE order" => [\&parse_collate_order, qr/^\s*(order_end|(<[^>\s]+>|UNDEFINED|\Q...\E)(\s+\S+.*)?)\s*$/], -); -my($current_LC, $parse_func, $validate_line, $call_parse_on_END) - = ("NONE", $LC_parsers{"NONE"}->[0], $LC_parsers{"NONE"}->[1], undef); - -while() { - next if (m/^\s*($comment_char.*)?\s*$/); - if (m/\Q$escape_char\E$/) { - chomp; - chop; - my $tmp = ; - if (!defined($tmp)) { - &exit(4, "Syntax error, last line ($.) of $opt{i} is marked as a continued line\n"); - } - $tmp =~ s/^\s*//; - $_ .= $tmp; - redo; - } - - if ($current_LC eq "NONE" && m/^\s*LC_([A-Z]+)\s*$/) { - &set_parser($1); - next; - } - - unless (m/$validate_line/) { - &exit(4, "Syntax error on line $. of $opt{i}\n"); - } - - my($action, $args); - if (m/^\s*(\S*)(\s+(\S+.*?))?\s*$/) { - ($action, $args) = ($1, $3); - } else { - $action = $_; - chomp $action; - } - - if ($action eq "END") { - if ($args ne "LC_$current_LC" || $current_LC eq "NONE") { - &exit(4, "Syntax error on line $. of $opt{i} attempting to end $args when LC_$current_LC is open\n"); - } - if ($call_parse_on_END) { - &{$parse_func}($action, $args); - } - &set_parser("NONE"); - } else { - &{$parse_func}($action, $args); - } -} - -mkdir($locale_dir); -&run_mklocale(); -&write_lc_money(); -&write_lc_time(); -&write_lc_messages(); -&write_lc_numeric(); -&write_lc_collate(); -exit 0; - -sub parse_charmaps { - while(<$CMAP>) { - # XXX need to parse out , , , - # , and before the generic " val" - if (m/^\s*<([\w\-]+)>\s+($val_match+)\s*$/) { - my($sym, $val) = ($1, $2); - $val = &parse_value_double_backwhack($val); - $sym{$sym} = $val; - } elsif (m/^\s*<([\w\-]*\d)>\s*\Q...\E\s*<([\w\-]*\d)>\s+($val_match+)\s*$/) { - # We don't deal with $se < $ss, or overflow of the last byte of $vs - # then again the standard doesn't say anything in particular needs - # to happen for those cases - my($ss, $se, $vs) = ($1, $2, $3); - $vs = &parse_value_double_backwhack($vs); - my $vlast = length($vs) -1; - for(my($s, $v) = ($ss, $vs); $s cmp $se; $s++) { - $sym{$s} = $v; - substr($v, $vlast) = chr(ord(substr($v, $vlast)) +1) - } - } elsif (m/^\s*END\s+CHARMAP\s*$/) { - return; - } elsif (m/^\s*($comment_char.*)?$/) { - } else { - &exit(4, "syntax error on line $."); - } - } -} - -sub parse_widths { - my $default = 1; - my @syms; - - while(<$CMAP>) { - if (m/^\s*<([\w\-]+)>\s+(\d+)\s*$/) { - my($sym, $w) = ($1, $2); - print "$sym width $w\n"; - if (!defined($sym{$sym})) { - warn "localedef: can't set width of unknown symbol $sym on line $.\n"; - } else { - $width{$sym} = $w; - } - } elsif (m/^\s*<([\w\-]+)>\s*\Q...\E\s*<([\w\-]+)>\s+(\d+)\s*$/) { - my($ss, $se, $w) = ($1, $2, $3); - if (!@syms) { - @syms = sort { $a cmp $b } keys(%sym); - } - - # Yes, we could do a binary search for find $ss in @syms - foreach my $s (@syms) { - if (($s cmp $ss) >= 0) { - last if (($s cmp $se) > 0); - } - } - } elsif (m/^\s*WIDTH_DEFAULT\s+(\d+)\s*$/) { - $default = $1; - } elsif (m/^\s*END\s+WIDTH\s*$/) { - last; - } elsif (m/^\s*($comment_char.*)?$/) { - } else { - &exit(4, "syntax error on line $."); - } - } - - foreach my $s (keys(%sym)) { - if (!defined($width{$s})) { - $width{$s} = $default; - } - } -} - -# This parses a single value in any of the 7 forms it can appear in, -# returns [0] the parsed value and [1] the remander of the string -sub parse_value_return_extra { - my $val = ""; - local($_) = $_[0]; - - while(1) { - $val .= &unsym($1), next - if (m/\G"((?:[^"\Q$escape_char\E]+|\Q$escape_char\E.)*)"/gc); - $val .= chr(oct($1)), next - if (m/\G\Q$escape_char\E([0-7]+)/gc); - $val .= chr(0+$1), next - if (m/\G\Q$escape_char\Ed([0-9]+)/gc); - $val .= pack("H*", $1), next - if (m/\G\Q$escape_char\Ex([0-9a-fA-F]+)/gc); - $val .= $1, next - if (m/\G([^,;<>\s\Q$escape_char()\E])/gc); - $val .= $1 - if (m/\G(?:\Q$escape_char\E)([,;<>\Q$escape_char()\E])/gc); - $val .= &unsym($1), next - if (m/\G(<[^>]+>)/gc); - - m/\G(.*)$/; - - return ($val, $1); - } -} - -# Parse one value, if there is more then one value alert the media -sub parse_value { - my ($ret, $err) = &parse_value_return_extra($_[0]); - if ($err ne "") { - &exit(4, "Syntax error, unexpected '$err' in value (after '$ret') on line $.\n"); - } - - return $ret; -} - -sub parse_value_double_backwhack { - my($val) = @_; - - my ($ret, $err) = &parse_value_return_extra($val); - return $ret if ($err eq ""); - - $val =~ s{\\\\}{\\}g; - ($ret, $err) = &parse_value_return_extra($val); - if ($err ne "") { - &exit(4, "Syntax error, unexpected '$err' in value (after '$ret') on line $.\n"); - } - - return $ret; -} -# $values is the string to parse, $dot_expand is a function ref that will -# return an array to insert when "X;...;Y" is parsed (undef means that -# construct is a syntax error), $nest is true if parens indicate a nested -# value string should be parsed and put in an array ref, $return_extra -# is true if any unparsable trailing junk should be returned as the last -# element (otherwise it is a syntax error). Any text matching the regex -# $specials is returned as an hash. -sub parse_values { - my($values, $sep, $dot_expand, $nest, $return_extra, $specials) = @_; - my(@ret, $live_dots); - - while($values ne "") { - if (defined($specials) && $values =~ s/^($specials)($sep|$)//) { - push(@ret, { $1, undef }); - next; - } - if ($nest && $values =~ s/^\(//) { - my @subret = &parse_values($values, ',', $dot_expand, $nest, 1, $specials); - $values = pop(@subret); - push(@ret, [@subret]); - unless ($values =~ s/^\)($sep)?//) { - &exit(4, "Syntax error, unmatched open paren on line $. of $opt{i}\n"); - } - next; - } - - my($v, $l) = &parse_value_return_extra($values); - $values = $l; - - if ($live_dots) { - splice(@ret, -1, 1, &{$dot_expand}($ret[$#ret], $v)); - $live_dots = 0; - } else { - push(@ret, $v); - } - - if (defined($dot_expand) && $values =~ s/^$sep\Q...\E$sep//) { - $live_dots = 1; - } elsif($values =~ s/^$sep//) { - # Normal case - } elsif($values =~ m/^$/) { - last; - } else { - last if ($return_extra); - &exit(4, "Syntax error parsing arguments on line $. of $opt{i}\n"); - } - } - - if ($live_dots) { - splice(@ret, -1, 1, &{$dot_expand}($ret[$#ret], undef)); - } - if ($return_extra) { - push(@ret, $values); - } - - return @ret; -} - -sub parse_LC_NONE { - my($cmd, $arg) = @_; - - if ($cmd eq "comment_char") { - $comment_char = &parse_value($arg); - } elsif($cmd eq "escape_char") { - &set_escape_char(&parse_value($arg)); - } elsif($cmd eq "") { - } else { - &exit(4, "Syntax error on line $. of $opt{i}\n"); - } -} - -sub parse_LC_CTYPE { - my($cmd, $arg) = @_; - - my $ctype_classes = join("|", keys(%ctype_classes)); - if ($cmd eq "copy") { - # XXX -- the locale command line utility doesn't currently - # output any LC_CTYPE info, so there isn't much of a way - # to implent copy yet - &exit(2, "copy not supported on line $. of $opt{i}\n"); - } elsif($cmd eq "charclass") { - my $cc = &parse_value($arg); - if (!defined($ctype_classes{$cc})) { - $ctype_classes{$cc} = []; - } else { - warn "charclass $cc defined more then once\n"; - } - } elsif($cmd =~ m/^to(upper|lower)$/) { - my @arg = &parse_values($arg, ';', undef, 1); - foreach my $p (@arg) { - &exit(4, "Syntax error on line $. of $opt{i} ${cmd}'s arguments must be character pairs like (a,A);(b,B)\n") if ("ARRAY" ne ref $p || 2 != @$p); - } - foreach my $pair (@arg) { - $ctype_classes{$cmd}{$pair->[0]} = $pair->[1]; - } - } elsif($cmd =~ m/^($ctype_classes)$/) { - my @arg = &parse_values($arg, ';', \&dot_expand, 0); - foreach my $c (@arg) { - $ctype_classes{$1}->{$c} = 1; - } - } elsif($cmd =~ "END") { - &add_to_ctype_class('alpha', keys(%{$ctype_classes{'lower'}})); - &add_to_ctype_class('alpha', keys(%{$ctype_classes{'upper'}})); - foreach my $c (qw(alpha lower upper)) { - foreach my $d (qw(cntrl digit punct space)) { - &deny_in_ctype_class($c, $d, keys(%{$ctype_classes{$d}})); - } - } - - &add_to_ctype_class('space', keys(%{$ctype_classes{'blank'}})); - foreach my $d (qw(upper lower alpha digit graph xdigit)) { - &deny_in_ctype_class('space', $d, keys(%{$ctype_classes{$d}})); - } - - foreach my $d (qw(upper lower alpha digit punct graph print xdigit)) { - &deny_in_ctype_class('cntrl', $d, keys(%{$ctype_classes{$d}})); - } - - foreach my $d (qw(upper lower alpha digit cntrl xdigit space)) { - &deny_in_ctype_class('punct', $d, keys(%{$ctype_classes{$d}})); - } - - foreach my $c (qw(graph print)) { - foreach my $a (qw(upper lower alpha digit xdigit punct)) { - &add_to_ctype_class($c, keys(%{$ctype_classes{$a}})); - } - foreach my $d (qw(cntrl)) { - &deny_in_ctype_class($c, $d, keys(%{$ctype_classes{$d}})); - } - } - &add_to_ctype_class('print', keys(%{$ctype_classes{'space'}})); - - # Yes, this is a requirment of the standard - &exit(2, "The digit class must have exactly 10 elements\n") if (10 != values(%{$ctype_classes{'digit'}})); - foreach my $d (values %{$ctype_classes{'digit'}}) { - if (!defined $ctype_classes{'xdigits'}->{$d}) { - &exit(4, "$d isn't in class xdigits, but all digits must appaer in xdigits\n"); - } - } - - $ctype_classes{'alnum'} = {} unless defined $ctype_classes{'alnum'}; - foreach my $a (qw(alpha digit)) { - &add_to_ctype_class('alnum', keys(%{$ctype_classes{$a}})); - } - - } else { - &exit(4, "Syntax error on line $. of $opt{i}\n"); - } -} - -sub parse_LC_COLLATE { - my ($cmd, $arg) = @_; - if (defined($arg) && $arg ne "") { - push(@colldef, "$cmd $arg"); - } else { - push(@colldef, "$cmd"); - } -} - -sub parse_collate_order { - my($cmd, $arg) = @_; - - if ($cmd =~ m/order[-_]end/) { - # restore the parent parser - &set_parser("COLLATE"); - my $undef_at; - for(my $i = 0; $i <= $#corder; ++$i) { - next unless "ARRAY" eq ref($corder[$i]); - # If ... appears as the "key" for a order entry it means the - # rest of the line is duplicated once for everything in the - # open ended range (key-pev-line, key-next-line). Any ... - # in the weight fields are delt with by &fixup_collate_order_args - if ($corder[$i]->[0] eq "...") { - my(@sym, $from, $to); - - my @charset = sort { $sym{$a} cmp $sym{$b} } keys(%sym); - if ($i != 0) { - $from = $corder[$i -1]->[0]; - } else { - $from = $charset[0]; - } - if ($i != $#corder) { - $to = $corder[$i +1]->[0]; - } else { - $to = $charset[$#charset]; - } - - my @expand; - my($s, $e) = (&parse_value($from), &parse_value($to)); - foreach my $c (@charset) { - if (($sym{$c} cmp $s) > 0) { - last if (($sym{$c} cmp $e) >= 0); - my @entry = @{$corder[$i]}; - $entry[0] = "<$c>"; - push(@expand, \@entry); - } - } - splice(@corder, $i, 1, @expand); - } elsif($corder[$i]->[0] eq "UNDEFINED") { - $undef_at = $i; - next; - } - &fixup_collate_order_args($corder[$i]); - } - - if ($undef_at) { - my @insert; - my %cused = map { ("ARRAY" eq ref $_) ? ($_->[0], undef) : () } @corder; - foreach my $s (keys(%sym)) { - next if (exists $cused{"<$s>"}); - my @entry = @{$corder[$undef_at]}; - $entry[0] = "<$s>"; - &fixup_collate_order_args(\@entry); - push(@insert, \@entry); - } - splice(@corder, $undef_at, 1, @insert); - } - } elsif((!defined $arg) || $arg eq "") { - if (!exists($csym{$cmd})) { - my($decode, $was_sym) = &unsym_with_check($cmd); - if ($was_sym) { - my %dots = ( "..." => undef ); - my @dots = (\%dots) x (0+@corder_weights); - push(@corder, [$cmd, @dots]); - } else { - warn "Undefined collation symbol $cmd used on line $. of $opt{i}\n"; - } - } else { - push(@corder, $cmd); - } - } else { - unless (defined($cele{$cmd} || defined $sym{$cmd})) { - warn "Undefined collation element or charset sym $cmd used on line $. of $opt{i}\n"; - } else { - # This expands all the symbols (but not colating elements), which - # makes life easier for dealing with ..., but harder for - # outputing the actual table at the end where we end up - # converting literal sequences back into symbols in some cases - my @args = &parse_values($arg, ';', undef, 0, 0, - qr/IGNORE|\Q...\E/); - - if (@args != @corder_weights) { - if (@args < @corder_weights) { - &exit(4, "Only " . (0 + @args) - . " weights supplied on line $. of $opt{i}, needed " - . (0 + @corder_weights) - . "\n"); - } else { - &exit(4, "Too many weights supplied on line $. of $opt{i}," - . " wanted " . (0 + @corder_weights) . " but had " - . (0 + @args) - . "\n"); - } - } - - push(@corder, [$cmd, @args]); - } - } -} - -sub parse_LC_MONETARY { - my($cmd, $arg) = @_; - - if ($cmd eq "copy") { - &do_copy(&parse_value($arg)); - } elsif($cmd eq "END") { - } elsif($cmd eq "mon_grouping") { - my @v = &parse_values($arg, ';', undef, 0); - $monetary{$cmd} = \@v; - } else { - my $v = &parse_value($arg); - $monetary{$cmd} = $v; - } -} - -sub parse_LC_MESSAGES { - my($cmd, $arg) = @_; - - if ($cmd eq "copy") { - &do_copy(&parse_value($arg)); - } elsif($cmd eq "END") { - } else { - my $v = &parse_value($arg); - $messages{$cmd} = $v; - } -} - -sub parse_LC_NUMERIC { - my($cmd, $arg) = @_; - - if ($cmd eq "copy") { - &do_copy(&parse_value($arg)); - } elsif($cmd eq "END") { - } elsif($cmd eq "grouping") { - my @v = &parse_values($arg, ';', undef, 0); - $numeric{$cmd} = \@v; - } else { - my $v = &parse_value($arg); - $numeric{$cmd} = $v; - } -} - -sub parse_LC_TIME { - my($cmd, $arg) = @_; - - $cmd =~ s/^ab_day$/abday/; - - if ($cmd eq "copy") { - &do_copy(&parse_value($arg)); - } elsif($cmd eq "END") { - } elsif($cmd =~ m/abday|day|mon|abmon|am_pm|alt_digits/) { - my @v = &parse_values($arg, ';', undef, 0); - $time{$cmd} = \@v; - } elsif($cmd eq "era") { - my @v = &parse_values($arg, ':', undef, 0); - $time{$cmd} = \@v; - } else { - my $v = &parse_value($arg); - $time{$cmd} = $v; - } -} - - -############################################################################### - -sub run_mklocale { - my $L = (new IO::File "|/usr/bin/mklocale -o $locale_dir/LC_CTYPE") || &exit(5, "$0: Can't start mklocale $!\n"); - if (defined($opt{'u'})) { - $L->print(qq{ENCODING "$opt{u}"\n}); - } else { - if ($ARGV[0] =~ m/(big5|euc|gb18030|gb2312|gbk|mskanji|utf-8)/i) { - my $enc = uc($1); - $L->print(qq{ENCODING "$enc"\n}); - } elsif($ARGV[0] =~ m/utf8/) { - $L->print(qq{ENCODING "UTF-8"\n}); - } else { - $L->print(qq{ENCODING "NONE"\n}); - } - } - foreach my $class (keys(%ctype_classes)) { - unless ($class =~ m/^(tolower|toupper|alpha|control|digit|grah|lower|space|upper|xdigit|blank|print|ideogram|special|phonogram)$/) { - $L->print("# skipping $class\n"); - next; - } - - if (!%{$ctype_classes{$class}}) { - $L->print("# Nothing in \U$class\n"); - next; - } - - if ($class =~ m/^to/) { - my $t = $class; - $t =~ s/^to/map/; - $L->print("\U$t "); - - foreach my $from (keys(%{$ctype_classes{$class}})) { - $L->print("[", &hexchars($from), " ", - &hexchars($ctype_classes{$class}->{$from}), "] "); - } - } else { - $L->print("\U$class "); - - foreach my $rune (keys(%{$ctype_classes{$class}})) { - $L->print(&hexchars($rune), " "); - } - } - $L->print("\n"); - } - - my @width; - foreach my $s (keys(%width)) { - my $w = $width{$s}; - $w = 3 if ($w > 3); - push(@{$width[$w]}, &hexchars($sym{$s})); - } - for(my $w = 0; $w <= $#width; ++$w) { - next if (!defined $width[$w]); - next if (0 == @{$width[$w]}); - $L->print("SWIDTH$w ", join(" ", @{$width[$w]}), "\n"); - } - - if (!$L->close()) { - if (0 == $!) { - &exit(5, "Bad return from mklocale $?"); - } else { - &exit(5, "Couldn't close mklocale pipe: $!"); - } - } -} - -############################################################################### - -sub hexchars { - my($str) = $_[0]; - my($ret); - - $ret = unpack "H*", $str; - &exit(2, "Rune >4 bytes ($ret; for $str)") if (length($ret) > 8); - - return "0x" . $ret; -} - -sub hexseq { - my($str) = $_[0]; - my($ret); - - $ret = unpack "H*", $str; - $ret =~ s/(..)/\\x$1/g; - - return $ret; -} - -# dot_expand in the target charset -sub dot_expand { - my($s, $e) = @_; - my(@ret); - - my @charset = sort { $a cmp $b } values(%sym); - foreach my $c (@charset) { - if (($c cmp $s) >= 0) { - last if (($c cmp $e) > 0); - push(@ret, $c); - } - } - - return @ret; -} - -# Convert symbols into literal values -sub unsym { - my @ret = &unsym_with_check(@_); - return $ret[0]; -} - -# Convert symbols into literal values (return[0]), and a count of how -# many symbols were converted (return[1]). -sub unsym_with_check { - my($str) = $_[0]; - - my $rx = join("|", keys(%sym)); - return ($str, 0) if ($rx eq ""); - my $found = $str =~ s/<($rx)>/$sym{$1}/eg; - - return ($str, $found); -} - -# Convert a string of literals back into symbols. It is an error -# for there to be literal values that can't be mapped back. The -# converter uses a gredy algo. It is likely this could be done -# more efficently with a regex ctrated at runtime. It would also be -# a good idea to only create %rsym if %sym changes, but that isn't -# the simplest thing to do in perl5. -sub resym { - my($str) = $_[0]; - my(%rsym, $k, $v); - my $max_len = 0; - my $ret = ""; - - while(($k, $v) = each(%sym)) { - # Collisions in $v are ok, we merely need a mapping, not the - # identical mapping - $rsym{$v} = $k; - $max_len = length($v) if (length($v) > $max_len); - } - - SYM: while("" ne $str) { - foreach my $l ($max_len .. 1) { - next if ($l > length($str)); - my $s = substr($str, 0, $l); - if (defined($rsym{$s})) { - $ret .= "<" . $rsym{$s} . ">"; - substr($str, 0, $l) = ""; - next SYM; - } - } - &exit(4, "Can't convert $str ($_[0]) back into symbolic form\n"); - } - - return $ret; -} - -sub set_escape { - $escape_char = $_[0]; - $val_match = qr/"(?:[^"\Q$escape_char\E]+|\Q$escape_char\E")+"|(?:\Q$escape_char\E(?:[0-7]+|d[0-9]+|x[0-9a-fA-F]+))|[^,;<>\s\Q$escape_char\E]|(?:\Q$escape_char\E)[,;<>\Q$escape_char\E]/; -} - -sub set_parser { - my $section = $_[0]; - ($current_LC, $parse_func, $validate_line, $call_parse_on_END) - = ($section, $LC_parsers{$section}->[0], $LC_parsers{$section}->[1], - $LC_parsers{$section}->[2]); - unless (defined $parse_func) { - &exit(4, "Unknown section name LC_$section on line $. of $opt{i}\n"); - } -} - -sub do_copy { - my($from) = @_; - local($ENV{LC_ALL}) = $from; - - my $C = (new IO::File "/usr/bin/locale -k LC_$current_LC |") || &exit(5, "can't fork locale during copy of LC_$current_LC"); - while(<$C>) { - if (s/=\s*$/ ""/ || s/=/ /) { - if (m/$validate_line/ && m/^\s*(\S*)(\s+(\S+.*?))?\s*$/) { - my($action, $args) = ($1, $3); - &{$parse_func}($action, $args); - } else { - &exit(4, "Syntax error on line $. of locale -k output" - . " during copy $current_LC\n"); - } - } else { - &exit(4, "Ill-formed line $. from locale -k during copy $current_LC\n"); - } - } - $C->close() || &exit(5, "copying LC_$current_LC from $from failed"); -} - -sub fixup_collate_order_args { - my $co = $_[0]; - - foreach my $s (@{$co}[1..$#{$co}]) { - if ("HASH" eq ref($s) && exists($s->{"..."})) { - $s = $co->[0]; - } - } -} - -sub add_to_ctype_class { - my($class, @runes) = @_; - - my $c = $ctype_classes{$class}; - foreach my $r (@runes) { - $c->{$r} = 2 unless exists $c->{$r}; - } -} - -sub deny_in_ctype_class { - my($class, $deny_reason, @runes) = @_; - - my $c = $ctype_classes{$class}; - foreach my $r (@runes) { - next unless exists $c->{$r}; - $deny_reason =~ s/^(\S+)$/can't belong in class $class and in class $1 at the same time/; - &exit(4, &hexchars($r) . " " . $deny_reason . "\n"); - } -} - -# write_lc_{money,time,messages} all use the existing Libc format, which -# is raw text with each record terminated by a newline, and records -# in a predetermined order. - -sub write_lc_money { - my $F = (new IO::File "$locale_dir/LC_MONETARY", O_TRUNC|O_WRONLY|O_CREAT, 0666) || &exit(4, "$0 can't create $locale_dir/LC_MONETARY: $!"); - foreach my $s (qw(int_curr_symbol currency_symbol mon_decimal_point mon_thousands_sep mon_grouping positive_sign negative_sign int_frac_digits frac_digits p_cs_precedes p_sep_by_space n_cs_precedes n_sep_by_space p_sign_posn n_sign_posn int_p_cs_precedes int_n_cs_precedes int_p_sep_by_space int_n_sep_by_space int_p_sign_posn int_n_sign_posn)) { - if (exists $monetary{$s}) { - my $v = $monetary{$s}; - if ("ARRAY" eq ref $v) { - $F->print(join(";", @$v), "\n"); - } else { - $F->print("$v\n"); - } - } else { - if ($s =~ m/^(int_curr_symbol|currency_symbol|mon_decimal_point|mon_thousands_sep|positive_sign|negative_sign)$/) { - $F->print("\n"); - } else { - $F->print("-1\n"); - } - } - } -} - -sub write_lc_time { - my $F = (new IO::File "$locale_dir/LC_TIME", O_TRUNC|O_WRONLY|O_CREAT, 0666) || &exit(4, "$0 can't create $locale_dir/LC_TIME: $!"); - my %array_cnt = (abmon => 12, mon => 12, abday => 7, day => 7, alt_month => 12, am_pm => 2); - - $time{"md_order"} = "md" unless defined $time{"md_order"}; - - foreach my $s (qw(abmon mon abday day t_fmt d_fmt d_t_fmt am_pm d_t_fmt mon md_order t_fmt_ampm)) { - my $cnt = $array_cnt{$s}; - my $v = $time{$s}; - - if (defined $v) { - if (defined $cnt) { - my @a = @{$v}; - &exit(4, "$0: $s has " . (0 + @a) - . " elements, it needs to have exactly $cnt\n") - unless (@a == $cnt); - $F->print(join("\n", @a), "\n"); - } else { - $F->print("$v\n"); - } - } else { - $cnt = 1 if !defined $cnt; - $F->print("\n" x $cnt); - } - } -} - -sub write_lc_messages { - mkdir("$locale_dir/LC_MESSAGES"); - my $F = (new IO::File "$locale_dir/LC_MESSAGES/LC_MESSAGES", O_TRUNC|O_WRONLY|O_CREAT, 0666) || &exit(4, "$0 can't create $locale_dir/LC_MESSAGES/LC_MESSAGES: $!"); - - foreach my $s (qw(yesexpr noexpr yesstr nostr)) { - my $v = $messages{$s}; - - if (defined $v) { - $F->print("$v\n"); - } else { - $F->print("\n"); - } - } -} - -sub write_lc_numeric { - my $F = (new IO::File "$locale_dir/LC_NUMERIC", O_TRUNC|O_WRONLY|O_CREAT, 0666) || &exit(4, "$0 can't create $locale_dir/LC_NUMERIC: $!"); - - foreach my $s (qw(decimal_point thousands_sep grouping)) { - if (exists $numeric{$s}) { - my $v = $numeric{$s}; - if ("ARRAY" eq ref $v) { - $F->print(join(";", @$v), "\n"); - } else { - $F->print("$v\n"); - } - } else { - $F->print("\n"); - } - } -} - -sub bylenval { - return 0 if ("ARRAY" ne ref $a || "ARRAY" ne ref $b); - - my($aval, $af) = &unsym_with_check($a->[0]); - $aval = $cele{$a->[0]} unless $af; - my($bval, $bf) = &unsym_with_check($b->[0]); - $bval = $cele{$b->[0]} unless $bf; - - my $r = length($aval) - length($bval); - return $r if $r; - return $aval cmp $bval; -} - -sub write_lc_collate { - return unless @colldef; - - # colldef doesn't parse the whole glory of SuSv3 charmaps, and we - # already have, so we cna spit out a simplifyed one; unfortunitly - # it doesn't like "/dev/fd/N" so we need a named tmp file - my($CMAP, $cmapname) = tempfile(DIR => "/tmp"); - foreach my $s (keys(%sym)) { - $CMAP->print("<$s>\t", sprintf "\\x%02x\n", ord($sym{$s})); - } - $CMAP->flush(); - unshift(@colldef, qq{charmap $cmapname}); - unshift(@colldef, "LC_COLLATE"); - $colldef[$#colldef] = "END LC_COLLATE"; - - # Can't just use /dev/stdin, colldef appears to use seek, - # and even seems to need a named temp file (re-open?) - my($COL, $colname) = tempfile(DIR => "/tmp"); - $COL->print(join("\n", @colldef), "\n"); - $COL->flush(); - - my $rc = system( - "/usr/bin/colldef -o $locale_dir/LC_COLLATE $colname"); - unlink $colname, $cmapname; - if ($rc) { - &exit(1, "Bad return from colldef $rc"); - } -} - -# Pack an int of unknown size into a series of bytes, each of which -# contains 7 bits of data, and the top bit is clear on the last -# byte of data. Also works on arrays -- does not encode the size of -# the array. This format is great for data that tends to have fewer -# then 21 bits. -sub pack_p_int { - if (@_ > 1) { - my $ret = ""; - foreach my $v (@_) { - $ret .= &pack_p_int($v); - } - - return $ret; - } - - my $v = $_[0]; - my $b; - - &exit(4, "pack_p_int only works on positive values") if ($v < 0); - if ($v < 128) { - $b = chr($v); - } else { - $b = chr(($v & 0x7f) | 0x80); - $b .= pack_p_int($v >> 7); - } - return $b; -} - -sub strip_angles { - my $s = $_[0]; - $s =~ s/^<(.*)>$/$1/; - return $s; -} - -# For localedef -# xc=0 "no warnings, locale defined" -# xc=1 "warnings, locale defined" -# xc=2 "implmentation limits or unsupported charactor sets, no locale defined" -# xc=3 "can't create new locales" -# xc=4+ "wornings or errors, no locale defined" -sub exit { - my($xc, $message) = @_; - - print STDERR $message; - exit $xc; -} diff --git a/localedef/localedef.plist.part b/localedef/localedef.plist.part new file mode 100644 index 0000000..aa6075b --- /dev/null +++ b/localedef/localedef.plist.part @@ -0,0 +1,31 @@ + + OpenSourceProject + localedef + OpenSourceVersion + 2023-09-05 + OpenSourceWebsiteURL + https://cgit.freebsd.org/src/tree/usr.bin/localedef?id=3141e51d2e38fa7b9e6d81477dfa860d087c514d + OpenSourceImportDate + 2023-09-05 + OpenSourceModifications + + ctype.c: use Apple-appropriate magic for this format (84626030) + messages.c: don't warn on missing yesstr/nostr (84626030) + monetary.c: provide appropriate default values for missing fields (84626030) + time.c: use Libc-recognized format (84626030) + localedef.c: use appropriate headers/byteswapping macros (84626030) + localedef.h: annotate werr() as printflike to squash some warnings (84626030) + scanner.c: support \constants (octal, decimal, hex) (84626030) + charmap.c: provide Apple-legacy spellings for some symbols in the default map (84626030) + localedef.c: make locales public by default (write to /usr/share/locale) (84626030) + localedef.c, localedef.h, parser.y, scanner.c: shell out to locale(1) for some copy statements (84626030) + monetary.c, parser.y: accept empty-quotes for "no value" (84626030) + parser.y: accept "copy C" as valid (84626030) + localedef.c: avoid mkdir(dirname(category_file())) before the category is set (84626030) + ctype.c, localedef.c, localedef.h, scanner.c: emit LC_CTYPE if none was defined (84626030) + ctype.c: include POSIX-specified class definitions (84626030) + localedef.c: save errno before it can be clobbered (84626030) + + OpenSourceLicense + bsd + diff --git a/localedef/messages.c b/localedef/messages.c new file mode 100644 index 0000000..4652851 --- /dev/null +++ b/localedef/messages.c @@ -0,0 +1,123 @@ +/*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * LC_MESSAGES database generation routines for localedef. + */ +#include +#include +#include +#include +#include +#include +#include +#include "localedef.h" +#include "parser.h" +#include "lmessages.h" + +static struct lc_messages_T msgs; + +void +init_messages(void) +{ + (void) memset(&msgs, 0, sizeof (msgs)); +} + +void +add_message(wchar_t *wcs) +{ + char *str; + + if ((str = to_mb_string(wcs)) == NULL) { + INTERR; + return; + } + free(wcs); + + switch (last_kw) { + case T_YESSTR: + msgs.yesstr = str; + break; + case T_NOSTR: + msgs.nostr = str; + break; + case T_YESEXPR: + msgs.yesexpr = str; + break; + case T_NOEXPR: + msgs.noexpr = str; + break; + default: + free(str); + INTERR; + break; + } +} + +void +dump_messages(void) +{ + FILE *f; + char *ptr; + + if (msgs.yesstr == NULL) { +#ifndef __APPLE__ + warn("missing field 'yesstr'"); +#endif + msgs.yesstr = ""; + } + if (msgs.nostr == NULL) { +#ifndef __APPLE__ + warn("missing field 'nostr'"); +#endif + msgs.nostr = ""; + } + + /* + * CLDR likes to add : separated lists for yesstr and nostr. + * Legacy Solaris code does not seem to grok this. Fix it. + */ + if ((ptr = strchr(msgs.yesstr, ':')) != NULL) + *ptr = 0; + if ((ptr = strchr(msgs.nostr, ':')) != NULL) + *ptr = 0; + + if ((f = open_category()) == NULL) { + return; + } + + if ((putl_category(msgs.yesexpr, f) == EOF) || + (putl_category(msgs.noexpr, f) == EOF) || + (putl_category(msgs.yesstr, f) == EOF) || + (putl_category(msgs.nostr, f) == EOF)) { + return; + } + close_category(f); +} diff --git a/localedef/monetary.c b/localedef/monetary.c new file mode 100644 index 0000000..145b223 --- /dev/null +++ b/localedef/monetary.c @@ -0,0 +1,254 @@ +/*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * LC_MONETARY database generation routines for localedef. + */ +#include + +#ifdef __APPLE__ +#include + +#include +#endif +#include +#include +#include +#include +#include +#include +#include "localedef.h" +#include "parser.h" +#include "lmonetary.h" + +static struct lc_monetary_T mon; + +#ifdef __APPLE__ +#define MON_FIELD(name) offsetof(struct lc_monetary_T, name) + +/* + * We only need to provide default values for numeric fields, which should print + * -1 instead of the empty string. + */ +static struct monetary_field_init { + size_t offset; + const char *value; +} monetary_init[] = { + { MON_FIELD(int_frac_digits), "-1" }, + { MON_FIELD(frac_digits), "-1" }, + { MON_FIELD(p_cs_precedes), "-1" }, + { MON_FIELD(p_sep_by_space), "-1" }, + { MON_FIELD(n_cs_precedes), "-1" }, + { MON_FIELD(n_sep_by_space), "-1" }, + { MON_FIELD(p_sign_posn), "-1" }, + { MON_FIELD(n_sign_posn), "-1" }, + { MON_FIELD(int_p_cs_precedes), "-1" }, + { MON_FIELD(int_p_sep_by_space), "-1" }, + { MON_FIELD(int_n_cs_precedes), "-1" }, + { MON_FIELD(int_n_sep_by_space), "-1" }, + { MON_FIELD(int_p_sign_posn), "-1" }, + { MON_FIELD(int_n_sign_posn), "-1" }, +}; +#endif /* __APPLE__ */ + +void +init_monetary(void) +{ + (void) memset(&mon, 0, sizeof (mon)); +} + +void +add_monetary_str(wchar_t *wcs) +{ + char *str; + + if ((str = to_mb_string(wcs)) == NULL) { + INTERR; + return; + } + free(wcs); + switch (last_kw) { + case T_INT_CURR_SYMBOL: + mon.int_curr_symbol = str; + break; + case T_CURRENCY_SYMBOL: + mon.currency_symbol = str; + break; + case T_MON_DECIMAL_POINT: + mon.mon_decimal_point = str; + break; + case T_MON_THOUSANDS_SEP: + mon.mon_thousands_sep = str; + break; + case T_POSITIVE_SIGN: + mon.positive_sign = str; + break; + case T_NEGATIVE_SIGN: + mon.negative_sign = str; + break; + default: + free(str); + INTERR; + break; + } +} + +void +add_monetary_num(int n) +{ + char *str = NULL; + + (void) asprintf(&str, "%d", n); + if (str == NULL) { + fprintf(stderr, "out of memory\n"); + return; + } + + switch (last_kw) { + case T_INT_FRAC_DIGITS: + mon.int_frac_digits = str; + break; + case T_FRAC_DIGITS: + mon.frac_digits = str; + break; + case T_P_CS_PRECEDES: + mon.p_cs_precedes = str; + break; + case T_P_SEP_BY_SPACE: + mon.p_sep_by_space = str; + break; + case T_N_CS_PRECEDES: + mon.n_cs_precedes = str; + break; + case T_N_SEP_BY_SPACE: + mon.n_sep_by_space = str; + break; + case T_P_SIGN_POSN: + mon.p_sign_posn = str; + break; + case T_N_SIGN_POSN: + mon.n_sign_posn = str; + break; + case T_INT_P_CS_PRECEDES: + mon.int_p_cs_precedes = str; + break; + case T_INT_N_CS_PRECEDES: + mon.int_n_cs_precedes = str; + break; + case T_INT_P_SEP_BY_SPACE: + mon.int_p_sep_by_space = str; + break; + case T_INT_N_SEP_BY_SPACE: + mon.int_n_sep_by_space = str; + break; + case T_INT_P_SIGN_POSN: + mon.int_p_sign_posn = str; + break; + case T_INT_N_SIGN_POSN: + mon.int_n_sign_posn = str; + break; + case T_MON_GROUPING: + mon.mon_grouping = str; + break; + default: + INTERR; + break; + } +} + +void +reset_monetary_group(void) +{ + free((char *)mon.mon_grouping); + mon.mon_grouping = NULL; +} + +void +add_monetary_group(int n) +{ + char *s = NULL; + + if (mon.mon_grouping == NULL) { + (void) asprintf(&s, "%d", n); + } else { + (void) asprintf(&s, "%s;%d", mon.mon_grouping, n); + } + if (s == NULL) + fprintf(stderr, "out of memory\n"); + + free((char *)mon.mon_grouping); + mon.mon_grouping = s; +} + +void +dump_monetary(void) +{ + FILE *f; + +#ifdef __APPLE__ + for (size_t i = 0; i < nitems(monetary_init); i++) { + struct monetary_field_init *initf = &monetary_init[i]; + const char **field; + + field = (const char **)(((unsigned char *)&mon) + initf->offset); + if (*field == NULL) + *field = initf->value; + } +#endif + + if ((f = open_category()) == NULL) { + return; + } + + if ((putl_category(mon.int_curr_symbol, f) == EOF) || + (putl_category(mon.currency_symbol, f) == EOF) || + (putl_category(mon.mon_decimal_point, f) == EOF) || + (putl_category(mon.mon_thousands_sep, f) == EOF) || + (putl_category(mon.mon_grouping, f) == EOF) || + (putl_category(mon.positive_sign, f) == EOF) || + (putl_category(mon.negative_sign, f) == EOF) || + (putl_category(mon.int_frac_digits, f) == EOF) || + (putl_category(mon.frac_digits, f) == EOF) || + (putl_category(mon.p_cs_precedes, f) == EOF) || + (putl_category(mon.p_sep_by_space, f) == EOF) || + (putl_category(mon.n_cs_precedes, f) == EOF) || + (putl_category(mon.n_sep_by_space, f) == EOF) || + (putl_category(mon.p_sign_posn, f) == EOF) || + (putl_category(mon.n_sign_posn, f) == EOF) || + (putl_category(mon.int_p_cs_precedes, f) == EOF) || + (putl_category(mon.int_n_cs_precedes, f) == EOF) || + (putl_category(mon.int_p_sep_by_space, f) == EOF) || + (putl_category(mon.int_n_sep_by_space, f) == EOF) || + (putl_category(mon.int_p_sign_posn, f) == EOF) || + (putl_category(mon.int_n_sign_posn, f) == EOF)) { + return; + } + close_category(f); +} diff --git a/localedef/numeric.c b/localedef/numeric.c new file mode 100644 index 0000000..5533b7c --- /dev/null +++ b/localedef/numeric.c @@ -0,0 +1,117 @@ +/*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * LC_NUMERIC database generation routines for localedef. + */ +#include +#include +#include +#include +#include +#include +#include +#include "localedef.h" +#include "parser.h" +#include "lnumeric.h" + +static struct lc_numeric_T numeric; + +void +init_numeric(void) +{ + (void) memset(&numeric, 0, sizeof (numeric)); +} + +void +add_numeric_str(wchar_t *wcs) +{ + char *str; + + if ((str = to_mb_string(wcs)) == NULL) { + INTERR; + return; + } + free(wcs); + + switch (last_kw) { + case T_DECIMAL_POINT: + numeric.decimal_point = str; + break; + case T_THOUSANDS_SEP: + numeric.thousands_sep = str; + break; + default: + free(str); + INTERR; + break; + } +} + +void +reset_numeric_group(void) +{ + free((char *)numeric.grouping); + numeric.grouping = NULL; +} + +void +add_numeric_group(int n) +{ + char *s; + + if (numeric.grouping == NULL) { + (void) asprintf(&s, "%d", n); + } else { + (void) asprintf(&s, "%s;%d", numeric.grouping, n); + } + if (s == NULL) + fprintf(stderr, "out of memory\n"); + + free((char *)numeric.grouping); + numeric.grouping = s; +} + +void +dump_numeric(void) +{ + FILE *f; + + if ((f = open_category()) == NULL) { + return; + } + + if ((putl_category(numeric.decimal_point, f) == EOF) || + (putl_category(numeric.thousands_sep, f) == EOF) || + (putl_category(numeric.grouping, f) == EOF)) { + return; + } + close_category(f); +} diff --git a/localedef/parser.y b/localedef/parser.y new file mode 100644 index 0000000..fe9326b --- /dev/null +++ b/localedef/parser.y @@ -0,0 +1,776 @@ +%{ +/*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * POSIX localedef grammar. + */ + +#include +#include +#include +#include "localedef.h" + +%} +%union { + int num; + wchar_t wc; + char *token; + collsym_t *collsym; + collelem_t *collelem; +} + +%token T_CODE_SET +%token T_MB_CUR_MAX +%token T_MB_CUR_MIN +%token T_COM_CHAR +%token T_ESC_CHAR +%token T_LT +%token T_GT +%token T_NL +%token T_SEMI +%token T_COMMA +%token T_ELLIPSIS +%token T_RPAREN +%token T_LPAREN +%token T_QUOTE +%token T_NULL +%token T_WS +%token T_END +%token T_COPY +%token T_CHARMAP +%token T_WIDTH +%token T_CTYPE +%token T_ISUPPER +%token T_ISLOWER +%token T_ISALPHA +%token T_ISDIGIT +%token T_ISPUNCT +%token T_ISXDIGIT +%token T_ISSPACE +%token T_ISPRINT +%token T_ISGRAPH +%token T_ISBLANK +%token T_ISCNTRL +%token T_ISALNUM +%token T_ISSPECIAL +%token T_ISPHONOGRAM +%token T_ISIDEOGRAM +%token T_ISENGLISH +%token T_ISNUMBER +%token T_TOUPPER +%token T_TOLOWER +%token T_COLLATE +%token T_COLLATING_SYMBOL +%token T_COLLATING_ELEMENT +%token T_ORDER_START +%token T_ORDER_END +%token T_FORWARD +%token T_BACKWARD +%token T_POSITION +%token T_FROM +%token T_UNDEFINED +%token T_IGNORE +%token T_MESSAGES +%token T_YESSTR +%token T_NOSTR +%token T_YESEXPR +%token T_NOEXPR +%token T_MONETARY +%token T_INT_CURR_SYMBOL +%token T_CURRENCY_SYMBOL +%token T_MON_DECIMAL_POINT +%token T_MON_THOUSANDS_SEP +%token T_POSITIVE_SIGN +%token T_NEGATIVE_SIGN +%token T_MON_GROUPING +%token T_INT_FRAC_DIGITS +%token T_FRAC_DIGITS +%token T_P_CS_PRECEDES +%token T_P_SEP_BY_SPACE +%token T_N_CS_PRECEDES +%token T_N_SEP_BY_SPACE +%token T_P_SIGN_POSN +%token T_N_SIGN_POSN +%token T_INT_P_CS_PRECEDES +%token T_INT_N_CS_PRECEDES +%token T_INT_P_SEP_BY_SPACE +%token T_INT_N_SEP_BY_SPACE +%token T_INT_P_SIGN_POSN +%token T_INT_N_SIGN_POSN +%token T_NUMERIC +%token T_DECIMAL_POINT +%token T_THOUSANDS_SEP +%token T_GROUPING +%token T_TIME +%token T_ABDAY +%token T_DAY +%token T_ABMON +%token T_MON +%token T_ERA +%token T_ERA_D_FMT +%token T_ERA_T_FMT +%token T_ERA_D_T_FMT +%token T_ALT_DIGITS +%token T_D_T_FMT +%token T_D_FMT +%token T_T_FMT +%token T_AM_PM +%token T_T_FMT_AMPM +%token T_DATE_FMT +%token T_CHAR +%token T_NAME +%token T_NUMBER +%token T_SYMBOL +%token T_COLLSYM +%token T_COLLELEM + +%% + +localedef : setting_list categories + | categories + ; + +string : T_QUOTE charlist T_QUOTE + | T_QUOTE T_QUOTE + ; + +charlist : charlist T_CHAR + { + add_wcs($2); + } + | T_CHAR + { + add_wcs($1); + } + ; + +setting_list : setting_list setting + | setting + ; + + +setting : T_COM_CHAR T_CHAR T_NL + { + com_char = $2; + } + | T_ESC_CHAR T_CHAR T_NL + { + esc_char = $2; + } + | T_MB_CUR_MAX T_NUMBER T_NL + { + mb_cur_max = $2; + } + | T_MB_CUR_MIN T_NUMBER T_NL + { + mb_cur_min = $2; + } + | T_CODE_SET string T_NL + { + wchar_t *w = get_wcs(); + set_wide_encoding(to_mb_string(w)); + free(w); + } + | T_CODE_SET T_NAME T_NL + { + set_wide_encoding($2); + } + ; + +copycat : T_COPY T_NAME T_NL + { + copy_category($2); + } + | T_COPY string T_NL + { + wchar_t *w = get_wcs(); + copy_category(to_mb_string(w)); + free(w); + } + | T_COPY T_CHAR T_NL + { +#ifdef __APPLE__ + /* + * This entire branch of grammar is actually __APPLE__, + * but no preproc before bison. + */ + + /* The C locale could be mistaken as a char. */ + wchar_t w[2]; + + w[0] = $2; + w[1] = '\0'; + + copy_category(w); +#endif + } + ; + +categories : categories category + | category + ; + + +category : charmap + | messages + | monetary + | ctype + | collate + | numeric + | time + ; + + +charmap : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL + | T_WIDTH T_NL width_list T_END T_WIDTH T_NL + ; + + +charmap_list : charmap_list charmap_entry + | charmap_entry + ; + + +charmap_entry : T_SYMBOL T_CHAR + { + add_charmap($1, $2); + scan_to_eol(); + } + | T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR + { + add_charmap_range($1, $3, $4); + scan_to_eol(); + } + | T_NL + ; + +width_list : width_list width_entry + | width_entry + ; + +width_entry : T_CHAR T_NUMBER T_NL + { + add_width($1, $2); + } + | T_SYMBOL T_NUMBER T_NL + { + add_charmap_undefined($1); + } + | T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL + { + add_width_range($1, $3, $4); + } + | T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL + { + add_charmap_undefined($1); + add_charmap_undefined($3); + } + | T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL + { + add_width($1, $4); + add_charmap_undefined($3); + } + | T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL + { + add_width($3, $4); + add_charmap_undefined($1); + } + | T_NL + ; + +ctype : T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL + { + dump_ctype(); + } + | T_CTYPE T_NL copycat T_END T_CTYPE T_NL + ; + +ctype_list : ctype_list ctype_kw + | ctype_kw + ; + +ctype_kw : T_ISUPPER cc_list T_NL + | T_ISLOWER cc_list T_NL + | T_ISALPHA cc_list T_NL + | T_ISDIGIT cc_list T_NL + | T_ISPUNCT cc_list T_NL + | T_ISXDIGIT cc_list T_NL + | T_ISSPACE cc_list T_NL + | T_ISPRINT cc_list T_NL + | T_ISGRAPH cc_list T_NL + | T_ISBLANK cc_list T_NL + | T_ISCNTRL cc_list T_NL + | T_ISALNUM cc_list T_NL + | T_ISSPECIAL cc_list T_NL + | T_ISENGLISH cc_list T_NL + | T_ISNUMBER cc_list T_NL + | T_ISIDEOGRAM cc_list T_NL + | T_ISPHONOGRAM cc_list T_NL + | T_TOUPPER conv_list T_NL + | T_TOLOWER conv_list T_NL + ; + +cc_list : cc_list T_SEMI cc_range_end + | cc_list T_SEMI cc_char + | cc_char + ; + +cc_range_end : T_ELLIPSIS T_SEMI T_CHAR + { + add_ctype_range($3); + } + ; + +cc_char : T_CHAR + { + add_ctype($1); + } + | T_SYMBOL + { + add_charmap_undefined($1); + } + ; + +conv_list : conv_list T_SEMI conv_pair + | conv_pair + ; + + +conv_pair : T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN + { + add_caseconv($2, $4); + } + | T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN + { + add_charmap_undefined($2); + } + | T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN + { + add_charmap_undefined($2); + add_charmap_undefined($4); + } + | T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN + { + add_charmap_undefined($4); + } + ; + +collate : T_COLLATE T_NL coll_details T_END T_COLLATE T_NL + { +#ifdef __APPLE__ + dump_collate(); +#endif + } +/* +#ifndef __APPLE__ +*/ + /* + * Minor simplification of the grammar; abstracting the below + * two combinations away into a coll_details lets us just add a + * single rule below cleanly. + */ +/* + T_COLLATE T_NL coll_order T_END T_COLLATE T_NL + { + dump_collate(); + } + | T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL + { + dump_collate(); + } +#endif +*/ + | T_COLLATE T_NL copycat T_END T_COLLATE T_NL + | T_COLLATE T_NL copycat coll_details T_END T_COLLATE T_NL + { +#ifdef __APPLE__ + dump_collate(); +#endif + } + ; + +coll_details : coll_order + | coll_optional coll_order + ; + +coll_optional : coll_optional coll_symbols + | coll_optional coll_elements + | coll_symbols + | coll_elements + ; + + +coll_symbols : T_COLLATING_SYMBOL T_SYMBOL T_NL + { + define_collsym($2); + } + ; + + +coll_elements : T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL + { + define_collelem($2, get_wcs()); + } + ; + +coll_order : T_ORDER_START T_NL order_list T_ORDER_END T_NL + { + /* If no order list supplied default to one forward */ + add_order_bit(T_FORWARD); + add_order_directive(); + } + | T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL + ; + + +order_args : order_args T_SEMI order_arg + { + add_order_directive(); + } + | order_arg + { + add_order_directive(); + } + ; + +order_arg : order_arg T_COMMA order_dir + | order_dir + ; + +order_dir : T_FORWARD + { + add_order_bit(T_FORWARD); + } + | T_BACKWARD + { + add_order_bit(T_BACKWARD); + } + | T_POSITION + { + add_order_bit(T_POSITION); + } + ; + +order_list : order_list order_item + | order_item + ; + +order_item : T_COLLSYM T_NL + { + end_order_collsym($1); + } + | order_itemkw T_NL + { + end_order(); + } + | order_itemkw order_weights T_NL + { + end_order(); + } + ; + +order_itemkw : T_CHAR + { + start_order_char($1); + } + | T_ELLIPSIS + { + start_order_ellipsis(); + } + | T_COLLELEM + { + start_order_collelem($1); + } + | T_UNDEFINED + { + start_order_undefined(); + } + | T_SYMBOL + { + start_order_symbol($1); + } + ; + +order_weights : order_weights T_SEMI order_weight + | order_weights T_SEMI + | order_weight + ; + +order_weight : T_COLLELEM + { + add_order_collelem($1); + } + | T_COLLSYM + { + add_order_collsym($1); + } + | T_CHAR + { + add_order_char($1); + } + | T_ELLIPSIS + { + add_order_ellipsis(); + } + | T_IGNORE + { + add_order_ignore(); + } + | T_SYMBOL + { + add_order_symbol($1); + } + | T_QUOTE order_str T_QUOTE + { + add_order_subst(); + } + ; + +order_str : order_str order_stritem + | order_stritem + ; + +order_stritem : T_CHAR + { + add_subst_char($1); + } + | T_COLLSYM + { + add_subst_collsym($1); + } + | T_COLLELEM + { + add_subst_collelem($1); + } + | T_SYMBOL + { + add_subst_symbol($1); + } + ; + +messages : T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL + { + dump_messages(); + } + | T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL + | T_MESSAGES T_NL copycat messages_list T_END T_MESSAGES T_NL + { +#ifdef __APPLE__ + dump_messages(); +#endif + } + ; + +messages_list : messages_list messages_item + | messages_item + ; + +messages_kw : T_YESSTR + | T_NOSTR + | T_YESEXPR + | T_NOEXPR + ; + +messages_item : messages_kw string T_NL + { + add_message(get_wcs()); + } + ; + +monetary : T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL + { + dump_monetary(); + } + | T_MONETARY T_NL copycat T_END T_MONETARY T_NL + | T_MONETARY T_NL copycat monetary_list T_END T_MONETARY T_NL + { +#ifdef __APPLE__ + dump_monetary(); +#endif + } + ; + +monetary_list : monetary_list monetary_kw + | monetary_kw + ; + +monetary_strkw : T_INT_CURR_SYMBOL + | T_CURRENCY_SYMBOL + | T_MON_DECIMAL_POINT + | T_MON_THOUSANDS_SEP + | T_POSITIVE_SIGN + | T_NEGATIVE_SIGN + ; + +monetary_numkw : T_INT_FRAC_DIGITS + | T_FRAC_DIGITS + | T_P_CS_PRECEDES + | T_P_SEP_BY_SPACE + | T_N_CS_PRECEDES + | T_N_SEP_BY_SPACE + | T_P_SIGN_POSN + | T_N_SIGN_POSN + | T_INT_P_CS_PRECEDES + | T_INT_N_CS_PRECEDES + | T_INT_P_SEP_BY_SPACE + | T_INT_N_SEP_BY_SPACE + | T_INT_P_SIGN_POSN + | T_INT_N_SIGN_POSN + ; + +monetary_kw : monetary_strkw string T_NL + { + add_monetary_str(get_wcs()); + } + | monetary_numkw T_NUMBER T_NL + { + add_monetary_num($2); + } + | T_MON_GROUPING mon_group_list T_NL + | T_MON_GROUPING T_QUOTE T_QUOTE T_NL + { +#ifdef __APPLE__ + reset_monetary_group(); +#endif + } + ; + +mon_group_list : T_NUMBER + { + reset_monetary_group(); + add_monetary_group($1); + } + | mon_group_list T_SEMI T_NUMBER + { + add_monetary_group($3); + } + ; + + +numeric : T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL + { + dump_numeric(); + } + | T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL + | T_NUMERIC T_NL copycat numeric_list T_END T_NUMERIC T_NL + { +#ifdef __APPLE__ + dump_numeric(); +#endif + } + ; + + +numeric_list : numeric_list numeric_item + | numeric_item + ; + + +numeric_item : numeric_strkw string T_NL + { + add_numeric_str(get_wcs()); + } + | T_GROUPING group_list T_NL + ; + +numeric_strkw : T_DECIMAL_POINT + | T_THOUSANDS_SEP + ; + + +group_list : T_NUMBER + { + reset_numeric_group(); + add_numeric_group($1); + } + | group_list T_SEMI T_NUMBER + { + add_numeric_group($3); + } + ; + + +time : T_TIME T_NL time_kwlist T_END T_TIME T_NL + { + dump_time(); + } + | T_TIME T_NL copycat T_END T_NUMERIC T_NL + | T_TIME T_NL copycat time_kwlist T_END T_NUMERIC T_NL + { +#ifdef __APPLE__ + dump_time(); +#endif + } + ; + +time_kwlist : time_kwlist time_kw + | time_kw + ; + +time_kw : time_strkw string T_NL + { + add_time_str(get_wcs()); + } + | time_listkw time_list T_NL + { + check_time_list(); + } + ; + +time_listkw : T_ABDAY + | T_DAY + | T_ABMON + | T_MON + | T_ERA + | T_ALT_DIGITS + | T_AM_PM + ; + +time_strkw : T_ERA_D_T_FMT + | T_ERA_T_FMT + | T_ERA_D_FMT + | T_D_T_FMT + | T_D_FMT + | T_T_FMT + | T_T_FMT_AMPM + | T_DATE_FMT + ; + +time_list : time_list T_SEMI string + { + add_time_list(get_wcs()); + } + | string + { + reset_time_list(); + add_time_list(get_wcs()); + } + ; diff --git a/localedef/scanner.c b/localedef/scanner.c new file mode 100644 index 0000000..456b932 --- /dev/null +++ b/localedef/scanner.c @@ -0,0 +1,942 @@ +/*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This file contains the "scanner", which tokenizes the input files + * for localedef for processing by the higher level grammar processor. + */ +#include + +#ifdef __APPLE__ +#include /* WCHAR_MAX */ +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include "localedef.h" +#include "parser.h" + +int com_char = '#'; +int esc_char = '\\'; +int mb_cur_min = 1; +int mb_cur_max = 1; +int lineno = 1; +int warnings = 0; +int is_stdin = 1; +FILE *input; +static int nextline; +//static FILE *input = stdin; +static const char *filename = ""; +static int instring = 0; +static int escaped = 0; + +/* + * Token space ... grows on demand. + */ +static char *token = NULL; +static int tokidx; +static int toksz = 0; +static int hadtok = 0; + +/* + * Wide string space ... grows on demand. + */ +static wchar_t *widestr = NULL; +static int wideidx = 0; +static int widesz = 0; + +/* + * The last keyword seen. This is useful to trigger the special lexer rules + * for "copy" and also collating symbols and elements. + */ +int last_kw = 0; +static int category = T_END; + +static struct token { + int id; + const char *name; +} keywords[] = { + { T_COM_CHAR, "comment_char" }, + { T_ESC_CHAR, "escape_char" }, + { T_END, "END" }, + { T_COPY, "copy" }, + { T_MESSAGES, "LC_MESSAGES" }, + { T_YESSTR, "yesstr" }, + { T_YESEXPR, "yesexpr" }, + { T_NOSTR, "nostr" }, + { T_NOEXPR, "noexpr" }, + { T_MONETARY, "LC_MONETARY" }, + { T_INT_CURR_SYMBOL, "int_curr_symbol" }, + { T_CURRENCY_SYMBOL, "currency_symbol" }, + { T_MON_DECIMAL_POINT, "mon_decimal_point" }, + { T_MON_THOUSANDS_SEP, "mon_thousands_sep" }, + { T_POSITIVE_SIGN, "positive_sign" }, + { T_NEGATIVE_SIGN, "negative_sign" }, + { T_MON_GROUPING, "mon_grouping" }, + { T_INT_FRAC_DIGITS, "int_frac_digits" }, + { T_FRAC_DIGITS, "frac_digits" }, + { T_P_CS_PRECEDES, "p_cs_precedes" }, + { T_P_SEP_BY_SPACE, "p_sep_by_space" }, + { T_N_CS_PRECEDES, "n_cs_precedes" }, + { T_N_SEP_BY_SPACE, "n_sep_by_space" }, + { T_P_SIGN_POSN, "p_sign_posn" }, + { T_N_SIGN_POSN, "n_sign_posn" }, + { T_INT_P_CS_PRECEDES, "int_p_cs_precedes" }, + { T_INT_N_CS_PRECEDES, "int_n_cs_precedes" }, + { T_INT_P_SEP_BY_SPACE, "int_p_sep_by_space" }, + { T_INT_N_SEP_BY_SPACE, "int_n_sep_by_space" }, + { T_INT_P_SIGN_POSN, "int_p_sign_posn" }, + { T_INT_N_SIGN_POSN, "int_n_sign_posn" }, + { T_COLLATE, "LC_COLLATE" }, + { T_COLLATING_SYMBOL, "collating-symbol" }, + { T_COLLATING_ELEMENT, "collating-element" }, + { T_FROM, "from" }, + { T_ORDER_START, "order_start" }, + { T_ORDER_END, "order_end" }, + { T_FORWARD, "forward" }, + { T_BACKWARD, "backward" }, + { T_POSITION, "position" }, + { T_IGNORE, "IGNORE" }, + { T_UNDEFINED, "UNDEFINED" }, + { T_NUMERIC, "LC_NUMERIC" }, + { T_DECIMAL_POINT, "decimal_point" }, + { T_THOUSANDS_SEP, "thousands_sep" }, + { T_GROUPING, "grouping" }, + { T_TIME, "LC_TIME" }, + { T_ABDAY, "abday" }, + { T_DAY, "day" }, + { T_ABMON, "abmon" }, + { T_MON, "mon" }, + { T_D_T_FMT, "d_t_fmt" }, + { T_D_FMT, "d_fmt" }, + { T_T_FMT, "t_fmt" }, + { T_AM_PM, "am_pm" }, + { T_T_FMT_AMPM, "t_fmt_ampm" }, + { T_ERA, "era" }, + { T_ERA_D_FMT, "era_d_fmt" }, + { T_ERA_T_FMT, "era_t_fmt" }, + { T_ERA_D_T_FMT, "era_d_t_fmt" }, + { T_ALT_DIGITS, "alt_digits" }, + { T_CTYPE, "LC_CTYPE" }, + { T_ISUPPER, "upper" }, + { T_ISLOWER, "lower" }, + { T_ISALPHA, "alpha" }, + { T_ISDIGIT, "digit" }, + { T_ISPUNCT, "punct" }, + { T_ISXDIGIT, "xdigit" }, + { T_ISSPACE, "space" }, + { T_ISPRINT, "print" }, + { T_ISGRAPH, "graph" }, + { T_ISBLANK, "blank" }, + { T_ISCNTRL, "cntrl" }, + /* + * These entries are local additions, and not specified by + * TOG. Note that they are not guaranteed to be accurate for + * all locales, and so applications should not depend on them. + */ + { T_ISSPECIAL, "special" }, + { T_ISENGLISH, "english" }, + { T_ISPHONOGRAM, "phonogram" }, + { T_ISIDEOGRAM, "ideogram" }, + { T_ISNUMBER, "number" }, + /* + * We have to support this in the grammar, but it would be a + * syntax error to define a character as one of these without + * also defining it as an alpha or digit. We ignore it in our + * parsing. + */ + { T_ISALNUM, "alnum" }, + { T_TOUPPER, "toupper" }, + { T_TOLOWER, "tolower" }, + + /* + * These are keywords used in the charmap file. Note that + * Solaris originally used angle brackets to wrap some of them, + * but we removed that to simplify our parser. The first of these + * items are "global items." + */ + { T_CHARMAP, "CHARMAP" }, + { T_WIDTH, "WIDTH" }, + + { -1, NULL }, +}; + +/* + * These special words are only used in a charmap file, enclosed in <>. + */ +static struct token symwords[] = { + { T_COM_CHAR, "comment_char" }, + { T_ESC_CHAR, "escape_char" }, + { T_CODE_SET, "code_set_name" }, + { T_MB_CUR_MAX, "mb_cur_max" }, + { T_MB_CUR_MIN, "mb_cur_min" }, + { -1, NULL }, +}; + +static int categories[] = { + T_CHARMAP, + T_CTYPE, + T_COLLATE, + T_MESSAGES, + T_MONETARY, + T_NUMERIC, + T_TIME, + T_WIDTH, + 0 +}; + +void +reset_scanner(const char *fname) +{ + if (fname == NULL) { + filename = ""; + is_stdin = 1; + } else { + if (!is_stdin) + (void) fclose(input); + if ((input = fopen(fname, "r")) == NULL) { + perror("fopen"); + exit(4); + } else { + is_stdin = 0; + } + filename = fname; + } + com_char = '#'; + esc_char = '\\'; + instring = 0; + escaped = 0; + lineno = 1; + nextline = 1; + tokidx = 0; + wideidx = 0; +} + +#define hex(x) \ + (isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10)) +#define isodigit(x) ((x >= '0') && (x <= '7')) + +static int +scanc(void) +{ + int c; + + if (is_stdin) + c = getc(stdin); + else + c = getc(input); + lineno = nextline; + if (c == '\n') { + nextline++; + } + return (c); +} + +static void +unscanc(int c) +{ + if (c == '\n') { + nextline--; + } + if (ungetc(c, is_stdin ? stdin : input) < 0) { + yyerror("ungetc failed"); + } +} + +#ifdef __APPLE__ +void +scan_enqueue(const char *line, size_t len) +{ + + /* + * XXX This doesn't do anything to make the line numbers look even + * remotely sane, but we just assume for now that `locale -k` will + * produce valid output. + */ + for (char *ep = &line[len - 1]; ep >= line; ep--) { + unscanc((unsigned char)*ep); + } +} +#endif + +static int +scan_hex_byte(void) +{ + int c1, c2; + int v; + + c1 = scanc(); + if (!isxdigit(c1)) { + yyerror("malformed hex digit"); + return (0); + } + c2 = scanc(); + if (!isxdigit(c2)) { + yyerror("malformed hex digit"); + return (0); + } + v = ((hex(c1) << 4) | hex(c2)); + return (v); +} + +static int +scan_dec_byte(void) +{ + int c1, c2, c3; + int b; + + c1 = scanc(); + if (!isdigit(c1)) { + yyerror("malformed decimal digit"); + return (0); + } + b = c1 - '0'; + c2 = scanc(); + if (!isdigit(c2)) { + yyerror("malformed decimal digit"); + return (0); + } + b *= 10; + b += (c2 - '0'); + c3 = scanc(); + if (!isdigit(c3)) { + unscanc(c3); + } else { + b *= 10; + b += (c3 - '0'); + } + return (b); +} + +static int +scan_oct_byte(void) +{ + int c1, c2, c3; + int b; + + b = 0; + + c1 = scanc(); + if (!isodigit(c1)) { + yyerror("malformed octal digit"); + return (0); + } + b = c1 - '0'; + c2 = scanc(); + if (!isodigit(c2)) { + yyerror("malformed octal digit"); + return (0); + } + b *= 8; + b += (c2 - '0'); + c3 = scanc(); + if (!isodigit(c3)) { + unscanc(c3); + } else { + b *= 8; + b += (c3 - '0'); + } + return (b); +} + +void +add_tok(int c) +{ + if ((tokidx + 1) >= toksz) { + toksz += 64; + if ((token = realloc(token, toksz)) == NULL) { + yyerror("out of memory"); + tokidx = 0; + toksz = 0; + return; + } + } + + token[tokidx++] = (char)c; + token[tokidx] = 0; +} +void +add_wcs(wchar_t c) +{ + if ((wideidx + 1) >= widesz) { + widesz += 64; + widestr = realloc(widestr, (widesz * sizeof (wchar_t))); + if (widestr == NULL) { + yyerror("out of memory"); + wideidx = 0; + widesz = 0; + return; + } + } + + widestr[wideidx++] = c; + widestr[wideidx] = 0; +} + +wchar_t * +get_wcs(void) +{ + wchar_t *ws = widestr; + wideidx = 0; + widestr = NULL; + widesz = 0; + if (ws == NULL) { + if ((ws = wcsdup(L"")) == NULL) { + yyerror("out of memory"); + } + } + return (ws); +} + +static int +get_byte(void) +{ + int c; + + if ((c = scanc()) != esc_char) { + unscanc(c); + return (EOF); + } + c = scanc(); + + switch (c) { + case 'd': + case 'D': + return (scan_dec_byte()); + case 'x': + case 'X': + return (scan_hex_byte()); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + /* put the character back so we can get it */ + unscanc(c); + return (scan_oct_byte()); + default: + unscanc(c); + unscanc(esc_char); + return (EOF); + } +} + +int +get_escaped(int c) +{ + switch (c) { + case 'n': + return ('\n'); + case 'r': + return ('\r'); + case 't': + return ('\t'); + case 'f': + return ('\f'); + case 'v': + return ('\v'); + case 'b': + return ('\b'); + case 'a': + return ('\a'); + default: + return (c); + } +} + +int +get_wide(void) +{ + static char mbs[MB_LEN_MAX + 1] = ""; + static int mbi = 0; + int c; + wchar_t wc; + + if (mb_cur_max >= (int)sizeof (mbs)) { + yyerror("max multibyte character size too big"); + mbi = 0; + return (T_NULL); + } + for (;;) { + if ((mbi == mb_cur_max) || ((c = get_byte()) == EOF)) { + /* + * end of the byte sequence reached, but no + * valid wide decoding. fatal error. + */ + mbi = 0; + yyerror("not a valid character encoding"); + return (T_NULL); + } + mbs[mbi++] = c; + mbs[mbi] = 0; + + /* does it decode? */ + if (to_wide(&wc, mbs) >= 0) { + break; + } + } + + mbi = 0; + if ((category != T_CHARMAP) && (category != T_WIDTH)) { + if (check_charmap(wc) < 0) { + yyerror("no symbolic name for character"); + return (T_NULL); + } + } + + yylval.wc = wc; + return (T_CHAR); +} + +int +get_symbol(void) +{ + int c; + + while ((c = scanc()) != EOF) { + if (escaped) { + escaped = 0; + if (c == '\n') + continue; + add_tok(get_escaped(c)); + continue; + } + if (c == esc_char) { + escaped = 1; + continue; + } + if (c == '\n') { /* well that's strange! */ + yyerror("unterminated symbolic name"); + continue; + } + if (c == '>') { /* end of symbol */ + + /* + * This restarts the token from the beginning + * the next time we scan a character. (This + * token is complete.) + */ + + if (token == NULL) { + yyerror("missing symbolic name"); + return (T_NULL); + } + tokidx = 0; + + /* + * A few symbols are handled as keywords outside + * of the normal categories. + */ + if (category == T_END) { + int i; + for (i = 0; symwords[i].name != 0; i++) { + if (strcmp(token, symwords[i].name) == + 0) { + last_kw = symwords[i].id; + return (last_kw); + } + } + } + /* + * Contextual rule: Only literal characters are + * permitted in CHARMAP. Anywhere else the symbolic + * forms are fine. + */ + if ((category != T_CHARMAP) && + (lookup_charmap(token, &yylval.wc)) != -1) { + return (T_CHAR); + } + if ((yylval.collsym = lookup_collsym(token)) != NULL) { + return (T_COLLSYM); + } + if ((yylval.collelem = lookup_collelem(token)) != + NULL) { + return (T_COLLELEM); + } + /* its an undefined symbol */ + yylval.token = strdup(token); + token = NULL; + toksz = 0; + tokidx = 0; + return (T_SYMBOL); + } + add_tok(c); + } + + yyerror("unterminated symbolic name"); + return (EOF); +} + +#ifdef __APPLE__ +void +scan_done(void) +{ + + /* + * We must provide a default LC_CTYPE if one hasn't been provided; most + * of that is taken care of in init_ctype(), so we really just need to + * write it out if we haven't yet. + */ + if (!ctype_dumped && !bsd) { + category = T_CTYPE; + + dump_ctype(); + } +} +#endif + +int +get_category(void) +{ + return (category); +} + +static int +consume_token(void) +{ + int len = tokidx; + int i; + + tokidx = 0; + if (token == NULL) + return (T_NULL); + + /* + * this one is special, because we don't want it to alter the + * last_kw field. + */ + if (strcmp(token, "...") == 0) { + return (T_ELLIPSIS); + } + + /* search for reserved words first */ + for (i = 0; keywords[i].name; i++) { + int j; + if (strcmp(keywords[i].name, token) != 0) { + continue; + } + + last_kw = keywords[i].id; + + /* clear the top level category if we're done with it */ + if (last_kw == T_END) { + category = T_END; + } + + /* set the top level category if we're changing */ + for (j = 0; categories[j]; j++) { + if (categories[j] != last_kw) + continue; + category = last_kw; + } + + return (keywords[i].id); + } + + /* maybe its a numeric constant? */ + if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) { + char *eptr; + yylval.num = strtol(token, &eptr, 10); + if (*eptr != 0) + yyerror("malformed number"); + return (T_NUMBER); +#ifdef __APPLE__ + /* perhaps it's encoded constant? */ + } else if (*token == '\\') { + char *eptr, *otoken; + long num; + int base; + + otoken = token; + + /* Octal if unspecified. */ + base = 8; + + /* + * Skip the backslash, as well as any prefix that we might be + * processing here. + */ + token++; + if (*token == 'd') { + base = 10; + token++; + } else if (*token == 'x') { + base = 16; + token++; + } + + if (*token != '\0') { + /* + * If we just had \d, \x, or \, then we shouldn't eat + * this as a T_CHAR. + */ + num = strtol(token, &eptr, base); + if (*eptr == '\0' && num >= 0 && num <= WCHAR_MAX) { + yylval.wc = (wchar_t)num; + return (T_CHAR); + } + } + + /* Return to the original position; probably a symbolic name. */ + token = otoken; +#endif + } + + /* + * A single lone character is treated as a character literal. + * To avoid duplication of effort, we stick in the charmap. + */ + if (len == 1) { + yylval.wc = token[0]; + return (T_CHAR); + } + + /* anything else is treated as a symbolic name */ + yylval.token = strdup(token); + token = NULL; + toksz = 0; + tokidx = 0; + return (T_NAME); +} + +void +scan_to_eol(void) +{ + int c; + while ((c = scanc()) != '\n') { + if (c == EOF) { + /* end of file without newline! */ + errf("missing newline"); + return; + } + } + assert(c == '\n'); +} + +int +yylex(void) +{ + int c; + + while ((c = scanc()) != EOF) { + + /* special handling for quoted string */ + if (instring) { + if (escaped) { + escaped = 0; + + /* if newline, just eat and forget it */ + if (c == '\n') + continue; + + if (strchr("xXd01234567", c)) { + unscanc(c); + unscanc(esc_char); + return (get_wide()); + } + yylval.wc = get_escaped(c); + return (T_CHAR); + } + if (c == esc_char) { + escaped = 1; + continue; + } + switch (c) { + case '<': + return (get_symbol()); + case '>': + /* oops! should generate syntax error */ + return (T_GT); + case '"': + instring = 0; + return (T_QUOTE); + default: + yylval.wc = c; + return (T_CHAR); + } + } + + /* escaped characters first */ + if (escaped) { + escaped = 0; + if (c == '\n') { + /* eat the newline */ + continue; + } + hadtok = 1; + if (tokidx) { + /* an escape mid-token is nonsense */ + return (T_NULL); + } + + /* numeric escapes are treated as wide characters */ + if (strchr("xXd01234567", c)) { + unscanc(c); + unscanc(esc_char); + return (get_wide()); + } + + add_tok(get_escaped(c)); + continue; + } + + /* if it is the escape charter itself note it */ + if (c == esc_char) { + escaped = 1; + continue; + } + + /* remove from the comment char to end of line */ + if (c == com_char) { + while (c != '\n') { + if ((c = scanc()) == EOF) { + /* end of file without newline! */ + return (EOF); + } + } + assert(c == '\n'); + if (!hadtok) { + /* + * If there were no tokens on this line, + * then just pretend it didn't exist at all. + */ + continue; + } + hadtok = 0; + return (T_NL); + } + + if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) { + /* + * These are all token delimiters. If there + * is a token already in progress, we need to + * process it. + */ + unscanc(c); + return (consume_token()); + } + + switch (c) { + case '\n': + if (!hadtok) { + /* + * If the line was completely devoid of tokens, + * then just ignore it. + */ + continue; + } + /* we're starting a new line, reset the token state */ + hadtok = 0; + return (T_NL); + case ',': + hadtok = 1; + return (T_COMMA); + case ';': + hadtok = 1; + return (T_SEMI); + case '(': + hadtok = 1; + return (T_LPAREN); + case ')': + hadtok = 1; + return (T_RPAREN); + case '>': + hadtok = 1; + return (T_GT); + case '<': + /* symbol start! */ + hadtok = 1; + return (get_symbol()); + case ' ': + case '\t': + /* whitespace, just ignore it */ + continue; + case '"': + hadtok = 1; + instring = 1; + return (T_QUOTE); + default: + hadtok = 1; + add_tok(c); + continue; + } + } + return (EOF); +} + +void +yyerror(const char *msg) +{ + (void) fprintf(stderr, "%s: %d: error: %s\n", + filename, lineno, msg); + exit(4); +} + +void +errf(const char *fmt, ...) +{ + char *msg; + + va_list va; + va_start(va, fmt); + (void) vasprintf(&msg, fmt, va); + va_end(va); + + (void) fprintf(stderr, "%s: %d: error: %s\n", + filename, lineno, msg); + free(msg); + exit(4); +} + +void +warn(const char *fmt, ...) +{ + char *msg; + + va_list va; + va_start(va, fmt); + (void) vasprintf(&msg, fmt, va); + va_end(va); + + (void) fprintf(stderr, "%s: %d: warning: %s\n", + filename, lineno, msg); + free(msg); + warnings++; + if (!warnok) + exit(4); +} diff --git a/localedef/sys/tree.h b/localedef/sys/tree.h new file mode 100644 index 0000000..af05bc6 --- /dev/null +++ b/localedef/sys/tree.h @@ -0,0 +1,1069 @@ +/* $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $ */ +/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */ +/* $FreeBSD$ */ + +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_TREE_H_ +#define _SYS_TREE_H_ + +#include + +#ifdef __APPLE__ +#include + +typedef uintptr_t __uintptr_t; +#endif + +/* + * This file defines data structures for different types of trees: + * splay trees and rank-balanced trees. + * + * A splay tree is a self-organizing data structure. Every operation + * on the tree causes a splay to happen. The splay moves the requested + * node to the root of the tree and partly rebalances it. + * + * This has the benefit that request locality causes faster lookups as + * the requested nodes move to the top of the tree. On the other hand, + * every lookup causes memory writes. + * + * The Balance Theorem bounds the total access time for m operations + * and n inserts on an initially empty tree as O((m + n)lg n). The + * amortized cost for a sequence of m accesses to a splay tree is O(lg n); + * + * A rank-balanced tree is a binary search tree with an integer + * rank-difference as an attribute of each pointer from parent to child. + * The sum of the rank-differences on any path from a node down to null is + * the same, and defines the rank of that node. The rank of the null node + * is -1. + * + * Different additional conditions define different sorts of balanced trees, + * including "red-black" and "AVL" trees. The set of conditions applied here + * are the "weak-AVL" conditions of Haeupler, Sen and Tarjan presented in in + * "Rank Balanced Trees", ACM Transactions on Algorithms Volume 11 Issue 4 June + * 2015 Article No.: 30pp 1–26 https://doi.org/10.1145/2689412 (the HST paper): + * - every rank-difference is 1 or 2. + * - the rank of any leaf is 1. + * + * For historical reasons, rank differences that are even are associated + * with the color red (Rank-Even-Difference), and the child that a red edge + * points to is called a red child. + * + * Every operation on a rank-balanced tree is bounded as O(lg n). + * The maximum height of a rank-balanced tree is 2lg (n+1). + */ + +#define SPLAY_HEAD(name, type) \ +struct name { \ + struct type *sph_root; /* root of the tree */ \ +} + +#define SPLAY_INITIALIZER(root) \ + { NULL } + +#define SPLAY_INIT(root) do { \ + (root)->sph_root = NULL; \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_ENTRY(type) \ +struct { \ + struct type *spe_left; /* left element */ \ + struct type *spe_right; /* right element */ \ +} + +#define SPLAY_LEFT(elm, field) (elm)->field.spe_left +#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right +#define SPLAY_ROOT(head) (head)->sph_root +#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) + +/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ +#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_LINKLEFT(head, tmp, field) do { \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_LINKRIGHT(head, tmp, field) do { \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \ + SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \ +} while (/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ + +#define SPLAY_PROTOTYPE(name, type, field, cmp) \ +void name##_SPLAY(struct name *, struct type *); \ +void name##_SPLAY_MINMAX(struct name *, int); \ +struct type *name##_SPLAY_INSERT(struct name *, struct type *); \ +struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \ + \ +/* Finds the node with the same key as elm */ \ +static __unused __inline struct type * \ +name##_SPLAY_FIND(struct name *head, struct type *elm) \ +{ \ + if (SPLAY_EMPTY(head)) \ + return(NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) \ + return (head->sph_root); \ + return (NULL); \ +} \ + \ +static __unused __inline struct type * \ +name##_SPLAY_NEXT(struct name *head, struct type *elm) \ +{ \ + name##_SPLAY(head, elm); \ + if (SPLAY_RIGHT(elm, field) != NULL) { \ + elm = SPLAY_RIGHT(elm, field); \ + while (SPLAY_LEFT(elm, field) != NULL) { \ + elm = SPLAY_LEFT(elm, field); \ + } \ + } else \ + elm = NULL; \ + return (elm); \ +} \ + \ +static __unused __inline struct type * \ +name##_SPLAY_MIN_MAX(struct name *head, int val) \ +{ \ + name##_SPLAY_MINMAX(head, val); \ + return (SPLAY_ROOT(head)); \ +} + +/* Main splay operation. + * Moves node close to the key of elm to top + */ +#define SPLAY_GENERATE(name, type, field, cmp) \ +struct type * \ +name##_SPLAY_INSERT(struct name *head, struct type *elm) \ +{ \ + if (SPLAY_EMPTY(head)) { \ + SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \ + } else { \ + __typeof(cmp(NULL, NULL)) __comp; \ + name##_SPLAY(head, elm); \ + __comp = (cmp)(elm, (head)->sph_root); \ + if (__comp < 0) { \ + SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\ + SPLAY_RIGHT(elm, field) = (head)->sph_root; \ + SPLAY_LEFT((head)->sph_root, field) = NULL; \ + } else if (__comp > 0) { \ + SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\ + SPLAY_LEFT(elm, field) = (head)->sph_root; \ + SPLAY_RIGHT((head)->sph_root, field) = NULL; \ + } else \ + return ((head)->sph_root); \ + } \ + (head)->sph_root = (elm); \ + return (NULL); \ +} \ + \ +struct type * \ +name##_SPLAY_REMOVE(struct name *head, struct type *elm) \ +{ \ + struct type *__tmp; \ + if (SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) { \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\ + } else { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\ + name##_SPLAY(head, elm); \ + SPLAY_RIGHT((head)->sph_root, field) = __tmp; \ + } \ + return (elm); \ + } \ + return (NULL); \ +} \ + \ +void \ +name##_SPLAY(struct name *head, struct type *elm) \ +{ \ + struct type __node, *__left, *__right, *__tmp; \ + __typeof(cmp(NULL, NULL)) __comp; \ +\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ + __left = __right = &__node; \ +\ + while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) < 0){ \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) > 0){ \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ +} \ + \ +/* Splay with either the minimum or the maximum element \ + * Used to find minimum or maximum element in tree. \ + */ \ +void name##_SPLAY_MINMAX(struct name *head, int __comp) \ +{ \ + struct type __node, *__left, *__right, *__tmp; \ +\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ + __left = __right = &__node; \ +\ + while (1) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp < 0){ \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp > 0) { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ +} + +#define SPLAY_NEGINF -1 +#define SPLAY_INF 1 + +#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) +#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) +#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) +#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) +#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \ + : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) +#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \ + : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) + +#define SPLAY_FOREACH(x, name, head) \ + for ((x) = SPLAY_MIN(name, head); \ + (x) != NULL; \ + (x) = SPLAY_NEXT(name, head, x)) + +/* Macros that define a rank-balanced tree */ +#define RB_HEAD(name, type) \ +struct name { \ + struct type *rbh_root; /* root of the tree */ \ +} + +#define RB_INITIALIZER(root) \ + { NULL } + +#define RB_INIT(root) do { \ + (root)->rbh_root = NULL; \ +} while (/*CONSTCOND*/ 0) + +#define RB_ENTRY(type) \ +struct { \ + struct type *rbe_link[3]; \ +} + +/* + * With the expectation that any object of struct type has an + * address that is a multiple of 4, and that therefore the + * 2 least significant bits of a pointer to struct type are + * always zero, this implementation sets those bits to indicate + * that the left or right child of the tree node is "red". + */ +#define _RB_LINK(elm, dir, field) (elm)->field.rbe_link[dir] +#define _RB_UP(elm, field) _RB_LINK(elm, 0, field) +#define _RB_L ((__uintptr_t)1) +#define _RB_R ((__uintptr_t)2) +#define _RB_LR ((__uintptr_t)3) +#define _RB_BITS(elm) (*(__uintptr_t *)&elm) +#define _RB_BITSUP(elm, field) _RB_BITS(_RB_UP(elm, field)) +#define _RB_PTR(elm) (__typeof(elm)) \ + ((__uintptr_t)elm & ~_RB_LR) + +#define RB_PARENT(elm, field) _RB_PTR(_RB_UP(elm, field)) +#define RB_LEFT(elm, field) _RB_LINK(elm, _RB_L, field) +#define RB_RIGHT(elm, field) _RB_LINK(elm, _RB_R, field) +#define RB_ROOT(head) (head)->rbh_root +#define RB_EMPTY(head) (RB_ROOT(head) == NULL) + +#define RB_SET_PARENT(dst, src, field) do { \ + _RB_BITSUP(dst, field) = (__uintptr_t)src | \ + (_RB_BITSUP(dst, field) & _RB_LR); \ +} while (/*CONSTCOND*/ 0) + +#define RB_SET(elm, parent, field) do { \ + _RB_UP(elm, field) = parent; \ + RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \ +} while (/*CONSTCOND*/ 0) + +/* + * Either RB_AUGMENT or RB_AUGMENT_CHECK is invoked in a loop at the root of + * every modified subtree, from the bottom up to the root, to update augmented + * node data. RB_AUGMENT_CHECK returns true only when the update changes the + * node data, so that updating can be stopped short of the root when it returns + * false. + */ +#ifndef RB_AUGMENT_CHECK +#ifndef RB_AUGMENT +#define RB_AUGMENT_CHECK(x) 0 +#else +#define RB_AUGMENT_CHECK(x) (RB_AUGMENT(x), 1) +#endif +#endif + +#define RB_UPDATE_AUGMENT(elm, field) do { \ + __typeof(elm) rb_update_tmp = (elm); \ + while (RB_AUGMENT_CHECK(rb_update_tmp) && \ + (rb_update_tmp = RB_PARENT(rb_update_tmp, field)) != NULL) \ + ; \ +} while (0) + +#define RB_SWAP_CHILD(head, par, out, in, field) do { \ + if (par == NULL) \ + RB_ROOT(head) = (in); \ + else if ((out) == RB_LEFT(par, field)) \ + RB_LEFT(par, field) = (in); \ + else \ + RB_RIGHT(par, field) = (in); \ +} while (/*CONSTCOND*/ 0) + +/* + * RB_ROTATE macro partially restructures the tree to improve balance. In the + * case when dir is _RB_L, tmp is a right child of elm. After rotation, elm + * is a left child of tmp, and the subtree that represented the items between + * them, which formerly hung to the left of tmp now hangs to the right of elm. + * The parent-child relationship between elm and its former parent is not + * changed; where this macro once updated those fields, that is now left to the + * caller of RB_ROTATE to clean up, so that a pair of rotations does not twice + * update the same pair of pointer fields with distinct values. + */ +#define RB_ROTATE(elm, tmp, dir, field) do { \ + if ((_RB_LINK(elm, dir ^ _RB_LR, field) = \ + _RB_LINK(tmp, dir, field)) != NULL) \ + RB_SET_PARENT(_RB_LINK(tmp, dir, field), elm, field); \ + _RB_LINK(tmp, dir, field) = (elm); \ + RB_SET_PARENT(elm, tmp, field); \ +} while (/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ +#define RB_PROTOTYPE(name, type, field, cmp) \ + RB_PROTOTYPE_INTERNAL(name, type, field, cmp,) +#define RB_PROTOTYPE_STATIC(name, type, field, cmp) \ + RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __unused static) +#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \ + RB_PROTOTYPE_RANK(name, type, attr) \ + RB_PROTOTYPE_INSERT_COLOR(name, type, attr); \ + RB_PROTOTYPE_REMOVE_COLOR(name, type, attr); \ + RB_PROTOTYPE_INSERT_FINISH(name, type, attr); \ + RB_PROTOTYPE_INSERT(name, type, attr); \ + RB_PROTOTYPE_REMOVE(name, type, attr); \ + RB_PROTOTYPE_FIND(name, type, attr); \ + RB_PROTOTYPE_NFIND(name, type, attr); \ + RB_PROTOTYPE_NEXT(name, type, attr); \ + RB_PROTOTYPE_INSERT_NEXT(name, type, attr); \ + RB_PROTOTYPE_PREV(name, type, attr); \ + RB_PROTOTYPE_INSERT_PREV(name, type, attr); \ + RB_PROTOTYPE_MINMAX(name, type, attr); \ + RB_PROTOTYPE_REINSERT(name, type, attr); +#ifdef _RB_DIAGNOSTIC +#define RB_PROTOTYPE_RANK(name, type, attr) \ + attr int name##_RB_RANK(struct type *); +#else +#define RB_PROTOTYPE_RANK(name, type, attr) +#endif +#define RB_PROTOTYPE_INSERT_COLOR(name, type, attr) \ + attr struct type *name##_RB_INSERT_COLOR(struct name *, \ + struct type *, struct type *) +#define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr) \ + attr struct type *name##_RB_REMOVE_COLOR(struct name *, \ + struct type *, struct type *) +#define RB_PROTOTYPE_REMOVE(name, type, attr) \ + attr struct type *name##_RB_REMOVE(struct name *, struct type *) +#define RB_PROTOTYPE_INSERT_FINISH(name, type, attr) \ + attr struct type *name##_RB_INSERT_FINISH(struct name *, \ + struct type *, struct type **, struct type *) +#define RB_PROTOTYPE_INSERT(name, type, attr) \ + attr struct type *name##_RB_INSERT(struct name *, struct type *) +#define RB_PROTOTYPE_FIND(name, type, attr) \ + attr struct type *name##_RB_FIND(struct name *, struct type *) +#define RB_PROTOTYPE_NFIND(name, type, attr) \ + attr struct type *name##_RB_NFIND(struct name *, struct type *) +#define RB_PROTOTYPE_NEXT(name, type, attr) \ + attr struct type *name##_RB_NEXT(struct type *) +#define RB_PROTOTYPE_INSERT_NEXT(name, type, attr) \ + attr struct type *name##_RB_INSERT_NEXT(struct name *, \ + struct type *, struct type *) +#define RB_PROTOTYPE_PREV(name, type, attr) \ + attr struct type *name##_RB_PREV(struct type *) +#define RB_PROTOTYPE_INSERT_PREV(name, type, attr) \ + attr struct type *name##_RB_INSERT_PREV(struct name *, \ + struct type *, struct type *) +#define RB_PROTOTYPE_MINMAX(name, type, attr) \ + attr struct type *name##_RB_MINMAX(struct name *, int) +#define RB_PROTOTYPE_REINSERT(name, type, attr) \ + attr struct type *name##_RB_REINSERT(struct name *, struct type *) + +/* Main rb operation. + * Moves node close to the key of elm to top + */ +#define RB_GENERATE(name, type, field, cmp) \ + RB_GENERATE_INTERNAL(name, type, field, cmp,) +#define RB_GENERATE_STATIC(name, type, field, cmp) \ + RB_GENERATE_INTERNAL(name, type, field, cmp, __unused static) +#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \ + RB_GENERATE_RANK(name, type, field, attr) \ + RB_GENERATE_INSERT_COLOR(name, type, field, attr) \ + RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \ + RB_GENERATE_INSERT_FINISH(name, type, field, attr) \ + RB_GENERATE_INSERT(name, type, field, cmp, attr) \ + RB_GENERATE_REMOVE(name, type, field, attr) \ + RB_GENERATE_FIND(name, type, field, cmp, attr) \ + RB_GENERATE_NFIND(name, type, field, cmp, attr) \ + RB_GENERATE_NEXT(name, type, field, attr) \ + RB_GENERATE_INSERT_NEXT(name, type, field, cmp, attr) \ + RB_GENERATE_PREV(name, type, field, attr) \ + RB_GENERATE_INSERT_PREV(name, type, field, cmp, attr) \ + RB_GENERATE_MINMAX(name, type, field, attr) \ + RB_GENERATE_REINSERT(name, type, field, cmp, attr) + +#ifdef _RB_DIAGNOSTIC +#ifndef RB_AUGMENT +#define _RB_AUGMENT_VERIFY(x) RB_AUGMENT_CHECK(x) +#else +#define _RB_AUGMENT_VERIFY(x) 0 +#endif +#define RB_GENERATE_RANK(name, type, field, attr) \ +/* \ + * Return the rank of the subtree rooted at elm, or -1 if the subtree \ + * is not rank-balanced, or has inconsistent augmentation data. + */ \ +attr int \ +name##_RB_RANK(struct type *elm) \ +{ \ + struct type *left, *right, *up; \ + int left_rank, right_rank; \ + \ + if (elm == NULL) \ + return (0); \ + up = _RB_UP(elm, field); \ + left = RB_LEFT(elm, field); \ + left_rank = ((_RB_BITS(up) & _RB_L) ? 2 : 1) + \ + name##_RB_RANK(left); \ + right = RB_RIGHT(elm, field); \ + right_rank = ((_RB_BITS(up) & _RB_R) ? 2 : 1) + \ + name##_RB_RANK(right); \ + if (left_rank != right_rank || \ + (left_rank == 2 && left == NULL && right == NULL) || \ + _RB_AUGMENT_VERIFY(elm)) \ + return (-1); \ + return (left_rank); \ +} +#else +#define RB_GENERATE_RANK(name, type, field, attr) +#endif + +#define RB_GENERATE_INSERT_COLOR(name, type, field, attr) \ +attr struct type * \ +name##_RB_INSERT_COLOR(struct name *head, \ + struct type *parent, struct type *elm) \ +{ \ + /* \ + * Initially, elm is a leaf. Either its parent was previously \ + * a leaf, with two black null children, or an interior node \ + * with a black non-null child and a red null child. The \ + * balance criterion "the rank of any leaf is 1" precludes the \ + * possibility of two red null children for the initial parent. \ + * So the first loop iteration cannot lead to accessing an \ + * uninitialized 'child', and a later iteration can only happen \ + * when a value has been assigned to 'child' in the previous \ + * one. \ + */ \ + struct type *child, *child_up, *gpar; \ + __uintptr_t elmdir, sibdir; \ + \ + do { \ + /* the rank of the tree rooted at elm grew */ \ + gpar = _RB_UP(parent, field); \ + elmdir = RB_RIGHT(parent, field) == elm ? _RB_R : _RB_L; \ + if (_RB_BITS(gpar) & elmdir) { \ + /* shorten the parent-elm edge to rebalance */ \ + _RB_BITSUP(parent, field) ^= elmdir; \ + return (NULL); \ + } \ + sibdir = elmdir ^ _RB_LR; \ + /* the other edge must change length */ \ + _RB_BITSUP(parent, field) ^= sibdir; \ + if ((_RB_BITS(gpar) & _RB_LR) == 0) { \ + /* both edges now short, retry from parent */ \ + child = elm; \ + elm = parent; \ + continue; \ + } \ + _RB_UP(parent, field) = gpar = _RB_PTR(gpar); \ + if (_RB_BITSUP(elm, field) & elmdir) { \ + /* \ + * Exactly one of the edges descending from elm \ + * is long. The long one is in the same \ + * direction as the edge from parent to elm, \ + * so change that by rotation. The edge from \ + * parent to z was shortened above. Shorten \ + * the long edge down from elm, and adjust \ + * other edge lengths based on the downward \ + * edges from 'child'. \ + * \ + * par par \ + * / \ / \ \ + * elm z / z \ + * / \ child \ + * / child / \ \ + * / / \ elm \ \ + * w / \ / \ y \ + * x y w \ \ + * x \ + */ \ + RB_ROTATE(elm, child, elmdir, field); \ + child_up = _RB_UP(child, field); \ + if (_RB_BITS(child_up) & sibdir) \ + _RB_BITSUP(parent, field) ^= elmdir; \ + if (_RB_BITS(child_up) & elmdir) \ + _RB_BITSUP(elm, field) ^= _RB_LR; \ + else \ + _RB_BITSUP(elm, field) ^= elmdir; \ + /* if child is a leaf, don't augment elm, \ + * since it is restored to be a leaf again. */ \ + if ((_RB_BITS(child_up) & _RB_LR) == 0) \ + elm = child; \ + } else \ + child = elm; \ + \ + /* \ + * The long edge descending from 'child' points back \ + * in the direction of 'parent'. Rotate to make \ + * 'parent' a child of 'child', then make both edges \ + * of 'child' short to rebalance. \ + * \ + * par child \ + * / \ / \ \ + * / z x par \ + * child / \ \ + * / \ / z \ + * x \ y \ + * y \ + */ \ + RB_ROTATE(parent, child, sibdir, field); \ + _RB_UP(child, field) = gpar; \ + RB_SWAP_CHILD(head, gpar, parent, child, field); \ + /* \ + * Elements rotated down have new, smaller subtrees, \ + * so update augmentation for them. \ + */ \ + if (elm != child) \ + (void)RB_AUGMENT_CHECK(elm); \ + (void)RB_AUGMENT_CHECK(parent); \ + return (child); \ + } while ((parent = gpar) != NULL); \ + return (NULL); \ +} + +#ifndef RB_STRICT_HST +/* + * In REMOVE_COLOR, the HST paper, in figure 3, in the single-rotate case, has + * 'parent' with one higher rank, and then reduces its rank if 'parent' has + * become a leaf. This implementation always has the parent in its new position + * with lower rank, to avoid the leaf check. Define RB_STRICT_HST to 1 to get + * the behavior that HST describes. + */ +#define RB_STRICT_HST 0 +#endif + +#define RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \ +attr struct type * \ +name##_RB_REMOVE_COLOR(struct name *head, \ + struct type *parent, struct type *elm) \ +{ \ + struct type *gpar, *sib, *up; \ + __uintptr_t elmdir, sibdir; \ + \ + if (RB_RIGHT(parent, field) == elm && \ + RB_LEFT(parent, field) == elm) { \ + /* Deleting a leaf that is an only-child creates a \ + * rank-2 leaf. Demote that leaf. */ \ + _RB_UP(parent, field) = _RB_PTR(_RB_UP(parent, field)); \ + elm = parent; \ + if ((parent = _RB_UP(elm, field)) == NULL) \ + return (NULL); \ + } \ + do { \ + /* the rank of the tree rooted at elm shrank */ \ + gpar = _RB_UP(parent, field); \ + elmdir = RB_RIGHT(parent, field) == elm ? _RB_R : _RB_L; \ + _RB_BITS(gpar) ^= elmdir; \ + if (_RB_BITS(gpar) & elmdir) { \ + /* lengthen the parent-elm edge to rebalance */ \ + _RB_UP(parent, field) = gpar; \ + return (NULL); \ + } \ + if (_RB_BITS(gpar) & _RB_LR) { \ + /* shorten other edge, retry from parent */ \ + _RB_BITS(gpar) ^= _RB_LR; \ + _RB_UP(parent, field) = gpar; \ + gpar = _RB_PTR(gpar); \ + continue; \ + } \ + sibdir = elmdir ^ _RB_LR; \ + sib = _RB_LINK(parent, sibdir, field); \ + up = _RB_UP(sib, field); \ + _RB_BITS(up) ^= _RB_LR; \ + if ((_RB_BITS(up) & _RB_LR) == 0) { \ + /* shorten edges descending from sib, retry */ \ + _RB_UP(sib, field) = up; \ + continue; \ + } \ + if ((_RB_BITS(up) & sibdir) == 0) { \ + /* \ + * The edge descending from 'sib' away from \ + * 'parent' is long. The short edge descending \ + * from 'sib' toward 'parent' points to 'elm*' \ + * Rotate to make 'sib' a child of 'elm*' \ + * then adjust the lengths of the edges \ + * descending from 'sib' and 'elm*'. \ + * \ + * par par \ + * / \ / \ \ + * / sib elm \ \ + * / / \ elm* \ + * elm elm* \ / \ \ + * / \ \ / \ \ + * / \ z / \ \ + * x y x sib \ + * / \ \ + * / z \ + * y \ + */ \ + elm = _RB_LINK(sib, elmdir, field); \ + /* elm is a 1-child. First rotate at elm. */ \ + RB_ROTATE(sib, elm, sibdir, field); \ + up = _RB_UP(elm, field); \ + _RB_BITSUP(parent, field) ^= \ + (_RB_BITS(up) & elmdir) ? _RB_LR : elmdir; \ + _RB_BITSUP(sib, field) ^= \ + (_RB_BITS(up) & sibdir) ? _RB_LR : sibdir; \ + _RB_BITSUP(elm, field) |= _RB_LR; \ + } else { \ + if ((_RB_BITS(up) & elmdir) == 0 && \ + RB_STRICT_HST && elm != NULL) { \ + /* if parent does not become a leaf, \ + do not demote parent yet. */ \ + _RB_BITSUP(parent, field) ^= sibdir; \ + _RB_BITSUP(sib, field) ^= _RB_LR; \ + } else if ((_RB_BITS(up) & elmdir) == 0) { \ + /* demote parent. */ \ + _RB_BITSUP(parent, field) ^= elmdir; \ + _RB_BITSUP(sib, field) ^= sibdir; \ + } else \ + _RB_BITSUP(sib, field) ^= sibdir; \ + elm = sib; \ + } \ + \ + /* \ + * The edge descending from 'elm' away from 'parent' \ + * is short. Rotate to make 'parent' a child of 'elm', \ + * then lengthen the short edges descending from \ + * 'parent' and 'elm' to rebalance. \ + * \ + * par elm \ + * / \ / \ \ + * e \ / \ \ + * elm / \ \ + * / \ par s \ + * / \ / \ \ + * / \ e \ \ + * x s x \ + */ \ + RB_ROTATE(parent, elm, elmdir, field); \ + RB_SET_PARENT(elm, gpar, field); \ + RB_SWAP_CHILD(head, gpar, parent, elm, field); \ + /* \ + * An element rotated down, but not into the search \ + * path has a new, smaller subtree, so update \ + * augmentation for it. \ + */ \ + if (sib != elm) \ + (void)RB_AUGMENT_CHECK(sib); \ + return (parent); \ + } while (elm = parent, (parent = gpar) != NULL); \ + return (NULL); \ +} + +#define _RB_AUGMENT_WALK(elm, match, field) \ +do { \ + if (match == elm) \ + match = NULL; \ +} while (RB_AUGMENT_CHECK(elm) && \ + (elm = RB_PARENT(elm, field)) != NULL) + +#define RB_GENERATE_REMOVE(name, type, field, attr) \ +attr struct type * \ +name##_RB_REMOVE(struct name *head, struct type *out) \ +{ \ + struct type *child, *in, *opar, *parent; \ + \ + child = RB_LEFT(out, field); \ + in = RB_RIGHT(out, field); \ + opar = _RB_UP(out, field); \ + if (in == NULL || child == NULL) { \ + in = child = (in == NULL ? child : in); \ + parent = opar = _RB_PTR(opar); \ + } else { \ + parent = in; \ + while (RB_LEFT(in, field)) \ + in = RB_LEFT(in, field); \ + RB_SET_PARENT(child, in, field); \ + RB_LEFT(in, field) = child; \ + child = RB_RIGHT(in, field); \ + if (parent != in) { \ + RB_SET_PARENT(parent, in, field); \ + RB_RIGHT(in, field) = parent; \ + parent = RB_PARENT(in, field); \ + RB_LEFT(parent, field) = child; \ + } \ + _RB_UP(in, field) = opar; \ + opar = _RB_PTR(opar); \ + } \ + RB_SWAP_CHILD(head, opar, out, in, field); \ + if (child != NULL) \ + _RB_UP(child, field) = parent; \ + if (parent != NULL) { \ + opar = name##_RB_REMOVE_COLOR(head, parent, child); \ + /* if rotation has made 'parent' the root of the same \ + * subtree as before, don't re-augment it. */ \ + if (parent == in && RB_LEFT(parent, field) == NULL) { \ + opar = NULL; \ + parent = RB_PARENT(parent, field); \ + } \ + _RB_AUGMENT_WALK(parent, opar, field); \ + if (opar != NULL) { \ + /* \ + * Elements rotated into the search path have \ + * changed subtrees, so update augmentation for \ + * them if AUGMENT_WALK didn't. \ + */ \ + (void)RB_AUGMENT_CHECK(opar); \ + (void)RB_AUGMENT_CHECK(RB_PARENT(opar, field)); \ + } \ + } \ + return (out); \ +} + +#define RB_GENERATE_INSERT_FINISH(name, type, field, attr) \ +/* Inserts a node into the RB tree */ \ +attr struct type * \ +name##_RB_INSERT_FINISH(struct name *head, struct type *parent, \ + struct type **pptr, struct type *elm) \ +{ \ + struct type *tmp = NULL; \ + \ + RB_SET(elm, parent, field); \ + *pptr = elm; \ + if (parent != NULL) \ + tmp = name##_RB_INSERT_COLOR(head, parent, elm); \ + _RB_AUGMENT_WALK(elm, tmp, field); \ + if (tmp != NULL) \ + /* \ + * An element rotated into the search path has a \ + * changed subtree, so update augmentation for it if \ + * AUGMENT_WALK didn't. \ + */ \ + (void)RB_AUGMENT_CHECK(tmp); \ + return (NULL); \ +} + +#define RB_GENERATE_INSERT(name, type, field, cmp, attr) \ +/* Inserts a node into the RB tree */ \ +attr struct type * \ +name##_RB_INSERT(struct name *head, struct type *elm) \ +{ \ + struct type *tmp; \ + struct type **tmpp = &RB_ROOT(head); \ + struct type *parent = NULL; \ + \ + while ((tmp = *tmpp) != NULL) { \ + parent = tmp; \ + __typeof(cmp(NULL, NULL)) comp = (cmp)(elm, parent); \ + if (comp < 0) \ + tmpp = &RB_LEFT(parent, field); \ + else if (comp > 0) \ + tmpp = &RB_RIGHT(parent, field); \ + else \ + return (parent); \ + } \ + return (name##_RB_INSERT_FINISH(head, parent, tmpp, elm)); \ +} + +#define RB_GENERATE_FIND(name, type, field, cmp, attr) \ +/* Finds the node with the same key as elm */ \ +attr struct type * \ +name##_RB_FIND(struct name *head, struct type *elm) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + __typeof(cmp(NULL, NULL)) comp; \ + while (tmp) { \ + comp = cmp(elm, tmp); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (NULL); \ +} + +#define RB_GENERATE_NFIND(name, type, field, cmp, attr) \ +/* Finds the first node greater than or equal to the search key */ \ +attr struct type * \ +name##_RB_NFIND(struct name *head, struct type *elm) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + struct type *res = NULL; \ + __typeof(cmp(NULL, NULL)) comp; \ + while (tmp) { \ + comp = cmp(elm, tmp); \ + if (comp < 0) { \ + res = tmp; \ + tmp = RB_LEFT(tmp, field); \ + } \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (res); \ +} + +#define RB_GENERATE_NEXT(name, type, field, attr) \ +/* ARGSUSED */ \ +attr struct type * \ +name##_RB_NEXT(struct type *elm) \ +{ \ + if (RB_RIGHT(elm, field)) { \ + elm = RB_RIGHT(elm, field); \ + while (RB_LEFT(elm, field)) \ + elm = RB_LEFT(elm, field); \ + } else { \ + while (RB_PARENT(elm, field) && \ + (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + return (elm); \ +} + +#if defined(_KERNEL) && defined(DIAGNOSTIC) +#define _RB_ORDER_CHECK(cmp, lo, hi) do { \ + KASSERT((cmp)(lo, hi) < 0, ("out of order insertion")); \ +} while (0) +#else +#define _RB_ORDER_CHECK(cmp, lo, hi) do {} while (0) +#endif + +#define RB_GENERATE_INSERT_NEXT(name, type, field, cmp, attr) \ +/* Inserts a node into the next position in the RB tree */ \ +attr struct type * \ +name##_RB_INSERT_NEXT(struct name *head, \ + struct type *elm, struct type *next) \ +{ \ + struct type *tmp; \ + struct type **tmpp = &RB_RIGHT(elm, field); \ + \ + _RB_ORDER_CHECK(cmp, elm, next); \ + if (name##_RB_NEXT(elm) != NULL) \ + _RB_ORDER_CHECK(cmp, next, name##_RB_NEXT(elm)); \ + while ((tmp = *tmpp) != NULL) { \ + elm = tmp; \ + tmpp = &RB_LEFT(elm, field); \ + } \ + return (name##_RB_INSERT_FINISH(head, elm, tmpp, next)); \ +} + +#define RB_GENERATE_PREV(name, type, field, attr) \ +/* ARGSUSED */ \ +attr struct type * \ +name##_RB_PREV(struct type *elm) \ +{ \ + if (RB_LEFT(elm, field)) { \ + elm = RB_LEFT(elm, field); \ + while (RB_RIGHT(elm, field)) \ + elm = RB_RIGHT(elm, field); \ + } else { \ + while (RB_PARENT(elm, field) && \ + (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + return (elm); \ +} + +#define RB_GENERATE_INSERT_PREV(name, type, field, cmp, attr) \ +/* Inserts a node into the prev position in the RB tree */ \ +attr struct type * \ +name##_RB_INSERT_PREV(struct name *head, \ + struct type *elm, struct type *prev) \ +{ \ + struct type *tmp; \ + struct type **tmpp = &RB_LEFT(elm, field); \ + \ + _RB_ORDER_CHECK(cmp, prev, elm); \ + if (name##_RB_PREV(elm) != NULL) \ + _RB_ORDER_CHECK(cmp, name##_RB_PREV(elm), prev); \ + while ((tmp = *tmpp) != NULL) { \ + elm = tmp; \ + tmpp = &RB_RIGHT(elm, field); \ + } \ + return (name##_RB_INSERT_FINISH(head, elm, tmpp, prev)); \ +} + +#define RB_GENERATE_MINMAX(name, type, field, attr) \ +attr struct type * \ +name##_RB_MINMAX(struct name *head, int val) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + struct type *parent = NULL; \ + while (tmp) { \ + parent = tmp; \ + if (val < 0) \ + tmp = RB_LEFT(tmp, field); \ + else \ + tmp = RB_RIGHT(tmp, field); \ + } \ + return (parent); \ +} + +#define RB_GENERATE_REINSERT(name, type, field, cmp, attr) \ +attr struct type * \ +name##_RB_REINSERT(struct name *head, struct type *elm) \ +{ \ + struct type *cmpelm; \ + if (((cmpelm = RB_PREV(name, head, elm)) != NULL && \ + cmp(cmpelm, elm) >= 0) || \ + ((cmpelm = RB_NEXT(name, head, elm)) != NULL && \ + cmp(elm, cmpelm) >= 0)) { \ + /* XXXLAS: Remove/insert is heavy handed. */ \ + RB_REMOVE(name, head, elm); \ + return (RB_INSERT(name, head, elm)); \ + } \ + return (NULL); \ +} \ + +#define RB_NEGINF -1 +#define RB_INF 1 + +#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) +#define RB_INSERT_NEXT(name, x, y, z) name##_RB_INSERT_NEXT(x, y, z) +#define RB_INSERT_PREV(name, x, y, z) name##_RB_INSERT_PREV(x, y, z) +#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) +#define RB_FIND(name, x, y) name##_RB_FIND(x, y) +#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y) +#define RB_NEXT(name, x, y) name##_RB_NEXT(y) +#define RB_PREV(name, x, y) name##_RB_PREV(y) +#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) +#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) +#define RB_REINSERT(name, x, y) name##_RB_REINSERT(x, y) + +#define RB_FOREACH(x, name, head) \ + for ((x) = RB_MIN(name, head); \ + (x) != NULL; \ + (x) = name##_RB_NEXT(x)) + +#define RB_FOREACH_FROM(x, name, y) \ + for ((x) = (y); \ + ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \ + (x) = (y)) + +#define RB_FOREACH_SAFE(x, name, head, y) \ + for ((x) = RB_MIN(name, head); \ + ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \ + (x) = (y)) + +#define RB_FOREACH_REVERSE(x, name, head) \ + for ((x) = RB_MAX(name, head); \ + (x) != NULL; \ + (x) = name##_RB_PREV(x)) + +#define RB_FOREACH_REVERSE_FROM(x, name, y) \ + for ((x) = (y); \ + ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \ + (x) = (y)) + +#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \ + for ((x) = RB_MAX(name, head); \ + ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \ + (x) = (y)) + +#endif /* _SYS_TREE_H_ */ diff --git a/localedef/time.c b/localedef/time.c new file mode 100644 index 0000000..698dcf5 --- /dev/null +++ b/localedef/time.c @@ -0,0 +1,302 @@ +/*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * LC_TIME database generation routines for localedef. + */ +#include +#include +#include +#include +#include +#include +#include +#include "localedef.h" +#include "parser.h" +#include "timelocal.h" + +struct lc_time_T tm; + +void +init_time(void) +{ + (void) memset(&tm, 0, sizeof (tm)); +} + +void +add_time_str(wchar_t *wcs) +{ + char *str; + + if ((str = to_mb_string(wcs)) == NULL) { + INTERR; + return; + } + free(wcs); + + switch (last_kw) { + case T_D_T_FMT: + tm.c_fmt = str; + break; + case T_D_FMT: + tm.x_fmt = str; + break; + case T_T_FMT: + tm.X_fmt = str; + break; + case T_T_FMT_AMPM: + tm.ampm_fmt = str; + break; + case T_DATE_FMT: + /* + * This one is a Solaris extension, Too bad date just + * doesn't use %c, which would be simpler. + */ + tm.date_fmt = str; + break; + case T_ERA_D_FMT: + case T_ERA_T_FMT: + case T_ERA_D_T_FMT: + /* Silently ignore it. */ + free(str); + break; + default: + free(str); + INTERR; + break; + } +} + +static void +add_list(const char *ptr[], char *str, int limit) +{ + int i; + for (i = 0; i < limit; i++) { + if (ptr[i] == NULL) { + ptr[i] = str; + return; + } + } + fprintf(stderr,"too many list elements\n"); +} + +void +add_time_list(wchar_t *wcs) +{ + char *str; + + if ((str = to_mb_string(wcs)) == NULL) { + INTERR; + return; + } + free(wcs); + + switch (last_kw) { + case T_ABMON: + add_list(tm.mon, str, 12); + break; + case T_MON: + add_list(tm.month, str, 12); + break; + case T_ABDAY: + add_list(tm.wday, str, 7); + break; + case T_DAY: + add_list(tm.weekday, str, 7); + break; + case T_AM_PM: + if (tm.am == NULL) { + tm.am = str; + } else if (tm.pm == NULL) { + tm.pm = str; + } else { + fprintf(stderr,"too many list elements\n"); + free(str); + } + break; + case T_ALT_DIGITS: + case T_ERA: + free(str); + break; + default: + free(str); + INTERR; + break; + } +} + +void +check_time_list(void) +{ + switch (last_kw) { + case T_ABMON: + if (tm.mon[11] != NULL) + return; + break; + case T_MON: + if (tm.month[11] != NULL) + return; + break; + case T_ABDAY: + if (tm.wday[6] != NULL) + return; + break; + case T_DAY: + if (tm.weekday[6] != NULL) + return; + break; + case T_AM_PM: + if (tm.pm != NULL) + return; + break; + case T_ERA: + case T_ALT_DIGITS: + return; + default: + fprintf(stderr,"unknown list\n"); + break; + } + + fprintf(stderr,"too few items in list (%d)\n", last_kw); +} + +void +reset_time_list(void) +{ + int i; + switch (last_kw) { + case T_ABMON: + for (i = 0; i < 12; i++) { + free((char *)tm.mon[i]); + tm.mon[i] = NULL; + } + break; + case T_MON: + for (i = 0; i < 12; i++) { + free((char *)tm.month[i]); + tm.month[i] = NULL; + } + break; + case T_ABDAY: + for (i = 0; i < 7; i++) { + free((char *)tm.wday[i]); + tm.wday[i] = NULL; + } + break; + case T_DAY: + for (i = 0; i < 7; i++) { + free((char *)tm.weekday[i]); + tm.weekday[i] = NULL; + } + break; + case T_AM_PM: + free((char *)tm.am); + tm.am = NULL; + free((char *)tm.pm); + tm.pm = NULL; + break; + } +} + +void +dump_time(void) +{ + FILE *f; + int i; + + if ((f = open_category()) == NULL) { + return; + } + + for (i = 0; i < 12; i++) { + if (putl_category(tm.mon[i], f) == EOF) { + return; + } + } + for (i = 0; i < 12; i++) { + if (putl_category(tm.month[i], f) == EOF) { + return; + } + } + for (i = 0; i < 7; i++) { + if (putl_category(tm.wday[i], f) == EOF) { + return; + } + } + for (i = 0; i < 7; i++) { + if (putl_category(tm.weekday[i], f) == EOF) { + return; + } + } + +#ifdef __APPLE__ + if ((putl_category(tm.X_fmt, f) == EOF) || + (putl_category(tm.x_fmt, f) == EOF) || + (putl_category(tm.c_fmt, f) == EOF) || + (putl_category(tm.am, f) == EOF) || + (putl_category(tm.pm, f) == EOF) || + (putl_category(tm.date_fmt ? tm.date_fmt : tm.c_fmt, f) == EOF)) { + return; + } + + for (i = 0; i < 12; i++) { + if (putl_category(tm.month[i], f) == EOF) { + return; + } + } + + /* + * Historically, localedef(1) here has not read an md_order from its + * source files, so we can just gloss over that here and do what it + * did: write out a default of "md". + */ + if (putl_category("md", f) == EOF || + putl_category(tm.ampm_fmt, f) == EOF) { + return; + } +#else /* !__APPLE__ */ + /* + * NOTE: If date_fmt is not specified, then we'll default to + * using the %c for date. This is reasonable for most + * locales, although for reasons that I don't understand + * Solaris historically has had a separate format for date. + */ + if ((putl_category(tm.X_fmt, f) == EOF) || + (putl_category(tm.x_fmt, f) == EOF) || + (putl_category(tm.c_fmt, f) == EOF) || + (putl_category(tm.am, f) == EOF) || + (putl_category(tm.pm, f) == EOF) || + (putl_category(tm.date_fmt ? tm.date_fmt : tm.c_fmt, f) == EOF) || + (putl_category(tm.ampm_fmt, f) == EOF)) { + return; + } +#endif /* __APPLE__ */ + close_category(f); +} diff --git a/localedef/wide.c b/localedef/wide.c new file mode 100644 index 0000000..062e120 --- /dev/null +++ b/localedef/wide.c @@ -0,0 +1,664 @@ +/*- + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2012 Garrett D'Amore All rights reserved. + * Copyright 2015 John Marino + * + * This source code is derived from the illumos localedef command, and + * provided under BSD-style license terms by Nexenta Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * The functions in this file convert from the standard multibyte forms + * to the wide character forms used internally by libc. Unfortunately, + * this approach means that we need a method for each and every encoding. + */ +#include +#include +#include +#include +#include +#include +#include "localedef.h" + +static int towide_none(wchar_t *, const char *, unsigned); +static int towide_utf8(wchar_t *, const char *, unsigned); +static int towide_big5(wchar_t *, const char *, unsigned); +static int towide_gbk(wchar_t *, const char *, unsigned); +static int towide_gb2312(wchar_t *, const char *, unsigned); +static int towide_gb18030(wchar_t *, const char *, unsigned); +static int towide_mskanji(wchar_t *, const char *, unsigned); +static int towide_euccn(wchar_t *, const char *, unsigned); +static int towide_eucjp(wchar_t *, const char *, unsigned); +static int towide_euckr(wchar_t *, const char *, unsigned); +static int towide_euctw(wchar_t *, const char *, unsigned); + +static int tomb_none(char *, wchar_t); +static int tomb_utf8(char *, wchar_t); +static int tomb_mbs(char *, wchar_t); + +static int (*_towide)(wchar_t *, const char *, unsigned) = towide_none; +static int (*_tomb)(char *, wchar_t) = tomb_none; +static char _encoding_buffer[20] = {'N','O','N','E'}; +static const char *_encoding = _encoding_buffer; +static int _nbits = 7; + +/* + * Table of supported encodings. We only bother to list the multibyte + * encodings here, because single byte locales are handed by "NONE". + */ +static struct { + const char *name; + /* the name that the underlying libc implemenation uses */ + const char *cname; + /* the maximum number of bits required for priorities */ + int nbits; + int (*towide)(wchar_t *, const char *, unsigned); + int (*tomb)(char *, wchar_t); +} mb_encodings[] = { + /* + * UTF8 values max out at 0x1fffff (although in theory there could + * be later extensions, but it won't happen.) This means we only need + * 21 bits to be able to encode the entire range of priorities. + */ + { "UTF-8", "UTF-8", 21, towide_utf8, tomb_utf8 }, + { "UTF8", "UTF-8", 21, towide_utf8, tomb_utf8 }, + { "utf8", "UTF-8", 21, towide_utf8, tomb_utf8 }, + { "utf-8", "UTF-8", 21, towide_utf8, tomb_utf8 }, + + { "EUC-CN", "EUC-CN", 16, towide_euccn, tomb_mbs }, + { "eucCN", "EUC-CN", 16, towide_euccn, tomb_mbs }, + /* + * Because the 3-byte form of EUC-JP use the same leading byte, + * only 17 bits required to provide unique priorities. (The low + * bit of that first byte is set.) By setting this value low, + * we can get by with only 3 bytes in the strxfrm expansion. + */ + { "EUC-JP", "EUC-JP", 17, towide_eucjp, tomb_mbs }, + { "eucJP", "EUC-JP", 17, towide_eucjp, tomb_mbs }, + + { "EUC-KR", "EUC-KR", 16, towide_euckr, tomb_mbs }, + { "eucKR", "EUC-KR", 16, towide_euckr, tomb_mbs }, + /* + * EUC-TW uses 2 bytes most of the time, but 4 bytes if the + * high order byte is 0x8E. However, with 4 byte encodings, + * the third byte will be A0-B0. So we only need to consider + * the lower order 24 bits for collation. + */ + { "EUC-TW", "EUC-TW", 24, towide_euctw, tomb_mbs }, + { "eucTW", "EUC-TW", 24, towide_euctw, tomb_mbs }, + + { "MS_Kanji", "MSKanji", 16, towide_mskanji, tomb_mbs }, + { "MSKanji", "MSKanji", 16, towide_mskanji, tomb_mbs }, + { "PCK", "MSKanji", 16, towide_mskanji, tomb_mbs }, + { "SJIS", "MSKanji", 16, towide_mskanji, tomb_mbs }, + { "Shift_JIS", "MSKanji", 16, towide_mskanji, tomb_mbs }, + + { "BIG5", "BIG5", 16, towide_big5, tomb_mbs }, + { "big5", "BIG5", 16, towide_big5, tomb_mbs }, + { "Big5", "BIG5", 16, towide_big5, tomb_mbs }, + + { "GBK", "GBK", 16, towide_gbk, tomb_mbs }, + + /* + * GB18030 can get away with just 31 bits. This is because the + * high order bit is always set for 4 byte values, and the + * at least one of the other bits in that 4 byte value will + * be non-zero. + */ + { "GB18030", "GB18030", 31, towide_gb18030, tomb_mbs }, + + /* + * This should probably be an aliase for euc-cn, or vice versa. + */ + { "GB2312", "GB2312", 16, towide_gb2312, tomb_mbs }, + + { NULL, NULL, 0, 0, 0 }, +}; + +static char * +show_mb(const char *mb) +{ + static char buf[64]; + + /* ASCII stuff we just print */ + if (isascii(*mb) && isgraph(*mb)) { + buf[0] = *mb; + buf[1] = 0; + return (buf); + } + buf[0] = 0; + while (*mb != 0) { + char scr[8]; + (void) snprintf(scr, sizeof (scr), "\\x%02x", *mb); + (void) strlcat(buf, scr, sizeof (buf)); + mb++; + } + return (buf); +} + +static char *widemsg; + +void +werr(const char *fmt, ...) +{ + char *msg; + + va_list va; + va_start(va, fmt); + (void) vasprintf(&msg, fmt, va); + va_end(va); + + free(widemsg); + widemsg = msg; +} + +/* + * This is used for 8-bit encodings. + */ +int +towide_none(wchar_t *c, const char *mb, unsigned n __unused) +{ + if (mb_cur_max != 1) { + werr("invalid or unsupported multibyte locale"); + return (-1); + } + *c = (uint8_t)*mb; + return (1); +} + +int +tomb_none(char *mb, wchar_t wc) +{ + if (mb_cur_max != 1) { + werr("invalid or unsupported multibyte locale"); + return (-1); + } + *(uint8_t *)mb = (wc & 0xff); + mb[1] = 0; + return (1); +} + +/* + * UTF-8 stores wide characters in UTF-32 form. + */ +int +towide_utf8(wchar_t *wc, const char *mb, unsigned n) +{ + wchar_t c; + int nb; + wchar_t lv; /* lowest legal value */ + int i; + const uint8_t *s = (const uint8_t *)mb; + + c = *s; + + if ((c & 0x80) == 0) { + /* 7-bit ASCII */ + *wc = c; + return (1); + } else if ((c & 0xe0) == 0xc0) { + /* u80-u7ff - two bytes encoded */ + nb = 2; + lv = 0x80; + c &= ~0xe0; + } else if ((c & 0xf0) == 0xe0) { + /* u800-uffff - three bytes encoded */ + nb = 3; + lv = 0x800; + c &= ~0xf0; + } else if ((c & 0xf8) == 0xf0) { + /* u1000-u1fffff - four bytes encoded */ + nb = 4; + lv = 0x1000; + c &= ~0xf8; + } else { + /* 5 and 6 byte encodings are not legal unicode */ + werr("utf8 encoding too large (%s)", show_mb(mb)); + return (-1); + } + if (nb > (int)n) { + werr("incomplete utf8 sequence (%s)", show_mb(mb)); + return (-1); + } + + for (i = 1; i < nb; i++) { + if (((s[i]) & 0xc0) != 0x80) { + werr("illegal utf8 byte (%x)", s[i]); + return (-1); + } + c <<= 6; + c |= (s[i] & 0x3f); + } + + if (c < lv) { + werr("illegal redundant utf8 encoding (%s)", show_mb(mb)); + return (-1); + } + *wc = c; + return (nb); +} + +int +tomb_utf8(char *mb, wchar_t wc) +{ + uint8_t *s = (uint8_t *)mb; + uint8_t msk; + int cnt; + int i; + + if (wc <= 0x7f) { + s[0] = wc & 0x7f; + s[1] = 0; + return (1); + } + if (wc <= 0x7ff) { + cnt = 2; + msk = 0xc0; + } else if (wc <= 0xffff) { + cnt = 3; + msk = 0xe0; + } else if (wc <= 0x1fffff) { + cnt = 4; + msk = 0xf0; + } else { + werr("illegal uf8 char (%x)", wc); + return (-1); + } + for (i = cnt - 1; i; i--) { + s[i] = (wc & 0x3f) | 0x80; + wc >>= 6; + } + s[0] = (msk) | wc; + s[cnt] = 0; + return (cnt); +} + +/* + * Several encodings share a simplistic dual byte encoding. In these + * forms, they all indicate that a two byte sequence is to be used if + * the first byte has its high bit set. They all store this simple + * encoding as a 16-bit value, although a great many of the possible + * code points are not used in most character sets. This gives a possible + * set of just over 32,000 valid code points. + * + * 0x00 - 0x7f - 1 byte encoding + * 0x80 - 0x7fff - illegal + * 0x8000 - 0xffff - 2 byte encoding + */ + +static int +towide_dbcs(wchar_t *wc, const char *mb, unsigned n) +{ + wchar_t c; + + c = *(const uint8_t *)mb; + + if ((c & 0x80) == 0) { + /* 7-bit */ + *wc = c; + return (1); + } + if (n < 2) { + werr("incomplete character sequence (%s)", show_mb(mb)); + return (-1); + } + + /* Store both bytes as a single 16-bit wide. */ + c <<= 8; + c |= (uint8_t)(mb[1]); + *wc = c; + return (2); +} + +/* + * Most multibyte locales just convert the wide character to the multibyte + * form by stripping leading null bytes, and writing the 32-bit quantity + * in big-endian order. + */ +int +tomb_mbs(char *mb, wchar_t wc) +{ + uint8_t *s = (uint8_t *)mb; + int n = 0, c; + + if ((wc & 0xff000000U) != 0) { + n = 4; + } else if ((wc & 0x00ff0000U) != 0) { + n = 3; + } else if ((wc & 0x0000ff00U) != 0) { + n = 2; + } else { + n = 1; + } + c = n; + while (n) { + n--; + s[n] = wc & 0xff; + wc >>= 8; + } + /* ensure null termination */ + s[c] = 0; + return (c); +} + + +/* + * big5 is a simple dual byte character set. + */ +int +towide_big5(wchar_t *wc, const char *mb, unsigned n) +{ + return (towide_dbcs(wc, mb, n)); +} + +/* + * GBK encodes wides in the same way that big5 does, the high order + * bit of the first byte indicates a double byte character. + */ +int +towide_gbk(wchar_t *wc, const char *mb, unsigned n) +{ + return (towide_dbcs(wc, mb, n)); +} + +/* + * GB2312 is another DBCS. Its cleaner than others in that the second + * byte does not encode ASCII, but it supports characters. + */ +int +towide_gb2312(wchar_t *wc, const char *mb, unsigned n) +{ + return (towide_dbcs(wc, mb, n)); +} + +/* + * GB18030. This encodes as 8, 16, or 32-bits. + * 7-bit values are in 1 byte, 4 byte sequences are used when + * the second byte encodes 0x30-39 and all other sequences are 2 bytes. + */ +int +towide_gb18030(wchar_t *wc, const char *mb, unsigned n) +{ + wchar_t c; + + c = *(const uint8_t *)mb; + + if ((c & 0x80) == 0) { + /* 7-bit */ + *wc = c; + return (1); + } + if (n < 2) { + werr("incomplete character sequence (%s)", show_mb(mb)); + return (-1); + } + + /* pull in the second byte */ + c <<= 8; + c |= (uint8_t)(mb[1]); + + if (((c & 0xff) >= 0x30) && ((c & 0xff) <= 0x39)) { + if (n < 4) { + werr("incomplete 4-byte character sequence (%s)", + show_mb(mb)); + return (-1); + } + c <<= 8; + c |= (uint8_t)(mb[2]); + c <<= 8; + c |= (uint8_t)(mb[3]); + *wc = c; + return (4); + } + + *wc = c; + return (2); +} + +/* + * MS-Kanji (aka SJIS) is almost a clean DBCS like the others, but it + * also has a range of single byte characters above 0x80. (0xa1-0xdf). + */ +int +towide_mskanji(wchar_t *wc, const char *mb, unsigned n) +{ + wchar_t c; + + c = *(const uint8_t *)mb; + + if ((c < 0x80) || ((c > 0xa0) && (c < 0xe0))) { + /* 7-bit */ + *wc = c; + return (1); + } + + if (n < 2) { + werr("incomplete character sequence (%s)", show_mb(mb)); + return (-1); + } + + /* Store both bytes as a single 16-bit wide. */ + c <<= 8; + c |= (uint8_t)(mb[1]); + *wc = c; + return (2); +} + +/* + * EUC forms. EUC encodings are "variable". FreeBSD carries some additional + * variable data to encode these, but we're going to treat each as independent + * instead. Its the only way we can sensibly move forward. + * + * Note that the way in which the different EUC forms vary is how wide + * CS2 and CS3 are and what the first byte of them is. + */ +static int +towide_euc_impl(wchar_t *wc, const char *mb, unsigned n, + uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) +{ + int i; + int width = 2; + wchar_t c; + + c = *(const uint8_t *)mb; + + /* + * All variations of EUC encode 7-bit ASCII as one byte, and use + * additional bytes for more than that. + */ + if ((c & 0x80) == 0) { + /* 7-bit */ + *wc = c; + return (1); + } + + /* + * All EUC variants reserve 0xa1-0xff to identify CS1, which + * is always two bytes wide. Note that unused CS will be zero, + * and that cannot be true because we know that the high order + * bit must be set. + */ + if (c >= 0xa1) { + width = 2; + } else if (c == cs2) { + width = cs2width; + } else if (c == cs3) { + width = cs3width; + } + + if ((int)n < width) { + werr("incomplete character sequence (%s)", show_mb(mb)); + return (-1); + } + + for (i = 1; i < width; i++) { + /* pull in the next byte */ + c <<= 8; + c |= (uint8_t)(mb[i]); + } + + *wc = c; + return (width); +} + +/* + * EUC-CN encodes as follows: + * + * Code set 0 (ASCII): 0x21-0x7E + * Code set 1 (CNS 11643-1992 Plane 1): 0xA1A1-0xFEFE + * Code set 2: unused + * Code set 3: unused + */ +int +towide_euccn(wchar_t *wc, const char *mb, unsigned n) +{ + return (towide_euc_impl(wc, mb, n, 0x8e, 4, 0, 0)); +} + +/* + * EUC-JP encodes as follows: + * + * Code set 0 (ASCII or JIS X 0201-1976 Roman): 0x21-0x7E + * Code set 1 (JIS X 0208): 0xA1A1-0xFEFE + * Code set 2 (half-width katakana): 0x8EA1-0x8EDF + * Code set 3 (JIS X 0212-1990): 0x8FA1A1-0x8FFEFE + */ +int +towide_eucjp(wchar_t *wc, const char *mb, unsigned n) +{ + return (towide_euc_impl(wc, mb, n, 0x8e, 2, 0x8f, 3)); +} + +/* + * EUC-KR encodes as follows: + * + * Code set 0 (ASCII or KS C 5636-1993): 0x21-0x7E + * Code set 1 (KS C 5601-1992): 0xA1A1-0xFEFE + * Code set 2: unused + * Code set 3: unused + */ +int +towide_euckr(wchar_t *wc, const char *mb, unsigned n) +{ + return (towide_euc_impl(wc, mb, n, 0, 0, 0, 0)); +} + +/* + * EUC-TW encodes as follows: + * + * Code set 0 (ASCII): 0x21-0x7E + * Code set 1 (CNS 11643-1992 Plane 1): 0xA1A1-0xFEFE + * Code set 2 (CNS 11643-1992 Planes 1-16): 0x8EA1A1A1-0x8EB0FEFE + * Code set 3: unused + */ +int +towide_euctw(wchar_t *wc, const char *mb, unsigned n) +{ + return (towide_euc_impl(wc, mb, n, 0x8e, 4, 0, 0)); +} + +/* + * Public entry points. + */ + +int +to_wide(wchar_t *wc, const char *mb) +{ + /* this won't fail hard */ + return (_towide(wc, mb, strlen(mb))); +} + +int +to_mb(char *mb, wchar_t wc) +{ + int rv; + + if ((rv = _tomb(mb, wc)) < 0) { + warn("%s", widemsg); + free(widemsg); + widemsg = NULL; + } + return (rv); +} + +char * +to_mb_string(const wchar_t *wcs) +{ + char *mbs; + char *ptr; + int len; + + mbs = malloc((wcslen(wcs) * mb_cur_max) + 1); + if (mbs == NULL) { + warn("out of memory"); + return (NULL); + } + ptr = mbs; + while (*wcs) { + if ((len = to_mb(ptr, *wcs)) < 0) { + INTERR; + free(mbs); + return (NULL); + } + wcs++; + ptr += len; + } + *ptr = 0; + return (mbs); +} + +void +set_wide_encoding(const char *encoding) +{ + int i; + + _towide = towide_none; + _tomb = tomb_none; + _nbits = 8; + + snprintf(_encoding_buffer, sizeof(_encoding_buffer), "NONE:%s", + encoding); + for (i = 0; mb_encodings[i].name; i++) { + if (strcasecmp(encoding, mb_encodings[i].name) == 0) { + _towide = mb_encodings[i].towide; + _tomb = mb_encodings[i].tomb; + _encoding = mb_encodings[i].cname; + _nbits = mb_encodings[i].nbits; + break; + } + } +} + +const char * +get_wide_encoding(void) +{ + return (_encoding); +} + +int +max_wide(void) +{ + return ((int)((1U << _nbits) - 1)); +} diff --git a/tests/adv_cmds.plist.in b/tests/adv_cmds.plist.in index 4edbc86..4f031b1 100644 --- a/tests/adv_cmds.plist.in +++ b/tests/adv_cmds.plist.in @@ -63,6 +63,31 @@ Timeout 300 + + TestName + adv_cmds.genwrap_test.sh.arg_selector_simple_varsel + ShellEnv + + ATF_SH + /usr/local/bin/atf-sh + __RUNNING_INSIDE_ATF_RUN + internal-yes-value + + Command + + /usr/local/bin/atf-sh + /AppleInternal/Tests/adv_cmds/genwrap/genwrap_test.sh + -s + /AppleInternal/Tests/adv_cmds/genwrap + -r + genwrap_test.sh.arg_selector_simple_varsel.results.txt + arg_selector_simple_varsel + + MayRunConcurrently + + Timeout + 300 + TestName adv_cmds.genwrap_test.sh.arg_selector_complex @@ -88,6 +113,31 @@ Timeout 300 + + TestName + adv_cmds.genwrap_test.sh.arg_selector_complex_logonly_args + ShellEnv + + ATF_SH + /usr/local/bin/atf-sh + __RUNNING_INSIDE_ATF_RUN + internal-yes-value + + Command + + /usr/local/bin/atf-sh + /AppleInternal/Tests/adv_cmds/genwrap/genwrap_test.sh + -s + /AppleInternal/Tests/adv_cmds/genwrap + -r + genwrap_test.sh.arg_selector_complex_logonly_args.results.txt + arg_selector_complex_logonly_args + + MayRunConcurrently + + Timeout + 300 + TestName adv_cmds.genwrap_test.sh.env_selector @@ -138,6 +188,31 @@ Timeout 300 + + TestName + adv_cmds.genwrap_test.sh.env_selector_varsel + ShellEnv + + ATF_SH + /usr/local/bin/atf-sh + __RUNNING_INSIDE_ATF_RUN + internal-yes-value + + Command + + /usr/local/bin/atf-sh + /AppleInternal/Tests/adv_cmds/genwrap/genwrap_test.sh + -s + /AppleInternal/Tests/adv_cmds/genwrap + -r + genwrap_test.sh.env_selector_varsel.results.txt + env_selector_varsel + + MayRunConcurrently + + Timeout + 300 + TestName adv_cmds.genwrap_test.sh.simple_shim

\120 - \121 - \122 - \123 - \124 - \125 - \126 - \127 - \130 - \131 - \132 - \133 - \134 - \134 - \135 - \136 - \136 - \137 - \137 - \137 - \140 - \141 - \142 - \143 - \144 - \145 - \146 - \147 - \150 - \151 - \152 - \153 - \154 - \155 - \156 - \157 -