From 72ead5faa1c2426bebe794973c1cbbcf0cb89e5c Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Thu, 1 Oct 2020 13:29:14 -0400 Subject: [PATCH] internal/export/idna: Allow specifying CheckHyphens and CheckJoiners This aligns with the options in the latest version of UTS 46, and in particular allows implementing the WHATWG URL Standard. Fixes golang/go#41732. Change-Id: Iab577eff4303f3eea64512d07d968c891acf126f Reviewed-on: https://go-review.googlesource.com/c/text/+/258837 Reviewed-by: Marcel van Lohuizen Reviewed-by: Nigel Tao Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot Trust: Nigel Tao --- internal/export/idna/idna10.0.0.go | 113 ++++++++++++++++-------- internal/export/idna/idna10.0.0_test.go | 8 ++ internal/export/idna/idna9.0.0.go | 93 +++++++++++++------ internal/export/idna/idna9.0.0_test.go | 8 ++ 4 files changed, 154 insertions(+), 68 deletions(-) diff --git a/internal/export/idna/idna10.0.0.go b/internal/export/idna/idna10.0.0.go index 1244f9ce9..2ceb32768 100644 --- a/internal/export/idna/idna10.0.0.go +++ b/internal/export/idna/idna10.0.0.go @@ -65,15 +65,14 @@ func Transitional(transitional bool) Option { // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts // are longer than allowed by the RFC. +// +// This option corresponds to the VerifyDnsLength flag in UTS #46. func VerifyDNSLength(verify bool) Option { return func(o *options) { o.verifyDNSLength = verify } } // RemoveLeadingDots removes leading label separators. Leading runes that map to // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. -// -// This is the behavior suggested by the UTS #46 and is adopted by some -// browsers. func RemoveLeadingDots(remove bool) Option { return func(o *options) { o.removeLeadingDots = remove } } @@ -81,6 +80,8 @@ func RemoveLeadingDots(remove bool) Option { // ValidateLabels sets whether to check the mandatory label validation criteria // as defined in Section 5.4 of RFC 5891. This includes testing for correct use // of hyphens ('-'), normalization, validity of runes, and the context rules. +// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags +// in UTS #46. func ValidateLabels(enable bool) Option { return func(o *options) { // Don't override existing mappings, but set one that at least checks @@ -89,25 +90,48 @@ func ValidateLabels(enable bool) Option { o.mapping = normalize } o.trie = trie - o.validateLabels = enable - o.fromPuny = validateFromPunycode + o.checkJoiners = enable + o.checkHyphens = enable + if enable { + o.fromPuny = validateFromPunycode + } else { + o.fromPuny = nil + } + } +} + +// CheckHyphens sets whether to check for correct use of hyphens ('-') in +// labels. Most web browsers do not have this option set, since labels such as +// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use. +// +// This option corresponds to the CheckHyphens flag in UTS #46. +func CheckHyphens(enable bool) Option { + return func(o *options) { o.checkHyphens = enable } +} + +// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix +// A of RFC 5892, concerning the use of joiner runes. +// +// This option corresponds to the CheckJoiners flag in UTS #46. +func CheckJoiners(enable bool) Option { + return func(o *options) { + o.trie = trie + o.checkJoiners = enable } } // StrictDomainName limits the set of permissible ASCII characters to those // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the -// hyphen). This is set by default for MapForLookup and ValidateForRegistration. +// hyphen). This is set by default for MapForLookup and ValidateForRegistration, +// but is only useful if ValidateLabels is set. // // This option is useful, for instance, for browsers that allow characters // outside this range, for example a '_' (U+005F LOW LINE). See -// http://www.rfc-editor.org/std/std3.txt for more details This option -// corresponds to the UseSTD3ASCIIRules option in UTS #46. +// http://www.rfc-editor.org/std/std3.txt for more details. +// +// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46. func StrictDomainName(use bool) Option { - return func(o *options) { - o.trie = trie - o.useSTD3Rules = use - o.fromPuny = validateFromPunycode - } + return func(o *options) { o.useSTD3Rules = use } } // NOTE: the following options pull in tables. The tables should not be linked @@ -115,6 +139,8 @@ func StrictDomainName(use bool) Option { // BidiRule enables the Bidi rule as defined in RFC 5893. Any application // that relies on proper validation of labels should include this rule. +// +// This option corresponds to the CheckBidi flag in UTS #46. func BidiRule() Option { return func(o *options) { o.bidirule = bidirule.ValidString } } @@ -150,7 +176,8 @@ func MapForLookup() Option { type options struct { transitional bool useSTD3Rules bool - validateLabels bool + checkHyphens bool + checkJoiners bool verifyDNSLength bool removeLeadingDots bool @@ -223,8 +250,11 @@ func (p *Profile) String() string { if p.useSTD3Rules { s += ":UseSTD3Rules" } - if p.validateLabels { - s += ":ValidateLabels" + if p.checkHyphens { + s += ":CheckHyphens" + } + if p.checkJoiners { + s += ":CheckJoiners" } if p.verifyDNSLength { s += ":VerifyDNSLength" @@ -252,26 +282,29 @@ var ( punycode = &Profile{} lookup = &Profile{options{ - transitional: true, - useSTD3Rules: true, - validateLabels: true, - trie: trie, - fromPuny: validateFromPunycode, - mapping: validateAndMap, - bidirule: bidirule.ValidString, + transitional: true, + useSTD3Rules: true, + checkHyphens: true, + checkJoiners: true, + trie: trie, + fromPuny: validateFromPunycode, + mapping: validateAndMap, + bidirule: bidirule.ValidString, }} display = &Profile{options{ - useSTD3Rules: true, - validateLabels: true, - trie: trie, - fromPuny: validateFromPunycode, - mapping: validateAndMap, - bidirule: bidirule.ValidString, + useSTD3Rules: true, + checkHyphens: true, + checkJoiners: true, + trie: trie, + fromPuny: validateFromPunycode, + mapping: validateAndMap, + bidirule: bidirule.ValidString, }} registration = &Profile{options{ useSTD3Rules: true, - validateLabels: true, verifyDNSLength: true, + checkHyphens: true, + checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateRegistration, @@ -338,7 +371,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) { } isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight labels.set(u) - if err == nil && p.validateLabels { + if err == nil && p.fromPuny != nil { err = p.fromPuny(p, u) } if err == nil { @@ -679,16 +712,18 @@ func (p *Profile) validateLabel(s string) (err error) { } return nil } - if !p.validateLabels { - return nil - } - trie := p.trie // p.validateLabels is only set if trie is set. - if len(s) > 4 && s[2] == '-' && s[3] == '-' { - return &labelError{s, "V2"} + if p.checkHyphens { + if len(s) > 4 && s[2] == '-' && s[3] == '-' { + return &labelError{s, "V2"} + } + if s[0] == '-' || s[len(s)-1] == '-' { + return &labelError{s, "V3"} + } } - if s[0] == '-' || s[len(s)-1] == '-' { - return &labelError{s, "V3"} + if !p.checkJoiners { + return nil } + trie := p.trie // p.checkJoiners is only set if trie is set. // TODO: merge the use of this in the trie. v, sz := trie.lookupString(s) x := info(v) diff --git a/internal/export/idna/idna10.0.0_test.go b/internal/export/idna/idna10.0.0_test.go index ed01f9343..66ea636db 100644 --- a/internal/export/idna/idna10.0.0_test.go +++ b/internal/export/idna/idna10.0.0_test.go @@ -31,6 +31,8 @@ func TestLabelErrors(t *testing.T) { lengthA := kind{"CheckLengthA", p.ToASCII} p = New(MapForLookup(), StrictDomainName(false)) std3 := kind{"STD3", p.ToASCII} + p = New(MapForLookup(), CheckHyphens(false)) + hyphens := kind{"CheckHyphens", p.ToASCII} testCases := []struct { kind @@ -85,6 +87,12 @@ func TestLabelErrors(t *testing.T) { {display, "*.foo.com", "*.foo.com", "P1"}, {std3, "*.foo.com", "*.foo.com", ""}, + // Hyphens + {display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"}, + {hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""}, + {display, "-label-.com", "-label-.com", "V3"}, + {hyphens, "-label-.com", "-label-.com", ""}, + // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return // lab9.be. diff --git a/internal/export/idna/idna9.0.0.go b/internal/export/idna/idna9.0.0.go index 25f2ac3e8..1ea943136 100644 --- a/internal/export/idna/idna9.0.0.go +++ b/internal/export/idna/idna9.0.0.go @@ -64,15 +64,14 @@ func Transitional(transitional bool) Option { // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts // are longer than allowed by the RFC. +// +// This option corresponds to the VerifyDnsLength flag in UTS #46. func VerifyDNSLength(verify bool) Option { return func(o *options) { o.verifyDNSLength = verify } } // RemoveLeadingDots removes leading label separators. Leading runes that map to // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. -// -// This is the behavior suggested by the UTS #46 and is adopted by some -// browsers. func RemoveLeadingDots(remove bool) Option { return func(o *options) { o.removeLeadingDots = remove } } @@ -80,6 +79,8 @@ func RemoveLeadingDots(remove bool) Option { // ValidateLabels sets whether to check the mandatory label validation criteria // as defined in Section 5.4 of RFC 5891. This includes testing for correct use // of hyphens ('-'), normalization, validity of runes, and the context rules. +// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags +// in UTS #46. func ValidateLabels(enable bool) Option { return func(o *options) { // Don't override existing mappings, but set one that at least checks @@ -88,25 +89,48 @@ func ValidateLabels(enable bool) Option { o.mapping = normalize } o.trie = trie - o.validateLabels = enable - o.fromPuny = validateFromPunycode + o.checkJoiners = enable + o.checkHyphens = enable + if enable { + o.fromPuny = validateFromPunycode + } else { + o.fromPuny = nil + } + } +} + +// CheckHyphens sets whether to check for correct use of hyphens ('-') in +// labels. Most web browsers do not have this option set, since labels such as +// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use. +// +// This option corresponds to the CheckHyphens flag in UTS #46. +func CheckHyphens(enable bool) Option { + return func(o *options) { o.checkHyphens = enable } +} + +// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix +// A of RFC 5892, concerning the use of joiner runes. +// +// This option corresponds to the CheckJoiners flag in UTS #46. +func CheckJoiners(enable bool) Option { + return func(o *options) { + o.trie = trie + o.checkJoiners = enable } } // StrictDomainName limits the set of permissable ASCII characters to those // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the -// hyphen). This is set by default for MapForLookup and ValidateForRegistration. +// hyphen). This is set by default for MapForLookup and ValidateForRegistration, +// but is only useful if ValidateLabels is set. // // This option is useful, for instance, for browsers that allow characters // outside this range, for example a '_' (U+005F LOW LINE). See -// http://www.rfc-editor.org/std/std3.txt for more details This option -// corresponds to the UseSTD3ASCIIRules option in UTS #46. +// http://www.rfc-editor.org/std/std3.txt for more details. +// +// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46. func StrictDomainName(use bool) Option { - return func(o *options) { - o.trie = trie - o.useSTD3Rules = use - o.fromPuny = validateFromPunycode - } + return func(o *options) { o.useSTD3Rules = use } } // NOTE: the following options pull in tables. The tables should not be linked @@ -114,6 +138,8 @@ func StrictDomainName(use bool) Option { // BidiRule enables the Bidi rule as defined in RFC 5893. Any application // that relies on proper validation of labels should include this rule. +// +// This option corresponds to the CheckBidi flag in UTS #46. func BidiRule() Option { return func(o *options) { o.bidirule = bidirule.ValidString } } @@ -150,7 +176,8 @@ func MapForLookup() Option { type options struct { transitional bool useSTD3Rules bool - validateLabels bool + checkHyphens bool + checkJoiners bool verifyDNSLength bool removeLeadingDots bool @@ -223,8 +250,11 @@ func (p *Profile) String() string { if p.useSTD3Rules { s += ":UseSTD3Rules" } - if p.validateLabels { - s += ":ValidateLabels" + if p.checkHyphens { + s += ":CheckHyphens" + } + if p.checkJoiners { + s += ":CheckJoiners" } if p.verifyDNSLength { s += ":VerifyDNSLength" @@ -253,9 +283,10 @@ var ( punycode = &Profile{} lookup = &Profile{options{ transitional: true, - useSTD3Rules: true, - validateLabels: true, removeLeadingDots: true, + useSTD3Rules: true, + checkHyphens: true, + checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateAndMap, @@ -263,8 +294,9 @@ var ( }} display = &Profile{options{ useSTD3Rules: true, - validateLabels: true, removeLeadingDots: true, + checkHyphens: true, + checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateAndMap, @@ -272,8 +304,9 @@ var ( }} registration = &Profile{options{ useSTD3Rules: true, - validateLabels: true, verifyDNSLength: true, + checkHyphens: true, + checkJoiners: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateRegistration, @@ -337,7 +370,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) { continue } labels.set(u) - if err == nil && p.validateLabels { + if err == nil && p.fromPuny != nil { err = p.fromPuny(p, u) } if err == nil { @@ -627,16 +660,18 @@ func (p *Profile) validateLabel(s string) error { if p.bidirule != nil && !p.bidirule(s) { return &labelError{s, "B"} } - if !p.validateLabels { - return nil - } - trie := p.trie // p.validateLabels is only set if trie is set. - if len(s) > 4 && s[2] == '-' && s[3] == '-' { - return &labelError{s, "V2"} + if p.checkHyphens { + if len(s) > 4 && s[2] == '-' && s[3] == '-' { + return &labelError{s, "V2"} + } + if s[0] == '-' || s[len(s)-1] == '-' { + return &labelError{s, "V3"} + } } - if s[0] == '-' || s[len(s)-1] == '-' { - return &labelError{s, "V3"} + if !p.checkJoiners { + return nil } + trie := p.trie // p.checkJoiners is only set if trie is set. // TODO: merge the use of this in the trie. v, sz := trie.lookupString(s) x := info(v) diff --git a/internal/export/idna/idna9.0.0_test.go b/internal/export/idna/idna9.0.0_test.go index 7047d744a..03b1267c3 100644 --- a/internal/export/idna/idna9.0.0_test.go +++ b/internal/export/idna/idna9.0.0_test.go @@ -31,6 +31,8 @@ func TestLabelErrors(t *testing.T) { lengthA := kind{"CheckLengthA", p.ToASCII} p = New(MapForLookup(), StrictDomainName(false)) std3 := kind{"STD3", p.ToASCII} + p = New(MapForLookup(), CheckHyphens(false)) + hyphens := kind{"CheckHyphens", p.ToASCII} testCases := []struct { kind @@ -81,6 +83,12 @@ func TestLabelErrors(t *testing.T) { {display, "*.foo.com", "*.foo.com", "P1"}, {std3, "*.foo.com", "*.foo.com", ""}, + // Hyphens + {display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"}, + {hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""}, + {display, "-label-.com", "-label-.com", "V3"}, + {hyphens, "-label-.com", "-label-.com", ""}, + // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return // lab9.be.