Skip to content

Commit

Permalink
internal/export/idna: Allow specifying CheckHyphens and CheckJoiners
Browse files Browse the repository at this point in the history
This aligns with the options in the latest version of UTS 46, and in
particular allows implementing the WHATWG URL Standard.

Fixes golang/go#41732.

Change-Id: Iab577eff4303f3eea64512d07d968c891acf126f
Reviewed-on: https://go-review.googlesource.com/c/text/+/258837
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Nigel Tao <nigeltao@golang.org>
  • Loading branch information
TimothyGu authored and nigeltao committed Nov 25, 2020
1 parent 967b8f6 commit 72ead5f
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 68 deletions.
113 changes: 74 additions & 39 deletions internal/export/idna/idna10.0.0.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,22 +65,23 @@ func Transitional(transitional bool) Option {

// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
// are longer than allowed by the RFC.
//
// This option corresponds to the VerifyDnsLength flag in UTS #46.
func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify }
}

// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
//
// This is the behavior suggested by the UTS #46 and is adopted by some
// browsers.
func RemoveLeadingDots(remove bool) Option {
return func(o *options) { o.removeLeadingDots = remove }
}

// ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules.
// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
// in UTS #46.
func ValidateLabels(enable bool) Option {
return func(o *options) {
// Don't override existing mappings, but set one that at least checks
Expand All @@ -89,32 +90,57 @@ func ValidateLabels(enable bool) Option {
o.mapping = normalize
}
o.trie = trie
o.validateLabels = enable
o.fromPuny = validateFromPunycode
o.checkJoiners = enable
o.checkHyphens = enable
if enable {
o.fromPuny = validateFromPunycode
} else {
o.fromPuny = nil
}
}
}

// CheckHyphens sets whether to check for correct use of hyphens ('-') in
// labels. Most web browsers do not have this option set, since labels such as
// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
//
// This option corresponds to the CheckHyphens flag in UTS #46.
func CheckHyphens(enable bool) Option {
return func(o *options) { o.checkHyphens = enable }
}

// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
// A of RFC 5892, concerning the use of joiner runes.
//
// This option corresponds to the CheckJoiners flag in UTS #46.
func CheckJoiners(enable bool) Option {
return func(o *options) {
o.trie = trie
o.checkJoiners = enable
}
}

// StrictDomainName limits the set of permissible ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
// hyphen). This is set by default for MapForLookup and ValidateForRegistration.
// hyphen). This is set by default for MapForLookup and ValidateForRegistration,
// but is only useful if ValidateLabels is set.
//
// This option is useful, for instance, for browsers that allow characters
// outside this range, for example a '_' (U+005F LOW LINE). See
// http://www.rfc-editor.org/std/std3.txt for more details This option
// corresponds to the UseSTD3ASCIIRules option in UTS #46.
// http://www.rfc-editor.org/std/std3.txt for more details.
//
// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
func StrictDomainName(use bool) Option {
return func(o *options) {
o.trie = trie
o.useSTD3Rules = use
o.fromPuny = validateFromPunycode
}
return func(o *options) { o.useSTD3Rules = use }
}

// NOTE: the following options pull in tables. The tables should not be linked
// in as long as the options are not used.

// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
// that relies on proper validation of labels should include this rule.
//
// This option corresponds to the CheckBidi flag in UTS #46.
func BidiRule() Option {
return func(o *options) { o.bidirule = bidirule.ValidString }
}
Expand Down Expand Up @@ -150,7 +176,8 @@ func MapForLookup() Option {
type options struct {
transitional bool
useSTD3Rules bool
validateLabels bool
checkHyphens bool
checkJoiners bool
verifyDNSLength bool
removeLeadingDots bool

Expand Down Expand Up @@ -223,8 +250,11 @@ func (p *Profile) String() string {
if p.useSTD3Rules {
s += ":UseSTD3Rules"
}
if p.validateLabels {
s += ":ValidateLabels"
if p.checkHyphens {
s += ":CheckHyphens"
}
if p.checkJoiners {
s += ":CheckJoiners"
}
if p.verifyDNSLength {
s += ":VerifyDNSLength"
Expand Down Expand Up @@ -252,26 +282,29 @@ var (

punycode = &Profile{}
lookup = &Profile{options{
transitional: true,
useSTD3Rules: true,
validateLabels: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
transitional: true,
useSTD3Rules: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
}}
display = &Profile{options{
useSTD3Rules: true,
validateLabels: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
useSTD3Rules: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
}}
registration = &Profile{options{
useSTD3Rules: true,
validateLabels: true,
verifyDNSLength: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateRegistration,
Expand Down Expand Up @@ -338,7 +371,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
}
isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
labels.set(u)
if err == nil && p.validateLabels {
if err == nil && p.fromPuny != nil {
err = p.fromPuny(p, u)
}
if err == nil {
Expand Down Expand Up @@ -679,16 +712,18 @@ func (p *Profile) validateLabel(s string) (err error) {
}
return nil
}
if !p.validateLabels {
return nil
}
trie := p.trie // p.validateLabels is only set if trie is set.
if len(s) > 4 && s[2] == '-' && s[3] == '-' {
return &labelError{s, "V2"}
if p.checkHyphens {
if len(s) > 4 && s[2] == '-' && s[3] == '-' {
return &labelError{s, "V2"}
}
if s[0] == '-' || s[len(s)-1] == '-' {
return &labelError{s, "V3"}
}
}
if s[0] == '-' || s[len(s)-1] == '-' {
return &labelError{s, "V3"}
if !p.checkJoiners {
return nil
}
trie := p.trie // p.checkJoiners is only set if trie is set.
// TODO: merge the use of this in the trie.
v, sz := trie.lookupString(s)
x := info(v)
Expand Down
8 changes: 8 additions & 0 deletions internal/export/idna/idna10.0.0_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ func TestLabelErrors(t *testing.T) {
lengthA := kind{"CheckLengthA", p.ToASCII}
p = New(MapForLookup(), StrictDomainName(false))
std3 := kind{"STD3", p.ToASCII}
p = New(MapForLookup(), CheckHyphens(false))
hyphens := kind{"CheckHyphens", p.ToASCII}

testCases := []struct {
kind
Expand Down Expand Up @@ -85,6 +87,12 @@ func TestLabelErrors(t *testing.T) {
{display, "*.foo.com", "*.foo.com", "P1"},
{std3, "*.foo.com", "*.foo.com", ""},

// Hyphens
{display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"},
{hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""},
{display, "-label-.com", "-label-.com", "V3"},
{hyphens, "-label-.com", "-label-.com", ""},

// Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
// Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return
// lab9.be.
Expand Down
Loading

0 comments on commit 72ead5f

Please sign in to comment.