Skip to content

Commit

Permalink
Support for hex, oct, and bin integers (#205)
Browse files Browse the repository at this point in the history
Add support for non-decimal integers. At the time of writing, this is an
unreleased backward-compatible feature of TOML:

```
  Non-negative integer values may also be expressed in hexadecimal, octal, or
  binary. In these formats, leading zeros are allowed (after the prefix). Hex
  values are case insensitive. Underscores are allowed between digits (but
  not between the prefix and the value).

  # hexadecimal with prefix `0x`
  hex1 = 0xDEADBEEF
  hex2 = 0xdeadbeef
  hex3 = 0xdead_beef

  # octal with prefix `0o`
  oct1 = 0o01234567
  oct2 = 0o755 # useful for Unix file permissions

  # binary with prefix `0b`
  bin1 = 0b11010110
```

Fixes #204
  • Loading branch information
pelletier authored Dec 22, 2017
1 parent b8b5e76 commit 861c473
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 10 deletions.
59 changes: 59 additions & 0 deletions lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -575,8 +575,67 @@ func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
return l.lexRvalue
}

type validRuneFn func(r rune) bool

func isValidHexRune(r rune) bool {
return r >= 'a' && r <= 'f' ||
r >= 'A' && r <= 'F' ||
r >= '0' && r <= '9' ||
r == '_'
}

func isValidOctalRune(r rune) bool {
return r >= '0' && r <= '7' || r == '_'
}

func isValidBinaryRune(r rune) bool {
return r == '0' || r == '1' || r == '_'
}

func (l *tomlLexer) lexNumber() tomlLexStateFn {
r := l.peek()

if r == '0' {
follow := l.peekString(2)
if len(follow) == 2 {
var isValidRune validRuneFn
switch follow[1] {
case 'x':
isValidRune = isValidHexRune
case 'o':
isValidRune = isValidOctalRune
case 'b':
isValidRune = isValidBinaryRune
default:
if follow[1] >= 'a' && follow[1] <= 'z' || follow[1] >= 'A' && follow[1] <= 'Z' {
return l.errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(follow[1]))
}
}

if isValidRune != nil {
l.next()
l.next()
digitSeen := false
for {
next := l.peek()
if !isValidRune(next) {
break
}
digitSeen = true
l.next()
}

if !digitSeen {
return l.errorf("number needs at least one digit")
}

l.emit(tokenInteger)

return l.lexRvalue
}
}
}

if r == '+' || r == '-' {
l.next()
}
Expand Down
60 changes: 51 additions & 9 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,25 @@ func (p *tomlParser) parseAssign() tomlParserStateFn {
}

var numberUnderscoreInvalidRegexp *regexp.Regexp
var hexNumberUnderscoreInvalidRegexp *regexp.Regexp

func cleanupNumberToken(value string) (string, error) {
func numberContainsInvalidUnderscore(value string) error {
if numberUnderscoreInvalidRegexp.MatchString(value) {
return "", errors.New("invalid use of _ in number")
return errors.New("invalid use of _ in number")
}
return nil
}

func hexNumberContainsInvalidUnderscore(value string) error {
if hexNumberUnderscoreInvalidRegexp.MatchString(value) {
return errors.New("invalid use of _ in hex number")
}
return nil
}

func cleanupNumberToken(value string) string {
cleanedVal := strings.Replace(value, "_", "", -1)
return cleanedVal, nil
return cleanedVal
}

func (p *tomlParser) parseRvalue() interface{} {
Expand All @@ -235,20 +247,49 @@ func (p *tomlParser) parseRvalue() interface{} {
case tokenFalse:
return false
case tokenInteger:
cleanedVal, err := cleanupNumberToken(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
cleanedVal := cleanupNumberToken(tok.val)
var err error
var val int64
if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
switch cleanedVal[1] {
case 'x':
err = hexNumberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
case 'o':
err = numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
case 'b':
err = numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
default:
panic("invalid base") // the lexer should catch this first
}
} else {
err = numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal, 10, 64)
}
val, err := strconv.ParseInt(cleanedVal, 10, 64)
if err != nil {
p.raiseError(tok, "%s", err)
}
return val
case tokenFloat:
cleanedVal, err := cleanupNumberToken(tok.val)
err := numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
cleanedVal := cleanupNumberToken(tok.val)
val, err := strconv.ParseFloat(cleanedVal, 64)
if err != nil {
p.raiseError(tok, "%s", err)
Expand Down Expand Up @@ -379,5 +420,6 @@ func parseToml(flow []token) *Tree {
}

func init() {
numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d]|_$|^_)`)
numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`)
hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`)
}
55 changes: 54 additions & 1 deletion parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,59 @@ func TestSimpleNumbers(t *testing.T) {
})
}

func TestHexIntegers(t *testing.T) {
tree, err := Load(`a = 0xDEADBEEF`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(3735928559)})

tree, err = Load(`a = 0xdeadbeef`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(3735928559)})

tree, err = Load(`a = 0xdead_beef`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(3735928559)})

_, err = Load(`a = 0x_1`)
if err.Error() != "(1, 5): invalid use of _ in hex number" {
t.Error("Bad error message:", err.Error())
}
}

func TestOctIntegers(t *testing.T) {
tree, err := Load(`a = 0o01234567`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(342391)})

tree, err = Load(`a = 0o755`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(493)})

_, err = Load(`a = 0o_1`)
if err.Error() != "(1, 5): invalid use of _ in number" {
t.Error("Bad error message:", err.Error())
}
}

func TestBinIntegers(t *testing.T) {
tree, err := Load(`a = 0b11010110`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(214)})

_, err = Load(`a = 0b_1`)
if err.Error() != "(1, 5): invalid use of _ in number" {
t.Error("Bad error message:", err.Error())
}
}

func TestBadIntegerBase(t *testing.T) {
_, err := Load(`a = 0k1`)
if err.Error() != "(1, 5): unknown number base: k. possible options are x (hex) o (octal) b (binary)" {
t.Error("Error should have been returned.")
}
}

func TestIntegerNoDigit(t *testing.T) {
_, err := Load(`a = 0b`)
if err.Error() != "(1, 5): number needs at least one digit" {
t.Error("Bad error message:", err.Error())
}
}

func TestNumbersWithUnderscores(t *testing.T) {
tree, err := Load("a = 1_000")
assertTree(t, tree, err, map[string]interface{}{
Expand Down Expand Up @@ -642,7 +695,7 @@ func TestTomlValueStringRepresentation(t *testing.T) {
{int64(12345), "12345"},
{uint64(50), "50"},
{float64(123.45), "123.45"},
{bool(true), "true"},
{true, "true"},
{"hello world", "\"hello world\""},
{"\b\t\n\f\r\"\\", "\"\\b\\t\\n\\f\\r\\\"\\\\\""},
{"\x05", "\"\\u0005\""},
Expand Down

0 comments on commit 861c473

Please sign in to comment.