Skip to content

Commit

Permalink
Lex performance improvement (#176)
Browse files Browse the repository at this point in the history
* Use []token instead of chan token

name             old time/op    new time/op    delta
ParseToml-8        1.18ms ± 0%    0.91ms ± 0%  -22.98%
UnmarshalToml-8    1.29ms ± 0%    0.95ms ± 0%  -25.96%

name             old alloc/op   new alloc/op   delta
ParseToml-8         429kB ± 0%     444kB ± 0%   +3.49%
UnmarshalToml-8     451kB ± 0%     466kB ± 0%   +3.32%

name             old allocs/op  new allocs/op  delta
ParseToml-8         14.1k ± 0%     13.7k ± 0%   -2.31%
UnmarshalToml-8     15.1k ± 0%     14.7k ± 0%   -2.16%

* Lex on []byte instead of io.Reader

name             old time/op    new time/op    delta
ParseToml-8        1.18ms ± 0%    0.29ms ± 0%  -75.18%
UnmarshalToml-8    1.27ms ± 0%    0.38ms ± 0%  -70.38%

name             old alloc/op   new alloc/op   delta
ParseToml-8         429kB ± 0%     135kB ± 0%  -68.53%
UnmarshalToml-8     451kB ± 0%     157kB ± 0%  -65.22%

name             old allocs/op  new allocs/op  delta
ParseToml-8         14.1k ± 0%      3.2k ± 0%  -77.20%
UnmarshalToml-8     15.1k ± 0%      4.2k ± 0%  -72.00%
  • Loading branch information
pelletier authored Jun 28, 2017
1 parent ef23ce9 commit 69d355d
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 105 deletions.
87 changes: 40 additions & 47 deletions lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@ import (
"bytes"
"errors"
"fmt"
"io"
"regexp"
"strconv"
"strings"

"github.com/pelletier/go-buffruneio"
)

var dateRegexp *regexp.Regexp
Expand All @@ -24,43 +21,43 @@ type tomlLexStateFn func() tomlLexStateFn

// Define lexer
type tomlLexer struct {
input *buffruneio.Reader // Textual source
buffer bytes.Buffer // Runes composing the current token
tokens chan token
depth int
line int
col int
endbufferLine int
endbufferCol int
inputIdx int
input []rune // Textual source
currentTokenStart int
currentTokenStop int
tokens []token
depth int
line int
col int
endbufferLine int
endbufferCol int
}

// Basic read operations on input

func (l *tomlLexer) read() rune {
r, _, err := l.input.ReadRune()
if err != nil {
panic(err)
}
r := l.peek()
if r == '\n' {
l.endbufferLine++
l.endbufferCol = 1
} else {
l.endbufferCol++
}
l.inputIdx++
return r
}

func (l *tomlLexer) next() rune {
r := l.read()

if r != eof {
l.buffer.WriteRune(r)
l.currentTokenStop++
}
return r
}

func (l *tomlLexer) ignore() {
l.buffer.Reset()
l.currentTokenStart = l.currentTokenStop
l.line = l.endbufferLine
l.col = l.endbufferCol
}
Expand All @@ -77,49 +74,46 @@ func (l *tomlLexer) fastForward(n int) {
}

func (l *tomlLexer) emitWithValue(t tokenType, value string) {
l.tokens <- token{
l.tokens = append(l.tokens, token{
Position: Position{l.line, l.col},
typ: t,
val: value,
}
})
l.ignore()
}

func (l *tomlLexer) emit(t tokenType) {
l.emitWithValue(t, l.buffer.String())
l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
}

func (l *tomlLexer) peek() rune {
r, _, err := l.input.ReadRune()
if err != nil {
panic(err)
if l.inputIdx >= len(l.input) {
return eof
}
l.input.UnreadRune()
return r
return l.input[l.inputIdx]
}

func (l *tomlLexer) follow(next string) bool {
for _, expectedRune := range next {
r, _, err := l.input.ReadRune()
defer l.input.UnreadRune()
if err != nil {
panic(err)
}
if expectedRune != r {
return false
}
func (l *tomlLexer) peekString(size int) string {
maxIdx := len(l.input)
upperIdx := l.inputIdx + size // FIXME: potential overflow
if upperIdx > maxIdx {
upperIdx = maxIdx
}
return true
return string(l.input[l.inputIdx:upperIdx])
}

func (l *tomlLexer) follow(next string) bool {
return next == l.peekString(len(next))
}

// Error management

func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
l.tokens <- token{
l.tokens = append(l.tokens, token{
Position: Position{l.line, l.col},
typ: tokenError,
val: fmt.Sprintf(format, args...),
}
})
return nil
}

Expand Down Expand Up @@ -220,7 +214,7 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn {
break
}

possibleDate := string(l.input.PeekRunes(35))
possibleDate := l.peekString(35)
dateMatch := dateRegexp.FindString(possibleDate)
if dateMatch != "" {
l.fastForward(len(dateMatch))
Expand Down Expand Up @@ -537,7 +531,7 @@ func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
for r := l.peek(); r != eof; r = l.peek() {
switch r {
case ']':
if l.buffer.Len() > 0 {
if l.currentTokenStop > l.currentTokenStart {
l.emit(tokenKeyGroupArray)
}
l.next()
Expand All @@ -560,7 +554,7 @@ func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
for r := l.peek(); r != eof; r = l.peek() {
switch r {
case ']':
if l.buffer.Len() > 0 {
if l.currentTokenStop > l.currentTokenStart {
l.emit(tokenKeyGroup)
}
l.next()
Expand Down Expand Up @@ -635,24 +629,23 @@ func (l *tomlLexer) run() {
for state := l.lexVoid; state != nil; {
state = state()
}
close(l.tokens)
}

func init() {
dateRegexp = regexp.MustCompile(`^\d{1,4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{1,9})?(Z|[+-]\d{2}:\d{2})`)
}

// Entry point
func lexToml(input io.Reader) chan token {
bufferedInput := buffruneio.NewReader(input)
func lexToml(inputBytes []byte) []token {
runes := bytes.Runes(inputBytes)
l := &tomlLexer{
input: bufferedInput,
tokens: make(chan token),
input: runes,
tokens: make([]token, 0, 256),
line: 1,
col: 1,
endbufferLine: 1,
endbufferCol: 1,
}
go l.run()
l.run()
return l.tokens
}
39 changes: 5 additions & 34 deletions lexer_test.go
Original file line number Diff line number Diff line change
@@ -1,38 +1,14 @@
package toml

import (
"os"
"strings"
"reflect"
"testing"
)

func testFlow(t *testing.T, input string, expectedFlow []token) {
ch := lexToml(strings.NewReader(input))
for _, expected := range expectedFlow {
token := <-ch
if token != expected {
t.Log("While testing: ", input)
t.Log("compared (got)", token, "to (expected)", expected)
t.Log("\tvalue:", token.val, "<->", expected.val)
t.Log("\tvalue as bytes:", []byte(token.val), "<->", []byte(expected.val))
t.Log("\ttype:", token.typ.String(), "<->", expected.typ.String())
t.Log("\tline:", token.Line, "<->", expected.Line)
t.Log("\tcolumn:", token.Col, "<->", expected.Col)
t.Log("compared", token, "to", expected)
t.FailNow()
}
}

tok, ok := <-ch
if ok {
t.Log("channel is not closed!")
t.Log(len(ch)+1, "tokens remaining:")

t.Log("token ->", tok)
for token := range ch {
t.Log("token ->", token)
}
t.FailNow()
tokens := lexToml([]byte(input))
if !reflect.DeepEqual(tokens, expectedFlow) {
t.Fatal("Different flows. Expected\n", expectedFlow, "\nGot:\n", tokens)
}
}

Expand Down Expand Up @@ -767,13 +743,8 @@ pluralizeListTitles = false
url = "https://github.com/spf13/hugo/releases"
weight = -200
`
rd := strings.NewReader(sample)

b.ResetTimer()
for i := 0; i < b.N; i++ {
rd.Seek(0, os.SEEK_SET)
ch := lexToml(rd)
for _ = range ch {
}
lexToml([]byte(sample))
}
}
30 changes: 10 additions & 20 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ import (
)

type tomlParser struct {
flow chan token
flowIdx int
flow []token
tree *Tree
tokensBuffer []token
currentTable []string
seenTableKeys []string
}
Expand All @@ -34,16 +34,10 @@ func (p *tomlParser) run() {
}

func (p *tomlParser) peek() *token {
if len(p.tokensBuffer) != 0 {
return &(p.tokensBuffer[0])
}

tok, ok := <-p.flow
if !ok {
if p.flowIdx >= len(p.flow) {
return nil
}
p.tokensBuffer = append(p.tokensBuffer, tok)
return &tok
return &p.flow[p.flowIdx]
}

func (p *tomlParser) assume(typ tokenType) {
Expand All @@ -57,16 +51,12 @@ func (p *tomlParser) assume(typ tokenType) {
}

func (p *tomlParser) getToken() *token {
if len(p.tokensBuffer) != 0 {
tok := p.tokensBuffer[0]
p.tokensBuffer = p.tokensBuffer[1:]
return &tok
}
tok, ok := <-p.flow
if !ok {
tok := p.peek()
if tok == nil {
return nil
}
return &tok
p.flowIdx++
return tok
}

func (p *tomlParser) parseStart() tomlParserStateFn {
Expand Down Expand Up @@ -374,13 +364,13 @@ func (p *tomlParser) parseArray() interface{} {
return array
}

func parseToml(flow chan token) *Tree {
func parseToml(flow []token) *Tree {
result := newTree()
result.position = Position{1, 1}
parser := &tomlParser{
flowIdx: 0,
flow: flow,
tree: result,
tokensBuffer: make([]token, 0),
currentTable: make([]string, 0),
seenTableKeys: make([]string, 0),
}
Expand Down
19 changes: 15 additions & 4 deletions toml.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"runtime"
"strings"
Expand Down Expand Up @@ -251,8 +252,8 @@ func (t *Tree) createSubTree(keys []string, pos Position) error {
return nil
}

// LoadReader creates a Tree from any io.Reader.
func LoadReader(reader io.Reader) (tree *Tree, err error) {
// LoadBytes creates a Tree from a []byte.
func LoadBytes(b []byte) (tree *Tree, err error) {
defer func() {
if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok {
Expand All @@ -261,13 +262,23 @@ func LoadReader(reader io.Reader) (tree *Tree, err error) {
err = errors.New(r.(string))
}
}()
tree = parseToml(lexToml(reader))
tree = parseToml(lexToml(b))
return
}

// LoadReader creates a Tree from any io.Reader.
func LoadReader(reader io.Reader) (tree *Tree, err error) {
inputBytes, err := ioutil.ReadAll(reader)
if err != nil {
return
}
tree, err = LoadBytes(inputBytes)
return
}

// Load creates a Tree from a string.
func Load(content string) (tree *Tree, err error) {
return LoadReader(strings.NewReader(content))
return LoadBytes([]byte(content))
}

// LoadFile creates a Tree from a file.
Expand Down

0 comments on commit 69d355d

Please sign in to comment.