Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes DFA/NFA returning incorrect tc in some cases #21

Merged
merged 7 commits into from
Feb 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions frontend/frontend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func TestParseConcatAltMaybes(x *testing.T) {
t.Error(err)
}
t.Log(program)
tMatch(program, "", t)
tNoMatch(program, "", t) // will get empty string error
tMatch(program, "E", t)
tMatch(program, "D", t)
tMatch(program, "A", t)
Expand Down Expand Up @@ -209,7 +209,7 @@ func TestParseConcatAltStar(x *testing.T) {
t.Error(err)
}
t.Log(program)
tMatch(program, "", t)
tNoMatch(program, "", t) // will get empty string error
tMatch(program, "X", t)
tMatch(program, "Y", t)
tMatch(program, "A", t)
Expand Down
221 changes: 178 additions & 43 deletions lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,42 +255,61 @@ func TestPartialLexer(x *testing.T) {
}

func TestRegression(t *testing.T) {
skip := func(*Scanner, *machines.Match) (interface{}, error) {
return nil, nil
}
token := func(id int, name string) Action {
token := func(name string) Action {
return func(s *Scanner, m *machines.Match) (interface{}, error) {
return string(m.Bytes), nil
return fmt.Sprintf("%v:%q", name, string(m.Bytes)), nil
}
}

data := "true" // This input fails.
// data := "true " // this with a trailing space does not.

lexer := NewLexer()
lexer.Add([]byte("true"), token(0, "TRUE"))
lexer.Add([]byte("( |\t|\n|\r)+"), skip)
newLexer := func() *Lexer {
lexer := NewLexer()
lexer.Add([]byte("true"), token("TRUE"))
lexer.Add([]byte("( |\t|\n|\r)+"), token("SPACE"))
return lexer
}

if err := lexer.CompileDFA(); err != nil {
t.Fatal(err)
tests := []struct {
text string
tokens int
}{
{`true`, 1},
{`true `, 2},
}

var scanner *Scanner
runTest := func(lexer *Lexer) {
for _, test := range tests {
scanner, err := lexer.Scanner([]byte(test.text))
if err != nil {
t.Fatal(err)
}

scanner, err := lexer.Scanner([]byte(data))
if err != nil {
t.Fatal(err)
found := 0
tok, err, eos := scanner.Next()
for ; !eos; tok, err, eos = scanner.Next() {
if err != nil {
t.Fatal(err)
}
fmt.Printf("Token: %v\n", tok)
found++
}
if found != test.tokens {
t.Errorf("Expected exactly %v tokens got %v, ===\nErr: %v\nEOS: %v\nTC: %d\n", test.tokens, found, err, eos, scanner.TC)
}
}
}

found := 0
tok, err, eos := scanner.Next()
for ; !eos; tok, err, eos = scanner.Next() {
fmt.Printf("Token: %v\n", tok)
found++
{
lexer := newLexer()
if err := lexer.CompileNFA(); err != nil {
t.Fatal(err)
}
runTest(lexer)
}
if found != 1 {
t.Errorf("Expected exactly 1 tokens got %v, ===\nErr: %v\nEOS: %v\nTC: %d\n", found, err, eos, scanner.TC)

{
lexer := newLexer()
if err := lexer.CompileDFA(); err != nil {
t.Fatal(err)
}
runTest(lexer)
}
}

Expand Down Expand Up @@ -353,30 +372,146 @@ ddns-update-style none;
for _, lit := range literals {
lex.Add([]byte(lit), token(lit))
}
return lex
}

err := lex.Compile()
runTest := func(lexer *Lexer) {
scanner, err := lexer.Scanner([]byte(text))
if err != nil {
panic(err)
return
}
for tok, err, eof := scanner.Next(); !eof; tok, err, eof = scanner.Next() {
if err != nil {
t.Fatal(err)
break
}
token := tok.(*Token)
fmt.Printf("%-7v | %-10v | %v:%v-%v:%v\n",
tokens[token.Type],
strings.TrimSpace(string(token.Lexeme)),
token.StartLine,
token.StartColumn,
token.EndLine,
token.EndColumn)
}
}
{
lexer := newLexer()
if err := lexer.CompileNFA(); err != nil {
t.Fatal(err)
}
runTest(lexer)
}
{
lexer := newLexer()
if err := lexer.CompileDFA(); err != nil {
t.Fatal(err)
}
runTest(lexer)
}
}

return lex
func TestPythonStrings(t *testing.T) {
tokens := []string{
"UNDEF",
"TRUE",
"SINGLE_STRING",
"TRIPLE_STRING",
"TRIPLE_STRING2",
"TY_STRING",
"SPACE",
}
tokenIds := map[string]int{}
for i, tok := range tokens {
tokenIds[tok] = i
}
skip := func(*Scanner, *machines.Match) (interface{}, error) {
return nil, nil
}
token := func(name string) Action {
return func(s *Scanner, m *machines.Match) (interface{}, error) {
return s.Token(tokenIds[name], string(m.Bytes), m), nil
}
}

scanner, err := newLexer().Scanner([]byte(text))
if err != nil {
return
newLexer := func() *Lexer {
lexer := NewLexer()
lexer.Add([]byte("true"), token("TRUE"))
lexer.Add([]byte(`'''([^\\']|(\\.))*'''`), token("TRIPLE_STRING"))
lexer.Add([]byte(`"""([^\\"]|(\\.))*"""`), token("TRIPLE_STRING"))
lexer.Add([]byte(`"([^\\"]|(\\.))*"`), token("SINGLE_STRING"))
lexer.Add([]byte(`'([^\\']|(\\.))*'`), token("SINGLE_STRING"))
lexer.Add([]byte("( |\t|\n|\r)+"), skip)
return lexer
}
for tok, err, eof := scanner.Next(); !eof; tok, err, eof = scanner.Next() {
if err != nil {
t.Error(err)

tests := []struct {
text string
tokens int
}{
{`'''hi'''`, 1},
{`"""hi"""`, 1},
{`"hi"`, 1},
{`'hi'`, 1},
{`''`, 1},
{`""`, 1},
{`""" . .
hello
"""`, 1},
{`'''' ''''`, 4},
{`''''''`, 1},
{`""""""`, 1},
{`"""""" """
hi there""" "wizard" true`, 4},
}

runTest := func(lexer *Lexer) {
for _, test := range tests {
fmt.Printf("test %q\n", test.text)
scanner, err := lexer.Scanner([]byte(test.text))
if err != nil {
t.Fatal(err)
}

found := 0
tok, err, eos := scanner.Next()
for ; !eos; tok, err, eos = scanner.Next() {
if err != nil {
t.Error(err)
fmt.Printf("err: %v\n", err)
scanner.TC++
} else {
token := tok.(*Token)
fmt.Printf("%-15v | %-30q | %d-%d | %v:%v-%v:%v\n",
tokens[token.Type],
strings.TrimSpace(string(token.Lexeme)),
token.TC,
token.TC+len(token.Lexeme),
token.StartLine,
token.StartColumn,
token.EndLine,
token.EndColumn)
found++
}
}
if found != test.tokens {
t.Errorf("expected %v tokens got %v: %q", test.tokens, found, test.text)
}
}
}
{
lexer := newLexer()
if err := lexer.CompileNFA(); err != nil {
t.Fatal(err)
}
token := tok.(*Token)
fmt.Printf("%-7v | %-10v | %v:%v-%v:%v\n",
tokens[token.Type],
strings.TrimSpace(string(token.Lexeme)),
token.StartLine,
token.StartColumn,
token.EndLine,
token.EndColumn)
runTest(lexer)
}
{
lexer := newLexer()
if err := lexer.CompileDFA(); err != nil {
t.Fatal(err)
}
runTest(lexer)
}

}
26 changes: 23 additions & 3 deletions machines/dfa_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,23 @@ func DFALexerEngine(startState, errorState int, trans DFATrans, accepting DFAAcc
Bytes: text[startTC:matchTC],
}
matchID = -1
return tc, match, nil, scan
if matchTC == startTC {
err := &EmptyMatchError{
MatchID: matchID,
TC: tc,
Line: startLC.line,
Column: startLC.col,
}
return startTC, nil, err, scan
}
return matchTC, match, nil, scan
}
}
if match, has := accepting[state]; has && startTC < len(text) {
if match, has := accepting[state]; has {
matchID = match
matchTC = tc
}
if startTC < len(text) && matchTC <= len(text) && matchID > -1 {
startLC := lineCols[startTC]
endLC := lineCols[matchTC-1]
match := &Match{
Expand All @@ -91,7 +102,16 @@ func DFALexerEngine(startState, errorState int, trans DFATrans, accepting DFAAcc
Bytes: text[startTC:matchTC],
}
matchID = -1
return tc, match, nil, scan
if matchTC == startTC {
err := &EmptyMatchError{
MatchID: matchID,
TC: tc,
Line: startLC.line,
Column: startLC.col,
}
return startTC, nil, err, scan
}
return matchTC, match, nil, scan
}
if matchTC != len(text) && startTC >= len(text) {
// the user has moved us farther than the text. Assume that was
Expand Down
26 changes: 25 additions & 1 deletion machines/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,21 @@ import (
"github.com/timtadh/lexmachine/queue"
)

// EmptyMatchError is returned when a pattern would have matched the empty
// string
type EmptyMatchError struct {
TC int
Line int
Column int
MatchID int
}

func (e *EmptyMatchError) Error() string {
return fmt.Sprintf("Lexer error: matched the empty string at %d:%d (tc=%d) for match id %d.",
e.Line, e.Column, e.TC, e.MatchID,
)
}

// UnconsumedInput error type
type UnconsumedInput struct {
StartTC int
Expand Down Expand Up @@ -195,7 +210,16 @@ func LexerEngine(program inst.Slice, text []byte) Scanner {
}
prevTC = startTC
matchPC = -1
return tc, match, nil, scan
if matchTC == startTC {
err := &EmptyMatchError{
MatchID: matchPC,
TC: tc,
Line: line,
Column: col,
}
return startTC, nil, err, scan
}
return matchTC, match, nil, scan
}
}
if matchTC != len(text) && startTC >= len(text) {
Expand Down