Skip to content

Commit

Permalink
Fix parser (#557)
Browse files Browse the repository at this point in the history
* fix document separator with directive

* fix number value with local tag

* fix decoding string for null value

* fix single pair flow mapping

* fix test case

* fix plain lines with tab indent

* fix test case

* fix sequence with null

* fix test case

* fix invalid map-value
  • Loading branch information
goccy authored Nov 30, 2024
1 parent c5254d7 commit 45889c9
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 34 deletions.
9 changes: 8 additions & 1 deletion decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,14 @@ func (d *Decoder) nodeToValue(node ast.Node) (any, error) {
}
return nil, errors.ErrSyntax(fmt.Sprintf("cannot convert %q to boolean", fmt.Sprint(v)), n.Value.GetToken())
case token.StringTag:
return d.nodeToValue(n.Value)
v, err := d.nodeToValue(n.Value)
if err != nil {
return nil, err
}
if v == nil {
return "", nil
}
return fmt.Sprint(v), nil
case token.MappingTag:
return d.nodeToValue(n.Value)
default:
Expand Down
110 changes: 95 additions & 15 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -442,11 +442,18 @@ func (p *parser) parseMap(ctx *context) (*ast.MappingNode, error) {
tk = ctx.currentToken()
}
for tk.Column() == keyTk.Column() {
typ := tk.Type()
if ctx.isFlow && typ == token.SequenceEndType {
// [
// key: value
// ] <=
break
}
if !p.isMapToken(tk) {
return nil, errors.ErrSyntax("non-map value is specified", tk.RawToken())
}
cm := p.parseHeadComment(ctx)
if tk.Type() == token.MappingEndType {
if typ == token.MappingEndType {
// a: {
// b: c
// } <=
Expand Down Expand Up @@ -644,6 +651,15 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token)
keyCol := key.GetToken().Position.Column
keyLine := key.GetToken().Position.Line

if tk.Column() != keyCol && tk.Line() == keyLine && (tk.GroupType() == TokenGroupMapKey || tk.GroupType() == TokenGroupMapKeyValue) {
// a: b:
// ^
//
// a: b: c
// ^
return nil, errors.ErrSyntax("mapping value is not allowed in this context", tk.RawToken())
}

if tk.Column() == keyCol && p.isMapToken(tk) {
// in this case,
// ----
Expand Down Expand Up @@ -673,9 +689,6 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token)
if tk.Column() <= keyCol && tk.GroupType() == TokenGroupAnchorName {
// key: <value does not defined>
// &anchor
//
// key: <value does not defined>
// &anchor
return nil, errors.ErrSyntax("anchor is not allowed in this context", tk.RawToken())
}

Expand Down Expand Up @@ -932,17 +945,7 @@ func (p *parser) parseSequence(ctx *context) (*ast.SequenceNode, error) {
comment := p.parseHeadComment(ctx)
ctx.goNext() // skip sequence entry token

valueTk := ctx.currentToken()
if valueTk == nil {
node, err := newNullNode(ctx, ctx.createNullToken(seqTk))
if err != nil {
return nil, err
}
seqNode.Values = append(seqNode.Values, node)
break
}

value, err := p.parseToken(ctx.withIndex(uint(len(seqNode.Values))), valueTk)
value, err := p.parseSequenceValue(ctx.withIndex(uint(len(seqNode.Values))), seqTk)
if err != nil {
return nil, err
}
Expand All @@ -968,6 +971,83 @@ func (p *parser) parseSequence(ctx *context) (*ast.SequenceNode, error) {
return seqNode, nil
}

func (p *parser) parseSequenceValue(ctx *context, seqTk *Token) (ast.Node, error) {
tk := ctx.currentToken()
if tk == nil {
return newNullNode(ctx, ctx.insertNullToken(seqTk))
}

if ctx.isComment() {
tk = ctx.nextNotCommentToken()
}
seqCol := seqTk.Column()
seqLine := seqTk.Line()

if tk.Column() == seqCol && tk.Type() == token.SequenceEntryType {
// in this case,
// ----
// - <value does not defined>
// -
return newNullNode(ctx, ctx.insertNullToken(seqTk))
}

if tk.Line() == seqLine && tk.GroupType() == TokenGroupAnchorName &&
ctx.nextToken().Column() == seqCol && ctx.nextToken().Type() == token.SequenceEntryType {
// in this case,
// ----
// - &anchor
// -
group := &TokenGroup{
Type: TokenGroupAnchor,
Tokens: []*Token{tk, ctx.createNullToken(tk)},
}
anchor, err := p.parseAnchor(ctx.withGroup(group), group)
if err != nil {
return nil, err
}
ctx.goNext()
return anchor, nil
}

if tk.Column() <= seqCol && tk.GroupType() == TokenGroupAnchorName {
// - <value does not defined>
// &anchor
return nil, errors.ErrSyntax("anchor is not allowed in this sequence context", tk.RawToken())
}

if tk.Column() < seqCol {
// in this case,
// ----
// - <value does not defined>
// next
return newNullNode(ctx, ctx.insertNullToken(seqTk))
}

if tk.Line() == seqLine && tk.GroupType() == TokenGroupAnchorName &&
ctx.nextToken().Column() < seqCol {
// in this case,
// ----
// - &anchor
// next
group := &TokenGroup{
Type: TokenGroupAnchor,
Tokens: []*Token{tk, ctx.createNullToken(tk)},
}
anchor, err := p.parseAnchor(ctx.withGroup(group), group)
if err != nil {
return nil, err
}
ctx.goNext()
return anchor, nil
}

value, err := p.parseToken(ctx, ctx.currentToken())
if err != nil {
return nil, err
}
return value, nil
}

func (p *parser) parseDirective(ctx *context, g *TokenGroup) (*ast.DirectiveNode, error) {
node, err := newDirectiveNode(ctx, g.First())
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions parser/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,10 @@ func createDocumentTokens(tokens []*Token) ([]*Token, error) {
}

func isScalarType(tk *Token) bool {
switch tk.GroupType() {
case TokenGroupMapKey, TokenGroupMapKeyValue:
return false
}
typ := tk.Type()
return typ == token.AnchorType ||
typ == token.AliasType ||
Expand Down
15 changes: 15 additions & 0 deletions scanner/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,25 @@ func (c *Context) bufferedToken(pos *token.Position) *token.Token {
} else {
tk = token.New(string(source), string(c.obuf), pos)
}
c.setTokenTypeByPrevTag(tk)
c.resetBuffer()
return tk
}

func (c *Context) setTokenTypeByPrevTag(tk *token.Token) {
lastTk := c.lastToken()
if lastTk == nil {
return
}
if lastTk.Type != token.TagType {
return
}
tag := token.ReservedTagKeyword(lastTk.Value)
if _, exists := token.ReservedTagKeywordMap[tag]; !exists {
tk.Type = token.StringType
}
}

func (c *Context) lastToken() *token.Token {
if len(c.tokens) != 0 {
return c.tokens[len(c.tokens)-1]
Expand Down
33 changes: 26 additions & 7 deletions scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,8 +515,17 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
}

func (s *Scanner) validateDocumentSeparatorMarker(ctx *Context, src []rune) error {
if s.foundDocumentSeparatorMarker(src) {
return ErrInvalidToken(
token.Invalid("found unexpected document separator", string(ctx.obuf), s.pos()),
)
}
return nil
}

func (s *Scanner) foundDocumentSeparatorMarker(src []rune) bool {
if len(src) < 3 {
return nil
return false
}
var marker string
if len(src) == 3 {
Expand All @@ -526,12 +535,7 @@ func (s *Scanner) validateDocumentSeparatorMarker(ctx *Context, src []rune) erro
return r == ' ' || r == '\t' || r == '\n' || r == '\r'
})
}
if marker == "---" || marker == "..." {
return ErrInvalidToken(
token.Invalid("found unexpected document separator", string(ctx.obuf), s.pos()),
)
}
return nil
return marker == "---" || marker == "..."
}

func (s *Scanner) scanQuote(ctx *Context, ch rune) (bool, error) {
Expand Down Expand Up @@ -701,6 +705,14 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
ctx.addBuf(c)
ctx.updateDocumentNewLineState()
s.progressLine(ctx)
if ctx.next() {
if s.foundDocumentSeparatorMarker(ctx.src[ctx.idx:]) {
value := ctx.bufferedSrc()
ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos()))
ctx.clear()
s.breakDocument(ctx)
}
}
} else if s.isFirstCharAtLine && c == ' ' {
ctx.addDocumentIndent(s.column)
s.progressColumn(ctx, 1)
Expand Down Expand Up @@ -1319,6 +1331,13 @@ func (s *Scanner) scan(ctx *Context) error {
return err
}
case '\t':
if ctx.existsBuffer() && s.lastDelimColumn == 0 {
// tab indent for plain text (yaml-test-suite's spec-example-7-12-plain-lines).
s.indentNum++
ctx.addOriginBuf(c)
s.progressColumn(ctx, 1)
continue
}
if err := s.scanTab(ctx, c); err != nil {
return err
}
Expand Down
12 changes: 1 addition & 11 deletions yaml_test_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ var failureTestNames = []string{
"anchors-on-empty-scalars", // no json.
"aliases-in-flow-objects", // no json.
"aliases-in-explicit-block-mapping", // no json.
"aliases-in-implicit-block-mapping",
"bare-document-after-document-end-marker",
"block-mapping-with-missing-keys", // no json.
"block-mapping-with-missing-keys", // no json.
"block-mapping-with-missing-values",
"block-mapping-with-multiline-scalars",
"block-scalar-with-more-spaces-than-first-content-line",
Expand Down Expand Up @@ -86,22 +84,14 @@ var failureTestNames = []string{
"spec-example-8-19-compact-block-mappings", // no json.
"spec-example-6-19-secondary-tag-handle",
"spec-example-6-24-verbatim-tags",
"spec-example-6-28-non-specific-tags",
"spec-example-6-4-line-prefixes",
"spec-example-6-6-line-folding",
"spec-example-6-6-line-folding-1-3",
"spec-example-6-8-flow-folding",
"spec-example-7-12-plain-lines",
"spec-example-7-19-single-pair-flow-mappings",
"spec-example-7-20-single-pair-explicit-entry",
"spec-example-7-24-flow-nodes",
"spec-example-8-10-folded-lines-8-13-final-empty-lines",
"spec-example-8-15-block-sequence-entry-types",
"spec-example-8-17-explicit-block-mapping-entries",
"spec-example-8-2-block-indentation-indicator",
"spec-example-9-3-bare-documents",
"spec-example-9-4-explicit-documents",
"spec-example-9-5-directives-documents",
"spec-example-9-6-stream",
"spec-example-9-6-stream-1-3",
"syntax-character-edge-cases/00", // no json.
Expand Down

0 comments on commit 45889c9

Please sign in to comment.