Support Op and Sep Tokens in Expression Values (#3984)

* Support Op and Sep Tokens in Expression Values * Additional Test Cases
aws · Jul 1, 2021 · f09a23b · f09a23b
1 parent 610918a
commit f09a23b
Show file tree

Hide file tree

Showing 11 changed files with 189 additions and 52 deletions.
diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md
@@ -1,4 +1,5 @@
 ### SDK Features
+* `internal/ini`: The ini parser has been updated to support `[`, `]`, `:`, and `=` being present in section key values. ([#3958](https://github.com/aws/aws-sdk-go/issues/3958))
 
 ### SDK Enhancements
 

diff --git a/internal/ini/doc.go b/internal/ini/doc.go
@@ -13,17 +13,30 @@
 //	}
 //
 // Below is the BNF that describes this parser
-//	Grammar:
-//	stmt -> value stmt'
-//	stmt' -> epsilon | op stmt
-//	value -> number | string | boolean | quoted_string
+//  Grammar:
+//  stmt -> section | stmt'
+//  stmt' -> epsilon | expr
+//  expr -> value (stmt)* | equal_expr (stmt)*
+//  equal_expr -> value ( ':' | '=' ) equal_expr'
+//  equal_expr' -> number | string | quoted_string
+//  quoted_string -> " quoted_string'
+//  quoted_string' -> string quoted_string_end
+//  quoted_string_end -> "
 //
-//	section -> [ section'
-//	section' -> value section_close
-//	section_close -> ]
+//  section -> [ section'
+//  section' -> section_value section_close
+//  section_value -> number | string_subset | boolean | quoted_string_subset
+//  quoted_string_subset -> " quoted_string_subset'
+//  quoted_string_subset' -> string_subset quoted_string_end
+//  quoted_string_subset -> "
+//  section_close -> ]
 //
-//	SkipState will skip (NL WS)+
+//  value -> number | string_subset | boolean
+//  string -> ? UTF-8 Code-Points except '\n' (U+000A) and '\r\n' (U+000D U+000A) ?
+//  string_subset -> ? Code-points excepted by <string> grammar except ':' (U+003A), '=' (U+003D), '[' (U+005B), and ']' (U+005D) ?
 //
-//	comment -> # comment' | ; comment'
-//	comment' -> epsilon | value
+//  SkipState will skip (NL WS)+
+//
+//  comment -> # comment' | ; comment'
+//  comment' -> epsilon | value
 package ini
diff --git a/internal/ini/ini_parser.go b/internal/ini/ini_parser.go
@@ -5,9 +5,12 @@ import (
 	"io"
 )
 
+// ParseState represents the current state of the parser.
+type ParseState uint
+
 // State enums for the parse table
 const (
-	InvalidState = iota
+	InvalidState ParseState = iota
 	// stmt -> value stmt'
 	StatementState
 	// stmt' -> MarkComplete | op stmt
@@ -36,24 +39,24 @@ const (
 )
 
 // parseTable is a state machine to dictate the grammar above.
-var parseTable = map[ASTKind]map[TokenType]int{
-	ASTKindStart: map[TokenType]int{
+var parseTable = map[ASTKind]map[TokenType]ParseState{
+	ASTKindStart: {
 		TokenLit:     StatementState,
 		TokenSep:     OpenScopeState,
 		TokenWS:      SkipTokenState,
 		TokenNL:      SkipTokenState,
 		TokenComment: CommentState,
 		TokenNone:    TerminalState,
 	},
-	ASTKindCommentStatement: map[TokenType]int{
+	ASTKindCommentStatement: {
 		TokenLit:     StatementState,
 		TokenSep:     OpenScopeState,
 		TokenWS:      SkipTokenState,
 		TokenNL:      SkipTokenState,
 		TokenComment: CommentState,
 		TokenNone:    MarkCompleteState,
 	},
-	ASTKindExpr: map[TokenType]int{
+	ASTKindExpr: {
 		TokenOp:      StatementPrimeState,
 		TokenLit:     ValueState,
 		TokenSep:     OpenScopeState,
@@ -62,46 +65,48 @@ var parseTable = map[ASTKind]map[TokenType]int{
 		TokenComment: CommentState,
 		TokenNone:    MarkCompleteState,
 	},
-	ASTKindEqualExpr: map[TokenType]int{
-		TokenLit:     ValueState,
-		TokenWS:      SkipTokenState,
-		TokenNL:      SkipState,
-		TokenNone:    SkipState,
+	ASTKindEqualExpr: {
+		TokenLit:  ValueState,
+		TokenSep:  ValueState,
+		TokenOp:   ValueState,
+		TokenWS:   SkipTokenState,
+		TokenNL:   SkipState,
+		TokenNone: SkipState,
 	},
-	ASTKindStatement: map[TokenType]int{
+	ASTKindStatement: {
 		TokenLit:     SectionState,
 		TokenSep:     CloseScopeState,
 		TokenWS:      SkipTokenState,
 		TokenNL:      SkipTokenState,
 		TokenComment: CommentState,
 		TokenNone:    MarkCompleteState,
 	},
-	ASTKindExprStatement: map[TokenType]int{
+	ASTKindExprStatement: {
 		TokenLit:     ValueState,
-		TokenSep:     OpenScopeState,
+		TokenSep:     ValueState,
 		TokenOp:      ValueState,
 		TokenWS:      ValueState,
 		TokenNL:      MarkCompleteState,
 		TokenComment: CommentState,
 		TokenNone:    TerminalState,
 		TokenComma:   SkipState,
 	},
-	ASTKindSectionStatement: map[TokenType]int{
+	ASTKindSectionStatement: {
 		TokenLit: SectionState,
 		TokenOp:  SectionState,
 		TokenSep: CloseScopeState,
 		TokenWS:  SectionState,
 		TokenNL:  SkipTokenState,
 	},
-	ASTKindCompletedSectionStatement: map[TokenType]int{
+	ASTKindCompletedSectionStatement: {
 		TokenWS:      SkipTokenState,
 		TokenNL:      SkipTokenState,
 		TokenLit:     StatementState,
 		TokenSep:     OpenScopeState,
 		TokenComment: CommentState,
 		TokenNone:    MarkCompleteState,
 	},
-	ASTKindSkipStatement: map[TokenType]int{
+	ASTKindSkipStatement: {
 		TokenLit:     StatementState,
 		TokenSep:     OpenScopeState,
 		TokenWS:      SkipTokenState,
@@ -205,18 +210,6 @@ loop:
 		case ValueState:
 			// ValueState requires the previous state to either be an equal expression
 			// or an expression statement.
-			//
-			// This grammar occurs when the RHS is a number, word, or quoted string.
-			// equal_expr -> lit op equal_expr'
-			// equal_expr' -> number | string | quoted_string
-			// quoted_string -> " quoted_string'
-			// quoted_string' -> string quoted_string_end
-			// quoted_string_end -> "
-			//
-			// otherwise
-			// expr_stmt -> equal_expr (expr_stmt')*
-			// expr_stmt' -> ws S | op S | MarkComplete
-			// S -> equal_expr' expr_stmt'
 			switch k.Kind {
 			case ASTKindEqualExpr:
 				// assigning a value to some key
@@ -243,7 +236,7 @@ loop:
 				}
 
 				children[len(children)-1] = rhs
-				k.SetChildren(children)
+				root.SetChildren(children)
 
 				stack.Push(k)
 			}

diff --git a/internal/ini/ini_parser_test.go b/internal/ini/ini_parser_test.go
@@ -25,6 +25,9 @@ func TestParser(t *testing.T) {
 	outputID, _, _ := newLitToken([]rune("output"))
 	outputLit, _, _ := newLitToken([]rune("json"))
 
+	sepInValueID, _, _ := newLitToken([]rune("sepInValue"))
+	sepInValueLit := newToken(TokenOp, []rune("=:[foo]]bar["), StringType)
+
 	equalOp, _, _ := newOpToken([]rune("= 1234"))
 	equalColonOp, _, _ := newOpToken([]rune(": 1234"))
 	numLit, _, _ := newLitToken([]rune("1234"))
@@ -53,6 +56,9 @@ func TestParser(t *testing.T) {
 	outputEQExpr := newEqualExpr(newExpression(outputID), equalOp)
 	outputEQExpr.AppendChild(newExpression(outputLit))
 
+	sepInValueExpr := newEqualExpr(newExpression(sepInValueID), equalOp)
+	sepInValueExpr.AppendChild(newExpression(sepInValueLit))
+
 	cases := []struct {
 		name          string
 		r             io.Reader
@@ -67,24 +73,48 @@ func TestParser(t *testing.T) {
 			},
 		},
 		{
-			name:          "0==0",
-			r:             bytes.NewBuffer([]byte(`0==0`)),
-			expectedError: true,
+			name: "0==0",
+			r:    bytes.NewBuffer([]byte(`0==0`)),
+			expectedStack: []AST{
+				func() AST {
+					equalExpr := newEqualExpr(newExpression(newToken(TokenLit, []rune("0"), StringType)), equalOp)
+					equalExpr.AppendChild(newExpression(newToken(TokenOp, []rune("=0"), StringType)))
+					return newExprStatement(equalExpr)
+				}(),
+			},
 		},
 		{
-			name:          "0=:0",
-			r:             bytes.NewBuffer([]byte(`0=:0`)),
-			expectedError: true,
+			name: "0=:0",
+			r:    bytes.NewBuffer([]byte(`0=:0`)),
+			expectedStack: []AST{
+				func() AST {
+					equalExpr := newEqualExpr(newExpression(newToken(TokenLit, []rune("0"), StringType)), equalOp)
+					equalExpr.AppendChild(newExpression(newToken(TokenOp, []rune(":0"), StringType)))
+					return newExprStatement(equalExpr)
+				}(),
+			},
 		},
 		{
-			name:          "0:=0",
-			r:             bytes.NewBuffer([]byte(`0:=0`)),
-			expectedError: true,
+			name: "0:=0",
+			r:    bytes.NewBuffer([]byte(`0:=0`)),
+			expectedStack: []AST{
+				func() AST {
+					equalExpr := newEqualExpr(newExpression(newToken(TokenLit, []rune("0"), StringType)), equalColonOp)
+					equalExpr.AppendChild(newExpression(newToken(TokenOp, []rune("=0"), StringType)))
+					return newExprStatement(equalExpr)
+				}(),
+			},
 		},
 		{
-			name:          "0::0",
-			r:             bytes.NewBuffer([]byte(`0::0`)),
-			expectedError: true,
+			name: "0::0",
+			r:    bytes.NewBuffer([]byte(`0::0`)),
+			expectedStack: []AST{
+				func() AST {
+					equalExpr := newEqualExpr(newExpression(newToken(TokenLit, []rune("0"), StringType)), equalColonOp)
+					equalExpr.AppendChild(newExpression(newToken(TokenOp, []rune(":0"), StringType)))
+					return newExprStatement(equalExpr)
+				}(),
+			},
 		},
 		{
 			name: "section with variable",
@@ -302,6 +332,25 @@ s3 =`)),
 				newExprStatement(noQuotesRegionEQRegion),
 			},
 		},
+		{
+			name: "token seperators [ and  ] in values",
+			r: bytes.NewBuffer([]byte(
+				`[default]
+sepInValue = =:[foo]]bar[
+output = json
+[assumerole]
+sepInValue==:[foo]]bar[
+output = json
+`)),
+			expectedStack: []AST{
+				newCompletedSectionStatement(defaultProfileStmt),
+				newExprStatement(sepInValueExpr),
+				newExprStatement(outputEQExpr),
+				newCompletedSectionStatement(assumeProfileStmt),
+				newExprStatement(sepInValueExpr),
+				newExprStatement(outputEQExpr),
+			},
+		},
 	}
 
 	for i, c := range cases {

diff --git a/internal/ini/testdata/invalid/bad_section_name b/internal/ini/testdata/invalid/bad_section_name
@@ -0,0 +1 @@
+[ :=foo ]
diff --git a/internal/ini/testdata/invalid/bad_syntax_2 b/internal/ini/testdata/invalid/bad_syntax_2
@@ -0,0 +1 @@
+[ foo ]]
diff --git a/internal/ini/testdata/invalid/invalid_keys b/internal/ini/testdata/invalid/invalid_keys
@@ -0,0 +1,2 @@
+[assumerole]
+key[id] = value
diff --git a/internal/ini/testdata/valid/op_sep_in_values b/internal/ini/testdata/valid/op_sep_in_values
@@ -0,0 +1,30 @@
+[case1]
+sepInValue = =:[foo]]bar[
+key:= value1
+
+[case2]
+sepInValue==:[foo]]bar[
+key = value2
+
+[case3]
+sepInValue = []
+key== value3
+
+[case4]
+sepInValue = [value] x=a
+key:=value4
+
+[case5]
+key : value5
+
+[case6]
+s3 =
+    [nested6]
+    key = valuen6
+key :=value6
+
+[case7]
+s3 =
+key :value7
+[sub7]
+key ==values7
diff --git a/internal/ini/testdata/valid/op_sep_in_values_expected b/internal/ini/testdata/valid/op_sep_in_values_expected
@@ -0,0 +1,32 @@
+{
+  "case1": {
+    "sepInValue": "=:[foo]]bar[",
+    "key": "= value1"
+  },
+  "case2": {
+    "sepInValue": "=:[foo]]bar[",
+    "key": "value2"
+  },
+  "case3": {
+    "sepInValue": "[]",
+    "key": "= value3"
+  },
+  "case4": {
+    "sepInValue": "[value] x=a",
+    "key": "=value4"
+  },
+  "case5": {
+    "key": "value5"
+  },
+  "case6": {
+    "s3": "",
+    "key": "=value6"
+  },
+  "case7": {
+    "s3": "",
+    "key": "value7"
+  },
+  "sub7": {
+    "key": "=values7"
+  }
+}
diff --git a/internal/ini/visitor.go b/internal/ini/visitor.go
@@ -50,7 +50,10 @@ func (v *DefaultVisitor) VisitExpr(expr AST) error {
 
 			rhs := children[1]
 
-			if rhs.Root.Type() != TokenLit {
+			// The right-hand value side the equality expression is allowed to contain '[', ']', ':', '=' in the values.
+			// If the token is not either a literal or one of the token types that identifies those four additional
+			// tokens then error.
+			if !(rhs.Root.Type() == TokenLit || rhs.Root.Type() == TokenOp || rhs.Root.Type() == TokenSep) {
 				return NewParseError("unexpected token type")
 			}
 

diff --git a/internal/ini/walker_test.go b/internal/ini/walker_test.go
@@ -105,6 +105,10 @@ func TestInvalidDataFiles(t *testing.T) {
 			path:               "./testdata/invalid/bad_syntax_1",
 			expectedParseError: true,
 		},
+		{
+			path:               "./testdata/invalid/bad_syntax_2",
+			expectedParseError: true,
+		},
 		{
 			path:               "./testdata/invalid/incomplete_section_profile",
 			expectedParseError: true,
@@ -113,6 +117,14 @@ func TestInvalidDataFiles(t *testing.T) {
 			path:               "./testdata/invalid/syntax_error_comment",
 			expectedParseError: true,
 		},
+		{
+			path:               "./testdata/invalid/invalid_keys",
+			expectedParseError: true,
+		},
+		{
+			path:               "./testdata/invalid/bad_section_name",
+			expectedParseError: true,
+		},
 	}
 
 	for i, c := range cases {