Skip to content

Commit

Permalink
lexers: add initial version of CUE lexer (#858)
Browse files Browse the repository at this point in the history
This commit adds a decent first-cut of a lexer for CUE
(https://cuelang.org).

The main aspects of the language are implemented, but there are likely a
number of edge cases not covered, especially when it comes to string
interpolation, but this is a sufficiently decent first cut so as to
warrant merging into the chroma project rather than first working in a
fork.

This was tested locally for visual correctness using

    mkdir -p _examples
    (command cd ./cmd/chroma; go run . --lexer=cue --html --html-inline-styles) < lexers/testdata/cue.actual > _examples/cue.html

and then loading _examples/cue.html in a browser.

Closes #857
  • Loading branch information
myitcv committed Sep 23, 2023
1 parent 9087c63 commit 827bd93
Show file tree
Hide file tree
Showing 3 changed files with 224 additions and 0 deletions.
85 changes: 85 additions & 0 deletions lexers/embedded/cue.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<lexer>
<config>
<name>CUE</name>
<alias>cue</alias>
<filename>*.cue</filename>
<mime_type>text/x-cue</mime_type>
<dot_all>true</dot_all>
<ensure_nl>true</ensure_nl>
</config>
<rules>
<state name="root">
<rule pattern="[^\S\n]+">
<token type="Text"/>
</rule>
<rule pattern="\\\n">
<token type="Text"/>
</rule>
<rule pattern="//[^\n\r]+">
<token type="CommentSingle"/>
</rule>
<rule pattern="\n">
<token type="Text"/>
</rule>
<rule pattern="(\+|&amp;&amp;|==|&lt;|=|-|\|\||!=|&gt;|:|\*|&amp;|=~|&lt;=|\?|\[|\]|,|/|\||!~|&gt;=|!|_\|_|\.\.\.)">
<token type="Operator"/>
</rule>
<rule pattern="#*&#34;+">
<token type="LiteralString"/>
<push state="string"/>
</rule>
<rule pattern="'(\\\\|\\'|[^'\n])*['\n]">
<token type="LiteralString"/>
</rule>
<rule pattern="0[boxX][0-9a-fA-F][_0-9a-fA-F]*|(\.\d+|\d[_\d]*(\.\d*)?)([eE][+-]?\d+)?[KMGTP]?i?">
<token type="LiteralNumber"/>
</rule>
<rule pattern="[~!%^&amp;*()+=|\[\]:;,.&lt;&gt;/?-]">
<token type="Punctuation"/>
</rule>
<rule pattern="[{}]">
<token type="Punctuation"/>
</rule>
<rule pattern="(import|for|if|in|let|package)\b">
<token type="Keyword"/>
</rule>
<rule pattern="(bool|float|int|string|uint|ulong|ushort)\b\??">
<token type="KeywordType"/>
</rule>
<rule pattern="(true|false|null|_)\b">
<token type="KeywordConstant"/>
</rule>
<rule pattern="[_a-zA-Z]\w*">
<token type="Name"/>
</rule>
</state>
<state name="string">
<rule pattern="\\#*\(">
<token type="LiteralStringInterpol"/>
<push state="string-intp"/>
</rule>
<rule pattern="&#34;+#*">
<token type="LiteralString"/>
<pop depth="1"/>
</rule>
<rule pattern="\\[&#39;&#34;\\nrt]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}">
<token type="LiteralStringEscape"/>
</rule>
<rule pattern="[^\\&#34;]+">
<token type="LiteralString"/>
</rule>
<rule pattern="\\">
<token type="LiteralString"/>
</rule>
</state>
<state name="string-intp">
<rule pattern="\)">
<token type="LiteralStringInterpol"/>
<pop depth="1"/>
</rule>
<rule>
<include state="root"/>
</rule>
</state>
</rules>
</lexer>
31 changes: 31 additions & 0 deletions lexers/testdata/cue.actual
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"this is a test"
5
false

// This is a comment
a: {
foo: string // foo is a string
[=~"^i"]: int // all other fields starting with i are integers
[=~"^b"]: bool // all other fields starting with b are booleans
[>"c"]: string // all other fields lexically after c are strings

...string // all other fields must be a string. Note: default constraints are not yet implemented.
}

x: #"""

\#(test)

"""

b: a & {
i3: 3
bar: true
other: "a string"
}

A: close({
field1: string
field2: string
})

108 changes: 108 additions & 0 deletions lexers/testdata/cue.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
[
{"type":"LiteralString","value":"\"this is a test\""},
{"type":"Text","value":"\n"},
{"type":"LiteralNumber","value":"5"},
{"type":"Text","value":"\n"},
{"type":"KeywordConstant","value":"false"},
{"type":"Text","value":"\n\n"},
{"type":"CommentSingle","value":"// This is a comment"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"a"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"Punctuation","value":"{"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"foo"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"string"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// foo is a string"},
{"type":"Text","value":"\n "},
{"type":"Operator","value":"[="},
{"type":"Punctuation","value":"~"},
{"type":"LiteralString","value":"\"^i\""},
{"type":"Operator","value":"]:"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"int"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// all other fields starting with i are integers"},
{"type":"Text","value":"\n "},
{"type":"Operator","value":"[="},
{"type":"Punctuation","value":"~"},
{"type":"LiteralString","value":"\"^b\""},
{"type":"Operator","value":"]:"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"bool"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// all other fields starting with b are booleans"},
{"type":"Text","value":"\n "},
{"type":"Operator","value":"[\u003e"},
{"type":"LiteralString","value":"\"c\""},
{"type":"Operator","value":"]:"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"string"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// all other fields lexically after c are strings"},
{"type":"Text","value":"\n\n "},
{"type":"Operator","value":"..."},
{"type":"KeywordType","value":"string"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// all other fields must be a string. Note: default constraints are not yet implemented."},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"}"},
{"type":"Text","value":"\n\n"},
{"type":"Name","value":"x"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"#\"\"\"\n\n "},
{"type":"LiteralStringInterpol","value":"\\#("},
{"type":"Name","value":"test"},
{"type":"LiteralStringInterpol","value":")"},
{"type":"LiteralString","value":"\n\n\"\"\""},
{"type":"Text","value":"\n\n"},
{"type":"Name","value":"b"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"Name","value":"a"},
{"type":"Text","value":" "},
{"type":"Operator","value":"\u0026"},
{"type":"Text","value":" "},
{"type":"Punctuation","value":"{"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"i3"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"LiteralNumber","value":"3"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"bar"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"KeywordConstant","value":"true"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"other"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"\"a string\""},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"}"},
{"type":"Text","value":"\n\n"},
{"type":"Name","value":"A"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"Name","value":"close"},
{"type":"Punctuation","value":"({"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"field1"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"string"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"field2"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"string"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"})"},
{"type":"Text","value":"\n\n"}
]

0 comments on commit 827bd93

Please sign in to comment.