@@ -6,150 +6,127 @@ The _lexical structure_ of the Pikelet programming langues is a description of w
6
6
7
7
The textual surface language assigns meaning to a source string,
8
8
which consists of a sequence of _ Unicode scalar values_ (as defined in Section 3.4 of [ the Unicode Standard] ( www.unicode.org/versions/latest/ ) ),
9
- terminated with a virtual end-of-file symbol, &empty ; :
10
-
11
- > ** Grammar** :
12
- >
13
- > <a href =" #var:unicode-scalar-value " ><var id =" var:unicode-scalar-value " >unicode-scalar-value</var ></a > ::=\
14
- > &emsp ; |&ensp ; <kbd >U+00</kbd > &hellip ; <kbd >U+D7FF</kbd >\
15
- > &emsp ; |&ensp ; <kbd >U+E000</kbd > &hellip ; <kbd >U+10FFF</kbd >
16
- >
17
- > <a href =" #var:source " ><var id =" var:source " >source</var ></a > ::=\
18
- > &emsp ; |&ensp ; <a href =" #var:unicode-scalar-value " ><var >unicode-scalar-value</var ></a ><sup >\* </sup > &empty ;
19
-
20
- For convenience, we define a number of special values within the above <a href =" #var:unicode-scalar-value " ><var >unicode-scalar-value</var ></a > definition:
21
-
22
- > ** Grammar** :
23
- >
24
- > <a href =" #var:horizontal-tab " ><var id =" var:horizontal-tab " >horizontal-tab</var ></a > ::=\
25
- > &emsp ; |&ensp ; <kbd >U+0009</kbd >
26
- >
27
- > <a href =" #var:line-feed " ><var id =" var:line-feed " >line-feed</var ></a > ::=\
28
- > &emsp ; |&ensp ; <kbd >U+000A</kbd >
29
- >
30
- > <a href =" #var:vertical-tab " ><var id =" var:vertical-tab " >vertical-tab</var ></a > ::=\
31
- > &emsp ; |&ensp ; <kbd >U+000B</kbd >
32
- >
33
- > <a href =" #var:form-feed " ><var id =" var:form-feed " >form-feed</var ></a > ::=\
34
- > &emsp ; |&ensp ; <kbd >U+000C</kbd >
35
- >
36
- > <a href =" #var:carriage-return " ><var id =" var:carriage-return " >carriage-return</var ></a > ::=\
37
- > &emsp ; |&ensp ; <kbd >U+000D</kbd >
38
- >
39
- > <a href =" #var:next-line " ><var id =" var:next-line " >next-line</var ></a > ::=\
40
- > &emsp ; |&ensp ; <kbd >U+0085</kbd >
41
- >
42
- > <a href =" #var:left-to-right-mark " ><var id =" var:left-to-right-mark " >left-to-right-mark</var ></a > ::=\
43
- > &emsp ; |&ensp ; <kbd >U+200E</kbd >
44
- >
45
- > <a href =" #var:right-to-left-mark " ><var id =" var:right-to-left-mark " >right-to-left-mark</var ></a > ::=\
46
- > &emsp ; |&ensp ; <kbd >U+200F</kbd >
47
- >
48
- > <a href =" #var:line-separator " ><var id =" var:line-separator " >line-separator</var ></a > ::=\
49
- > &emsp ; |&ensp ; <kbd >U+2028</kbd >
50
- >
51
- > <a href =" #var:paragraph-separator " ><var id =" var:paragraph-separator " >paragraph-separator</var ></a > ::=\
52
- > &emsp ; |&ensp ; <kbd >U+2029</kbd >
9
+ terminated with a virtual end-of-file symbol, ` "\0" ` :
10
+
11
+ ``` text
12
+ unicode-scalar-value ::=
13
+ | "\u{00}" ... "\u{D7FF}"
14
+ | "\u{E000}" ... "\u{10FFF}"
15
+
16
+ source ::=
17
+ | unicode-scalar-value* "\0"
18
+ ```
19
+
20
+ For convenience, we define a number of special values within the above ` unicode-scalar-value ` definition:
21
+
22
+ ``` text
23
+ horizontal-tab ::= "\u{0009}"
24
+ line-feed ::= "\u{000A}"
25
+ vertical-tab ::= "\u{000B}"
26
+ form-feed ::= "\u{000C}"
27
+ carriage-return ::= "\u{000D}"
28
+ next-line ::= "\u{0085}"
29
+ left-to-right-mark ::= "\u{200E}"
30
+ right-to-left-mark ::= "\u{200F}"
31
+ line-separator ::= "\u{2028}"
32
+ paragraph-separator ::= "\u{2029}"
33
+ ```
53
34
54
35
## Whitespace and comments
55
36
56
- > ** Grammar ** :
57
- >
58
- > < a href = " #var:line-break " >< var id = " var: line-break " >line-break</ var ></ a > ::= \
59
- > & emsp ; | & ensp ; < a href = " #var:line-feed " >< var >line-feed</ var ></ a > \
60
- > & emsp ; | & ensp ; < a href = " #var: carriage-return" >< var >carriage-return</ var ></ a > \
61
- > & emsp ; | & ensp ; < a href = " #var:carriage-return " >< var >carriage-return</ var ></ a > < a href = " #var:line-feed " >< var >line-feed</ var ></ a > \
62
- > & emsp ; | & ensp ;& empty ;
63
- >
64
- > < a href = " #var:comment-text " >< var id = " var:comment-text " >comment-text</ var ></ a > ::= \
65
- > & emsp ; | & ensp ; ~ (< a href = " #var:line-feed " >< var >line-feed</ var ></ a > | < a href = " #var:carriage-return " >< var >carriage-return</ var ></ a >) < a href = " #var:unicode-scalar-value " >< var >unicode-scalar-value</ var ></ a >< sup > \* </ sup >
66
- >
67
- > < a href = " #var:comment " >< var id = " var: comment" >comment</ var ></ a > ::= \
68
- > & emsp ; | & ensp ; ` -- ` < a href = " comment-text " >< var >comment-text</ var ></ a > < a href = " #var:line-break " >< var >line-break</ var ></ a >
69
- >
70
- > < a href = " #var:doc-comment " >< var id = " var:doc- comment" >doc-comment</ var ></ a > ::= \
71
- > & emsp ; | & ensp ; ` ||| ` < a href = " comment-text " >< var >comment-text</ var ></ a > < a href = " #var:line-break " >< var >line-break</ var ></ a >
72
- >
73
- > < a href = " #var:white-space " >< var id = " var:white-space " >white-space</ var ></ a > ::= \
74
- > & emsp ; | & ensp ; < a href = " #var:horizontal-tab " >< var >horizontal-tab</ var ></ a > \
75
- > & emsp ; | & ensp ; < a href = " #var:comment " >< var >comment</ var ></ a > \
76
- > & emsp ; | & ensp ; < a href = " #var:vertical-tab " >< var >vertical-tab</ var ></ a > \
77
- > & emsp ; | & ensp ; < a href = " #var:form-feed " >< var >form-feed</ var ></ a > \
78
- > & emsp ; | & ensp ; < a href = " #var:line-break " >< var > line-break</ var ></ a > \
79
- > & emsp ; | & ensp ; < a href = " #var:next-line " >< var >next-line</ var ></ a > \
80
- > & emsp ; | & ensp ; < a href = " #var:left -to-right-mark " >< var > left-to-right- mark</ var ></ a > \
81
- > & emsp ; | & ensp ; < a href = " #var:right-to-left-mark " >< var >right-to-left-mark</ var ></ a > \
82
- > & emsp ; | & ensp ; < a href = " #var:line -separator" >< var >line-separator</ var ></ a > \
83
- > & emsp ; | & ensp ; < a href = " #var:paragraph-separator " >< var >paragraph-separator</ var ></ a >
84
-
85
- ## Keywords and identifiers
86
-
87
- > ** Grammar ** :
88
- >
89
- > < a href = " #var:keyword " >< var id = " var:keyword " >keyword</ var ></ a > ::= \
90
- > & emsp ; | & ensp ; ` as ` \
91
- > & emsp ; | & ensp ; ` fun ` \
92
- > & emsp ; | & ensp ; ` Fun ` \
93
- > & emsp ; | & ensp ; ` Record ` \
94
- > & emsp ; | & ensp ; ` record `
95
- >
96
- > < a href = " #var:ident-or-keyword " >< var id = " var:ident-or-keyword " >ident-or-keyword</ var ></ a > ::= \
97
- > & emsp ; | & ensp ; ( ` a ` & hellip ; ` z ` | ` A ` & hellip ; ` Z ` ) ( ` a ` & hellip ; ` z ` | ` A ` & hellip ; ` Z ` | ` 0 ` & hellip ; ` 9 ` | ` - ` )< sup > \* </ sup >
98
- >
99
- > < a href = " #var:ident " >< var id = " var:ident " >ident</ var ></ a > ::= \
100
- > & emsp ; | & ensp ; ~ < a href = " #var:keyword " >< var >keyword</ var ></ a > < a href = " #var:ident-or-keyword " >< var >ident-or-keyword</ var ></ a >
37
+ ``` text
38
+ line-break ::=
39
+ | line-feed
40
+ | carriage-return
41
+ | carriage-return line-feed
42
+ | "\0"
43
+
44
+ comment-text ::=
45
+ | (~(line-feed | carriage-return) unicode-scalar-value)*
46
+
47
+ comment ::=
48
+ | "--" comment-text line-break
49
+
50
+ doc-comment ::=
51
+ | "|||" comment-text line-break
52
+
53
+ white-space ::=
54
+ | horizontal-tab
55
+ | comment
56
+ | vertical-tab
57
+ | form-feed
58
+ | line-break
59
+ | next- line
60
+ | left-to-right-mark
61
+ | right -to-left-mark
62
+ | line-separator
63
+ | paragraph -separator
64
+ ```
65
+
66
+ ## Keywords and names
67
+
68
+ ``` text
69
+ keyword ::=
70
+ | "as"
71
+ | "fun"
72
+ | "Fun"
73
+ | "Record"
74
+ | "record"
75
+
76
+ name-or-keyword ::=
77
+ | ("a" ... "z" | "A" ... "Z") ("a" ... "z" | "A" ... "Z" | "0" ... "9" | "-")*
78
+
79
+ name ::=
80
+ | ~keyword name-or-keyword
81
+ ```
101
82
102
83
### Punctuation
103
84
104
- > ** Grammar** :
105
- >
106
- > <a href =" #var:delimiter " ><var id =" var:delimiter " >delimiter</var ></a > ::=\
107
- > &emsp ; |&ensp ; ` { ` \
108
- > &emsp ; |&ensp ; ` } ` \
109
- > &emsp ; |&ensp ; ` [ ` \
110
- > &emsp ; |&ensp ; ` ] ` \
111
- > &emsp ; |&ensp ; ` ( ` \
112
- > &emsp ; |&ensp ; ` ) `
113
- >
114
- > <a href =" #var:symbol " ><var id =" var:symbol " >symbol</var ></a > ::=\
115
- > &emsp ; |&ensp ; ` . ` \
116
- > &emsp ; |&ensp ; ` : ` \
117
- > &emsp ; |&ensp ; ` , ` \
118
- > &emsp ; |&ensp ; ` = ` \
119
- > &emsp ; |&ensp ; ` => ` \
120
- > &emsp ; |&ensp ; ` -> `
121
- >
122
- > <a href =" #var:punctuation " ><var id =" var:punctuation " >punctuation</var ></a > ::=\
123
- > &emsp ; |&ensp ; <a href =" #var:delimiter " ><var >delimiter</var ></a >\
124
- > &emsp ; |&ensp ; <a href =" #var:symbol " ><var >symbol</var ></a >
125
-
126
- ### Numeric literals
127
-
128
- > ** Grammar** :
129
- >
130
- > <a href =" #var:number-literal " ><var id =" var:number-literal " >number-literal</var ></a > ::=\
131
- > &emsp ; |&ensp ; (` + ` | ` - ` )<sup >?</sup > (` 0 ` &hellip ; ` 9 ` ) (` a ` &hellip ; ` z ` | ` A ` &hellip ; ` Z ` | ` 0 ` &hellip ; ` 9 ` | ` . ` )<sup >* </sup ></sup >
132
-
133
- ### Character and string literals
134
-
135
- > ** Grammar** :
136
- >
137
- > <a href =" #var:character-literal " ><var id =" var:character-literal " >character-literal</var ></a > ::=\
138
- > &emsp ; |&ensp ; ` " ` (` \" ` | ~ ` " ` <a href =" #var:unicode-scalar-value " ><var >unicode-scalar-value</var ></a >)<sup >\* </sup > ` " `
139
- >
140
- > <a href =" #var:string-literal " ><var id =" var:string-literal " >string-literal</var ></a > ::=\
141
- > &emsp ; |&ensp ; ` ' ` (` \' ` | ~ ` ' ` <a href =" #var:unicode-scalar-value " ><var >unicode-scalar-value</var ></a >)<sup >\* </sup > ` ' `
85
+ ``` text
86
+ delimiter ::=
87
+ | "{"
88
+ | "}"
89
+ | "["
90
+ | "]"
91
+ | "("
92
+ | ")"
93
+
94
+ symbol ::=
95
+ | "."
96
+ | ":"
97
+ | ","
98
+ | "="
99
+ | "=>"
100
+ | "->"
101
+
102
+ punctuation ::=
103
+ | delimiter
104
+ | symbol
105
+ ```
106
+
107
+ ### Literals
108
+
109
+ ``` text
110
+ number-literal ::=
111
+ | ("+" | "-")? ("0" ... "9") ("a" ... "z" | "A" ... "Z" | "0" ... "9" | ".")*
112
+
113
+ character-literal ::=
114
+ | "\"" ("\"" | ~"\"" unicode-scalar-value)* "\""
115
+
116
+ string-literal ::=
117
+ | "'" ("'" | ~"'" unicode-scalar-value)* "'"
118
+ ```
142
119
143
120
### Tokens
144
121
145
- > ** Grammar ** :
146
- >
147
- > < a href = " #var:token " >< var id = " var:token " >token</ var ></ a > ::= \
148
- > & emsp ; | & ensp ; < a href = " #var:white-space " >< var >white-space</ var ></ a > \
149
- > & emsp ; | & ensp ; < a href = " #var:doc-comment " >< var >doc-comment</ var ></ a > \
150
- > & emsp ; | & ensp ; < a href = " #var:keyword " >< var >keyword</ var ></ a > \
151
- > & emsp ; | & ensp ; < a href = " #var:ident " >< var >ident</ var ></ a > \
152
- > & emsp ; | & ensp ; < a href = " #var:punctuation " >< var >punctuation</ var ></ a > \
153
- > & emsp ; | & ensp ; < a href = " #var:number -literal" >< var >number-literal</ var ></ a > \
154
- > & emsp ; | & ensp ; < a href = " #var:character -literal" >< var >character-literal</ var ></ a > \
155
- > & emsp ; | & ensp ; < a href = " #var:string-literal " >< var >string-literal</ var ></ a >
122
+ ``` text
123
+ token ::=
124
+ | white-space
125
+ | doc-comment
126
+ | keyword
127
+ | name
128
+ | punctuation
129
+ | number-literal
130
+ | character -literal
131
+ | string -literal
132
+ ```
0 commit comments