diff --git a/.github/workflows/ci-interpreter.yml b/.github/workflows/ci-interpreter.yml index 8e0f4cb5..76f44418 100644 --- a/.github/workflows/ci-interpreter.yml +++ b/.github/workflows/ci-interpreter.yml @@ -21,7 +21,7 @@ jobs: - name: Setup OCaml uses: ocaml/setup-ocaml@v2 with: - ocaml-compiler: 4.12.x + ocaml-compiler: 4.14.x - name: Setup OCaml tools run: opam install --yes ocamlfind.1.9.5 js_of_ocaml.4.0.0 js_of_ocaml-ppx.4.0.0 - name: Setup Node.js diff --git a/document/core/text/conventions.rst b/document/core/text/conventions.rst index 1efc88b0..493d7921 100644 --- a/document/core/text/conventions.rst +++ b/document/core/text/conventions.rst @@ -120,20 +120,21 @@ It is convenient to define identifier contexts as :ref:`records ` assigned to the defined indices. +For each index space, such a context contains the list of :ref:`names ` assigned to the defined indices, +which were denoted by the corresponding :ref:`identifiers `. Unnamed indices are associated with empty (:math:`\epsilon`) entries in these lists. An identifier context is *well-formed* if no index space contains duplicate identifiers. diff --git a/document/core/text/instructions.rst b/document/core/text/instructions.rst index 45929536..c284326f 100644 --- a/document/core/text/instructions.rst +++ b/document/core/text/instructions.rst @@ -32,11 +32,11 @@ The following grammar handles the corresponding update to the :ref:`identifier c .. math:: \begin{array}{llcllll} \production{label} & \Tlabel_I &::=& - v{:}\Tid &\Rightarrow& \{\ILABELS~v\} \compose I + v{:}\Tid &\Rightarrow& v, \{\ILABELS~v\} \compose I & (\iff v \notin I.\ILABELS) \\ &&|& - v{:}\Tid &\Rightarrow& \{\ILABELS~v\} \compose (I \with \ILABELS[i] = \epsilon) + v{:}\Tid &\Rightarrow& v, \{\ILABELS~v\} \compose (I \with \ILABELS[i] = \epsilon) & (\iff I.\ILABELS[i] = v) \\ &&|& - \epsilon &\Rightarrow& \{\ILABELS~(\epsilon)\} \compose I \\ + \epsilon &\Rightarrow& \epsilon, \{\ILABELS~(\epsilon)\} \compose I \\ \end{array} .. note:: @@ -79,16 +79,16 @@ However, the special case of a type use that is syntactically empty or consists x,I'{:}\Ttypeuse_I &\Rightarrow& x & (\iff I' = \{\ILOCALS~(\epsilon)^\ast\}) \\ \end{array} \\ \production{block instruction} & \Tblockinstr_I &::=& - \text{block}~~I'{:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~\Tid^? + \text{block}~~(v^?,I'){:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~{v'}^?{:}\Tid^? \\ &&&\qquad \Rightarrow\quad \BLOCK~\X{bt}~\X{in}^\ast~\END - \qquad\quad~~ (\iff \Tid^? = \epsilon \vee \Tid^? = \Tlabel) \\ &&|& - \text{loop}~~I'{:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~\Tid^? + \qquad\quad~~ (\iff {v'}^? = \epsilon \vee {v'}^? = v^?) \\ &&|& + \text{loop}~~(v^?,I'){:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~{v'}^?{:}\Tid^? \\ &&&\qquad \Rightarrow\quad \LOOP~\X{bt}~\X{in}^\ast~\END - \qquad\qquad (\iff \Tid^? = \epsilon \vee \Tid^? = \Tlabel) \\ &&|& - \text{if}~~I'{:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}_1{:}\Tinstr_{I'})^\ast~~ - \text{else}~~\Tid_1^?~~(\X{in}_2{:}\Tinstr_{I'})^\ast~~\text{end}~~\Tid_2^? + \qquad\qquad (\iff {v'}^? = \epsilon \vee {v'}^? = v^?) \\ &&|& + \text{if}~~(v^?,I'){:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}_1{:}\Tinstr_{I'})^\ast~~ + \text{else}~~v_1^?{:}\Tid_1^?~~(\X{in}_2{:}\Tinstr_{I'})^\ast~~\text{end}~~v_2^?{:}\Tid_2^? \\ &&&\qquad \Rightarrow\quad \IF~\X{bt}~\X{in}_1^\ast~\ELSE~\X{in}_2^\ast~\END - \qquad (\iff \Tid_1^? = \epsilon \vee \Tid_1^? = \Tlabel, \Tid_2^? = \epsilon \vee \Tid_2^? = \Tlabel) \\ + \qquad (\iff v_1^? = \epsilon \vee v_1^? = v^?, v_2^? = \epsilon \vee v_2^? = v^?) \\ \end{array} .. note:: diff --git a/document/core/text/lexical.rst b/document/core/text/lexical.rst index 55cbe3a4..077dae17 100644 --- a/document/core/text/lexical.rst +++ b/document/core/text/lexical.rst @@ -135,7 +135,7 @@ The *look-ahead* restrictions on the productions for |Tblockchar| disambiguate t Annotations ~~~~~~~~~~~ -An *annotation* is a bracketed token sequence headed by an *annotation id* of the form :math:`\T{@id}`. +An *annotation* is a bracketed token sequence headed by an *annotation id* of the form :math:`\text{@id}` or :math:`\text{@"..."}`. No :ref:`space ` is allowed between the opening parenthesis and this id. Annotations are intended to be used for third-party extensions; they can appear anywhere in a program but are ignored by the WebAssembly semantics itself, which treats them as :ref:`white space `. @@ -145,8 +145,10 @@ Annotations can contain other parenthesized token sequences (including nested an .. math:: \begin{array}{llclll@{\qquad\qquad}l} - \production{annot} & \Tannot &::=& - \text{(@}~\Tidchar^+ ~(\Tspace ~|~ \Ttoken)^\ast~\text{)} \\ + \production{annotation} & \Tannot &::=& + \text{(@}~\Tannotid ~(\Tspace ~|~ \Ttoken)^\ast~\text{)} \\ + \production{annotation identifier} & \Tannotid &::=& + \Tidchar^+ ~|~ \Tname \\ \end{array} .. note:: diff --git a/document/core/text/modules.rst b/document/core/text/modules.rst index 5aab9b32..69d5f90a 100644 --- a/document/core/text/modules.rst +++ b/document/core/text/modules.rst @@ -679,28 +679,28 @@ The definition of the initial :ref:`identifier context ` :math:`I` .. math:: \begin{array}{@{}lcl@{\qquad\qquad}l} - \F{idc}(\text{(}~\text{type}~\Tid^?~\X{ft}{:}\Tfunctype~\text{)}) &=& - \{\ITYPES~(\Tid^?), \ITYPEDEFS~\X{ft}\} \\ - \F{idc}(\text{(}~\text{func}~\Tid^?~\dots~\text{)}) &=& - \{\IFUNCS~(\Tid^?)\} \\ - \F{idc}(\text{(}~\text{table}~\Tid^?~\dots~\text{)}) &=& - \{\ITABLES~(\Tid^?)\} \\ - \F{idc}(\text{(}~\text{memory}~\Tid^?~\dots~\text{)}) &=& - \{\IMEMS~(\Tid^?)\} \\ - \F{idc}(\text{(}~\text{global}~\Tid^?~\dots~\text{)}) &=& - \{\IGLOBALS~(\Tid^?)\} \\ - \F{idc}(\text{(}~\text{elem}~\Tid^?~\dots~\text{)}) &=& - \{\IELEM~(\Tid^?)\} \\ - \F{idc}(\text{(}~\text{data}~\Tid^?~\dots~\text{)}) &=& - \{\IDATA~(\Tid^?)\} \\ - \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{func}~\Tid^?~\dots~\text{)}~\text{)}) &=& - \{\IFUNCS~(\Tid^?)\} \\ - \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{table}~\Tid^?~\dots~\text{)}~\text{)}) &=& - \{\ITABLES~(\Tid^?)\} \\ - \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{memory}~\Tid^?~\dots~\text{)}~\text{)}) &=& - \{\IMEMS~(\Tid^?)\} \\ - \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{global}~\Tid^?~\dots~\text{)}~\text{)}) &=& - \{\IGLOBALS~(\Tid^?)\} \\ + \F{idc}(\text{(}~\text{type}~v^?{:}\Tid^?~\X{ft}{:}\Tfunctype~\text{)}) &=& + \{\ITYPES~(v^?), \ITYPEDEFS~\X{ft}\} \\ + \F{idc}(\text{(}~\text{func}~v^?{:}\Tid^?~\dots~\text{)}) &=& + \{\IFUNCS~(v^?)\} \\ + \F{idc}(\text{(}~\text{table}~v^?{:}\Tid^?~\dots~\text{)}) &=& + \{\ITABLES~(v^?)\} \\ + \F{idc}(\text{(}~\text{memory}~v^?{:}\Tid^?~\dots~\text{)}) &=& + \{\IMEMS~(v^?)\} \\ + \F{idc}(\text{(}~\text{global}~v^?{:}\Tid^?~\dots~\text{)}) &=& + \{\IGLOBALS~(v^?)\} \\ + \F{idc}(\text{(}~\text{elem}~v^?{:}\Tid^?~\dots~\text{)}) &=& + \{\IELEM~(v^?)\} \\ + \F{idc}(\text{(}~\text{data}~v^?{:}\Tid^?~\dots~\text{)}) &=& + \{\IDATA~(v^?)\} \\ + \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{func}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=& + \{\IFUNCS~(v^?)\} \\ + \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{table}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=& + \{\ITABLES~(v^?)\} \\ + \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{memory}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=& + \{\IMEMS~(v^?)\} \\ + \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{global}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=& + \{\IGLOBALS~(v^?)\} \\ \F{idc}(\text{(}~\dots~\text{)}) &=& \{\} \\ \end{array} diff --git a/document/core/text/values.rst b/document/core/text/values.rst index 2b869d25..3c74112d 100644 --- a/document/core/text/values.rst +++ b/document/core/text/values.rst @@ -211,12 +211,13 @@ Identifiers ~~~~~~~~~~~ :ref:`Indices ` can be given in both numeric and symbolic form. -Symbolic *identifiers* that stand in lieu of indices start with :math:`\text{\$}`, followed by any sequence of printable |ASCII|_ characters that does not contain a space, quotation mark, comma, semicolon, or bracket. +Symbolic *identifiers* that stand in lieu of indices start with :math:`\text{\$}`, followed by eiter a sequence of printable |ASCII|_ characters that does not contain a space, quotation mark, comma, semicolon, or bracket, or by a quoted :ref:`name `. .. math:: \begin{array}{llclll@{\qquad}l} \production{identifier} & \Tid &::=& - \text{\$}~\Tidchar^+ \\ + \text{\$}~c^\ast{:}\Tidchar^+ &\Rightarrow& c^\ast \\ &&|& + \text{\$}~c^\ast{:}\Tname &\Rightarrow& c^\ast & (\iff |c^\ast| > 0) \\ \production{identifier character} & \Tidchar &::=& \text{0} ~~|~~ \dots ~~|~~ \text{9} \\ &&|& \text{A} ~~|~~ \dots ~~|~~ \text{Z} \\ &&|& @@ -246,6 +247,9 @@ Symbolic *identifiers* that stand in lieu of indices start with :math:`\text{\$} \text{\tilde{~~}} \\ \end{array} +.. note:: + The value of an identifier character is its the Unicode codepoint denoting it. + .. _text-id-fresh: Conventions diff --git a/document/core/util/macros.def b/document/core/util/macros.def index 8f78e992..17eb3fab 100644 --- a/document/core/util/macros.def +++ b/document/core/util/macros.def @@ -719,6 +719,7 @@ .. |Tblockchar| mathdef:: \xref{text/lexical}{text-comment}{\T{blockchar}} .. |Tannot| mathdef:: \xref{text/lexical}{text-annot}{\T{annot}} +.. |Tannotid| mathdef:: \xref{text/lexical}{text-annot}{\T{annotid}} .. |Tannottoken| mathdef:: \xref{text/lexical}{text-annot}{\T{annottoken}} diff --git a/interpreter/text/arrange.ml b/interpreter/text/arrange.ml index 4ea26338..9559a889 100644 --- a/interpreter/text/arrange.ml +++ b/interpreter/text/arrange.ml @@ -627,7 +627,10 @@ let custom m mnode (module S : Custom.Section) = let var_opt = function | None -> "" - | Some x -> " " ^ x.it + | Some x when + String.for_all (fun c -> Lib.Char.is_alphanum_ascii c || c = '_') x.it -> + " $" ^ x.it + | Some x -> " $" ^ name (Utf8.decode x.it) let module_with_var_opt x_opt (m, cs) = let fx = ref 0 in diff --git a/interpreter/text/lexer.mll b/interpreter/text/lexer.mll index bda1aa81..b2f4e2a3 100644 --- a/interpreter/text/lexer.mll +++ b/interpreter/text/lexer.mll @@ -48,6 +48,11 @@ let string s = done; Buffer.contents b +let annot_id lexbuf s = + let s' = string s in + if s' = "" then error lexbuf "empty annotation id"; + try Utf8.decode s' with Utf8.Utf8 -> error lexbuf "malformed UTF-8 encoding" + let opt = Lib.Option.get } @@ -102,8 +107,7 @@ let float = let string = '"' character* '"' let idchar = letter | digit | '_' | symbol -let name = idchar+ -let id = '$' name +let id = idchar+ let keyword = ['a'-'z'] (letter | digit | '_' | '.' | ':')+ let reserved = (idchar | string)+ | ',' | ';' | '[' | ']' | '{' | '}' @@ -699,13 +703,21 @@ rule token = parse | "offset="(nat as s) { OFFSET_EQ_NAT s } | "align="(nat as s) { ALIGN_EQ_NAT s } - | id as s { VAR s } + | '$'(id as s) { VAR s } + | '$'(string as s) + { let s' = string s in + if s' = "" then error lexbuf "empty identifier"; VAR s' } + | '$' { error lexbuf "empty identifier" } - | "(@"(name as n) + | "(@"(id as n) { let r = region lexbuf in let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in Annot.record (Annot.{name = Utf8.decode n; items} @@ r); token lexbuf } - | "(@" { error lexbuf "malformed annotation id" } + | "(@"(string as s) + { let r = region lexbuf in + let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in + Annot.record (Annot.{name = annot_id lexbuf s; items} @@ r); token lexbuf } + | "(@" { error lexbuf "empty annotation id" } | ";;"utf8_no_nl*eof { EOF } | ";;"utf8_no_nl*newline { Lexing.new_line lexbuf; token lexbuf } @@ -726,11 +738,16 @@ and annot start = parse { let r = region lexbuf in let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in (Annot.Parens items @@ r) :: annot start lexbuf } - | "(@"(name as n) + | "(@"(id as n) { let r = region lexbuf in let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in let ann = Annot.{name = Utf8.decode n; items} @@ r in (Annot.Annot ann @@ r) :: annot start lexbuf } + | "(@"(string as s) + { let r = region lexbuf in + let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in + let ann = Annot.{name = annot_id lexbuf s; items} @@ r in + (Annot.Annot ann @@ r) :: annot start lexbuf } | nat as s { let r = region lexbuf in @@ -741,9 +758,15 @@ and annot start = parse | float as s { let r = region lexbuf in (Annot.Float s @@ r) :: annot start lexbuf } - | id as s + | '$'(id as s) { let r = region lexbuf in (Annot.Var s @@ r) :: annot start lexbuf } + | '$'(string as s) + { let r = region lexbuf in + let s' = string s in + if s' = "" then error lexbuf "empty identifier"; + (Annot.Var s' @@ r) :: annot start lexbuf } + | '$' { error lexbuf "empty identifier" } | string as s { let r = region lexbuf in (Annot.String (string s) @@ r) :: annot start lexbuf } diff --git a/interpreter/text/parser.mly b/interpreter/text/parser.mly index 48731201..ac77c1e3 100644 --- a/interpreter/text/parser.mly +++ b/interpreter/text/parser.mly @@ -88,6 +88,11 @@ let nat32 s loc = let name s loc = try Utf8.decode s with Utf8.Utf8 -> error (at loc) "malformed UTF-8 encoding" +let var s loc = + let r = at loc in + try ignore (Utf8.decode s); Source.(s @@ r) + with Utf8.Utf8 -> error r "malformed UTF-8 encoding" + (* Symbolic variables *) @@ -120,9 +125,24 @@ let force_locals (c : context) = let enter_func (c : context) = {c with labels = VarMap.empty; locals = empty ()} +let print_char = function + | 0x09 -> "\\t" + | 0x0a -> "\\n" + | 0x22 -> "\\\"" + | 0x5c -> "\\\\" + | c when 0x20 <= c && c < 0x7f -> String.make 1 (Char.chr c) + | c -> Printf.sprintf "\\u{%02x}" c + +let print x = + "$" ^ + if String.for_all (fun c -> Lib.Char.is_alphanum_ascii c || c = '_') x.it + then x.it + else "\"" ^ String.concat "" (List.map print_char (Utf8.decode x.it)) ^ "\"" + + let lookup category space x = try VarMap.find x.it space.map - with Not_found -> error x.at ("unknown " ^ category ^ " " ^ x.it) + with Not_found -> error x.at ("unknown " ^ category ^ " " ^ print x) let type_ (c : context) x = lookup "type" c.types.space x let func (c : context) x = lookup "function" c.funcs x @@ -134,7 +154,7 @@ let elem (c : context) x = lookup "elem segment" c.elems x let data (c : context) x = lookup "data segment" c.datas x let label (c : context) x = try VarMap.find x.it c.labels - with Not_found -> error x.at ("unknown label " ^ x.it) + with Not_found -> error x.at ("unknown label " ^ print x) let func_type (c : context) x = try (Lib.List32.nth c.types.list x.it).it @@ -151,7 +171,7 @@ let anon category space n = let bind category space x = let i = anon category space 1l in if VarMap.mem x.it space.map then - error x.at ("duplicate " ^ category ^ " " ^ x.it); + error x.at ("duplicate " ^ category ^ " " ^ print x); space.map <- VarMap.add x.it i space.map; i @@ -336,8 +356,8 @@ num : | FLOAT { $1 @@ $sloc } var : - | NAT { let at = $sloc in fun c lookup -> nat32 $1 at @@ at } - | VAR { let at = $sloc in fun c lookup -> lookup c ($1 @@ at) @@ at } + | NAT { fun c lookup -> nat32 $1 $sloc @@ $sloc } + | VAR { fun c lookup -> lookup c (var $1 $sloc) @@ $sloc } var_list : | /* empty */ { fun c lookup -> [] } @@ -348,7 +368,7 @@ bind_var_opt : | bind_var { fun c anon bind -> bind c $1 } /* Sugar */ bind_var : - | VAR { $1 @@ $sloc } + | VAR { var $1 $sloc } labeling_opt : | /* empty */ @@ -1001,7 +1021,7 @@ module_fields1 : {m with exports = $1 c :: m.exports} } module_var : - | VAR { $1 @@ $sloc } /* Sugar */ + | VAR { var $1 $sloc } /* Sugar */ module_ : | LPAR MODULE option(module_var) module_fields RPAR @@ -1026,7 +1046,7 @@ inline_module1 : /* Sugar */ /* Scripts */ script_var : - | VAR { $1 @@ $sloc } /* Sugar */ + | VAR { var $1 $sloc } /* Sugar */ script_module : | module_ { $1 } diff --git a/interpreter/util/lib.ml b/interpreter/util/lib.ml index 90c4e4fe..aaa60395 100644 --- a/interpreter/util/lib.ml +++ b/interpreter/util/lib.ml @@ -22,6 +22,15 @@ struct n <> 0 && n land (n - 1) = 0 end +module Char = +struct + let is_digit_ascii c = '0' <= c && c <= '9' + let is_uppercase_ascii c = 'A' <= c && c <= 'Z' + let is_lowercase_ascii c = 'a' <= c && c <= 'z' + let is_letter_ascii c = is_uppercase_ascii c || is_lowercase_ascii c + let is_alphanum_ascii c = is_digit_ascii c || is_letter_ascii c +end + module String = struct let implode cs = diff --git a/interpreter/util/lib.mli b/interpreter/util/lib.mli index b66b8b0e..d0ba3250 100644 --- a/interpreter/util/lib.mli +++ b/interpreter/util/lib.mli @@ -75,6 +75,15 @@ sig val is_power_of_two : int -> bool end +module Char : +sig + val is_digit_ascii : char -> bool + val is_uppercase_ascii : char -> bool + val is_lowercase_ascii : char -> bool + val is_letter_ascii : char -> bool + val is_alphanum_ascii : char -> bool +end + module String : sig val implode : char list -> string diff --git a/proposals/annotations/Overview.md b/proposals/annotations/Overview.md index ba6ad846..dbd18420 100644 --- a/proposals/annotations/Overview.md +++ b/proposals/annotations/Overview.md @@ -17,6 +17,8 @@ Solution * Neither the syntactic shape nor the semantics is prescribed by the Wasm specification, though the Appendix might include a description of optional support for name section annotations and generic custom sections. +* As an aside, the syntax of symbolic identifiers is extended to allow arbitrary strings in the form `$"..."`. + * This proposal only affects the text format, nothing else. @@ -26,15 +28,23 @@ Extend the Text Format as follows: * Anywhere where white space is allowed, allow *annotations* of the following form: ``` - annot ::= "(@"idchar+ annotelem* ")" - annotelem ::= keyword | reserved | uN | sN | fN | string | id | "(" annotelem* ")" | "(@"idchar+ annotelem* ")" + annot ::= "(@"annotid annotelem* ")" + annotid ::= idchar+ | name + annotelem ::= keyword | reserved | uN | sN | fN | string | id | "(" annotelem* ")" ``` In other words, an annotation can contain any sequence of tokens, as long as it is well-bracketed. - No white space is allowed as part of the initial `(@idchar+` delimiter. + No white space is allowed as part of the initial `(@annotid` delimiter. -* The initial `idchar+` is meant to be an identifier categorising the extension, and plays a role similar to the name of a custom section. +* The initial `annotid` is meant to be an identifier categorising the extension, and plays a role similar to the name of a custom section. By convention, annotations corresponding to a custom section should use the same id. +* Extend the grammar of identifiers as follows: + ``` + id ::= "$"idchar+ | "$"name + ``` + +* Elaborate identifiers to their denotation as a name, treating the unquoted form as a shorthand for the name `"idchar+"`. In all places where identifiers are compared, compare the denotated names instead. In particular, change the identifier environment `I` to record names instead of identifiers. + Extend the Appendix on the Custom Sections: * Define annotations reflecting the Name section, which take the form of annotations `(@name "name")`. diff --git a/test/core/annotations.wast b/test/core/annotations.wast index 5a828f12..912d10f3 100644 --- a/test/core/annotations.wast +++ b/test/core/annotations.wast @@ -1,8 +1,11 @@ (module (@a) - + (@0) (@aas-3!@$d-@#4) (@@) (@$) (@+) (@0) (@.) (@!$@#$23414@#$) + (@"a") + (@" @ asd\2a 045 \" fdaf \t \u{45}") + (@a x y z) (@a x-y $yz "aa" -2 0.3 0x3) (@a x-y$yz"aa"-2) @@ -66,10 +69,14 @@ (assert_malformed (module quote "( @a)") "unknown operator") -(assert_malformed (module quote "(@)") "malformed annotation id") -(assert_malformed (module quote "(@ )") "malformed annotation id") -(assert_malformed (module quote "(@ x)") "malformed annotation id") -(assert_malformed (module quote "(@(@a)x)") "malformed annotation id") +(assert_malformed (module quote "(@)") "empty annotation id") +(assert_malformed (module quote "(@ )") "empty annotation id") +(assert_malformed (module quote "(@ x)") "empty annotation id") +(assert_malformed (module quote "(@(@a)x)") "empty annotation id") +(assert_malformed (module quote "(@\"\")") "empty annotation id") +(assert_malformed (module quote "(@ \"a\")") "empty annotation id") +(assert_malformed (module quote "(@\"\n\")") "empty annotation id") +(assert_malformed (module quote "(@\"\\ef\")") "malformed UTF-8") (assert_malformed (module quote "(@x ") "unclosed annotation") (assert_malformed (module quote "(@x ()") "unclosed annotation") @@ -85,8 +92,8 @@ (assert_malformed (module quote "(@x \")") "unclosed string") (assert_malformed (module quote "((@a)@b)") "unknown operator") -(assert_malformed (module quote "(func $(@a))") "unknown operator") -(assert_malformed (module quote "(func $(@a)f)") "unknown operator") +(assert_malformed (module quote "(func $(@a))") "empty identifier") +(assert_malformed (module quote "(func $(@a)f)") "empty identifier") ((@a) module (@a) $m (@a) (@a) ((@a) import (@a) "spectest" (@a) "global_i32" (@a) diff --git a/test/core/id.wast b/test/core/id.wast new file mode 100644 index 00000000..dcf151f2 --- /dev/null +++ b/test/core/id.wast @@ -0,0 +1,31 @@ +(module + (func $fg) (func (call $fg)) + (func $03) (func (call $03)) + (func $!?@#a$%^&*b-+_.:9'`|/\<=>~) (func (call $!?@#a$%^&*b-+_.:9'`|/\<=>~)) + (func $" random \t \n stuff ") (func (call $" random \t \n stuff ")) + (func $" ") (func (call $" ")) + + (func $fh) (func (call $"fh")) + (func $"fi") (func (call $fi)) + (func $!?@#a$%^&*-+_.:9'`|/\<=>~) (func (call $"!?@#a$%^&*-+_.:9'`|/\\<=>~")) + + (func $"\41B") (func (call $"AB") (call $"A\42") (call $"\41\42") (call $"\u{41}\u{42}")) + (func $"\t") (func (call $"\09") (call $"\u{09}")) + (func $"") (func (call $"\ef\98\9a\ef\92\a9") (call $"\u{f61a}\u{f4a9}")) + + (func + block $l1 (br $"l1") end $"l1" + block $007 (br $"007") end $"007" + block $!?@#a$%^&*-+_.:9'`|/\<=>~ end $"!?@#a$%^&*-+_.:9'`|/\\<=>~" + (i32.const 0) if $"\41B" (br $AB) else $"A\42" end $"\u{41}\u{42}" + (i32.const 0) if $"\t" else $"\09" end $"\u{09}" + (i32.const 0) if $" " else $"\ef\98\9a\ef\92\a9 " end $"\u{f61a}\u{f4a9} " + ) +) + +(assert_malformed (module quote "(func $)") "empty identifier") +(assert_malformed (module quote "(func $\"\")") "empty identifier") +(assert_malformed (module quote "(func $ \"a\")") "empty identifier") +(assert_malformed (module quote "(func $\"a\nb\")") "empty identifier") +(assert_malformed (module quote "(func $\"a\tb\")") "empty identifier") +(assert_malformed (module quote "(func $\"\\ef\")") "malformed UTF-8")