WebAssembly · rossberg · May 21, 2024 · Apr 4, 2024 · Apr 4, 2024 · Apr 4, 2024
diff --git a/.github/workflows/ci-interpreter.yml b/.github/workflows/ci-interpreter.yml
@@ -21,7 +21,7 @@ jobs:
       - name: Setup OCaml
         uses: ocaml/setup-ocaml@v2
         with:
-          ocaml-compiler: 4.12.x
+          ocaml-compiler: 4.14.x
       - name: Setup OCaml tools
         run: opam install --yes ocamlfind.1.9.5 js_of_ocaml.4.0.0 js_of_ocaml-ppx.4.0.0
       - name: Setup Node.js

diff --git a/document/core/text/conventions.rst b/document/core/text/conventions.rst
@@ -120,20 +120,21 @@ It is convenient to define identifier contexts as :ref:`records <notation-record
    \begin{array}{llll}
    \production{identifier context} & I &::=&
      \begin{array}[t]{l@{~}ll}
-     \{ & \ITYPES & (\Tid^?)^\ast, \\
-        & \IFUNCS & (\Tid^?)^\ast, \\
-        & \ITABLES & (\Tid^?)^\ast, \\
-        & \IMEMS & (\Tid^?)^\ast, \\
-        & \IGLOBALS & (\Tid^?)^\ast, \\
-        & \IELEM & (\Tid^?)^\ast, \\
-        & \IDATA & (\Tid^?)^\ast, \\
-        & \ILOCALS & (\Tid^?)^\ast, \\
-        & \ILABELS & (\Tid^?)^\ast, \\
+     \{ & \ITYPES & (\name^?)^\ast, \\
+        & \IFUNCS & (\name^?)^\ast, \\
+        & \ITABLES & (\name^?)^\ast, \\
+        & \IMEMS & (\name^?)^\ast, \\
+        & \IGLOBALS & (\name^?)^\ast, \\
+        & \IELEM & (\name^?)^\ast, \\
+        & \IDATA & (\name^?)^\ast, \\
+        & \ILOCALS & (\name^?)^\ast, \\
+        & \ILABELS & (\name^?)^\ast, \\
         & \ITYPEDEFS & \functype^\ast ~\} \\
      \end{array}
    \end{array}
 
-For each index space, such a context contains the list of :ref:`identifiers <text-id>` assigned to the defined indices.
+For each index space, such a context contains the list of :ref:`names <syntax-name>` assigned to the defined indices,
+which were denoted by the corresponding :ref:`identifiers <text-id>`.
 Unnamed indices are associated with empty (:math:`\epsilon`) entries in these lists.
 
 An identifier context is *well-formed* if no index space contains duplicate identifiers.

diff --git a/document/core/text/instructions.rst b/document/core/text/instructions.rst
@@ -32,11 +32,11 @@ The following grammar handles the corresponding update to the :ref:`identifier c
 .. math::
    \begin{array}{llcllll}
    \production{label} & \Tlabel_I &::=&
-     v{:}\Tid &\Rightarrow& \{\ILABELS~v\} \compose I
+     v{:}\Tid &\Rightarrow& v, \{\ILABELS~v\} \compose I
        & (\iff v \notin I.\ILABELS) \\ &&|&
-     v{:}\Tid &\Rightarrow& \{\ILABELS~v\} \compose (I \with \ILABELS[i] = \epsilon)
+     v{:}\Tid &\Rightarrow& v, \{\ILABELS~v\} \compose (I \with \ILABELS[i] = \epsilon)
        & (\iff I.\ILABELS[i] = v) \\ &&|&
-     \epsilon &\Rightarrow& \{\ILABELS~(\epsilon)\} \compose I \\
+     \epsilon &\Rightarrow& \epsilon, \{\ILABELS~(\epsilon)\} \compose I \\
    \end{array}
 
 .. note::
@@ -79,16 +79,16 @@ However, the special case of a type use that is syntactically empty or consists
      x,I'{:}\Ttypeuse_I &\Rightarrow& x & (\iff I' = \{\ILOCALS~(\epsilon)^\ast\}) \\
    \end{array} \\
    \production{block instruction} & \Tblockinstr_I &::=&
-     \text{block}~~I'{:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~\Tid^?
+     \text{block}~~(v^?,I'){:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~{v'}^?{:}\Tid^?
        \\ &&&\qquad \Rightarrow\quad \BLOCK~\X{bt}~\X{in}^\ast~\END
-       \qquad\quad~~ (\iff \Tid^? = \epsilon \vee \Tid^? = \Tlabel) \\ &&|&
-     \text{loop}~~I'{:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~\Tid^?
+       \qquad\quad~~ (\iff {v'}^? = \epsilon \vee {v'}^? = v^?) \\ &&|&
+     \text{loop}~~(v^?,I'){:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}{:}\Tinstr_{I'})^\ast~~\text{end}~~{v'}^?{:}\Tid^?
        \\ &&&\qquad \Rightarrow\quad \LOOP~\X{bt}~\X{in}^\ast~\END
-       \qquad\qquad (\iff \Tid^? = \epsilon \vee \Tid^? = \Tlabel) \\ &&|&
-     \text{if}~~I'{:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}_1{:}\Tinstr_{I'})^\ast~~
-       \text{else}~~\Tid_1^?~~(\X{in}_2{:}\Tinstr_{I'})^\ast~~\text{end}~~\Tid_2^?
+       \qquad\qquad (\iff {v'}^? = \epsilon \vee {v'}^? = v^?) \\ &&|&
+     \text{if}~~(v^?,I'){:}\Tlabel_I~~\X{bt}{:}\Tblocktype_I~~(\X{in}_1{:}\Tinstr_{I'})^\ast~~
+       \text{else}~~v_1^?{:}\Tid_1^?~~(\X{in}_2{:}\Tinstr_{I'})^\ast~~\text{end}~~v_2^?{:}\Tid_2^?
        \\ &&&\qquad \Rightarrow\quad \IF~\X{bt}~\X{in}_1^\ast~\ELSE~\X{in}_2^\ast~\END
-       \qquad (\iff \Tid_1^? = \epsilon \vee \Tid_1^? = \Tlabel, \Tid_2^? = \epsilon \vee \Tid_2^? = \Tlabel) \\
+       \qquad (\iff v_1^? = \epsilon \vee v_1^? = v^?, v_2^? = \epsilon \vee v_2^? = v^?) \\
    \end{array}
 
 .. note::

diff --git a/document/core/text/lexical.rst b/document/core/text/lexical.rst
@@ -135,7 +135,7 @@ The *look-ahead* restrictions on the productions for |Tblockchar| disambiguate t
 Annotations
 ~~~~~~~~~~~
 
-An *annotation* is a bracketed token sequence headed by an *annotation id* of the form :math:`\T{@id}`.
+An *annotation* is a bracketed token sequence headed by an *annotation id* of the form :math:`\text{@id}` or :math:`\text{@"..."}`.
 No :ref:`space <text-space>` is allowed between the opening parenthesis and this id.
 Annotations are intended to be used for third-party extensions;
 they can appear anywhere in a program but are ignored by the WebAssembly semantics itself, which treats them as :ref:`white space <text-space>`.
@@ -145,8 +145,10 @@ Annotations can contain other parenthesized token sequences (including nested an
 
 .. math::
    \begin{array}{llclll@{\qquad\qquad}l}
-   \production{annot} & \Tannot &::=&
-     \text{(@}~\Tidchar^+ ~(\Tspace ~|~ \Ttoken)^\ast~\text{)} \\
+   \production{annotation} & \Tannot &::=&
+     \text{(@}~\Tannotid ~(\Tspace ~|~ \Ttoken)^\ast~\text{)} \\
+   \production{annotation identifier} & \Tannotid &::=&
+     \Tidchar^+ ~|~ \Tname \\
    \end{array}
 
 .. note::

diff --git a/document/core/text/modules.rst b/document/core/text/modules.rst
@@ -679,28 +679,28 @@ The definition of the initial :ref:`identifier context <text-context>` :math:`I`
 
 .. math::
    \begin{array}{@{}lcl@{\qquad\qquad}l}
-   \F{idc}(\text{(}~\text{type}~\Tid^?~\X{ft}{:}\Tfunctype~\text{)}) &=&
-     \{\ITYPES~(\Tid^?), \ITYPEDEFS~\X{ft}\} \\
-   \F{idc}(\text{(}~\text{func}~\Tid^?~\dots~\text{)}) &=&
-     \{\IFUNCS~(\Tid^?)\} \\
-   \F{idc}(\text{(}~\text{table}~\Tid^?~\dots~\text{)}) &=&
-     \{\ITABLES~(\Tid^?)\} \\
-   \F{idc}(\text{(}~\text{memory}~\Tid^?~\dots~\text{)}) &=&
-     \{\IMEMS~(\Tid^?)\} \\
-   \F{idc}(\text{(}~\text{global}~\Tid^?~\dots~\text{)}) &=&
-     \{\IGLOBALS~(\Tid^?)\} \\
-   \F{idc}(\text{(}~\text{elem}~\Tid^?~\dots~\text{)}) &=&
-     \{\IELEM~(\Tid^?)\} \\
-   \F{idc}(\text{(}~\text{data}~\Tid^?~\dots~\text{)}) &=&
-     \{\IDATA~(\Tid^?)\} \\
-   \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{func}~\Tid^?~\dots~\text{)}~\text{)}) &=&
-     \{\IFUNCS~(\Tid^?)\} \\
-   \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{table}~\Tid^?~\dots~\text{)}~\text{)}) &=&
-     \{\ITABLES~(\Tid^?)\} \\
-   \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{memory}~\Tid^?~\dots~\text{)}~\text{)}) &=&
-     \{\IMEMS~(\Tid^?)\} \\
-   \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{global}~\Tid^?~\dots~\text{)}~\text{)}) &=&
-     \{\IGLOBALS~(\Tid^?)\} \\
+   \F{idc}(\text{(}~\text{type}~v^?{:}\Tid^?~\X{ft}{:}\Tfunctype~\text{)}) &=&
+     \{\ITYPES~(v^?), \ITYPEDEFS~\X{ft}\} \\
+   \F{idc}(\text{(}~\text{func}~v^?{:}\Tid^?~\dots~\text{)}) &=&
+     \{\IFUNCS~(v^?)\} \\
+   \F{idc}(\text{(}~\text{table}~v^?{:}\Tid^?~\dots~\text{)}) &=&
+     \{\ITABLES~(v^?)\} \\
+   \F{idc}(\text{(}~\text{memory}~v^?{:}\Tid^?~\dots~\text{)}) &=&
+     \{\IMEMS~(v^?)\} \\
+   \F{idc}(\text{(}~\text{global}~v^?{:}\Tid^?~\dots~\text{)}) &=&
+     \{\IGLOBALS~(v^?)\} \\
+   \F{idc}(\text{(}~\text{elem}~v^?{:}\Tid^?~\dots~\text{)}) &=&
+     \{\IELEM~(v^?)\} \\
+   \F{idc}(\text{(}~\text{data}~v^?{:}\Tid^?~\dots~\text{)}) &=&
+     \{\IDATA~(v^?)\} \\
+   \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{func}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=&
+     \{\IFUNCS~(v^?)\} \\
+   \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{table}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=&
+     \{\ITABLES~(v^?)\} \\
+   \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{memory}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=&
+     \{\IMEMS~(v^?)\} \\
+   \F{idc}(\text{(}~\text{import}~\dots~\text{(}~\text{global}~v^?{:}\Tid^?~\dots~\text{)}~\text{)}) &=&
+     \{\IGLOBALS~(v^?)\} \\
    \F{idc}(\text{(}~\dots~\text{)}) &=&
      \{\} \\
    \end{array}

diff --git a/document/core/text/values.rst b/document/core/text/values.rst
@@ -211,12 +211,13 @@ Identifiers
 ~~~~~~~~~~~
 
 :ref:`Indices <syntax-index>` can be given in both numeric and symbolic form.
-Symbolic *identifiers* that stand in lieu of indices start with :math:`\text{\$}`, followed by any sequence of printable |ASCII|_ characters that does not contain a space, quotation mark, comma, semicolon, or bracket.
+Symbolic *identifiers* that stand in lieu of indices start with :math:`\text{\$}`, followed by eiter a sequence of printable |ASCII|_ characters that does not contain a space, quotation mark, comma, semicolon, or bracket, or by a quoted :ref:`name <text-name>`.
 
 .. math::
    \begin{array}{llclll@{\qquad}l}
    \production{identifier} & \Tid &::=&
-     \text{\$}~\Tidchar^+ \\
+     \text{\$}~c^\ast{:}\Tidchar^+ &\Rightarrow& c^\ast \\ &&|&
+     \text{\$}~c^\ast{:}\Tname &\Rightarrow& c^\ast & (\iff |c^\ast| > 0) \\
    \production{identifier character} & \Tidchar &::=&
      \text{0} ~~|~~ \dots ~~|~~ \text{9} \\ &&|&
      \text{A} ~~|~~ \dots ~~|~~ \text{Z} \\ &&|&
@@ -246,6 +247,9 @@ Symbolic *identifiers* that stand in lieu of indices start with :math:`\text{\$}
      \text{\tilde{~~}} \\
    \end{array}
 
+.. note::
+   The value of an identifier character is its the Unicode codepoint denoting it.
+
 .. _text-id-fresh:
 
 Conventions

diff --git a/document/core/util/macros.def b/document/core/util/macros.def
@@ -719,6 +719,7 @@
 .. |Tblockchar| mathdef:: \xref{text/lexical}{text-comment}{\T{blockchar}}
 
 .. |Tannot| mathdef:: \xref{text/lexical}{text-annot}{\T{annot}}
+.. |Tannotid| mathdef:: \xref{text/lexical}{text-annot}{\T{annotid}}
 .. |Tannottoken| mathdef:: \xref{text/lexical}{text-annot}{\T{annottoken}}
 
 

diff --git a/interpreter/text/arrange.ml b/interpreter/text/arrange.ml
@@ -627,7 +627,10 @@ let custom m mnode (module S : Custom.Section) =
 
 let var_opt = function
   | None -> ""
-  | Some x -> " " ^ x.it
+  | Some x when
+    String.for_all (fun c -> Lib.Char.is_alphanum_ascii c || c = '_') x.it ->
+    " $" ^ x.it
+  | Some x -> " $" ^ name (Utf8.decode x.it)
 
 let module_with_var_opt x_opt (m, cs) =
   let fx = ref 0 in

diff --git a/interpreter/text/lexer.mll b/interpreter/text/lexer.mll
@@ -48,6 +48,11 @@ let string s =
   done;
   Buffer.contents b
 
+let annot_id lexbuf s =
+  let s' = string s in
+  if s' = "" then error lexbuf "empty annotation id";
+  try Utf8.decode s' with Utf8.Utf8 -> error lexbuf "malformed UTF-8 encoding"
+
 let opt = Lib.Option.get
 }
 
@@ -102,8 +107,7 @@ let float =
 let string = '"' character* '"'
 
 let idchar = letter | digit | '_' | symbol
-let name = idchar+
-let id = '$' name
+let id = idchar+
 
 let keyword = ['a'-'z'] (letter | digit | '_' | '.' | ':')+
 let reserved = (idchar | string)+ | ',' | ';' | '[' | ']' | '{' | '}'
@@ -699,13 +703,21 @@ rule token = parse
   | "offset="(nat as s) { OFFSET_EQ_NAT s }
   | "align="(nat as s) { ALIGN_EQ_NAT s }
 
-  | id as s { VAR s }
+  | '$'(id as s) { VAR s }
+  | '$'(string as s)
+    { let s' = string s in
+      if s' = "" then error lexbuf "empty identifier"; VAR s' }
+  | '$' { error lexbuf "empty identifier" }
 
-  | "(@"(name as n)
+  | "(@"(id as n)
     { let r = region lexbuf in
       let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in
       Annot.record (Annot.{name = Utf8.decode n; items} @@ r); token lexbuf }
-  | "(@" { error lexbuf "malformed annotation id" }
+  | "(@"(string as s)
+    { let r = region lexbuf in
+      let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in
+      Annot.record (Annot.{name = annot_id lexbuf s; items} @@ r); token lexbuf }
+  | "(@" { error lexbuf "empty annotation id" }
 
   | ";;"utf8_no_nl*eof { EOF }
   | ";;"utf8_no_nl*newline { Lexing.new_line lexbuf; token lexbuf }
@@ -726,11 +738,16 @@ and annot start = parse
     { let r = region lexbuf in
       let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in
       (Annot.Parens items @@ r) :: annot start lexbuf }
-  | "(@"(name as n)
+  | "(@"(id as n)
     { let r = region lexbuf in
       let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in
       let ann = Annot.{name = Utf8.decode n; items} @@ r in
       (Annot.Annot ann @@ r) :: annot start lexbuf }
+  | "(@"(string as s)
+    { let r = region lexbuf in
+      let items = annot (Lexing.lexeme_start_p lexbuf) lexbuf in
+      let ann = Annot.{name = annot_id lexbuf s; items} @@ r in
+      (Annot.Annot ann @@ r) :: annot start lexbuf }
 
   | nat as s
     { let r = region lexbuf in
@@ -741,9 +758,15 @@ and annot start = parse
   | float as s
     { let r = region lexbuf in
       (Annot.Float s @@ r) :: annot start lexbuf }
-  | id as s
+  | '$'(id as s)
     { let r = region lexbuf in
       (Annot.Var s @@ r) :: annot start lexbuf }
+  | '$'(string as s)
+    { let r = region lexbuf in
+      let s' = string s in
+      if s' = "" then error lexbuf "empty identifier";
+      (Annot.Var s' @@ r) :: annot start lexbuf }
+  | '$' { error lexbuf "empty identifier" }
   | string as s
     { let r = region lexbuf in
       (Annot.String (string s) @@ r) :: annot start lexbuf }

diff --git a/interpreter/text/parser.mly b/interpreter/text/parser.mly
@@ -88,6 +88,11 @@ let nat32 s loc =
 let name s loc =
   try Utf8.decode s with Utf8.Utf8 -> error (at loc) "malformed UTF-8 encoding"
 
+let var s loc =
+  let r = at loc in
+  try ignore (Utf8.decode s); Source.(s @@ r)
+  with Utf8.Utf8 -> error r "malformed UTF-8 encoding"
+
 
 (* Symbolic variables *)
 
@@ -120,9 +125,24 @@ let force_locals (c : context) =
 let enter_func (c : context) =
   {c with labels = VarMap.empty; locals = empty ()}
 
+let print_char = function
+  | 0x09 -> "\\t"
+  | 0x0a -> "\\n"
+  | 0x22 -> "\\\""
+  | 0x5c -> "\\\\"
+  | c when 0x20 <= c && c < 0x7f -> String.make 1 (Char.chr c)
+  | c -> Printf.sprintf "\\u{%02x}" c
+
+let print x =
+  "$" ^
+  if String.for_all (fun c -> Lib.Char.is_alphanum_ascii c || c = '_') x.it
+  then x.it
+  else "\"" ^ String.concat "" (List.map print_char (Utf8.decode x.it)) ^ "\""
+
+
 let lookup category space x =
   try VarMap.find x.it space.map
-  with Not_found -> error x.at ("unknown " ^ category ^ " " ^ x.it)
+  with Not_found -> error x.at ("unknown " ^ category ^ " " ^ print x)
 
 let type_ (c : context) x = lookup "type" c.types.space x
 let func (c : context) x = lookup "function" c.funcs x
@@ -134,7 +154,7 @@ let elem (c : context) x = lookup "elem segment" c.elems x
 let data (c : context) x = lookup "data segment" c.datas x
 let label (c : context) x =
   try VarMap.find x.it c.labels
-  with Not_found -> error x.at ("unknown label " ^ x.it)
+  with Not_found -> error x.at ("unknown label " ^ print x)
 
 let func_type (c : context) x =
   try (Lib.List32.nth c.types.list x.it).it
@@ -151,7 +171,7 @@ let anon category space n =
 let bind category space x =
   let i = anon category space 1l in
   if VarMap.mem x.it space.map then
-    error x.at ("duplicate " ^ category ^ " " ^ x.it);
+    error x.at ("duplicate " ^ category ^ " " ^ print x);
   space.map <- VarMap.add x.it i space.map;
   i
 
@@ -336,8 +356,8 @@ num :
   | FLOAT { $1 @@ $sloc }
 
 var :
-  | NAT { let at = $sloc in fun c lookup -> nat32 $1 at @@ at }
-  | VAR { let at = $sloc in fun c lookup -> lookup c ($1 @@ at) @@ at }
+  | NAT { fun c lookup -> nat32 $1 $sloc @@ $sloc }
+  | VAR { fun c lookup -> lookup c (var $1 $sloc) @@ $sloc }
 
 var_list :
   | /* empty */ { fun c lookup -> [] }
@@ -348,7 +368,7 @@ bind_var_opt :
   | bind_var { fun c anon bind -> bind c $1 }  /* Sugar */
 
 bind_var :
-  | VAR { $1 @@ $sloc }
+  | VAR { var $1 $sloc }
 
 labeling_opt :
   | /* empty */
@@ -1001,7 +1021,7 @@ module_fields1 :
       {m with exports = $1 c :: m.exports} }
 
 module_var :
-  | VAR { $1 @@ $sloc }  /* Sugar */
+  | VAR { var $1 $sloc }  /* Sugar */
 
 module_ :
   | LPAR MODULE option(module_var) module_fields RPAR
@@ -1026,7 +1046,7 @@ inline_module1 :  /* Sugar */
 /* Scripts */
 
 script_var :
-  | VAR { $1 @@ $sloc }  /* Sugar */
+  | VAR { var $1 $sloc }  /* Sugar */
 
 script_module :
   | module_ { $1 }

diff --git a/interpreter/util/lib.ml b/interpreter/util/lib.ml
@@ -22,6 +22,15 @@ struct
     n <> 0 && n land (n - 1) = 0
 end
 
+module Char =
+struct
+  let is_digit_ascii c = '0' <= c && c <= '9'
+  let is_uppercase_ascii c = 'A' <= c && c <= 'Z'
+  let is_lowercase_ascii c = 'a' <= c && c <= 'z'
+  let is_letter_ascii c = is_uppercase_ascii c || is_lowercase_ascii c
+  let is_alphanum_ascii c = is_digit_ascii c || is_letter_ascii c
+end
+
 module String =
 struct
   let implode cs =