From 091762f3f026a5fb3b158369fcd2f76d8acd4fb9 Mon Sep 17 00:00:00 2001
From: Kenneth MacKenzie <kenneth.mackenzie@iohk.io>
Date: Thu, 21 Dec 2023 18:13:38 +0000
Subject: [PATCH] Kwxm/spec/no flat for bls (#5690)

* Change symbol for units of field to avoid confusion

* No flat serialisation for BLS12-381 elements
---
 doc/plutus-core-spec/cardano/builtins1.tex  |  8 +++---
 doc/plutus-core-spec/cardano/builtins4.tex  | 29 ++++++++++++++++-----
 doc/plutus-core-spec/flat-serialisation.tex | 29 +++++++++------------
 doc/plutus-core-spec/header.tex             |  1 +
 doc/plutus-core-spec/notation.tex           |  4 +--
 doc/plutus-core-spec/untyped-grammar.tex    |  2 +-
 6 files changed, 43 insertions(+), 30 deletions(-)
diff --git a/doc/plutus-core-spec/cardano/builtins1.tex b/doc/plutus-core-spec/cardano/builtins1.tex
index c18f966842b..520ecb5c967 100644
--- a/doc/plutus-core-spec/cardano/builtins1.tex
+++ b/doc/plutus-core-spec/cardano/builtins1.tex
@@ -27,10 +27,10 @@ \subsubsection{Built-in types and type operators}
         \hline
         \texttt{integer} &   $\mathbb{Z}$ & \texttt{-?[0-9]+}\\
         \texttt{bytestring}  & $ \B^*$, the set of sequences of bytes or 8-bit characters. & \texttt{\#([0-9A-Fa-f][0-9A-Fa-f])*}\\
-        \texttt{string} & $\U^*$,  the set of sequences of Unicode characters. & See note below.\\
+        \texttt{string} & $\U^*$,  the set of sequences of Unicode characters. & See note below\\
         \texttt{bool} & \{\texttt{true, false}\} & \texttt{True | False}\\
         \texttt{unit} &  \{()\} & \texttt{()}\\
-        \texttt{data} &  See below. & Not yet supported.\\
+        \texttt{data} &  See below & See below\\
         \hline
     \end{tabular}
     \caption{Atomic Types}
@@ -43,8 +43,8 @@ \subsubsection{Built-in types and type operators}
         \hline
         Operator $\op$ & $\left|\op\right|$  & Denotation & Concrete Syntax\\
         \hline
-        \texttt{list} & 1 & $\denote{\listOf{t}} = \denote{t}^*$ & Not yet supported\\
-        \texttt{pair} & 2 & $\denote{\pairOf{t_1}{t_2}} = \denote{t_1} \times \denote{t_2}$ & Not yet supported\\
+        \texttt{list} & 1 & $\denote{\listOf{t}} = \denote{t}^*$ & See below\\
+        \texttt{pair} & 2 & $\denote{\pairOf{t_1}{t_2}} = \denote{t_1} \times \denote{t_2}$ & See below\\
         \hline
         \end{tabular}
    \caption{Type Operators}
diff --git a/doc/plutus-core-spec/cardano/builtins4.tex b/doc/plutus-core-spec/cardano/builtins4.tex
index 5c3f39e9f10..52431771209 100644
--- a/doc/plutus-core-spec/cardano/builtins4.tex
+++ b/doc/plutus-core-spec/cardano/builtins4.tex
@@ -398,11 +398,26 @@ \subsubsection{BLS12-381 built-in functions}
 Similarly, a value of type $\ty{bls12\_381\_G2\_element}$ is denoted by a term
 of the form \texttt{(con bls12\_381\_G2\_element 0x...)}  where \texttt{...}
 consists of 192 hexadecimal digits representing the 96-byte compressed form of
-the relevant point.
+the relevant point.  \textbf{This syntax is provided only for use in textual
+Plutus Core programs}, for example for experimentation and testing.  We do not
+support constants of any of the BLS12-381 types in serialised programs on the
+Cardano blockchain: see Section~\ref{sec:flat-constants}.  However, for
+$\ty{bls12\_381\_G1\_element}$ and $\ty{bls12\_381\_G2\_element}$ one can use
+the appropriate uncompression function on a  bytestring constant at runtime:
+for example, instead of
+$$
+\texttt{(con bls12\_381\_G1\_element 0xa1e9a0...)}
+$$
+write
+$$
+\texttt{[(builtin bls12\_381\_G1\_uncompress) (con bytestring \#a1e9a0...)]}.
+$$
 
-No syntax is provided for values of type $\ty{bls12\_381\_mlresult}$. It is not
-possible to parse such values, and they will appear as \texttt{(con
-bls12\_381\_mlresult <opaque>)} if output by a program.
+\noindent
+No concrete syntax is provided for values of type
+$\ty{bls12\_381\_mlresult}$. It is not possible to parse such values, and they
+will appear as \texttt{(con bls12\_381\_mlresult <opaque>)} if output by a
+program.
 
 
 \note{Pairing operations.}
@@ -441,11 +456,11 @@ \subsubsection{BLS12-381 built-in functions}
 We do not mandate specific choices for $\MlResultDenotation, \mu_r, e$, and $\phi$, but a
 plausible choice would be
 \begin{itemize}
-\item $\MlResultDenotation = \FF^*$.
+\item $\MlResultDenotation = \units{\FF}$.
 \item $e$ is the Miller loop associated with the optimal Ate pairing
 for $E_1$ and $E_2$~\cite{Vercauteren}.
-\item $\mu_r = \{x \in \FF^*: x^r=1\}$, the group of $r$th roots of unity in $\FF^*$.
-(There are $r$ distinct $r$th roots of unity in $\FF^*$ because the embedding
+\item $\mu_r = \{x \in \units{\FF}: x^r=1\}$, the group of $r$th roots of unity in $\FF$.
+(There are $r$ distinct $r$th roots of unity in $\FF$ because the embedding
 degree of $E_1$ and $E_2$ with respect to $r$ is 12 (see~\cite[4.1]{Costello-pairings}).)
 \item $\psi(x) = x^{\frac{q-1}{r}}$.
 \end{itemize}
diff --git a/doc/plutus-core-spec/flat-serialisation.tex b/doc/plutus-core-spec/flat-serialisation.tex
index 90e6d9c9714..2c139a8685e 100644
--- a/doc/plutus-core-spec/flat-serialisation.tex
+++ b/doc/plutus-core-spec/flat-serialisation.tex
@@ -497,6 +497,7 @@ \subsection{Built-in types}
 $$
 
 \subsection{Constants}
+\label{sec:flat-constants}
 Values of built-in types can mostly be encoded quite simply by using encoders
 already defined.  Note that the unit value \texttt{(con unit ())} does not have
 an explicit encoding: the type has only one possible value, so there is no need
@@ -508,16 +509,16 @@ \subsection{Constants}
 bytestring is obtained using $\D_{\B^*}$ and this is then decoded from CBOR
 using $\dData$ to obtain a $\ty{data}$ object.
 
-The $\ty{bls12\_381\_G1\_element}$ and $\ty{bls12\_381\_G2\_element}$ types are
-serialised by converting to byestrings using the $\compress_{G_1}$ and
-$\compress_{G_2}$ functions defined in
-Section~\ref{sec:bls-builtins-4}, and decoding is similarly performed
-via bytestrings using the $\uncompress$ functions.  \textbf{We do not provide
-serialisation and deserialisation for constants of the
-$\ty{bls12\_381\_mlresult}$ type}; we have specified a tag for this type, but if
-that tag is encountered during deserialisation then deserialisation fails and
-any subsequent input is ignored.
-
+We do not provide serialisation and deserialisation for constants of type
+$\ty{bls12\_381\_G1\_element}$, $\ty{bls12\_381\_G2\_element}$, or
+$\ty{bls12\_381\_mlresult}$.  We have specified tags for these types, but if one
+of these tags is encountered during deserialisation then deserialisation fails
+and any subsequent input is ignored.  Note however that constants of the first
+two types can be serialised by using the compression functions defined in
+Section~\ref{sec:bls-builtins-4} and serialising the resulting bytestrings.
+Decoding can similarly be performed indirectly by using
+\texttt{bls12\_381\_G1\_uncompress} and \texttt{bls12\_381\_G2\_uncompress} on
+bytestring constants during program execution.
 
 \begin{alignat*}{2}
   & \Econstant{\ty{integer}}(s,n)                  &&= \E_{\Z}(s, n) \\
@@ -528,9 +529,7 @@ \subsection{Constants}
   & \Econstant{\ty{bool}}(s, \texttt{True})        &&= s \cdot \bits{1}\\
   & \Econstant{\listOf{\tn}}(s,l)                  &&= \Elist^{\tn}_{\mathsf{constant}}(s, l) \\
   & \Econstant{\pairOf{\tn_1}{\tn_2}}(s,(c_1,c_2))  &&= \Econstant{\tn_2}(\Econstant{\tn_1}(s, c_1), c_2)\\
-  & \Econstant{\ty{data}}(s,d)                     &&= \E_{\B^*}(s, \eData(d))\\
-  & \Econstant{\ty{bls12\_381\_G1\_element}}(s,e)  &&= \E_{\B^*}(s, \compress_{G_1}(e))\\
-  & \Econstant{\ty{bls12\_381\_G2\_element}}(s,e)  &&= \E_{\B^*}(s, \compress_{G_2}(e)).
+  & \Econstant{\ty{data}}(s,d)                     &&= \E_{\B^*}(s, \eData(d))
 \end{alignat*}
 
 \begin{alignat*}{3}
@@ -548,9 +547,7 @@ \subsection{Constants}
      \end{cases}\\
   &\dConstant{\ty{data}}(s)                  &&= (s', d) &&
                                            \text{if $\D_{\B*}(s) = (s', t)$
-                                            and $\dData(t) = (t', d)$ for some $t'$}\\
-  & \dConstant{\ty{bls12\_381\_G1\_element}}(s)  &&= (s',e) && \text{if $\D_{\B^*}(s) = (s',b)$ and $\uncompress_{G_1}(b)=e$}\\
-  & \dConstant{\ty{bls12\_381\_G2\_element}}(s)  &&= (s',e) && \text{if $\D_{\B^*}(s) = (s',b)$ and $\uncompress_{G_2}(b)=e$}.
+                                            and $\dData(t) = (t', d)$ for some $t'$}
 \end{alignat*}
 
 \subsection{Built-in functions}
diff --git a/doc/plutus-core-spec/header.tex b/doc/plutus-core-spec/header.tex
index 58848a4a293..43ca4507ae4 100644
--- a/doc/plutus-core-spec/header.tex
+++ b/doc/plutus-core-spec/header.tex
@@ -340,6 +340,7 @@
 \DeclareMathOperator{\proj}{proj}
 \DeclareMathOperator{\is}{is}
 \DeclareMathOperator{\dom}{dom}
+\newcommand{\units}[1]{#1^{\times}}  % Group of units of a field
 
 %%% Spacing in tables
 
diff --git a/doc/plutus-core-spec/notation.tex b/doc/plutus-core-spec/notation.tex
index 495c3196aa0..3cc51f47bbf 100644
--- a/doc/plutus-core-spec/notation.tex
+++ b/doc/plutus-core-spec/notation.tex
@@ -17,7 +17,7 @@ \subsection{Sets}
   \item $\B^*$ is the set of all bytestrings.
   \item $\Z = \{\ldots, -2, -1, 0, 1, 2, \ldots\}$.
   \item $\mathbb{F}_q$ denotes a finite field with $q$ elements ($q$ a prime power).
-  \item $\mathbb{F}_q^*$ denotes the multiplicative group of nonzero elements of $\mathbb{F}_q$.
+  \item $\units{\mathbb{F}} = \mathbb{F}\setminus\{0\}$ denotes the multiplicative group of nonzero elements of a field $\mathbb{F}$.
   \item $\U$ denotes the set of Unicode scalar values, as defined in~\cite[Definition D76]{Unicode-standard}.
   \item $\U^*$ is the set of all Unicode strings.
   \item We assume that there is a special symbol $\errorX$ which does not appear
@@ -34,7 +34,7 @@ \subsection{Sets}
 \nomenclature[An3]{$\Nab{a}{b}$}{$\{n \in \N: a \leq n \leq b\}$}%
 \nomenclature[Aw]{$\Z$}{$\{\ldots, -2, -1, 0, 1, 2, \ldots\}$}%
 \nomenclature[Af]{$\mathbb{F}_q$}{The finite field with $q$ elements}%
-\nomenclature[Af]{$\mathbb{F}_q^*$}{The multiplicative group of $\mathbb{F}_q$}%
+\nomenclature[Af]{$\units{\mathbb{F}}$}{The multiplicative group of a field $\mathbb{F}$, ie $\mathbb{F}\setminus\{0\}$}%
 \nomenclature[Ab]{$\B$}{$\{n \in \Z: 0 \leq n \leq 255\}$}%
 \nomenclature[Ab]{$\B^*$}{The set of all bytestrings}%
 \nomenclature[Au]{$\U$}{The set of Unicode scalar values}%
diff --git a/doc/plutus-core-spec/untyped-grammar.tex b/doc/plutus-core-spec/untyped-grammar.tex
index 38836f41ae5..f84ffc2e01a 100644
--- a/doc/plutus-core-spec/untyped-grammar.tex
+++ b/doc/plutus-core-spec/untyped-grammar.tex
@@ -162,7 +162,7 @@ \subsection{Notes}
 (\cite{deBruijn}, \cite[C.3]{Barendregt}), and for this we redefine names to be
 natural numbers.  In de Bruijn terms, $\lambda$-expressions do not need to bind
 a variable, but in order to re-use our existing syntax we arbitrarily use 0 for
-the bound variable, so that all $\lambda$-expresssions are of the form
+the bound variable, so that all $\lambda$-expressions are of the form
 \texttt{(lam 0 $M$)}; other variables (ie, those not appearing immediately after
 a \texttt{lam} binder) are represented by natural number greater than zero.