From 222bf26e8c45510ebddbd9411ddb8694c0f8f32d Mon Sep 17 00:00:00 2001 From: Jules Aguillon Date: Wed, 27 Nov 2024 11:20:43 +0100 Subject: [PATCH] Update vendored odoc-parser to version 2.4 (#2631) * Backport the table feature from Odoc 2.4's parser and implement tables. * Make sure elements in light table don't wrap Invalid syntax was generated when elements had to wrap. This is fixed by making sure all the elements in the table are inline elements and then by formatting them without any break. A smaller type is used to represent safely formattable light table that do not contain any block elements. The `~wrap` argument is added to `fmt_inline_elements` that controls whether elements can wrap. This is passed explicitly rather than through the config type to be sure that the information is not lost when recursing through elements. This fixed a potential bug in heading labels. * Odoc 2.4's code blocks support delimiters and a output section. * Add support for @hidden tag, quoted references and reduce diff with upstream --- CHANGES.md | 2 + lib/Docstring.ml | 44 +- lib/Fmt_odoc.ml | 238 ++++++++--- test/passing/gen/dune.inc | 15 + test/passing/refs.default/odoc.mli.err | 1 + test/passing/refs.default/odoc.mli.ref | 251 +++++++++++ test/passing/refs.janestreet/odoc.mli.err | 1 + test/passing/refs.janestreet/odoc.mli.ref | 262 ++++++++++++ test/passing/refs.ocamlformat/odoc.mli.err | 1 + test/passing/refs.ocamlformat/odoc.mli.ref | 251 +++++++++++ test/passing/tests/odoc.mli | 237 +++++++++++ test/passing/tests/odoc.mli.opts | 1 + vendor/odoc-parser/ast.ml | 30 +- vendor/odoc-parser/dune | 4 +- vendor/odoc-parser/lexer.mll | 202 +++++++-- vendor/odoc-parser/loc.ml | 8 + vendor/odoc-parser/loc.mli | 4 + vendor/odoc-parser/parse_error.ml | 10 + vendor/odoc-parser/syntax.ml | 459 +++++++++++++++++++-- vendor/odoc-parser/token.ml | 27 +- 20 files changed, 1879 insertions(+), 169 deletions(-) create mode 100644 test/passing/refs.default/odoc.mli.err create mode 100644 test/passing/refs.default/odoc.mli.ref create mode 100644 test/passing/refs.janestreet/odoc.mli.err create mode 100644 test/passing/refs.janestreet/odoc.mli.ref create mode 100644 test/passing/refs.ocamlformat/odoc.mli.err create mode 100644 test/passing/refs.ocamlformat/odoc.mli.ref create mode 100644 test/passing/tests/odoc.mli create mode 100644 test/passing/tests/odoc.mli.opts diff --git a/CHANGES.md b/CHANGES.md index 4b0887f300..02087a21e5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -56,6 +56,8 @@ profile. This started with version 0.26.0. - Added back the flag `--disable-outside-detected-project` (#2439, @gpetiot) It was removed in version 0.22. +- Support newer Odoc syntax (#2631, @Julow) + ### Changed - \* Consistent formatting of comments (#2371, #2550, @Julow) diff --git a/lib/Docstring.ml b/lib/Docstring.ml index 5b070302b2..04c890bd37 100644 --- a/lib/Docstring.ml +++ b/lib/Docstring.ml @@ -9,6 +9,7 @@ (* *) (**************************************************************************) +module Ast = Ocamlformat_odoc_parser.Ast module Odoc_parser = Ocamlformat_odoc_parser.Odoc_parser let parse ~loc text = @@ -56,8 +57,6 @@ let odoc_reference = ign_loc str let option f fmt = function Some v -> f fmt v | None -> () -let pair fmt_a fmt_b fmt (a, b) = fpf fmt "(%a,%a)" fmt_a a fmt_b b - let odoc_style fmt = function | `Bold -> fpf fmt "Bold" | `Italic -> fpf fmt "Italic" @@ -88,15 +87,31 @@ let rec odoc_inline_element fmt = function and odoc_inline_elements fmt elems = list (ign_loc odoc_inline_element) fmt elems +let light_heavy_to_string = function `Light -> "Light" | `Heavy -> "Heavy" + +let alignment_to_string = function + | `Left -> "Left" + | `Right -> "Right" + | `Center -> "Center" + +let header_data_to_string = function `Header -> "Header" | `Data -> "Data" + let rec odoc_nestable_block_element c fmt = function | `Paragraph elms -> fpf fmt "Paragraph(%a)" odoc_inline_elements elms - | `Code_block (metadata, txt) -> - let txt = Odoc_parser.Loc.value txt in - let txt = c.normalize_code txt in - let fmt_metadata = - option (pair (ign_loc str) (option (ign_loc str))) + | `Code_block (b : Ast.code_block) -> + let fmt_metadata fmt (m : Ast.code_block_meta) = + fpf fmt "(%a, %a)" (ign_loc str) m.language + (option (ign_loc str)) + m.tags in - fpf fmt "Code_block(%a, %a)" fmt_metadata metadata str txt + let fmt_content = + ign_loc (fun fmt s -> str fmt (c.normalize_code s)) + in + let fmt_output = + option (list (ign_loc (odoc_nestable_block_element c))) + in + fpf fmt "Code_block(%a, %a, %a, %a)" (option fmt_metadata) b.meta + (option str) b.delimiter fmt_content b.content fmt_output b.output | `Math_block txt -> fpf fmt "Math_block(%a)" str txt | `Verbatim txt -> fpf fmt "Verbatim(%a)" str txt | `Modules mods -> fpf fmt "Modules(%a)" (list odoc_reference) mods @@ -106,6 +121,18 @@ let rec odoc_nestable_block_element c fmt = function fpf fmt "Item(%a)" (odoc_nestable_block_elements c) elems in fpf fmt "List(%s,%a)" ord (list list_item) items + | `Table ((grid, alignment), syntax) -> + let pp_align fmt aln = fpf fmt "%s" (alignment_to_string aln) in + let pp_cell fmt (elems, header) = + fpf fmt "(%a,%s)" + (odoc_nestable_block_elements c) + elems + (header_data_to_string header) + in + let pp_grid = list (list pp_cell) in + let pp_alignment = option (list (option pp_align)) in + fpf fmt "Table((%a,%a),%s)" pp_grid grid pp_alignment alignment + (light_heavy_to_string syntax) and odoc_nestable_block_elements c fmt elems = list (ign_loc (odoc_nestable_block_element c)) fmt elems @@ -135,6 +162,7 @@ let odoc_tag c fmt = function | `Inline -> fpf fmt "Inline" | `Open -> fpf fmt "Open" | `Closed -> fpf fmt "Closed" + | `Hidden -> fpf fmt "Hidden" let odoc_block_element c fmt = function | `Heading (lvl, lbl, content) -> diff --git a/lib/Fmt_odoc.ml b/lib/Fmt_odoc.ml index 15ee9250f5..16eb451530 100644 --- a/lib/Fmt_odoc.ml +++ b/lib/Fmt_odoc.ml @@ -82,8 +82,8 @@ let split_on_whitespaces = String.split_on_chars ~on:['\t'; '\n'; '\011'; '\012'; '\r'; ' '] (** Escape special characters and normalize whitespaces *) -let str_normalized ?(escape = escape_all) c s = - if c.conf.fmt_opts.wrap_docstrings.v then +let str_normalized ?(escape = escape_all) ~wrap s = + if wrap then split_on_whitespaces s |> List.filter ~f:(Fn.non String.is_empty) |> fun s -> list s space_break (fun s -> escape s |> str) @@ -104,45 +104,6 @@ let fmt_verbatim_block ~loc s = in hvbox 0 (wrap (str "{v") (str "v}") content) -let fmt_metadata (lang, meta) = - let fmt_meta meta = str " " $ str meta in - str "@" $ ign_loc ~f:str lang $ opt meta (ign_loc ~f:fmt_meta) - -let fmt_code_block c s1 s2 = - let wrap_code x = - str "{" $ opt s1 fmt_metadata $ str "[" $ break 1000 2 $ x $ space_break - $ str "]}" - in - let fmt_line ~first ~last:_ l = - let l = String.rstrip l in - if first then str l - else if String.length l = 0 then str "\n" - else cut_break $ str l - in - let fmt_code s = - let lines = String.split_lines s in - let box = match lines with _ :: _ :: _ -> vbox 0 | _ -> hvbox 0 in - box (wrap_code (vbox 0 (list_fl lines fmt_line))) - in - let {Loc.location; value= original} = s2 in - match s1 with - | Some ({value= "ocaml"; _}, _) | None -> ( - (* [offset] doesn't take into account code blocks nested into lists. *) - match c.fmt_code c.conf ~offset:2 ~set_margin:true original with - | Ok formatted -> formatted |> Format_.asprintf "%a" Fmt.eval |> fmt_code - | Error (`Msg message) -> - ( match message with - | "" -> () - | _ when Option.is_none s1 -> () - | _ -> - if not c.conf.opr_opts.quiet.v then - Docstring.warn Stdlib.Format.err_formatter - { location - ; message= Format.sprintf "invalid code block: %s" message } - ) ; - fmt_code original ) - | Some _ -> fmt_code original - let fmt_code_span s = wrap (str "[") (str "]") (str (escape_balanced_brackets s)) @@ -208,35 +169,78 @@ let list_block_elem c elems f = in f elem $ break ) +module Light_table = struct + (** A table type that can safely be formatted using the light syntax. *) + + type cell = inline_element with_location list + + type row = cell list + + (** [header_rows, alignments, data_rows] *) + type t = row list * alignment option list option * row list + + module Ast = Ocamlformat_odoc_parser.Ast + + (** Returns [None] if the given table cannot be safely formatted using the + light syntax. This might return [None] for tables that were using the + light syntax in the original source. *) + let of_table : Ast.table -> t option = + let exception Table_not_safe in + let extract_cell ((elems, _header) : _ Ast.cell) = + match elems with + | [] -> [] + | [{value= `Paragraph inline_elems; _}] -> inline_elems + | _ -> raise Table_not_safe + in + let extract_row (row : _ Ast.row) = List.map ~f:extract_cell row in + let is_header_cell (_, h) = + match h with `Header -> true | `Data -> false + in + let rec extract header : _ Ast.grid -> _ = function + | hd :: tl when List.exists hd ~f:is_header_cell -> + extract (extract_row hd :: header) tl + | data_rows -> (List.rev header, List.map ~f:extract_row data_rows) + in + function + | _, `Heavy -> None + | (grid, alignments), `Light -> ( + try + let header, data = extract [] grid in + Some (header, alignments, data) + with Table_not_safe -> None ) +end + let non_wrap_space sp = if String.contains sp '\n' then force_newline else str sp -let rec fmt_inline_elements c elements = +let fmt_block_markup ?force_break:(fb = false) tag content = + let initial_break = if fb then force_break else space_break in + hvbox 2 + (str "{" $ str tag $ initial_break $ content $ break 1 ~-2 $ str "}") + +let rec fmt_inline_elements c ~wrap elements = let rec aux = function | [] -> noop | `Space sp :: `Word (("-" | "+") as w) :: t -> (* Escape lines starting with '+' or '-'. *) - fmt_or c.conf.fmt_opts.wrap_docstrings.v + fmt_or wrap (cbreak ~fits:("", 1, "") ~breaks:("", 0, "\\")) (non_wrap_space sp) $ str w $ aux t - | `Space sp :: t -> - fmt_or c.conf.fmt_opts.wrap_docstrings.v space_break - (non_wrap_space sp) - $ aux t + | `Space sp :: t -> fmt_or wrap space_break (non_wrap_space sp) $ aux t | `Word w :: t -> fmt_if (String.is_prefix ~prefix:"@" w) (str "\\") - $ str_normalized c w $ aux t + $ str_normalized ~wrap w $ aux t | `Code_span s :: t -> fmt_code_span s $ aux t | `Math_span s :: t -> fmt_math_span s $ aux t | `Raw_markup (lang, s) :: t -> let lang = match lang with - | Some l -> str_normalized c l $ str ":" + | Some l -> str_normalized ~wrap l $ str ":" | None -> noop in (* todo check this was an escape sequence *) - wrap (str "{%") (str "%}") (lang $ str s) $ aux t + str "{%" $ (lang $ str s) $ str "%}" $ aux t | `Styled (style, elems) :: t -> let s = match style with @@ -246,41 +250,43 @@ let rec fmt_inline_elements c elements = | `Superscript -> "^" | `Subscript -> "_" in - hovbox_if c.conf.fmt_opts.wrap_docstrings.v + hovbox_if wrap (1 + String.length s + 1) - (fmt_markup_with_inline_elements c ~force_space:true - (str_normalized c s) elems ) + (fmt_markup_with_inline_elements c ~wrap ~force_space:true + (str_normalized ~wrap s) elems ) $ aux t | `Reference (_kind, rf, txt) :: t -> let rf = str "{!" $ fmt_reference rf $ str "}" in - fmt_link_or_reference c rf txt $ aux t + fmt_link_or_reference c ~wrap rf txt $ aux t | `Link (url, txt) :: t -> - let url = str "{:" $ str_normalized c url $ str "}" in - fmt_link_or_reference c url txt $ aux t + let url = str "{:" $ str_normalized ~wrap url $ str "}" in + fmt_link_or_reference c ~wrap url txt $ aux t in aux (List.map elements ~f:(ign_loc ~f:Fn.id)) -and fmt_link_or_reference c tag txt = +and fmt_link_or_reference c ~wrap tag txt = match txt with | [] -> tag | _ :: _ -> - hovbox_if c.conf.fmt_opts.wrap_docstrings.v 1 - (fmt_markup_with_inline_elements c tag txt) + hovbox_if wrap 1 (fmt_markup_with_inline_elements c ~wrap tag txt) (** Format a markup of the form [{tag elems}]. If [force_space] is [true], a space will be added after the tag, even if it's not present in the source. *) -and fmt_markup_with_inline_elements c ?(force_space = false) tag elems = +and fmt_markup_with_inline_elements c ~wrap ?(force_space = false) tag elems + = let leading_space, elems = if force_space then (str " ", drop_leading_spaces elems) else (noop, elems) in - str "{" $ tag $ leading_space $ fmt_inline_elements c elems $ str "}" + str "{" $ tag $ leading_space $ fmt_inline_elements c ~wrap elems $ str "}" and fmt_nestable_block_element c elm = match elm.Loc.value with - | `Paragraph elems -> hovbox 0 (fmt_inline_elements c elems) - | `Code_block (s1, s2) -> fmt_code_block c s1 s2 + | `Paragraph elems -> + hovbox 0 + (fmt_inline_elements c ~wrap:c.conf.fmt_opts.wrap_docstrings.v elems) + | `Code_block code_block -> fmt_code_block c code_block | `Math_block s -> fmt_math_block s | `Verbatim s -> fmt_verbatim_block ~loc:elm.location s | `Modules mods -> @@ -292,6 +298,7 @@ and fmt_nestable_block_element c elm = | `List (k, _syntax, items) when list_should_use_heavy_syntax items -> fmt_list_heavy c k items | `List (k, _syntax, items) -> fmt_list_light c k items + | `Table table -> fmt_table c table and fmt_list_heavy c kind items = let fmt_item elems = @@ -315,7 +322,103 @@ and fmt_list_light c kind items = let fmt_item elems = line_start $ hovbox 0 (fmt_nestable_block_elements c elems) in - vbox 0 (list items cut_break fmt_item) + vbox 0 (list items force_break fmt_item) + +and fmt_table_heavy c (((grid, alignments), _) : table) = + let fmt_cell (elems, header) = + let cell_tag = match header with `Header -> "th" | `Data -> "td" in + fmt_block_markup cell_tag (fmt_nestable_block_elements c elems) + in + let fmt_row row = fmt_block_markup "tr" (list row space_break fmt_cell) in + ignore alignments ; + fmt_block_markup "table" (list grid force_break fmt_row) + +and fmt_table_light c (header, alignments, data) = + let fmt_align = function + | Some `Left -> str ":--" + | Some `Center -> str ":-:" + | Some `Right -> str "--:" + | None -> str "---" + in + let has_header = not (List.is_empty header) + and has_data = not (List.is_empty data) in + let fmt_alignment_row = + opt alignments (fun aligns -> + str "|" + $ list aligns (str "|") fmt_align + $ str "|" + $ fmt_if has_data force_break ) + in + (* Don't allow inline elements to wrap, meaning the line won't break if the + row breaks the margin. *) + let fmt_cell elems = fmt_inline_elements c ~wrap:false elems in + let fmt_row row = str "| " $ list row (str " | ") fmt_cell $ str " |" in + let fmt_rows rows = list rows force_break fmt_row in + let fmt_grid = + fmt_rows header + $ fmt_if has_header force_break + $ fmt_alignment_row $ fmt_rows data + in + fmt_block_markup ~force_break:true "t" (vbox 0 fmt_grid) + +and fmt_table c table = + match Light_table.of_table table with + | Some light -> fmt_table_light c light + | None -> fmt_table_heavy c table + +and fmt_code_block c (b : code_block) = + let content = + let content = b.content.value in + match b.meta with + | Some {language= {value= "ocaml"; _}; _} | None -> ( + (* [offset] doesn't take into account code blocks nested into lists. *) + match c.fmt_code c.conf ~offset:2 ~set_margin:true content with + | Ok formatted -> formatted |> Format_.asprintf "%a" Fmt.eval + | Error (`Msg message) -> + if + (not (String.is_empty message)) + && Option.is_some b.meta + && not c.conf.opr_opts.quiet.v + then + Docstring.warn Stdlib.Format.err_formatter + { location= b.content.location + ; message= Format.sprintf "invalid code block: %s" message } ; + content ) + | Some _ -> content + in + let fmt_line ~first ~last:_ l = + let l = String.rstrip l in + if first then str l + else if String.length l = 0 then str_as 0 "\n" + else force_break $ str l + in + let fmt_code s = + let lines = String.split_lines s in + vbox 0 (list_fl lines fmt_line) + in + let delim = opt b.delimiter str in + let opening = + let meta = + opt b.meta (fun meta -> + str "@" + $ ign_loc ~f:str meta.language + $ opt meta.tags (fun tags -> str " " $ ign_loc ~f:str tags) ) + in + str "{" $ delim $ meta $ str "[" + in + let output_or_closing = + match b.output with + | Some elems -> + hvbox 2 + ( str "]" $ delim $ str "[" $ force_break + $ fmt_nestable_block_elements c elems + $ fmt_if (not (List.is_empty elems)) (break 1000 ~-2) + $ str "]}" ) + | None -> str "]" $ delim $ str "}" + in + hvbox 2 + ( opening $ force_break $ fmt_code content $ break 1 ~-2 + $ output_or_closing ) and fmt_nestable_block_elements c elems = list_block_elem c elems (fmt_nestable_block_element c) @@ -347,6 +450,7 @@ let fmt_tag c = function | `Inline -> fmt_tag_args c "inline" | `Open -> fmt_tag_args c "open" | `Closed -> fmt_tag_args c "closed" + | `Hidden -> fmt_tag_args c "hidden" | `Canonical ref -> fmt_tag_args c "canonical" ~arg:(fmt_reference ref) let fmt_block_element c elm = @@ -356,12 +460,14 @@ let fmt_block_element c elm = let lvl = Int.to_string lvl in let lbl = match lbl with - | Some lbl -> str ":" $ str_normalized c lbl + | Some lbl -> str ":" $ str_normalized ~wrap:false lbl | None -> noop in let tag = str lvl $ lbl in hovbox 0 - (fmt_markup_with_inline_elements c ~force_space:true tag elems) + (fmt_markup_with_inline_elements c + ~wrap:c.conf.fmt_opts.wrap_docstrings.v ~force_space:true tag + elems ) | #nestable_block_element as value -> hovbox 0 (fmt_nestable_block_element c {elm with value}) diff --git a/test/passing/gen/dune.inc b/test/passing/gen/dune.inc index 2643a7c96d..61de59eb87 100644 --- a/test/passing/gen/dune.inc +++ b/test/passing/gen/dune.inc @@ -3637,6 +3637,21 @@ (alias runtest) (action (diff ocp_indent_options.ml.err ocp_indent_options.ml.stderr))) +(rule + (deps .ocamlformat dune-project) + (action + (with-stdout-to odoc.mli.stdout + (with-stderr-to odoc.mli.stderr + (run %{bin:ocamlformat} --name odoc.mli --margin-check --parse-docstrings %{dep:../tests/odoc.mli}))))) + +(rule + (alias runtest) + (action (diff odoc.mli.ref odoc.mli.stdout))) + +(rule + (alias runtest) + (action (diff odoc.mli.err odoc.mli.stderr))) + (rule (deps .ocamlformat dune-project) (action diff --git a/test/passing/refs.default/odoc.mli.err b/test/passing/refs.default/odoc.mli.err new file mode 100644 index 0000000000..13703041aa --- /dev/null +++ b/test/passing/refs.default/odoc.mli.err @@ -0,0 +1 @@ +Warning: odoc.mli:130 exceeds the margin diff --git a/test/passing/refs.default/odoc.mli.ref b/test/passing/refs.default/odoc.mli.ref new file mode 100644 index 0000000000..da4ce50a09 --- /dev/null +++ b/test/passing/refs.default/odoc.mli.ref @@ -0,0 +1,251 @@ +(** Test cases taken from Odoc's testsuite *) + +(** {table } + {table {tr } } + {table {tr {td } } } + {table {tr {th } } } + {table + {tr {th } } + {tr {th } } + {tr {td } } + } + + {table + {tr {th xxx } {th yyy } } + {tr {td aaaa bbb ccc {i ddd} } {td {table {tr {td } } } } } + {tr + {td + - aaa + - bbb + - ccc + } + {td + {t + | x | y | z | + |---|---|---| + | 1 | 2 | 3 | + } + } + } + } + + {t + + } + + {t + | a | + } + + {t + | a | *b* | + | *c | d* | + } + + {t + | `a | ` | + } + + {t + |---|---| + | x | y | + } + + {t + | x | y | + | x | y | + } + + {t + |---|---| + } + + {t + | x | y | + |---|---| + } + + {t + | a | b | c | d | + |---|:--|--:|:-:| + } + + {t + | a | b | c | d | + |---|:--|--:|:-:| + | a | b | c | d | + } + + {t + | a | b | c | d | + |---|---|---|---| + | a | b | c | d | + } + + {t + | {i a} {:google.com} \t | | {m b} {e c} {% xyz %} | {b d} [foo] | + |---|---|---|---| + } + + {t + | a | b | c | d | + |---|--:|:--|:-:| + } + + {t + | | a | b | + |:--|--:| + | c | d | + | cc | dd | + | -: | :-: | + | e | f | + | g | h | | + } + + {t + | x | y | + |---|---| + | x | y | z | + } + + {t + | x | y | + |---|---| + | x | + } + + {t + | Header and other word | + |---| + | cell and other words | + } + + {t + | Header other word | + |---| + | Header other word | + } + + {t + | foo | bar | + | {i foooooooooooooooooooooooooooo} foooooooooooooooooooooooo fooooooooooooooooooooooo | bar | + } *) + +(** {[ + foo + ]} + {[ + foo + ]} + + {[ + foo bar + ]} + {[ + foo bar + ]} + + {[ + foo bar + ]} + + {[ + foo bar + ]} + + {[ + foo bar + ]} + + {[ + {[ + ]} + {[ + foo + ]} + {[ + ] + ]} + {[ + foo]]bar + ]} + + {[ + (** foo *) + let bar = () + ]} + + {@ocaml env=f1 version>=4.06[ + code goes here + ]} + {delim@ocaml[ + foo + ]delim[ + output {b foo} + ]} + + {delim@ocaml[ + foo + ]delim[ + foo + {[ + bar + ]} + baz + ]} + + {[ + foo][output {b foo} + ]} + {@ocaml[ + foo][output {b foo} + ]} + {@ocaml[ + foo]unexpected[output {b foo} + ]} + {delim@ocaml[ + foo]wrong[output {b foo} + ]delim} + + {@ocaml[ + code + ]} + + {@ocaml kind=toplevel[ + code + ]} + + {@ocaml kind=toplevel + env=e1[ + code + ]} + + {@ocaml kind=toplevel[ + code + ]} + + {@ocaml kind=toplevel[ + code + ]} + {delim@ocaml[ + foo + ]delim[ + ]} *) + +(** @author foo bar + @version foo bar + @see {i bar} + @since foo bar + @before foo {i bar} + @deprecated foo {i bar} + @param foo {i bar} + @raise foo {i bar} + @return foo {i bar} + @inline + @open + @closed + @hidden + @canonical ref *) + +(** {!foo} bar{!foo} {!foo}bar {!val:foo} {!} {!( * )} {!:foo} {!val:} + {!"my-name"} {!"}"} {!( } )} {{!foo} bar} {{!foo} {b bar}} *) diff --git a/test/passing/refs.janestreet/odoc.mli.err b/test/passing/refs.janestreet/odoc.mli.err new file mode 100644 index 0000000000..13703041aa --- /dev/null +++ b/test/passing/refs.janestreet/odoc.mli.err @@ -0,0 +1 @@ +Warning: odoc.mli:130 exceeds the margin diff --git a/test/passing/refs.janestreet/odoc.mli.ref b/test/passing/refs.janestreet/odoc.mli.ref new file mode 100644 index 0000000000..94a0d862a7 --- /dev/null +++ b/test/passing/refs.janestreet/odoc.mli.ref @@ -0,0 +1,262 @@ +(** Test cases taken from Odoc's testsuite *) + +(** {table } + {table {tr } } + {table {tr {td } } } + {table {tr {th } } } + {table + {tr {th } } + {tr {th } } + {tr {td } } + } + + {table + {tr {th xxx } {th yyy } } + {tr {td aaaa bbb ccc {i ddd} } {td {table {tr {td } } } } } + {tr + {td + - aaa + - bbb + - ccc + } + {td + {t + | x | y | z | + |---|---|---| + | 1 | 2 | 3 | + } + } + } + } + + {t + + } + + {t + | a | + } + + {t + | a | *b* | + | *c | d* | + } + + {t + | `a | ` | + } + + {t + |---|---| + | x | y | + } + + {t + | x | y | + | x | y | + } + + {t + |---|---| + } + + {t + | x | y | + |---|---| + } + + {t + | a | b | c | d | + |---|:--|--:|:-:| + } + + {t + | a | b | c | d | + |---|:--|--:|:-:| + | a | b | c | d | + } + + {t + | a | b | c | d | + |---|---|---|---| + | a | b | c | d | + } + + {t + | {i a} {:google.com} \t | | {m b} {e c} {% xyz %} | {b d} [foo] | + |---|---|---|---| + } + + {t + | a | b | c | d | + |---|--:|:--|:-:| + } + + {t + | | a | b | + |:--|--:| + | c | d | + | cc | dd | + | -: | :-: | + | e | f | + | g | h | | + } + + {t + | x | y | + |---|---| + | x | y | z | + } + + {t + | x | y | + |---|---| + | x | + } + + {t + | Header and other word | + |---| + | cell and other words | + } + + {t + | Header other word | + |---| + | Header other word | + } + + {t + | foo | bar | + | {i foooooooooooooooooooooooooooo} foooooooooooooooooooooooo fooooooooooooooooooooooo | bar | + } *) + +(** {[ + foo + ]} + {[ + foo + ]} + + {[ + foo bar + ]} + {[ + foo bar + ]} + + {[ + foo bar + ]} + + {[ + foo bar + ]} + + {[ + foo bar + ]} + + {[ + {[ + ]} + {[ + foo + ]} + {[ + ] + ]} + {[ + foo]]bar + ]} + + {[ + (** foo *) + let bar = () + ]} + + {@ocaml env=f1 version>=4.06[ + code goes here + ]} + {delim@ocaml[ + foo + ]delim[ + output {b foo} + ]} + + {delim@ocaml[ + foo + ]delim[ + foo + {[ + bar + ]} + baz + ]} + + {[ + foo][output {b foo} + ]} + {@ocaml[ + foo][output {b foo} + ]} + {@ocaml[ + foo]unexpected[output {b foo} + ]} + {delim@ocaml[ + foo]wrong[output {b foo} + ]delim} + + {@ocaml[ + code + ]} + + {@ocaml kind=toplevel[ + code + ]} + + {@ocaml kind=toplevel + env=e1[ + code + ]} + + {@ocaml kind=toplevel[ + code + ]} + + {@ocaml kind=toplevel[ + code + ]} + {delim@ocaml[ + foo + ]delim[ + ]} *) + +(** @author foo bar + @version foo bar + @see {i bar} + @since foo bar + @before foo {i bar} + @deprecated foo {i bar} + @param foo {i bar} + @raise foo {i bar} + @return foo {i bar} + @inline + @open + @closed + @hidden + @canonical ref *) + +(** {!foo} + bar{!foo} + {!foo}bar + {!val:foo} + {!} + {!( * )} + {!:foo} + {!val:} + {!"my-name"} + {!"}"} + {!( } )} + {{!foo} bar} + {{!foo} {b bar}} *) diff --git a/test/passing/refs.ocamlformat/odoc.mli.err b/test/passing/refs.ocamlformat/odoc.mli.err new file mode 100644 index 0000000000..13703041aa --- /dev/null +++ b/test/passing/refs.ocamlformat/odoc.mli.err @@ -0,0 +1 @@ +Warning: odoc.mli:130 exceeds the margin diff --git a/test/passing/refs.ocamlformat/odoc.mli.ref b/test/passing/refs.ocamlformat/odoc.mli.ref new file mode 100644 index 0000000000..da4ce50a09 --- /dev/null +++ b/test/passing/refs.ocamlformat/odoc.mli.ref @@ -0,0 +1,251 @@ +(** Test cases taken from Odoc's testsuite *) + +(** {table } + {table {tr } } + {table {tr {td } } } + {table {tr {th } } } + {table + {tr {th } } + {tr {th } } + {tr {td } } + } + + {table + {tr {th xxx } {th yyy } } + {tr {td aaaa bbb ccc {i ddd} } {td {table {tr {td } } } } } + {tr + {td + - aaa + - bbb + - ccc + } + {td + {t + | x | y | z | + |---|---|---| + | 1 | 2 | 3 | + } + } + } + } + + {t + + } + + {t + | a | + } + + {t + | a | *b* | + | *c | d* | + } + + {t + | `a | ` | + } + + {t + |---|---| + | x | y | + } + + {t + | x | y | + | x | y | + } + + {t + |---|---| + } + + {t + | x | y | + |---|---| + } + + {t + | a | b | c | d | + |---|:--|--:|:-:| + } + + {t + | a | b | c | d | + |---|:--|--:|:-:| + | a | b | c | d | + } + + {t + | a | b | c | d | + |---|---|---|---| + | a | b | c | d | + } + + {t + | {i a} {:google.com} \t | | {m b} {e c} {% xyz %} | {b d} [foo] | + |---|---|---|---| + } + + {t + | a | b | c | d | + |---|--:|:--|:-:| + } + + {t + | | a | b | + |:--|--:| + | c | d | + | cc | dd | + | -: | :-: | + | e | f | + | g | h | | + } + + {t + | x | y | + |---|---| + | x | y | z | + } + + {t + | x | y | + |---|---| + | x | + } + + {t + | Header and other word | + |---| + | cell and other words | + } + + {t + | Header other word | + |---| + | Header other word | + } + + {t + | foo | bar | + | {i foooooooooooooooooooooooooooo} foooooooooooooooooooooooo fooooooooooooooooooooooo | bar | + } *) + +(** {[ + foo + ]} + {[ + foo + ]} + + {[ + foo bar + ]} + {[ + foo bar + ]} + + {[ + foo bar + ]} + + {[ + foo bar + ]} + + {[ + foo bar + ]} + + {[ + {[ + ]} + {[ + foo + ]} + {[ + ] + ]} + {[ + foo]]bar + ]} + + {[ + (** foo *) + let bar = () + ]} + + {@ocaml env=f1 version>=4.06[ + code goes here + ]} + {delim@ocaml[ + foo + ]delim[ + output {b foo} + ]} + + {delim@ocaml[ + foo + ]delim[ + foo + {[ + bar + ]} + baz + ]} + + {[ + foo][output {b foo} + ]} + {@ocaml[ + foo][output {b foo} + ]} + {@ocaml[ + foo]unexpected[output {b foo} + ]} + {delim@ocaml[ + foo]wrong[output {b foo} + ]delim} + + {@ocaml[ + code + ]} + + {@ocaml kind=toplevel[ + code + ]} + + {@ocaml kind=toplevel + env=e1[ + code + ]} + + {@ocaml kind=toplevel[ + code + ]} + + {@ocaml kind=toplevel[ + code + ]} + {delim@ocaml[ + foo + ]delim[ + ]} *) + +(** @author foo bar + @version foo bar + @see {i bar} + @since foo bar + @before foo {i bar} + @deprecated foo {i bar} + @param foo {i bar} + @raise foo {i bar} + @return foo {i bar} + @inline + @open + @closed + @hidden + @canonical ref *) + +(** {!foo} bar{!foo} {!foo}bar {!val:foo} {!} {!( * )} {!:foo} {!val:} + {!"my-name"} {!"}"} {!( } )} {{!foo} bar} {{!foo} {b bar}} *) diff --git a/test/passing/tests/odoc.mli b/test/passing/tests/odoc.mli new file mode 100644 index 0000000000..63c55fc434 --- /dev/null +++ b/test/passing/tests/odoc.mli @@ -0,0 +1,237 @@ +(** Test cases taken from Odoc's testsuite *) + +(** + {table } + {table {tr } } + {table {tr {td}}} + {table {tr {th}}} + {table {tr {th}} {tr {th}} {tr {td}}} + + {table + {tr + {th xxx} + {th yyy} + } + {tr + {td aaaa bbb ccc {i ddd} + } + {td + {table {tr {td}}} + } + } + {tr + {td + - aaa + - bbb + - ccc + } + {td + {t + x | y | z + --|---|-- + 1 | 2 | 3 + } + } + } + } + + {t } + + {t + | a | + } + + {t + |a| *b*| + |*c| d* | + } + + {t + | `a |` + } + + {t + |---|---| + | x | y | + } + + {t + | x | y | + | x | y | + } + + {t + |--|--| + } + + {t + | x | y | + |---|---| + } + + {t + | a | b | c | d | + |---|:--|--:|:-:| + } + + {t + a | b | c | d + ---|:--|--:|:-: + a | b | c | d + } + + {t + + | a | b | c | d | + + |---|---|---|---| + + | a | b | c | d | + + } + + {t + | {i a} {:google.com} \t | | {m b} {e c} {% xyz %} | {b d} [foo] | + |---|---|---|---| + } + + {t + | a | b |c| d | + |---|--:|:--|:-:| + } + + {t + ||a|b| + |:-|---:| + |c|d| + |cc|dd| + |-:|:-:| + |e|f| + |g|h|| + } + + {t + | x | y | + |---|---| + | x | y | z | + } + + {t + | x | y | + |---|---| + x + } + + {t + | Header and other word | + |-----------------------| + | cell and other words | + } + + {t + | Header other word | + |-------------------| + | Header other word | + } + + {t + | foo | bar | + | {i foooooooooooooooooooooooooooo} foooooooooooooooooooooooo fooooooooooooooooooooooo | bar | + } +*) + +(** + {[foo]} + {[ foo]} + + {[foo bar]} + {[foo + bar]} + + {[foo + + bar]} + + {[ foo + + bar]} + + {[ + foo + bar + ]} + + {[{[]} + {[foo]}]} + {[]]} + {[foo]]bar]} + + {[ + (** foo *) + let bar = () + ]} + + {@ocaml env=f1 version>=4.06 [code goes here]} + {delim@ocaml[foo]delim[output {b foo}]} + + {delim@ocaml[ + foo + ]delim[ + foo + {[ bar ]} + baz + ]} + + {[foo][output {b foo}]} + {@ocaml[foo][output {b foo}]} + {@ocaml[foo]unexpected[output {b foo}]} + {delim@ocaml[foo]wrong[output {b foo}]delim} + + {@ocaml + [ code ]} + + {@ocaml kind=toplevel + [ code ]} + + {@ocaml kind=toplevel + env=e1[ code ]} + + {@ocaml + kind=toplevel[ code ]} + + {@ocaml kind=toplevel [ code ]} + {delim@ocaml[ foo ]delim[ ]} +*) + +(** + @author foo bar + @version foo bar + @see {i bar} + @since foo bar + @before foo {i bar} + @deprecated foo {i bar} + @param foo {i bar} + @raise foo {i bar} + @return foo {i bar} + @inline + @open + @closed + @hidden + @canonical ref +*) + +(** + {!foo} + bar{!foo} + {!foo}bar + {!val:foo} + {!} + {!( * )} + {!:foo} + {!val:} + {!"my-name"} + {!"}"} + {!( } )} + {{!foo} bar} + {{!foo} {b bar}} +*) diff --git a/test/passing/tests/odoc.mli.opts b/test/passing/tests/odoc.mli.opts new file mode 100644 index 0000000000..1e99f88e48 --- /dev/null +++ b/test/passing/tests/odoc.mli.opts @@ -0,0 +1 @@ +--parse-docstrings diff --git a/vendor/odoc-parser/ast.ml b/vendor/odoc-parser/ast.ml index 863f60c4a5..85c38931f1 100644 --- a/vendor/odoc-parser/ast.ml +++ b/vendor/odoc-parser/ast.ml @@ -9,6 +9,7 @@ type 'a with_location = 'a Loc.with_location type style = [ `Bold | `Italic | `Emphasis | `Superscript | `Subscript ] +type alignment = [ `Left | `Center | `Right ] type reference_kind = [ `Simple | `With_text ] (** References in doc comments can be of two kinds: [{!simple}] or [{{!ref}With text}]. *) @@ -29,18 +30,33 @@ type inline_element = text. Similarly the [`Link] constructor has the link itself as first parameter and the second is the replacement text. *) -type nestable_block_element = +type 'a cell = 'a with_location list * [ `Header | `Data ] +type 'a row = 'a cell list +type 'a grid = 'a row list +type 'a abstract_table = 'a grid * alignment option list option + +type code_block_meta = { + language : string with_location; + tags : string with_location option; +} + +type code_block = { + meta : code_block_meta option; + delimiter : string option; + content : string with_location; + output : nestable_block_element with_location list option; +} + +and nestable_block_element = [ `Paragraph of inline_element with_location list - | `Code_block of - (string with_location * string with_location option) option - * string with_location - (* [(language tag * metadata option) option * content] *) + | `Code_block of code_block | `Verbatim of string | `Modules of string with_location list | `List of [ `Unordered | `Ordered ] * [ `Light | `Heavy ] * nestable_block_element with_location list list + | `Table of table | `Math_block of string (** @since 2.0.0 *) ] (** Some block elements may be nested within lists or tags, but not all. The [`List] constructor has a parameter of type [\[`Light | `Heavy\]]. @@ -48,8 +64,10 @@ type nestable_block_element = {{:https://ocaml.org/releases/4.12/htmlman/ocamldoc.html#sss:ocamldoc-list}manual}). *) +and table = nestable_block_element abstract_table * [ `Light | `Heavy ] + type internal_tag = - [ `Canonical of string with_location | `Inline | `Open | `Closed ] + [ `Canonical of string with_location | `Inline | `Open | `Closed | `Hidden ] (** Internal tags are used to exercise fine control over the output of odoc. They are never rendered in the output *) diff --git a/vendor/odoc-parser/dune b/vendor/odoc-parser/dune index 812e56fcef..fafcd25f7e 100644 --- a/vendor/odoc-parser/dune +++ b/vendor/odoc-parser/dune @@ -6,5 +6,5 @@ (instrumentation (backend bisect_ppx)) (flags - (:standard -w -50)) - (libraries astring camlp-streams)) + (:standard -w -50 -open Ocamlformat_parser_shims)) + (libraries astring camlp-streams ocamlformat_parser_shims)) diff --git a/vendor/odoc-parser/lexer.mll b/vendor/odoc-parser/lexer.mll index d8eac89cf5..b86b972f4f 100644 --- a/vendor/odoc-parser/lexer.mll +++ b/vendor/odoc-parser/lexer.mll @@ -31,7 +31,7 @@ type math_kind = Inline | Block let math_constr kind x = - match kind with + match kind with | Inline -> `Math_span x | Block -> `Math_block x @@ -189,8 +189,6 @@ let reference_token start target = | "{{:" -> `Begin_link_with_replacement_text target | _ -> assert false - - let trim_leading_space_or_accept_whitespace input start_offset text = match text.[0] with | ' ' -> String.sub text 1 (String.length text - 1) @@ -219,18 +217,25 @@ let emit_verbatim input start_offset buffer = let t = trim_trailing_blank_lines t in emit input (`Verbatim t) ~start_offset -let emit_code_block ~start_offset input metadata c = - let c = trim_trailing_blank_lines c in +(* The locations have to be treated carefully in this function. We need to ensure that + the []`Code_block] location matches the entirety of the block including the terminator, + and the content location is precicely the location of the text of the code itself. + Note that the location reflects the content _without_ stripping of whitespace, whereas + the value of the content in the tree has whitespace stripped from the beginning, + and trailing empty lines removed. *) +let emit_code_block ~start_offset content_offset input metadata delim terminator c has_results = + let c = Buffer.contents c |> trim_trailing_blank_lines in + let content_location = input.offset_to_location content_offset in let c = with_location_adjustments - (fun _ location c -> - let first_line_offset = location.start.column in + (fun _ _location c -> + let first_line_offset = content_location.column in trim_leading_whitespace ~first_line_offset c) input c in let c = trim_leading_blank_lines c in - let c = with_location_adjustments ~adjust_end_by:"]}" (fun _ -> Loc.at) input c in - emit ~start_offset input (`Code_block (metadata, c)) + let c = with_location_adjustments ~adjust_end_by:terminator ~start_offset:content_offset (fun _ -> Loc.at) input c in + emit ~start_offset input (`Code_block (metadata, delim, c, has_results)) let heading_level input level = if String.length level >= 2 && level.[0] = '0' then begin @@ -239,12 +244,13 @@ let heading_level input level = end; int_of_string level -} - +let buffer_add_lexeme buffer lexbuf = + Buffer.add_string buffer (Lexing.lexeme lexbuf) +} let markup_char = - ['{' '}' '[' ']' '@'] + ['{' '}' '[' ']' '@' '|'] let space_char = [' ' '\t' '\n' '\r'] let bullet_char = @@ -261,18 +267,74 @@ let newline = let reference_start = "{!" | "{{!" | "{:" | "{{:" -let code_block_text = - ([^ ']'] | ']'+ [^ ']' '}'])* ']'* let raw_markup = ([^ '%'] | '%'+ [^ '%' '}'])* '%'* + let raw_markup_target = ([^ ':' '%'] | '%'+ [^ ':' '%' '}'])* '%'* let language_tag_char = ['a'-'z' 'A'-'Z' '0'-'9' '_' '-' ] +let delim_char = + ['a'-'z' 'A'-'Z' '0'-'9' '_' ] + +rule reference_paren_content input start ref_offset start_offset depth_paren + buffer = + parse + | '(' + { + buffer_add_lexeme buffer lexbuf ; + reference_paren_content input start ref_offset start_offset + (depth_paren + 1) buffer lexbuf } + | ')' + { + buffer_add_lexeme buffer lexbuf ; + if depth_paren = 0 then + reference_content input start ref_offset buffer lexbuf + else + reference_paren_content input start ref_offset start_offset + (depth_paren - 1) buffer lexbuf } + | eof + { warning + input + ~start_offset + (Parse_error.unclosed_bracket ~bracket:"(") ; + Buffer.contents buffer } + | _ + { + buffer_add_lexeme buffer lexbuf ; + reference_paren_content input start ref_offset start_offset depth_paren + buffer lexbuf } -rule token input = parse +and reference_content input start start_offset buffer = parse + | '}' + { + Buffer.contents buffer + } + | '(' + { + buffer_add_lexeme buffer lexbuf ; + reference_paren_content input start start_offset + (Lexing.lexeme_start lexbuf) 0 buffer lexbuf + } + | '"' [^ '"']* '"' + { + buffer_add_lexeme buffer lexbuf ; + reference_content input start start_offset buffer lexbuf + } + | eof + { warning + input + ~start_offset + (Parse_error.unclosed_bracket ~bracket:start) ; + Buffer.contents buffer } + | _ + { + buffer_add_lexeme buffer lexbuf ; + reference_content input start start_offset buffer lexbuf } + +and token input = parse | horizontal_space* eof { emit input `End } @@ -289,6 +351,9 @@ rule token input = parse | (horizontal_space* (newline horizontal_space*)? as p) '}' { emit input `Right_brace ~adjust_start_by:p } + | '|' + { emit input `Bar } + | word_char (word_char | bullet_char | '@')* | bullet_char (word_char | bullet_char | '@')+ as w { emit input (`Word (unescape_word w)) } @@ -311,13 +376,13 @@ rule token input = parse | "{e" { emit input (`Begin_style `Emphasis) } - + | "{L" { emit input (`Begin_paragraph_style `Left) } - + | "{C" { emit input (`Begin_paragraph_style `Center) } - + | "{R" { emit input (`Begin_paragraph_style `Right) } @@ -326,24 +391,30 @@ rule token input = parse | "{_" { emit input (`Begin_style `Subscript) } - + | "{math" space_char { math Block (Buffer.create 1024) 0 (Lexing.lexeme_start lexbuf) input lexbuf } - + | "{m" horizontal_space { math Inline (Buffer.create 1024) 0 (Lexing.lexeme_start lexbuf) input lexbuf } - + | "{!modules:" ([^ '}']* as modules) '}' { emit input (`Modules modules) } - | (reference_start as start) ([^ '}']* as target) '}' - { emit input (reference_token start target) } + | (reference_start as start) + { + let start_offset = Lexing.lexeme_start lexbuf in + let target = + reference_content input start start_offset (Buffer.create 16) lexbuf + in + let token = (reference_token start target) in + emit ~start_offset input token } | "{[" - { code_block (Lexing.lexeme_start lexbuf) None input lexbuf } + { code_block false (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf } - | (("{@" horizontal_space*) as prefix) (language_tag_char+ as lang_tag_) + | (("{" (delim_char* as delim) "@" horizontal_space*) as prefix) (language_tag_char+ as lang_tag_) { let start_offset = Lexing.lexeme_start lexbuf in let lang_tag = @@ -351,23 +422,33 @@ rule token input = parse in let emit_truncated_code_block () = let empty_content = with_location_adjustments (fun _ -> Loc.at) input "" in - emit ~start_offset input (`Code_block (Some (lang_tag, None), empty_content)) + emit ~start_offset input (`Code_block (Some (lang_tag, None), delim, empty_content, false)) + in + (* Disallow result block sections for code blocks without a delimiter. + This avoids the surprising parsing of '][' ending the code block. *) + let allow_result_block = delim <> "" in + let code_block_with_metadata metadata = + let content_offset = Lexing.lexeme_end lexbuf in + let metadata = Some (lang_tag, metadata) in + let prefix = Buffer.create 256 in + code_block allow_result_block start_offset content_offset metadata + prefix delim input lexbuf in match code_block_metadata_tail input lexbuf with - | `Ok metadata -> code_block start_offset (Some (lang_tag, metadata)) input lexbuf + | `Ok metadata -> code_block_with_metadata metadata | `Eof -> warning input ~start_offset Parse_error.truncated_code_block_meta; emit_truncated_code_block () | `Invalid_char c -> warning input ~start_offset (Parse_error.language_tag_invalid_char lang_tag_ c); - code_block start_offset (Some (lang_tag, None)) input lexbuf + code_block_with_metadata None } | "{@" horizontal_space* '[' { warning input Parse_error.no_language_tag_in_meta; - code_block (Lexing.lexeme_start lexbuf) None input lexbuf + code_block false (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf } | "{v" @@ -398,6 +479,21 @@ rule token input = parse | "{-" { emit input (`Begin_list_item `Dash) } + | "{table" + { emit input (`Begin_table_heavy) } + + | "{t" + { emit input (`Begin_table_light) } + + | "{tr" + { emit input `Begin_table_row } + + | "{th" + { emit input (`Begin_table_cell `Header) } + + | "{td" + { emit input (`Begin_table_cell `Data) } + | '{' (['0'-'9']+ as level) ':' (([^ '}'] # space_char)* as label) { emit input (`Begin_section_heading (heading_level input level, Some label)) } @@ -450,7 +546,11 @@ rule token input = parse | "@closed" { emit input (`Tag `Closed) } + | "@hidden" + { emit input (`Tag `Hidden) } + | "]}" + { emit input `Right_code_delimiter} | '{' { try bad_markup_recovery (Lexing.lexeme_start lexbuf) input lexbuf @@ -502,17 +602,6 @@ rule token input = parse ~in_what:(Token.describe (`Modules ""))); emit input (`Modules modules) } - | (reference_start as start) ([^ '}']* as target) eof - { warning - input - ~start_offset:(Lexing.lexeme_end lexbuf) - (Parse_error.not_allowed - ~what:(Token.describe `End) - ~in_what:(Token.describe (reference_token start ""))); - emit input (reference_token start target) } - - - and code_span buffer nesting_level start_offset input = parse | ']' { if nesting_level = 0 then @@ -655,11 +744,34 @@ and code_block_metadata_tail input = parse | eof { `Eof } -and code_block start_offset metadata input = parse - | (code_block_text as c) "]}" - { emit_code_block ~start_offset input metadata c } - | (code_block_text as c) eof +and code_block allow_result_block start_offset content_offset metadata prefix delim input = parse + | ("]" (delim_char* as delim') "[") as terminator + { if delim = delim' && allow_result_block + then emit_code_block ~start_offset content_offset input metadata delim terminator prefix true + else ( + Buffer.add_string prefix terminator; + code_block allow_result_block start_offset content_offset metadata + prefix delim input lexbuf + ) + } + | ("]" (delim_char* as delim') "}") as terminator + { + if delim = delim' + then emit_code_block ~start_offset content_offset input metadata delim terminator prefix false + else ( + Buffer.add_string prefix terminator; + code_block allow_result_block start_offset content_offset metadata + prefix delim input lexbuf + ) + } + | eof { warning input ~start_offset Parse_error.truncated_code_block; - emit_code_block ~start_offset input metadata c + emit_code_block ~start_offset content_offset input metadata delim "" prefix false + } + | (_ as c) + { + Buffer.add_char prefix c; + code_block allow_result_block start_offset content_offset metadata + prefix delim input lexbuf } diff --git a/vendor/odoc-parser/loc.ml b/vendor/odoc-parser/loc.ml index e3f5a07c79..0316fa270c 100644 --- a/vendor/odoc-parser/loc.ml +++ b/vendor/odoc-parser/loc.ml @@ -22,3 +22,11 @@ let span spans = let nudge_start offset span = { span with start = { span.start with column = span.start.column + offset } } + +let spans_multiple_lines = function + | { + location = + { start = { line = start_line; _ }; end_ = { line = end_line; _ }; _ }; + _; + } -> + end_line > start_line diff --git a/vendor/odoc-parser/loc.mli b/vendor/odoc-parser/loc.mli index 0afe735544..135ba0358e 100644 --- a/vendor/odoc-parser/loc.mli +++ b/vendor/odoc-parser/loc.mli @@ -39,3 +39,7 @@ val map : ('a -> 'b) -> 'a with_location -> 'b with_location val same : _ with_location -> 'b -> 'b with_location (** [same x y] retuns the value y wrapped in a {!with_location} whose location is that of [x] *) + +val spans_multiple_lines : _ with_location -> bool +(** [spans_multiple_lines x] checks to see whether [x] is located + on a single line or whether it covers more than one. *) diff --git a/vendor/odoc-parser/parse_error.ml b/vendor/odoc-parser/parse_error.ml index 095061ff49..a07a8a24bb 100644 --- a/vendor/odoc-parser/parse_error.ml +++ b/vendor/odoc-parser/parse_error.ml @@ -30,6 +30,11 @@ let not_allowed : Warning.make ?suggestion "%s is not allowed in %s." (capitalize_ascii what) in_what +let unclosed_bracket : + ?suggestion:string -> bracket:string -> Loc.span -> Warning.t = + fun ?suggestion ~bracket -> + Warning.make ?suggestion "Open bracket '%s' is never closed." bracket + let no_leading_whitespace_in_verbatim : Loc.span -> Warning.t = Warning.make "'{v' should be followed by whitespace." @@ -76,3 +81,8 @@ let truncated_code_block_meta : Loc.span -> Warning.t = let truncated_code_block : Loc.span -> Warning.t = Warning.make ~suggestion:"add ']}'." "Missing end of code block." + +let end_not_allowed : in_what:string -> Loc.span -> Warning.t = + fun ~in_what -> + Warning.make ~suggestion:"add '}'." "End of text is not allowed in %s." + in_what diff --git a/vendor/odoc-parser/syntax.ml b/vendor/odoc-parser/syntax.ml index 8210a41632..4d5921f79a 100644 --- a/vendor/odoc-parser/syntax.ml +++ b/vendor/odoc-parser/syntax.ml @@ -36,6 +36,115 @@ let peek input = | Some token -> token | None -> assert false +module Table = struct + module Light_syntax = struct + let valid_align = function + | [ { Loc.value = `Word w; _ } ] -> ( + match String.length w with + | 0 -> `Valid None + | 1 -> ( + match w with + | "-" -> `Valid None + | ":" -> `Valid (Some `Center) + | _ -> `Invalid) + | len -> + if String.for_all (Char.equal '-') (String.sub w 1 (len - 2)) then + match (String.get w 0, String.get w (len - 1)) with + | ':', ':' -> `Valid (Some `Center) + | ':', '-' -> `Valid (Some `Left) + | '-', ':' -> `Valid (Some `Right) + | '-', '-' -> `Valid None + | _ -> `Invalid + else `Invalid) + | _ -> `Invalid + + let valid_align_row lx = + let rec loop acc = function + | [] -> Some (List.rev acc) + | x :: q -> ( + match valid_align x with + | `Invalid -> None + | `Valid alignment -> loop (alignment :: acc) q) + in + loop [] lx + + let create ~grid ~align : Ast.table = + let cell_to_block (x, k) = + let whole_loc = Loc.span (List.map (fun x -> x.Loc.location) x) in + match x with + | [] -> ([], k) + | _ -> ([ Loc.at whole_loc (`Paragraph x) ], k) + in + let row_to_block = List.map cell_to_block in + let grid_to_block = List.map row_to_block in + ((grid_to_block grid, align), `Light) + + let with_kind kind : 'a with_location list list -> 'a Ast.row = + List.map (fun c -> (c, kind)) + + let from_raw_data grid : Ast.table = + match grid with + | [] -> create ~grid:[] ~align:None + | row1 :: rows2_N -> ( + match valid_align_row row1 with + (* If the first line is the align row, everything else is data. *) + | Some _ as align -> + create ~grid:(List.map (with_kind `Data) rows2_N) ~align + | None -> ( + match rows2_N with + (* Only 1 line, if this is not the align row this is data. *) + | [] -> create ~grid:[ with_kind `Data row1 ] ~align:None + | row2 :: rows3_N -> ( + match valid_align_row row2 with + (* If the second line is the align row, the first one is the + header and the rest is data. *) + | Some _ as align -> + let header = with_kind `Header row1 in + let data = List.map (with_kind `Data) rows3_N in + create ~grid:(header :: data) ~align + (* No align row in the first 2 lines, everything is considered + data. *) + | None -> + create ~grid:(List.map (with_kind `Data) grid) ~align:None + ))) + end + + module Heavy_syntax = struct + let create ~grid : Ast.table = ((grid, None), `Heavy) + let from_grid grid : Ast.table = create ~grid + end +end + +module Reader = struct + let until_rbrace_or_eof input acc = + let rec consume () = + let next_token = peek input in + match next_token.value with + | `Right_brace -> + junk input; + `End (acc, next_token.location) + | `End -> + Parse_error.end_not_allowed next_token.location ~in_what:"table" + |> add_warning input; + junk input; + `End (acc, next_token.location) + | `Space _ | `Single_newline _ | `Blank_line _ -> + junk input; + consume () + | _ -> `Token next_token + in + consume () + + module Infix = struct + let ( >>> ) consume if_token = + match consume with + | `End (ret, loc) -> (ret, loc) + | `Token t -> if_token t + end +end + +open Reader.Infix + (* The last token in the stream is always [`End], and it is never consumed by the parser, so the [None] case is impossible. *) @@ -99,6 +208,9 @@ let rec inline_element : | `Plus -> junk input; Loc.at location (`Word "+") + | `Bar -> + junk input; + Loc.at location (`Word "|") | (`Code_span _ | `Math_span _ | `Raw_markup _) as token -> junk input; Loc.at location token @@ -249,16 +361,19 @@ and delimited_inline_element_list : junk input; let element = Loc.same next_token (`Space ws) in consume_elements ~at_start_of_line:true (element :: acc) + | `Bar as token -> + let acc = inline_element input next_token.location token :: acc in + consume_elements ~at_start_of_line:false acc | (`Minus | `Plus) as bullet -> (if at_start_of_line then - let suggestion = - Printf.sprintf "move %s so it isn't the first thing on the line." - (Token.print bullet) - in - Parse_error.not_allowed ~what:(Token.describe bullet) - ~in_what:(Token.describe parent_markup) - ~suggestion next_token.location - |> add_warning input); + let suggestion = + Printf.sprintf "move %s so it isn't the first thing on the line." + (Token.print bullet) + in + Parse_error.not_allowed ~what:(Token.describe bullet) + ~in_what:(Token.describe parent_markup) + ~suggestion next_token.location + |> add_warning input); let acc = inline_element input next_token.location bullet :: acc in consume_elements ~at_start_of_line:false acc @@ -342,8 +457,8 @@ let paragraph : input -> Ast.nestable_block_element with_location = fun acc -> let next_token = peek input in match next_token.value with - | (`Space _ | `Minus | `Plus | #token_that_always_begins_an_inline_element) - as token -> + | ( `Space _ | `Minus | `Plus | `Bar + | #token_that_always_begins_an_inline_element ) as token -> let element = inline_element input next_token.location token in paragraph_line (element :: acc) | _ -> acc @@ -356,7 +471,7 @@ let paragraph : input -> Ast.nestable_block_element with_location = fun acc -> match npeek 2 input with | { value = `Single_newline ws; location } - :: { value = #token_that_always_begins_an_inline_element; _ } + :: { value = #token_that_always_begins_an_inline_element | `Bar; _ } :: _ -> junk input; let acc = Loc.at location (`Space ws) :: acc in @@ -373,7 +488,7 @@ let paragraph : input -> Ast.nestable_block_element with_location = (* {3 Helper types} *) (* The interpretation of tokens in the block parser depends on where on a line - each token appears. The five possible "locations" are: + each token appears. The six possible "locations" are: - [`At_start_of_line], when only whitespace has been read on the current line. @@ -383,6 +498,7 @@ let paragraph : input -> Ast.nestable_block_element with_location = [-], has been read, and only whitespace has been read since. - [`After_explicit_list_bullet], when a valid explicit bullet, such as [{li], has been read, and only whitespace has been read since. + - [`After_table_cell], when a table cell opening markup ('{th' or '{td') has been read. - [`After_text], when any other valid non-whitespace token has already been read on the current line. @@ -406,6 +522,7 @@ type where_in_line = | `After_tag | `After_shorthand_bullet | `After_explicit_list_bullet + | `After_table_cell | `After_text ] (* The block parsing loop, function [block_element_list], stops when it @@ -428,6 +545,7 @@ type where_in_line = cases for exactly the tokens that might be at the front of the stream after the block parser returns. *) type stops_at_delimiters = [ `End | `Right_brace ] +type code_stop = [ `End | `Right_code_delimiter ] type stopped_implicitly = [ `End @@ -459,6 +577,8 @@ type ('block, 'stops_at_which_tokens) context = | Top_level : (Ast.block_element, stops_at_delimiters) context | In_shorthand_list : (Ast.nestable_block_element, stopped_implicitly) context | In_explicit_list : (Ast.nestable_block_element, stops_at_delimiters) context + | In_table_cell : (Ast.nestable_block_element, stops_at_delimiters) context + | In_code_results : (Ast.nestable_block_element, code_stop) context | In_tag : (Ast.nestable_block_element, Token.t) context (* This is a no-op. It is needed to prove to the type system that nestable block @@ -473,6 +593,8 @@ let accepted_in_all_contexts : | Top_level -> (block :> Ast.block_element) | In_shorthand_list -> block | In_explicit_list -> block + | In_table_cell -> block + | In_code_results -> block | In_tag -> block (* Converts a tag to a series of words. This is used in error recovery, when a @@ -485,6 +607,7 @@ let tag_to_words = function | `Inline -> [ `Word "@inline" ] | `Open -> [ `Word "@open" ] | `Closed -> [ `Word "@closed" ] + | `Hidden -> [ `Word "@hidden" ] | `Param s -> [ `Word "@param"; `Space " "; `Word s ] | `Raise s -> [ `Word "@raise"; `Space " "; `Word s ] | `Return -> [ `Word "@return" ] @@ -501,6 +624,7 @@ let tag_to_words = function - paragraphs, - code blocks, - verbatim text blocks, + - tables, - lists, and - section headings. *) let rec block_element_list : @@ -555,7 +679,15 @@ let rec block_element_list : match peek input with (* Terminators: the two tokens that terminate anything. *) - | ({ value = `End; _ } | { value = `Right_brace; _ }) as next_token -> ( + | { value = `End; _ } as next_token -> ( + match context with + | Top_level -> (List.rev acc, next_token, where_in_line) + | In_shorthand_list -> (List.rev acc, next_token, where_in_line) + | In_explicit_list -> (List.rev acc, next_token, where_in_line) + | In_tag -> (List.rev acc, next_token, where_in_line) + | In_table_cell -> (List.rev acc, next_token, where_in_line) + | In_code_results -> (List.rev acc, next_token, where_in_line)) + | { value = `Right_brace; _ } as next_token -> ( (* This little absurdity is needed to satisfy the type system. Without it, OCaml is unable to prove that [stream_head] has the right type for all possible values of [context]. *) @@ -563,7 +695,17 @@ let rec block_element_list : | Top_level -> (List.rev acc, next_token, where_in_line) | In_shorthand_list -> (List.rev acc, next_token, where_in_line) | In_explicit_list -> (List.rev acc, next_token, where_in_line) - | In_tag -> (List.rev acc, next_token, where_in_line)) + | In_table_cell -> (List.rev acc, next_token, where_in_line) + | In_tag -> (List.rev acc, next_token, where_in_line) + | In_code_results -> + junk input; + consume_block_elements ~parsed_a_tag where_in_line acc) + | { value = `Right_code_delimiter; _ } as next_token -> ( + match context with + | In_code_results -> (List.rev acc, next_token, where_in_line) + | _ -> + junk input; + consume_block_elements ~parsed_a_tag where_in_line acc) (* Whitespace. This can terminate some kinds of block elements. It is also necessary to track it to interpret [`Minus] and [`Plus] correctly, as well as to ensure that all block elements begin on their own line. *) @@ -596,6 +738,32 @@ let rec block_element_list : ~suggestion location |> add_warning input; + junk input; + consume_block_elements ~parsed_a_tag where_in_line acc + (* Table rows ([{tr ...}]) can never appear directly + in block content. They can only appear inside [{table ...}]. *) + | { value = `Begin_table_row as token; location } -> + let suggestion = + Printf.sprintf "move %s into %s." (Token.print token) + (Token.describe `Begin_table_heavy) + in + Parse_error.not_allowed ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion location + |> add_warning input; + junk input; + consume_block_elements ~parsed_a_tag where_in_line acc + (* Table cells ([{th ...}] and [{td ...}]) can never appear directly + in block content. They can only appear inside [{tr ...}]. *) + | { value = `Begin_table_cell _ as token; location } -> + let suggestion = + Printf.sprintf "move %s into %s." (Token.print token) + (Token.describe `Begin_table_row) + in + Parse_error.not_allowed ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion location + |> add_warning input; junk input; consume_block_elements ~parsed_a_tag where_in_line acc (* Tags. These can appear at the top level only. Also, once one tag is seen, @@ -624,10 +792,12 @@ let rec block_element_list : if where_in_line = `At_start_of_line then (List.rev acc, next_token, where_in_line) else recover_when_not_at_top_level context + | In_table_cell -> recover_when_not_at_top_level context | In_tag -> if where_in_line = `At_start_of_line then (List.rev acc, next_token, where_in_line) else recover_when_not_at_top_level context + | In_code_results -> recover_when_not_at_top_level context (* If this is the top-level call to [block_element_list], parse the tag. *) | Top_level -> ( @@ -704,12 +874,12 @@ let rec block_element_list : let tag = Loc.at location tag in consume_block_elements ~parsed_a_tag:true where_in_line (tag :: acc) - | (`Inline | `Open | `Closed) as tag -> + | (`Inline | `Open | `Closed | `Hidden) as tag -> let tag = Loc.at location (`Tag tag) in consume_block_elements ~parsed_a_tag:true `After_text (tag :: acc))) - | { value = #token_that_always_begins_an_inline_element; _ } as next_token - -> + | ( { value = #token_that_always_begins_an_inline_element; _ } + | { value = `Bar; _ } ) as next_token -> warn_if_after_tags next_token; warn_if_after_text next_token; @@ -729,8 +899,7 @@ let rec block_element_list : let block = Loc.at location block in let acc = block :: acc in consume_block_elements ~parsed_a_tag `After_text acc - | ( { value = `Code_block (_, { value = s; _ }) as token; location } - | { value = `Math_block s as token; location } ) as next_token -> + | { value = `Math_block s as token; location } as next_token -> warn_if_after_tags next_token; warn_if_after_text next_token; if s = "" then @@ -742,6 +911,53 @@ let rec block_element_list : let block = Loc.at location block in let acc = block :: acc in consume_block_elements ~parsed_a_tag `After_text acc + | { + value = + `Code_block (meta, delim, { value = s; location = v_loc }, has_outputs) + as token; + location; + } as next_token -> + warn_if_after_tags next_token; + warn_if_after_text next_token; + junk input; + let delimiter = if delim = "" then None else Some delim in + let output, location = + if not has_outputs then (None, location) + else + let content, next_token, _where_in_line = + block_element_list In_code_results ~parent_markup:token input + in + junk input; + let locations = + location :: List.map (fun content -> content.Loc.location) content + in + let location = Loc.span locations in + let location = { location with end_ = next_token.location.end_ } in + (Some content, location) + in + + if s = "" then + Parse_error.should_not_be_empty ~what:(Token.describe token) location + |> add_warning input; + + let meta = + match meta with + | None -> None + | Some (language, tags) -> Some { Ast.language; tags } + in + let block = + accepted_in_all_contexts context + (`Code_block + { + Ast.meta; + delimiter; + content = { value = s; location = v_loc }; + output; + }) + in + let block = Loc.at location block in + let acc = block :: acc in + consume_block_elements ~parsed_a_tag `After_text acc | { value = `Modules s as token; location } as next_token -> warn_if_after_tags next_token; warn_if_after_text next_token; @@ -802,6 +1018,25 @@ let rec block_element_list : let block = Loc.at location block in let acc = block :: acc in consume_block_elements ~parsed_a_tag `After_text acc + | { value = (`Begin_table_light | `Begin_table_heavy) as token; location } + as next_token -> + warn_if_after_tags next_token; + warn_if_after_text next_token; + junk input; + let block, brace_location = + let parent_markup = token in + let parent_markup_location = location in + match token with + | `Begin_table_light -> + light_table input ~parent_markup ~parent_markup_location + | `Begin_table_heavy -> + heavy_table input ~parent_markup ~parent_markup_location + in + let location = Loc.span [ location; brace_location ] in + let block = accepted_in_all_contexts context (`Table block) in + let block = Loc.at location block in + let acc = block :: acc in + consume_block_elements ~parsed_a_tag `After_text acc | { value = (`Minus | `Plus) as token; location } as next_token -> ( (match where_in_line with | `After_text | `After_shorthand_bullet -> @@ -857,7 +1092,9 @@ let rec block_element_list : (List.rev acc, next_token, where_in_line) else recover_when_not_at_top_level context | In_explicit_list -> recover_when_not_at_top_level context + | In_table_cell -> recover_when_not_at_top_level context | In_tag -> recover_when_not_at_top_level context + | In_code_results -> recover_when_not_at_top_level context | Top_level -> if where_in_line <> `At_start_of_line then Parse_error.should_begin_on_its_own_line @@ -916,6 +1153,8 @@ let rec block_element_list : | Top_level -> `At_start_of_line | In_shorthand_list -> `After_shorthand_bullet | In_explicit_list -> `After_explicit_list_bullet + | In_table_cell -> `After_table_cell + | In_code_results -> `After_tag | In_tag -> `After_tag in @@ -993,7 +1232,7 @@ and explicit_list_items : let next_token = peek input in match next_token.value with | `End -> - Parse_error.not_allowed next_token.location ~what:(Token.describe `End) + Parse_error.end_not_allowed next_token.location ~in_what:(Token.describe parent_markup) |> add_warning input; (List.rev acc, next_token.location) @@ -1009,25 +1248,25 @@ and explicit_list_items : (* '{li', represented by [`Begin_list_item `Li], must be followed by whitespace. *) (if kind = `Li then - match (peek input).value with - | `Space _ | `Single_newline _ | `Blank_line _ | `Right_brace -> - () - (* The presence of [`Right_brace] above requires some explanation: - - - It is better to be silent about missing whitespace if the next - token is [`Right_brace], because the error about an empty list - item will be generated below, and that error is more important to - the user. - - The [`Right_brace] token also happens to include all whitespace - before it, as a convenience for the rest of the parser. As a - result, not ignoring it could be wrong: there could in fact be - whitespace in the concrete syntax immediately after '{li', just - it is not represented as [`Space], [`Single_newline], or - [`Blank_line]. *) - | _ -> - Parse_error.should_be_followed_by_whitespace next_token.location - ~what:(Token.print token) - |> add_warning input); + match (peek input).value with + | `Space _ | `Single_newline _ | `Blank_line _ | `Right_brace -> + () + (* The presence of [`Right_brace] above requires some explanation: + + - It is better to be silent about missing whitespace if the next + token is [`Right_brace], because the error about an empty list + item will be generated below, and that error is more important to + the user. + - The [`Right_brace] token also happens to include all whitespace + before it, as a convenience for the rest of the parser. As a + result, not ignoring it could be wrong: there could in fact be + whitespace in the concrete syntax immediately after '{li', just + it is not represented as [`Space], [`Single_newline], or + [`Blank_line]. *) + | _ -> + Parse_error.should_be_followed_by_whitespace next_token.location + ~what:(Token.print token) + |> add_warning input); let content, token_after_list_item, _where_in_line = block_element_list In_explicit_list ~parent_markup:token input @@ -1041,8 +1280,8 @@ and explicit_list_items : (match token_after_list_item.value with | `Right_brace -> junk input | `End -> - Parse_error.not_allowed token_after_list_item.location - ~what:(Token.describe `End) ~in_what:(Token.describe token) + Parse_error.end_not_allowed token_after_list_item.location + ~in_what:(Token.describe token) |> add_warning input); let acc = content :: acc in @@ -1069,6 +1308,144 @@ and explicit_list_items : consume_list_items [] +(* Consumes a sequence of table rows that might start with [`Bar]. + + This function is called immediately after '{t' ([`Begin_table `Light]) is + read. The only "valid" way to exit is by reading a [`Right_brace] token, + which is consumed. *) +and light_table ~parent_markup ~parent_markup_location input = + let rec consume_rows acc ~last_loc = + Reader.until_rbrace_or_eof input acc >>> fun next_token -> + match next_token.Loc.value with + | `Bar | #token_that_always_begins_an_inline_element -> ( + let next, row, last_loc = + light_table_row ~parent_markup ~last_loc input + in + match next with + | `Continue -> consume_rows (row :: acc) ~last_loc + | `Stop -> (row :: acc, last_loc)) + | other_token -> + Parse_error.not_allowed next_token.location + ~what:(Token.describe other_token) + ~in_what:(Token.describe parent_markup) + |> add_warning input; + junk input; + consume_rows acc ~last_loc + in + let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in + let grid = List.rev rows in + (Table.Light_syntax.from_raw_data grid, brace_location) + +(* Consumes a table row that might start with [`Bar]. *) +and light_table_row ~parent_markup ~last_loc input = + let rec consume_row acc_row acc_cell acc_space ~new_line ~last_loc = + let push_cells row cell = + match cell with [] -> row | _ -> List.rev cell :: row + in + let return row cell = List.rev (push_cells row cell) in + let next_token = peek input in + match next_token.value with + | `End -> + Parse_error.end_not_allowed next_token.location ~in_what:"table" + |> add_warning input; + junk input; + (`Stop, return acc_row acc_cell, next_token.location) + | `Right_brace -> + junk input; + (`Stop, return acc_row acc_cell, next_token.location) + | `Space _ as token -> + junk input; + let i = Loc.at next_token.location token in + consume_row acc_row acc_cell (i :: acc_space) ~new_line ~last_loc + | `Single_newline _ | `Blank_line _ -> + junk input; + (`Continue, return acc_row acc_cell, last_loc) + | `Bar -> + junk input; + let acc_row = if new_line then [] else List.rev acc_cell :: acc_row in + consume_row acc_row [] [] ~new_line:false ~last_loc + | #token_that_always_begins_an_inline_element as token -> + let i = inline_element input next_token.location token in + if Loc.spans_multiple_lines i then + Parse_error.not_allowed + ~what:(Token.describe (`Single_newline "")) + ~in_what:(Token.describe `Begin_table_light) + i.location + |> add_warning input; + let acc_cell = + if acc_cell = [] then [ i ] else (i :: acc_space) @ acc_cell + in + consume_row acc_row acc_cell [] ~new_line:false + ~last_loc:next_token.location + | other_token -> + Parse_error.not_allowed next_token.location + ~what:(Token.describe other_token) + ~in_what:(Token.describe parent_markup) + |> add_warning input; + junk input; + consume_row acc_row acc_cell acc_space ~new_line ~last_loc + in + consume_row [] [] [] ~new_line:true ~last_loc + +(* Consumes a sequence of table rows (starting with '{tr ...}', which are + represented by [`Begin_table_row] tokens). + + This function is called immediately after '{table' ([`Begin_table `Heavy]) is + read. The only "valid" way to exit is by reading a [`Right_brace] token, + which is consumed. *) +and heavy_table ~parent_markup ~parent_markup_location input = + let rec consume_rows acc ~last_loc = + Reader.until_rbrace_or_eof input acc >>> fun next_token -> + match next_token.Loc.value with + | `Begin_table_row as token -> + junk input; + let items, last_loc = heavy_table_row ~parent_markup:token input in + consume_rows (List.rev items :: acc) ~last_loc + | token -> + Parse_error.not_allowed next_token.location ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion:"Move outside of {table ...}, or inside {tr ...}" + |> add_warning input; + junk input; + consume_rows acc ~last_loc + in + let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in + let grid = List.rev rows in + (Table.Heavy_syntax.from_grid grid, brace_location) + +(* Consumes a sequence of table cells (starting with '{th ...}' or '{td ... }', + which are represented by [`Begin_table_cell] tokens). + + This function is called immediately after '{tr' ([`Begin_table_row]) is + read. The only "valid" way to exit is by reading a [`Right_brace] token, + which is consumed. *) +and heavy_table_row ~parent_markup input = + let rec consume_cell_items acc = + Reader.until_rbrace_or_eof input acc >>> fun next_token -> + match next_token.Loc.value with + | `Begin_table_cell kind as token -> + junk input; + let content, token_after_list_item, _where_in_line = + block_element_list In_table_cell ~parent_markup:token input + in + (match token_after_list_item.value with + | `Right_brace -> junk input + | `End -> + Parse_error.not_allowed token_after_list_item.location + ~what:(Token.describe `End) ~in_what:(Token.describe token) + |> add_warning input); + consume_cell_items ((content, kind) :: acc) + | token -> + Parse_error.not_allowed next_token.location ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion: + "Move outside of {table ...}, or inside {td ...} or {th ...}" + |> add_warning input; + junk input; + consume_cell_items acc + in + consume_cell_items [] + (* {2 Entry point} *) let parse warnings tokens = diff --git a/vendor/odoc-parser/token.ml b/vendor/odoc-parser/token.ml index 222820f084..83181fe455 100644 --- a/vendor/odoc-parser/token.ml +++ b/vendor/odoc-parser/token.ml @@ -20,7 +20,8 @@ type tag = | `Canonical of string | `Inline | `Open - | `Closed ] ] + | `Closed + | `Hidden ] ] type t = [ (* End of input. *) @@ -40,6 +41,7 @@ type t = | `Blank_line of string | (* A right curly brace ([}]), i.e. end of markup. *) `Right_brace + | `Right_code_delimiter | (* Words are anything that is not whitespace or markup. Markup symbols can be be part of words if escaped. @@ -63,14 +65,22 @@ type t = | (* Leaf block element markup. *) `Code_block of (string Loc.with_location * string Loc.with_location option) option + * string * string Loc.with_location + * bool | `Verbatim of string | `Modules of string | (* List markup. *) `Begin_list of [ `Unordered | `Ordered ] | `Begin_list_item of [ `Li | `Dash ] + | (* Table markup. *) + `Begin_table_light + | `Begin_table_heavy + | `Begin_table_row + | `Begin_table_cell of [ `Header | `Data ] | `Minus | `Plus + | `Bar | section_heading | tag ] @@ -87,8 +97,14 @@ let print : [< t ] -> string = function | `Begin_link_with_replacement_text _ -> "'{{:'" | `Begin_list_item `Li -> "'{li ...}'" | `Begin_list_item `Dash -> "'{- ...}'" + | `Begin_table_light -> "{t" + | `Begin_table_heavy -> "{table" + | `Begin_table_row -> "'{tr'" + | `Begin_table_cell `Header -> "'{th'" + | `Begin_table_cell `Data -> "'{td'" | `Minus -> "'-'" | `Plus -> "'+'" + | `Bar -> "'|'" | `Begin_section_heading (level, label) -> let label = match label with None -> "" | Some label -> ":" ^ label in Printf.sprintf "'{%i%s'" level label @@ -105,6 +121,7 @@ let print : [< t ] -> string = function | `Tag `Inline -> "'@inline'" | `Tag `Open -> "'@open'" | `Tag `Closed -> "'@closed'" + | `Tag `Hidden -> "'@hidden" | `Raw_markup (None, _) -> "'{%...%}'" | `Raw_markup (Some target, _) -> "'{%" ^ target ^ ":...%}'" @@ -135,6 +152,7 @@ let describe : [< t | `Comment ] -> string = function | `Single_newline _ -> "line break" | `Blank_line _ -> "blank line" | `Right_brace -> "'}'" + | `Right_code_delimiter -> "']}'" | `Code_block _ -> "'{[...]}' (code block)" | `Verbatim _ -> "'{v ... v}' (verbatim text)" | `Modules _ -> "'{!modules ...}'" @@ -142,8 +160,14 @@ let describe : [< t | `Comment ] -> string = function | `Begin_list `Ordered -> "'{ol ...}' (numbered list)" | `Begin_list_item `Li -> "'{li ...}' (list item)" | `Begin_list_item `Dash -> "'{- ...}' (list item)" + | `Begin_table_light -> "'{t ...}' (table)" + | `Begin_table_heavy -> "'{table ...}' (table)" + | `Begin_table_row -> "'{tr ...}' (table row)" + | `Begin_table_cell `Header -> "'{th ... }' (table header cell)" + | `Begin_table_cell `Data -> "'{td ... }' (table data cell)" | `Minus -> "'-' (bulleted list item)" | `Plus -> "'+' (numbered list item)" + | `Bar -> "'|'" | `Begin_section_heading (level, _) -> Printf.sprintf "'{%i ...}' (section heading)" level | `Tag (`Author _) -> "'@author'" @@ -159,6 +183,7 @@ let describe : [< t | `Comment ] -> string = function | `Tag `Inline -> "'@inline'" | `Tag `Open -> "'@open'" | `Tag `Closed -> "'@closed'" + | `Tag `Hidden -> "'@hidden" | `Comment -> "top-level text" let describe_element = function