Skip to content

Commit

Permalink
Use a separate symbol flags record for carrying sticky flags
Browse files Browse the repository at this point in the history
to keep the external API simple and backward-compatible

Correct rendering of rule dumps is done by introducing a virtual
"sticky parent" flags that is used for passing the sticky marker down
during symbol reduction
  • Loading branch information
dmbaturin committed Feb 19, 2025
1 parent 560dd64 commit ad6dc1a
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 40 deletions.
5 changes: 2 additions & 3 deletions src/bnfgen_cli.ml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(*
* Copyright (c) 2021 Daniil Baturin
* Copyright (c) 2025 Daniil Baturin
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -72,11 +72,10 @@ let () =
begin match !action with
| Dump -> Printf.printf "%s\n" @@ Bnfgen.grammar_to_string g
| Reduce ->
let start_symbol = (!start_symbol, false) in
for production = 1 to !productions do
if !settings.debug then Printf.ksprintf !settings.debug_fun "Outputting Production %d of %d%!" production !productions;
let out_fun = if !buffering then print_string else (Printf.printf "%s%!") in
let res = Bnfgen.generate ~settings:!settings out_fun g start_symbol in
let res = Bnfgen.generate ~settings:!settings out_fun g !start_symbol in
begin match res with
| Ok _ -> print_string "\n"
| Error msg -> Printf.eprintf "%s%!\n" msg
Expand Down
16 changes: 8 additions & 8 deletions src/lib/bnf_parser.mly
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(*
* Copyright (c) 2024 Daniil Baturin
* Copyright (c) 2025 Daniil Baturin
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -53,6 +53,9 @@
As in:
<start> ::= 10 <nonterminal> "terminal" | "terminal"{1,3} ;
<nonterminal> ::= "nonterminal";

Symbols can also be marked as "sticky" with a tilde to prevent BNFGen
from emitting a separator after reducing them: <foo>~
*/

nonterminal:
Expand All @@ -68,14 +71,11 @@ repeat_range:
| LBRACE; l = NUMBER; COMMA; r = NUMBER; RBRACE { (l, r) }
;

sticky_flag:
| { false }
| TILDE { true }
;

symbol:
| str = terminal; sticky = sticky_flag { Terminal (str, sticky) }
| name = nonterminal; sticky = sticky_flag { Nonterminal (name, sticky) }
| content = terminal { Terminal (content, default_flags) }
| content = terminal; TILDE { Terminal (content, {default_flags with sticky=true}) }
| name = nonterminal { Nonterminal (name, default_flags) }
| name = nonterminal; TILDE { Nonterminal (name, {default_flags with sticky=true}) }
| s = symbol; r = repeat_range { Repeat (s, r) }
;

Expand Down
2 changes: 1 addition & 1 deletion src/lib/bnfgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ let generate ?(settings=default_settings) callback grammar start_symbol =
if sym_stack = [] then Ok ()
else aux settings callback grammar (reductions + 1) 0 sym_stack
in
try aux settings callback grammar 0 0 [Grammar.Nonterminal start_symbol]
try aux settings callback grammar 0 0 [Grammar.Nonterminal (start_symbol, Grammar.default_flags)]
with Grammar.Grammar_error e -> Error e

let generate_string ?(settings=default_settings) grammar start_symbol =
Expand Down
33 changes: 28 additions & 5 deletions src/lib/bnfgen.mli
Original file line number Diff line number Diff line change
@@ -1,11 +1,34 @@
module Grammar : sig
exception Grammar_error of string

type symbol_flags = {
(** The symbol itself is marked sticky in the grammar. *)
sticky: bool;

(** The symbol must be rendered as sticky
because it was produced during reduction of a sticky parent.
The reason not to use the normal sticky flag
is to be able to render rules for debugging purposes
exactly as they were in the source grammar.
The reason why we need that flag propagation at all
is that symbol reduction is tail-recursive
to support infinite depth and output size,
so there is no other way to determine if we need
to emit a separator or not.
In a naively recursive implementation,
we could just check for stickyness before return,
but in a tail-recursive implementation,
we have to pass that state around.
*)
sticky_parent: bool;
}

type symbol =
| Terminal of (string * bool)
| Nonterminal of (string * bool)
| Terminal of (string * symbol_flags)
| Nonterminal of (string * symbol_flags)
| Repeat of symbol * (int * int)
| SymbolMarker of (string * bool)

type rule_alternative = { weight: int; symbols: symbol list }
type rule = string * (rule_alternative list)
Expand Down Expand Up @@ -36,6 +59,6 @@ val grammar_to_string : Grammar.grammar -> string
val check_grammar : Grammar.grammar -> (unit, string) result
val check_grammar_exn : Grammar.grammar -> unit

val generate : ?settings:settings -> (string -> unit) -> Grammar.grammar -> (string * bool) -> (unit, string) result
val generate : ?settings:settings -> (string -> unit) -> Grammar.grammar -> string -> (unit, string) result

val generate_string : ?settings:settings -> Grammar.grammar -> (string * bool) -> (string, string) result
val generate_string : ?settings:settings -> Grammar.grammar -> string -> (string, string) result
81 changes: 58 additions & 23 deletions src/lib/grammar.ml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(*
* Copyright (c) 2024 Daniil Baturin
* Copyright (c) 2025 Daniil Baturin
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -22,29 +22,55 @@

exception Grammar_error of string

type symbol_flags = {
(* The symbol itself is marked sticky in the grammar. *)
sticky: bool;

(* The symbol must be rendered as sticky
because it was produced during reduction of a sticky parent.
The reason not to use the normal sticky flag
is to be able to render rules for debugging purposes
exactly as they were in the source grammar.
The reason why we need that flag propagation at all
is that symbol reduction is tail-recursive
to support infinite depth and output size,
so there is no other way to determine if we need
to emit a separator or not.
In a naively recursive implementation,
we could just check for stickyness before return,
but in a tail-recursive implementation,
we have to pass that state around.
*)
sticky_parent: bool;
}

type symbol =
| Terminal of (string * bool)
| Nonterminal of (string * bool)
| Terminal of (string * symbol_flags)
| Nonterminal of (string * symbol_flags)
| Repeat of symbol * (int * int)
| SymbolMarker of (string * bool)

type rule_alternative = { weight: int; symbols: symbol list }
type rule = string * (rule_alternative list)
type grammar = rule list

let default_flags = {sticky=false; sticky_parent=false}

let grammar_error s = raise (Grammar_error s)

(* Anything to string, mainly for parser debug *)

let sticky_flag s =
if s then "~" else ""
let sticky_flag sym_data =
(* Only render the sticky flag if it was originally there
and was not added during reduction of a sticky parent. *)
if sym_data.sticky then "~"
else ""

let rec string_of_symbol s =
match s with
| SymbolMarker (name, sticky) ->
Printf.sprintf "(SymbolMarker <%s>%s)" name (sticky_flag sticky)
| Terminal (s, sticky) -> Printf.sprintf "\"%s\"%s" s (sticky_flag sticky)
| Nonterminal (s, sticky) -> Printf.sprintf "<%s>%s" s (sticky_flag sticky)
| Terminal (content, sym_data) -> Printf.sprintf "\"%s\"%s" content (sticky_flag sym_data)
| Nonterminal (name, sym_data) -> Printf.sprintf "<%s>%s" name (sticky_flag sym_data)
| Repeat (s, (min, max)) ->
if (min = max) then Printf.sprintf "%s{%d}" (string_of_symbol s) min
else Printf.sprintf "%s{%d-%d}" (string_of_symbol s) min max
Expand All @@ -64,7 +90,7 @@ let rec string_of_rule_rhs r =

let string_of_rule r =
let (name, alts) = r in
Printf.sprintf "%s ::= %s;" (string_of_symbol (Nonterminal (name, false))) (string_of_rule_rhs alts)
Printf.sprintf "%s ::= %s;" (string_of_symbol (Nonterminal (name, default_flags))) (string_of_rule_rhs alts)

let to_string r =
let rule_str_list = List.map string_of_rule r in
Expand Down Expand Up @@ -160,24 +186,26 @@ let sort_rule_parts l =

let rec make_sticky sym =
match sym with
| Terminal (name, _) -> Terminal (name, true)
| Nonterminal (name, _) -> Nonterminal (name, true)
| Terminal (content, sym_flags) -> Terminal (content, {sym_flags with sticky_parent=true})
| Nonterminal (name, sym_flags) -> Nonterminal (name, {sym_flags with sticky_parent=true})
| Repeat (sym, num) -> Repeat ((make_sticky sym), num)
| _ -> assert false

let is_sticky sym_data =
sym_data.sticky || sym_data.sticky_parent

let reduce_symbol ?(debug=false) ?(debug_fun=print_endline) ?(separator="") sym_stack grammar =
match sym_stack with
| [] -> (None, [])
| sym :: syms ->
match sym with
| SymbolMarker (name, sticky) ->
let () = if debug then Printf.ksprintf debug_fun "Finished resolving symbol %s" (string_of_symbol (Nonterminal (name, sticky))) in
(None, syms)
| Terminal (t, sticky) ->
let () = if debug then Printf.ksprintf debug_fun "Emitting terminal \"%s\"" t in
let output = if (sticky || (separator = "")) then t else t ^ separator in
| Terminal (content, sym_data) ->
let () = if debug then Printf.ksprintf debug_fun {|Emitting terminal "%s"|} content in
let output =
if ((is_sticky sym_data) || (separator = "")) then content
else content ^ separator
in
(Some output, syms)
| Nonterminal (name, sticky) ->
| Nonterminal (name, sym_data) ->
let () = if debug then Printf.ksprintf debug_fun "Reducing symbol <%s>" name in
let rhs = find_production name grammar in
let rhs =
Expand All @@ -189,15 +217,22 @@ let reduce_symbol ?(debug=false) ?(debug_fun=print_endline) ?(separator="") sym_
if debug then Printf.ksprintf debug_fun "Alternative taken: %s" (string_of_rule_rhs_part {weight=1; symbols=new_syms})
in
let new_syms =
if not sticky then new_syms
(* If the current symbol is not sticky,
we just push everything it produced on the stack and proceed.
But if it's sticky, we need to make sure not to emit the separator
when we emit the last terminal that was produced from that sticky parent.
So we mark the last symbol in the list with [sticky_parent] flag
to allow that marker to eventually propagate down to the last terminal.
*)
if not (is_sticky sym_data) then new_syms
else begin
let rev_new_syms = List.rev new_syms in
let last_sym = List.hd rev_new_syms in
let last_sym = make_sticky last_sym in
last_sym :: List.tl rev_new_syms |> List.rev
end
in
let new_syms = List.append new_syms [(SymbolMarker (name, sticky))] in
let syms = List.append new_syms syms in
(None, syms)
| Repeat (s, (min, max)) ->
Expand Down

0 comments on commit ad6dc1a

Please sign in to comment.