Skip to content

Commit

Permalink
Add support for sticky symbols
Browse files Browse the repository at this point in the history
  • Loading branch information
dmbaturin committed Dec 14, 2024
1 parent db06ebd commit 560dd64
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 107 deletions.
3 changes: 2 additions & 1 deletion src/bnfgen_cli.ml
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,11 @@ let () =
begin match !action with
| Dump -> Printf.printf "%s\n" @@ Bnfgen.grammar_to_string g
| Reduce ->
let start_symbol = (!start_symbol, false) in
for production = 1 to !productions do
if !settings.debug then Printf.ksprintf !settings.debug_fun "Outputting Production %d of %d%!" production !productions;
let out_fun = if !buffering then print_string else (Printf.printf "%s%!") in
let res = Bnfgen.generate ~settings:!settings out_fun g !start_symbol in
let res = Bnfgen.generate ~settings:!settings out_fun g start_symbol in
begin match res with
| Ok _ -> print_string "\n"
| Error msg -> Printf.eprintf "%s%!\n" msg
Expand Down
3 changes: 2 additions & 1 deletion src/lib/bnf_lexer.mll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(*
* Copyright (c) 2014, 2019 Daniil Baturin
* Copyright (c) 2024 Daniil Baturin
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -47,6 +47,7 @@ rule token = parse
| '{' { LBRACE }
| '}' { RBRACE }
| ',' { COMMA }
| '~' { TILDE }
| ['0' - '9']+ as i
{ NUMBER (int_of_string i) }
| eof
Expand Down
174 changes: 99 additions & 75 deletions src/lib/bnf_parser.messages
Original file line number Diff line number Diff line change
@@ -1,17 +1,44 @@
grammar: IDENTIFIER DEF NUMBER SEMI
grammar: TILDE
##
## Ends in an error in state: 8.
## Ends in an error in state: 0.
##
## rule_rhs_part -> NUMBER . rule_rhs_symbols [ SEMI OR EOF ]
## grammar' -> . grammar [ # ]
##
## The known suffix of the stack is as follows:
## NUMBER
##
##

Sticky flag (~) must be attached to a symbol.

grammar: SEMI TILDE
##
## Ends in an error in state: 3.
##
## option(SEMI) -> SEMI . [ EOF ]
## rules -> rules SEMI . rule [ SEMI EOF ]
##
## The known suffix of the stack is as follows:
## rules SEMI
##

Sticky flag (~) must be attached to a symbol.

grammar: IDENTIFIER TILDE
##
## Ends in an error in state: 5.
##
## rule -> nonterminal . DEF rule_rhs [ SEMI EOF ]
##
## The known suffix of the stack is as follows:
## nonterminal
##

Left-hand side of a rule must be a single symbol identifier followed by "::=".
In your rule, a symbol identifier is followed by a string or the right-hand side is empty.
Example of a valid rule: <foo> ::= "foo" ;

Weight number must be followed by a symbol identifier or a string.
Example of a valid rule: <start> ::= 10 <foo> | <bar> ;

grammar: IDENTIFIER DEF SEMI
grammar: IDENTIFIER DEF TILDE
##
## Ends in an error in state: 6.
##
Expand All @@ -21,59 +48,55 @@ grammar: IDENTIFIER DEF SEMI
## nonterminal DEF
##

The right-hand side of a rule is empty or starts with an empty alternative.
Empty rules (<foo> ::= ;) and empty alternatives (<foo> ::= | "foo") are not allowed.
Example of a valid rule: <start> ::= <foo> | <bar> ;
Invalid right-hand side of a rule.
Example of a valid rule: <foo> ::= "foo" ;

grammar: IDENTIFIER DEF STRING LBRACE NUMBER COMMA NUMBER STRING
grammar: IDENTIFIER DEF NUMBER TILDE
##
## Ends in an error in state: 15.
## Ends in an error in state: 8.
##
## repeat_range -> LBRACE NUMBER COMMA NUMBER . RBRACE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
## rule_rhs_part -> NUMBER . rule_rhs_symbols [ SEMI OR EOF ]
##
## The known suffix of the stack is as follows:
## LBRACE NUMBER COMMA NUMBER
## NUMBER
##

Invalid repetition syntax. Did you forget to close a brace?
Examples of valid rules:
<foo> ::= <bar>{1,4}
<baz> ::= <foo>{5}
Invalid right-hand side of a rule.
Example of a valid rule: <foo> ::= "foo" ;

grammar: IDENTIFIER DEF STRING LBRACE NUMBER COMMA STRING
grammar: IDENTIFIER DEF STRING RBRACE
##
## Ends in an error in state: 14.
## Ends in an error in state: 9.
##
## repeat_range -> LBRACE NUMBER COMMA . NUMBER RBRACE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
## symbol -> terminal . [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
## symbol -> terminal . TILDE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
##
## The known suffix of the stack is as follows:
## LBRACE NUMBER COMMA
## terminal
##

Invalid repetition syntax. Should be "{num}" or "{min, max}".
Invalid repetition syntax. Did you forget an opening brace?
Examples of valid rules:
<foo> ::= <bar>{1,4}
<baz> ::= <foo>{5}

grammar: IDENTIFIER DEF STRING LBRACE NUMBER STRING
grammar: IDENTIFIER DEF IDENTIFIER TILDE TILDE
##
## Ends in an error in state: 12.
## Ends in an error in state: 11.
##
## repeat_range -> LBRACE NUMBER . RBRACE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
## repeat_range -> LBRACE NUMBER . COMMA NUMBER RBRACE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
## rule_rhs_symbols -> symbol . [ STRING SEMI OR IDENTIFIER EOF ]
## symbol -> symbol . repeat_range [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
##
## The known suffix of the stack is as follows:
## LBRACE NUMBER
## symbol
##

Invalid repetition syntax. Did you forget to close a brace?
Examples of valid rules:
<foo> ::= <bar>{1,4}
<baz> ::= <foo>{5}
Invalid right-hand side of a rule.
Example of a valid rule: <foo> ::= "foo" ;

grammar: IDENTIFIER DEF STRING LBRACE STRING
grammar: IDENTIFIER DEF IDENTIFIER LBRACE TILDE
##
## Ends in an error in state: 11.
## Ends in an error in state: 12.
##
## repeat_range -> LBRACE . NUMBER RBRACE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
## repeat_range -> LBRACE . NUMBER COMMA NUMBER RBRACE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
Expand All @@ -87,91 +110,92 @@ Examples of valid rules:
<foo> ::= <bar>{1,4}
<baz> ::= <foo>{5}

grammar: IDENTIFIER DEF STRING OR SEMI
grammar: IDENTIFIER DEF IDENTIFIER LBRACE NUMBER TILDE
##
## Ends in an error in state: 24.
## Ends in an error in state: 13.
##
## rule_rhs -> rule_rhs OR . rule_rhs_part [ SEMI OR EOF ]
## repeat_range -> LBRACE NUMBER . RBRACE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
## repeat_range -> LBRACE NUMBER . COMMA NUMBER RBRACE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
##
## The known suffix of the stack is as follows:
## rule_rhs OR
## LBRACE NUMBER
##

Empty alternative in a right hand side.
Empty alternatives such as "<foo> | | <bar>") are not allowed.
Example of a valid rule: <start> ::= <foo> | <bar> ;
Invalid repetition syntax. Should be "{num}" or "{min, max}".
Examples of valid rules:
<foo> ::= <bar>{1,4}
<baz> ::= <foo>{5}

grammar: IDENTIFIER DEF STRING RBRACE
grammar: IDENTIFIER DEF IDENTIFIER LBRACE NUMBER COMMA TILDE
##
## Ends in an error in state: 10.
## Ends in an error in state: 15.
##
## rule_rhs_symbols -> symbol . [ STRING SEMI OR IDENTIFIER EOF ]
## symbol -> symbol . repeat_range [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
## repeat_range -> LBRACE NUMBER COMMA . NUMBER RBRACE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
##
## The known suffix of the stack is as follows:
## symbol
## LBRACE NUMBER COMMA
##

Invalid repetition syntax. Did you forget an opening brace?
Invalid repetition syntax. Should be "{num}" or "{min, max}".
Examples of valid rules:
<foo> ::= <bar>{1,4}
<baz> ::= <foo>{5}

grammar: IDENTIFIER DEF STRING STRING RBRACE
grammar: IDENTIFIER DEF IDENTIFIER LBRACE NUMBER COMMA NUMBER TILDE
##
## Ends in an error in state: 19.
## Ends in an error in state: 16.
##
## rule_rhs_symbols -> rule_rhs_symbols symbol . [ STRING SEMI OR IDENTIFIER EOF ]
## symbol -> symbol . repeat_range [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
## repeat_range -> LBRACE NUMBER COMMA NUMBER . RBRACE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
##
## The known suffix of the stack is as follows:
## rule_rhs_symbols symbol
## LBRACE NUMBER COMMA NUMBER
##

Invalid repetition syntax. Did you forget an opening brace?
Invalid repetition syntax. Should be "{num}" or "{min, max}".
Examples of valid rules:
<foo> ::= <bar>{1,4}
<baz> ::= <foo>{5}

grammar: IDENTIFIER STRING
grammar: IDENTIFIER DEF IDENTIFIER IDENTIFIER TILDE TILDE
##
## Ends in an error in state: 5.
## Ends in an error in state: 20.
##
## rule -> nonterminal . DEF rule_rhs [ SEMI EOF ]
## rule_rhs_symbols -> rule_rhs_symbols symbol . [ STRING SEMI OR IDENTIFIER EOF ]
## symbol -> symbol . repeat_range [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
##
## The known suffix of the stack is as follows:
## nonterminal
## rule_rhs_symbols symbol
##

Left-hand side of a rule must be a single symbol identifier followed by "::=".
In your rule, a symbol identifier is followed by a string or the right-hand side is empty.
Example of a valid rule: <foo> ::= "foo" ;
Invalid right-hand side of a rule.
Example of a valid rule: <start> ::= <foo> | <bar> ;

grammar: SEMI STRING
grammar: IDENTIFIER DEF IDENTIFIER RBRACE
##
## Ends in an error in state: 3.
## Ends in an error in state: 21.
##
## option(SEMI) -> SEMI . [ EOF ]
## rules -> rules SEMI . rule [ SEMI EOF ]
## symbol -> nonterminal . [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
## symbol -> nonterminal . TILDE [ STRING SEMI OR LBRACE IDENTIFIER EOF ]
##
## The known suffix of the stack is as follows:
## rules SEMI
## nonterminal
##

Left-hand side of a rule must be a single non-terminal symbol.
In your rule, a symbol identifier is preceded by a string or the left-hand side is empty.
The most likely reason is a misplaced semicolon in a previous rule.
Example of a valid rule: <start> ::= "foo" ;
Invalid repetition syntax. Did you forget to close a brace?
Examples of valid rules:
<foo> ::= <bar>{1,4}
<baz> ::= <foo>{5}

grammar: STRING
grammar: IDENTIFIER DEF IDENTIFIER OR TILDE
##
## Ends in an error in state: 0.
## Ends in an error in state: 26.
##
## grammar' -> . grammar [ # ]
## rule_rhs -> rule_rhs OR . rule_rhs_part [ SEMI OR EOF ]
##
## The known suffix of the stack is as follows:
## rule_rhs OR
##
##

# I'm still not quite certain how to end up in this state to be fair.
Invalid syntax.
Invalid right-hand side of a rule.
Example of a valid rule: <start> ::= <foo> | <bar> ;

12 changes: 9 additions & 3 deletions src/lib/bnf_parser.mly
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(*
* Copyright (c) 2014, 2021 Daniil Baturin
* Copyright (c) 2024 Daniil Baturin
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -34,6 +34,7 @@
%token LBRACE
%token RBRACE
%token COMMA
%token TILDE

%start <Grammar.grammar> grammar
%%
Expand Down Expand Up @@ -67,9 +68,14 @@ repeat_range:
| LBRACE; l = NUMBER; COMMA; r = NUMBER; RBRACE { (l, r) }
;

sticky_flag:
| { false }
| TILDE { true }
;

symbol:
| s = terminal { Terminal s }
| s = nonterminal { Nonterminal s }
| str = terminal; sticky = sticky_flag { Terminal (str, sticky) }
| name = nonterminal; sticky = sticky_flag { Nonterminal (name, sticky) }
| s = symbol; r = repeat_range { Repeat (s, r) }
;

Expand Down
6 changes: 3 additions & 3 deletions src/lib/bnfgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ let generate ?(settings=default_settings) callback grammar start_symbol =
end
in
if depth_exceeded settings.max_reductions reductions then Error ("Maximum total number of reductions exceeded") else
let output, sym_stack = Grammar.reduce_symbol ~debug:settings.debug ~debug_fun:settings.debug_fun sym_stack grammar in
let output, sym_stack =
Grammar.reduce_symbol ~debug:settings.debug ~debug_fun:settings.debug_fun
~separator:settings.symbol_separator sym_stack grammar in
match output with
| None ->
if sym_stack = [] then Ok () else
Expand All @@ -81,8 +83,6 @@ let generate ?(settings=default_settings) callback grammar start_symbol =
else aux settings callback grammar (reductions + 1) (nonprod_reductions + 1) sym_stack
| Some str ->
let () = callback str in
(* Emit a symbol separator, unless it's set to an empty string. *)
let () = if settings.symbol_separator <> "" then callback settings.symbol_separator in
if sym_stack = [] then Ok ()
else aux settings callback grammar (reductions + 1) 0 sym_stack
in
Expand Down
12 changes: 6 additions & 6 deletions src/lib/bnfgen.mli
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ module Grammar : sig
exception Grammar_error of string

type symbol =
| Terminal of string
| Nonterminal of string
| Terminal of (string * bool)
| Nonterminal of (string * bool)
| Repeat of symbol * (int * int)
| SymbolMarker of string
| SymbolMarker of (string * bool)

type rule_alternative = { weight: int; symbols: symbol list }
type rule = string * (rule_alternative list)
type grammar = rule list

val reduce_symbol :
?debug:bool -> ?debug_fun:(string -> unit) ->
?debug:bool -> ?debug_fun:(string -> unit) -> ?separator:string ->
symbol list -> grammar -> (string option * symbol list)
end

Expand All @@ -36,6 +36,6 @@ val grammar_to_string : Grammar.grammar -> string
val check_grammar : Grammar.grammar -> (unit, string) result
val check_grammar_exn : Grammar.grammar -> unit

val generate : ?settings:settings -> (string -> unit) -> Grammar.grammar -> string -> (unit, string) result
val generate : ?settings:settings -> (string -> unit) -> Grammar.grammar -> (string * bool) -> (unit, string) result

val generate_string : ?settings:settings -> Grammar.grammar -> string -> (string, string) result
val generate_string : ?settings:settings -> Grammar.grammar -> (string * bool) -> (string, string) result
Loading

0 comments on commit 560dd64

Please sign in to comment.