-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRegex.v
219 lines (198 loc) · 6.52 KB
/
Regex.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
Require Import List Bool Ascii String Arith Lia Nat PeanoNat.
Export ListNotations BoolNotations.
From stdpp Require Import gmap sets fin_sets.
Open Scope list_scope.
(** Code given by Jules *)
Ltac simp tac :=
repeat match goal with
| |- ?x = ?x => reflexivity
| H : ?x = ?x |- _ => clear H
| |- forall _, _ => intro
| |- _ -> _ => intro
| H : False |- _ => destruct H
| H : True |- _ => destruct H
| H : exists _, _ |- _ => destruct H
| H : _ /\ _ |- _ => destruct H
| H : _ <-> _ |- _ => destruct H
| x : _ * _ |- _ => destruct x
| H1 : ?P -> ?Q, H2 : ?P |- _ => specialize (H1 H2)
| H : True -> ?Q |- _ => specialize (H I)
| |- _ => progress (simplify_eq; auto)
| |- _ => progress set_unfold
| |- _ => progress unfold orb, andb in *
(* Do tactics that create multiple subgoals last *)
| |- _ /\ _ => split
| |- _ <-> _ => split
| H : _ \/ _ |- _ => destruct H
| |- _ => case_decide
| [ H: ?x ++ ?y = [] |- _ ] => destruct x, y
| [ H: [] = ?x ++ ?y |- _ ] => destruct x, y
| [ H: context[match ?X with _ => _ end] |- _ ] => destruct X eqn:?
| [ |- context[match ?X with _ => _ end] ] => destruct X eqn:?
| [ H: context[if ?b then _ else _] |- _ ] => destruct b eqn:?
| [ |- context[if ?b then _ else _] ] => destruct b eqn:?
| |- _ \/ _ => solve [left; simp tac | right; simp tac]
| |- _ => lia
| |- _ => congruence
| |- _ => progress tac
end.
Definition char := ascii.
Definition string := list ascii.
Definition char_dec := ascii_dec.
(** Regular expressions *)
Inductive re :=
| Void : re
| Epsilon : re
| Atom : char -> re
| Union : re -> re -> re
| Concat : re -> re -> re
| Star : re -> re.
(** Matching relation *)
Inductive matches : re -> string -> Prop :=
| matches_epsilon : matches Epsilon []
| matches_atom a : matches (Atom a) [a]
| matches_union_l r1 r2 s :
matches r1 s ->
matches (Union r1 r2) s
| matches_union_r r1 r2 s :
matches r2 s ->
matches (Union r1 r2) s
| matches_concat r1 r2 s1 s2 s3 :
matches r1 s1 ->
matches r2 s2 ->
s3 = s1 ++ s2 ->
matches (Concat r1 r2) s3
| matches_star_empty r :
matches (Star r) []
| matches_star_step r s1 s2 s3 :
matches r s1 ->
matches (Star r) s2 ->
s3 = s1 ++ s2 ->
matches (Star r) s3.
Hint Constructors matches : core.
(** A lemma relating cons and [++] *)
Lemma cons_eq_app {A} (a : A) x y z :
a :: x = y ++ z -> (y = [] /\ z = a :: x) \/
(exists y', y = a :: y' /\ x = y' ++ z).
Proof.
intros H. replace (a :: x) with ([a] ++ x) in H; last done.
apply app_eq_app in H. simp eauto. destruct y; simp eauto.
Qed.
(** Inversion lemma for [Star] *)
Lemma star_inv (r : re) (s : string) :
s ≠ [] -> matches (Star r) s ->
exists (s1 s2 : string), s1 ≠ [] /\ s = s1 ++ s2
/\ matches r s1 /\ matches (Star r) s2.
Proof.
intros Hs Hr. remember (Star r).
revert r Heqr0. induction Hr; simp eauto.
destruct s1; simp eauto.
exists (a :: s1). simp eauto.
Qed.
Ltac inv' H := inversion H; clear H; simplify_eq.
Ltac auto_inv :=
try match goal with
| [ H : matches Void _ |- _ ] => inv' H
| [ H : matches (Epsilon) _ |- _ ] => inv' H
| [ H : matches (Atom _) _ |- _ ] => inv' H
| [ H : matches (Union _ _) _ |- _ ] => inv' H
| [ H : matches (Concat _ _) _ |- _ ] => inv' H
| [ H : matches (Star _) (_ :: _) |- _ ] => apply star_inv in H
| [ H : _ :: _ = ?x ++ ?y |- _ ] => apply cons_eq_app in H
end; eauto.
Ltac X := simp auto_inv.
(** True if the regex matches the empty string *)
Fixpoint isEmpty (r : re) : bool :=
match r with
| Void => false
| Epsilon => true
| Atom _ => false
| Union r1 r2 => isEmpty r1 || isEmpty r2
| Concat r1 r2 => isEmpty r1 && isEmpty r2
| Star _ => true
end.
Lemma isEmpty_matches_1 (r : re) : isEmpty r = true -> matches r [].
Proof. induction r; X. Qed.
Lemma isEmpty_matches_2 (r : re) : matches r [] -> isEmpty r = true.
Proof. remember []. induction 1; X. Qed.
Hint Resolve isEmpty_matches_1 isEmpty_matches_2 : core.
(******************************************************************************)
(* Our code below *)
(** True if the regex never matches any string *)
Fixpoint isVoid (r : re) : bool :=
match r with
| Void => true
| Concat r1 r2 => isVoid r1 || isVoid r2
| Union r1 r2 => isVoid r1 && isVoid r2
| Star _ => false (* Star can always match the empty string *)
| Atom _ => false
| Epsilon => false
end.
(** Regexes have decidable equality *)
Lemma re_dec : forall r1 r2 : re, {r1 = r2} + {r1 <> r2}.
Proof. decide equality. apply char_dec. Qed.
Instance ReDecidable : EqDecision re := re_dec.
(** Injection from regexes into gen_trees, which are countable *)
Fixpoint encode_regex (r : re) : gen_tree nat :=
match r with
| Void => GenLeaf 1
| Epsilon => GenLeaf 2
| Atom a => GenNode 3 [GenLeaf (nat_of_ascii a)]
| Union r1 r2 => GenNode 4 [encode_regex r1; encode_regex r2]
| Concat r1 r2 => GenNode 5 [encode_regex r1; encode_regex r2]
| Star r' => GenNode 6 [encode_regex r']
end.
Fixpoint decode_regex (t : gen_tree nat) : option re :=
match t with
| GenLeaf 1 => Some Void
| GenLeaf 2 => Some Epsilon
| GenNode 3 [GenLeaf n] => Some (Atom (ascii_of_nat n))
| GenNode 4 [r1; r2] =>
match decode_regex r1, decode_regex r2 with
| Some r1', Some r2' => Some (Union r1' r2')
| _, _ => None
end
| GenNode 5 [r1; r2] =>
match decode_regex r1, decode_regex r2 with
| Some r1', Some r2' => Some (Concat r1' r2')
| _, _ => None
end
| GenNode 6 [r] =>
match decode_regex r with
| Some r' => Some (Star r')
| _ => None
end
| _ => None
end.
Theorem decode_encode_regex (r : re) : decode_regex (encode_regex r) = Some r.
Proof.
induction r; unfold decode_regex; unfold encode_regex;
eauto; fold decode_regex; fold encode_regex.
- rewrite ascii_nat_embedding. reflexivity.
- rewrite IHr1, IHr2. reflexivity.
- rewrite IHr1, IHr2. reflexivity.
- rewrite IHr. reflexivity.
Qed.
(** There are countably many regexes *)
Instance ReCountable : Countable re.
Proof.
apply inj_countable with (f := encode_regex) (g := decode_regex).
intros. apply decode_encode_regex.
Qed.
(** Concatenating a regex with itself n times *)
Fixpoint Concat_n (n : nat) (r : re) :=
match n with
| 0 => Epsilon
| S n' => Concat r (Concat_n n' r)
end.
(** Strong induction on lists *)
Lemma strong_induction {A} (P : list A -> Prop) :
(forall n, (forall m, length m < length n -> P m) -> P n) ->
forall n, P n.
Proof.
intros. apply H. induction n.
- intros. inversion H0.
- intros. apply H. intros.
apply IHn. eapply Nat.lt_le_trans.
apply H1. simpl in H0. lia.
Qed.