我们是少数学习Coq的人,我们正在尝试定义一个用于表示正则表达式的归纳谓词,该谓词表示一组序列。这似乎遇到了strictly positive
限制,因为我们允许not
作为运算符。not
通常不包含在正则表达式中,但包含在Brzozowski的正则表达式中,这是我们正在查看的正则表达式。当我们尝试使用Fixpoint重新定义正则表达式时,我们遇到了ill-formed
运算符的zero or more
限制。我们可以通过将正则表达式定义为归纳谓词和Fixpoint的组合来克服这些问题,但这感觉不对。
还有其他方法可以完全将我们的正则表达式定义为归纳谓词吗?
我们如何将Fixpoint和归纳谓词混合使用,是否存在问题,或者我们是否过于单纯?
这里是示例代码,注释中有解释和预期的错误:
Require Import List.
Import ListNotations.
(* We are defining our input alphabet for regular expressions as only two possible symbols *)
Inductive alphabet := a1 | a0.
Inductive regex :=
(* emptyset matches absolutely no strings *)
| emptyset : regex
(* lambda matches only the empty string *)
| lambda : regex
(* symbol matches only strings of length 1 containing the exact alphabet symbol *)
| symbol : alphabet -> regex
(* concat is used to build of regular expressions that can match longer strings *)
| concat : regex -> regex -> regex
(* zero or more, as you are familiar with from regular expressions *)
| star : regex -> regex
(* `nor` is a boolean operator, here is the truth table
P | Q | P `nor` Q
-----------------
T | T | F
T | F | F
F | T | F
F | F | T
*)
| nor : regex -> regex -> regex
.
(* We chose to include `nor`, since it can represent any possible boolean expression,
which is one of the selling points of Brzozowski's derivatives for regular expressions.
*)
Definition complement (r: regex) : regex :=
nor r r.
Definition and (r s: regex) : regex :=
nor (nor r r) (nor s s).
Definition or (r s: regex) : regex :=
nor (nor r s) (nor r s).
Definition xor (r s: regex) : regex :=
or (and r (complement s)) (and (complement r) s).
(* I matches all strings *)
Definition I: regex :=
complement (emptyset).
(* A regular expression denotes a set of sequences. *)
Definition seq := (list alphabet).
Definition seqs := seq -> Prop.
Definition in_set_of_sequences (ss: seqs) (s: seq): Prop := ss s.
Notation "p \in P" := (in_set_of_sequences P p) (at level 80).
(* Concatenation*. $(P.Q) = \{ s | s = p.q; p \in P, q \in Q \}$. *)
Inductive concat_seqs (P Q: seqs): seqs :=
| mk_concat: forall (s: seq),
(exists p q, p ++ q = s ->
p \in P /\
q \in Q
) ->
concat_seqs P Q s
.
(*
*Star*. $P^{*} = \cup_{0}^{\infty} P^n$ , where $P^2 = P.P$, etc.
and $P^0 = \lambda$, the set consisting of the sequence of zero length.
*)
Inductive star_seqs (R: seqs): seqs :=
| mk_star_zero : forall (s: seq),
s = [] -> star_seqs R s
| mk_star_more : forall (s: seq),
s \in (concat_seqs R (star_seqs R)) ->
star_seqs R s
.
(*
*Boolean function*. We shall denote any Boolean function of $P$ and $Q$ by $f(P, Q)$.
Of course, all the laws of Boolean algebra apply.
`nor` is used to emulate `f`, since nor can be used to emulate all boolean functions.
*)
Inductive nor_seqs (P Q: seqs): seqs :=
| mk_nor : forall s,
~(s \in P) /\ ~(s \in Q) ->
nor_seqs P Q s
.
(* Here we use a mix of Fixpoint and Inductive predicates to define the denotation of regular expressions.
This works, but it would be nicer to define it purely as an Inductive predicate.
*)
Fixpoint denote_regex (r: regex): seqs :=
match r with
| emptyset => fun _ => False
| lambda => fun xs => xs = []
| symbol y => fun xs => xs = [y]
| concat r1 r2 => concat_seqs (denote_regex r1) (denote_regex r2)
| star r1 => star_seqs (denote_regex r1)
| nor r1 r2 => nor_seqs (denote_regex r1) (denote_regex r2)
end.
(* Here we try to rewrite the denotation of a regex using a pure inductive predicate, but we get an error:
Non strictly positive occurrence of "ind_regex" in
"forall (s : seq) (P Q : regex),
s \in nor_seqs (ind_regex P) (ind_regex Q) -> ind_regex (nor P Q) s".
*)
Inductive ind_regex: regex -> seqs :=
| ind_emptyset (s: seq):
False ->
ind_regex emptyset s
| ind_lambda (s: seq):
s = [] ->
ind_regex lambda s
| ind_symbol (s: seq) (a: alphabet):
s = [a] ->
ind_regex (symbol a) s
| ind_concat (s: seq) (P Q: regex):
s \in (concat_seqs (ind_regex P) (ind_regex Q)) ->
ind_regex (concat P Q) s
| ind_star (s: seq) (R: regex):
s \in (star_seqs (ind_regex R)) ->
ind_regex (star R) s
| ind_nor (s: seq) (P Q: regex):
s \in (nor_seqs (ind_regex P) (ind_regex Q)) ->
ind_regex (nor P Q) s
.
(*
Here we try to define the denotation of a regex purely as a fixpoint, but we get an error:
Recursive definition of fix_regex is ill-formed.
In environment
fix_regex : regex -> seqs
r : regex
s : regex
xs : seq
x : alphabet
xs' : list alphabet
ys : list alphabet
zs : list alphabet
Recursive call to fix_regex has principal argument equal to "star s" instead of "s".
Recursive definition is:
"fun r : regex =>
match r with
| emptyset => fun _ : seq => False
| lambda => fun xs : seq => xs = []
| symbol y => fun xs : seq => xs = [y]
| concat s t => fun xs : seq => exists ys zs : list alphabet, xs = ys ++ zs /\ fix_regex s ys /\ fix_regex t zs
| star s =>
fun xs : seq =>
match xs with
| [] => True
| x :: xs' => exists ys zs : list alphabet, xs' = ys ++ zs /\ fix_regex s (x :: ys) /\ fix_regex (star s) zs
end
| nor _ _ => fun _ : seq => True
end".
*)
Fixpoint fix_regex (r: regex): seqs :=
match r with
| emptyset => fun _ => False
| lambda => fun xs => xs = []
| symbol y => fun xs => xs = [y]
| concat s t => fun xs => exists ys zs, xs = ys ++ zs /\ fix_regex s ys /\ fix_regex t zs
| star s => fun xs =>
match xs with
| [] => True
| (x::xs') => exists ys zs, xs' = ys ++ zs /\ fix_regex s (x::ys) /\ fix_regex (star s) zs
end
| _ => fun _ => True
end.
有几种可能的方法。
您可以定义相互依赖的多个归纳类型。
这里是一个(不完整的)示例。
Inductive match_regex : regex -> seq -> Prop :=
| match_lambda : match_regex lambda []
| match_symbol : forall a, match_regex (symbol a) [a]
| match_nor : forall r1 r2 s,
unmatch_regex r1 s -> unmatch_regex r2 s -> match_regex (nor r1 r2) s
with unmatch_regex : regex -> seq -> Prop :=
| unmatch_lambda : forall x xs, unmatch_regex lambda (x :: xs)
| unmatch_symbol : forall a b, a <> b -> unmatch_regex (symbol a) [b]
| unmatch_nor_l : forall r1 r2 s,
match_regex r1 s -> unmatch_regex (nor r1 r2) s
| unmatch_nor_r : forall r1 r2 s,
match_regex r2 s -> unmatch_regex (nor r1 r2) s
.
Definition alpha_eq_dec : forall (x y : alphabet), {x = y} + {x <> y}.
decide equality.
Defined.
Definition seq_eq_dec : forall (xs ys : seq), {xs = ys} + {xs <> ys} := list_eq_dec alpha_eq_dec.
Definition seq_eqb (xs ys : seq) : bool :=
if seq_eq_dec xs ys then true else false.
Inductive bool_regex : regex -> seq -> bool -> Prop :=
| bool_lambda : forall xs, bool_regex lambda xs (seq_eqb xs [])
| bool_symbol : forall a xs, bool_regex (symbol a) xs (seq_eqb xs [a])
| bool_nor : forall r1 r2 s b1 b2,
bool_regex r1 s b1 -> bool_regex r2 s b2 -> bool_regex (nor r1 r2) s (negb (b1 || b2)).