尾递归删除列表中重复的连续条目

Question

compress

删除列表中连续的重复整体：

assert (
  compress
    [ "a"; "a"; "a"; "a"; "b"; "c"; "c"; "a"; "a"; "d"; "e"; "e"; "e"; "e" ]
  = [ "a"; "b"; "c"; "a"; "d"; "e" ])

我得出以下解决方案：

let head = function x :: _ -> Some x | [] -> None

let compress list =
  let rec fn old_list new_list =
    match (old_list, new_list) with
    | h :: t, _ ->
        fn t (if Some h = head new_list then new_list else h :: new_list)
    | _ -> new_list
  in
  List.rev (fn list [])
;;

网站提供的示例解决方案如下：

let rec compress = function
    | a :: (b :: _ as t) -> if a = b then compress t else a :: compress t
    | smaller -> smaller;;

起初，我认为我的解决方案更有效，因为它是尾递归的，而提供的解决方案显然不是（要求我们将

中的

a :: compress t

保留在堆栈上）。但是，当我测试我的代码是否是尾递归时：

assert (
  (compress [@tailcall])
    [ "a"; "a"; "a"; "a"; "b"; "c"; "c"; "a"; "a"; "d"; "e"; "e"; "e"; "e" ]
  = [ "a"; "b"; "c"; "a"; "d"; "e" ])

它给了我一个警告，说它不是尾递归。为什么？

根据我的理解，我的解决方案不需要在堆栈上保留任何状态，这应该使其成为尾递归。

编辑还尝试直接通过

[@tailcall]

将

fn

应用于

List.rev ((fn [@tailcall]) list [])

，得到相同的警告。

Answer 1

当你做出断言时，

compress

不在尾部位置，

(=)

是。这对您的

compress

函数实现没有影响。

# assert (
  ((=) [@tailcall]) 
    (compress ["a"; "a"; "b"]) 
    ["a"; "b"]
);;
- : unit = ()

同样，在表达式

List.rev ((fn [@tailcall]) list [])

中，

fn

的调用也不在尾部调用位置。

您可以通过尝试进行测试：

let compress list =
  let rec fn old_list new_list =
    match (old_list, new_list) with
    | h :: t, _ ->
        (fn[@tailcall]) t (if Some h = head new_list then new_list else h :: new_list)
    | _ -> new_list
  in
  List.rev (fn list [])

另请注意，尾递归并不总是意味着“更高效”。尾递归函数通常效率较低，但它们可以用于大数据而不会出现堆栈溢出。如果您正在处理可能导致此问题的数据，则表明您可能需要重新评估您正在使用的数据结构。从 OCaml 4.14 开始，我们还可以使用 compress 使

tail_mod_cons

尾递归。

let[@tail_mod_cons] rec compress = 
  function
  | a :: (b :: _ as t) -> 
      if a = b then compress t 
      else a :: compress t
  | smaller -> smaller;;

或者，您可以通过连续传递来实现这一点。

let compress lst =
  let rec aux k = function
    | ([] | [_]) as lst -> k lst
    | a::(b::_ as t) when a = b -> aux k t
    | a::t -> aux (fun i -> k (a :: i)) t
  in
  aux Fun.id lst

作为尾递归的另一个有趣的替代方案，您可以编写一个

compress

 函数来生成

sequence

。为了做到这一点，我们的

aux

函数需要接受一个参数来跟踪最后看到的值。因为一开始不会看到最后一个值，所以 option 类型是有意义的。

# let compress lst =
    let rec aux lst last_seen () =
      match lst, last_seen with
      | [], _ -> Seq.Nil
      | x::xs, Some x' when x = x' -> aux xs last_seen ()
      | x::xs, _ -> Seq.Cons (x, aux xs (Some x)) 
    in
    aux lst None;;
val compress : 'a list -> 'a Seq.t = <fun>
# compress [1;1;1;3;3;4;6;6;7;4] 
  |> Seq.take 3 
  |> List.of_seq;;
- : int list = [1; 3; 4]

当然，直接在序列上工作作为输入也可能会有所帮助，允许它在不首先转换为列表的情况下处理其他数据类型。翻译很简单。

# let compress_seq seq =
    let rec aux seq last_seen () =
      match seq (), last_seen with
      | Seq.Nil, _ -> Seq.Nil
      | Seq.Cons (x, xs), Some x' when x = x' -> aux xs last_seen ()
      | Seq.Cons (x, xs), _ -> Seq.Cons (x, aux xs (Some x)) 
    in
    aux seq None;;
val compress_seq : 'a Seq.t -> 'a Seq.t = <fun>
# [1;1;1;3;3;4;6;6;7;4] 
  |> List.to_seq
  |> compress_seq
  |> Seq.take 3
  |> List.of_seq;;
- : int list = [1; 3; 4]

好吧，我明白了。

Answer 2

为了测试函数是否是尾递归的，我决定尝试通过引发堆栈溢出来破坏它们：

`let _ = compress (List.init 10_000_000 (fun x -> Some x))`

对于我的实现来说，这工作得很好。另一方面，提供的解决方案会导致 segvault，我假设它来自堆栈溢出：

[1]    70387 segmentation fault  ./a.out

所以我们可以得出结论，我的实现确实是尾递归，而另一个则不是。

2）哪一个更快？

我使用以下方法来测试这两个功能的速度：

`let _ = let l = List.init 250000 (fun x -> x) in let t = Sys.time () in let f = compress l in Printf.printf "Execution time: %fs\n" (Sys.time () -. t); f`

注意，

250000

是我的机器在堆栈溢出之前的限制。

对于尾递归实现，大约是

0.018s

。

对于非尾递归实现，大约是

0.013s

。

看来函数调用的开销不足以使非尾递归实现比尾递归实现慢，尾递归需要遍历列表两次。

我还应该指出，这也是最坏的情况，我们的输入列表

List.init 250000 (fun x -> x)

是一个包含所有唯一元素的列表。非尾递归实现所需的堆栈空间量与

唯一元素

的数量成正比，而不是列表中元素的数量，因为在

if a = b then compress t else a :: compress t

的左分支中，我们不使用任何堆栈空间。我通过将列表更改为仅保存常量并使列表变得更大List.init 100000000 (fun x -> 0)来测试这一点，并且非尾递归实现不再出现段错误。看来 OCaml 编译器足够聪明，可以知道

if a = b then compress t else a :: compress t

的左分支是否是尾递归，并且仅在命中右分支时才分配堆栈。

尾递归删除列表中重复的连续条目

问题描述投票：0回答：2

2个回答

`let _ = compress (List.init 10_000_000 (fun x -> Some x))`

`let _ = let l = List.init 250000 (fun x -> x) in let t = Sys.time () in let f = compress l in Printf.printf "Execution time: %fs\n" (Sys.time () -. t); f`

最新问题

尾递归删除列表中重复的连续条目

问题描述 投票：0回答：2

2个回答

let _ = compress (List.init 10_000_000 (fun x -> Some x))

let _ = let l = List.init 250000 (fun x -> x) in let t = Sys.time () in let f = compress l in Printf.printf "Execution time: %fs\n" (Sys.time () -. t); f

最新问题

问题描述投票：0回答：2

`let _ = compress (List.init 10_000_000 (fun x -> Some x))`

`let _ = let l = List.init 250000 (fun x -> x) in let t = Sys.time () in let f = compress l in Printf.printf "Execution time: %fs\n" (Sys.time () -. t); f`