如何从 OCaml 中的文本文件中逐行读取?

问题描述 投票:0回答:12

这就是我到目前为止所拥有的。这不是您所需要的一切吗?我不断收到错误“错误:未绑定模块标准”

let r file =
    let chan = open_in file in
    Std.input_list (chan)
ocaml
12个回答
35
投票

仅使用标准库的命令式解决方案:

let read_file filename = 
let lines = ref [] in
let chan = open_in filename in
try
  while true; do
    lines := input_line chan :: !lines
  done; !lines
with End_of_file ->
  close_in chan;
  List.rev !lines ;;

如果您有 Batteries-included 库,您可以将文件读入 Enum.t 并按如下方式迭代它:

let filelines = File.lines_of filename in
Enum.iter ( fun line -> (*Do something with line here*) ) filelines

19
投票

如果您安装了 OCaml Core 库,那么就很简单:

open Core.Std
let r file = In_channel.read_lines file

如果您安装了

corebuild
,那么您可以用它来编译您的代码:

corebuild filename.byte

如果您的代码位于名为

filename.ml
的文件中。

如果您没有 OCaml Core,或者不想安装它,或者其他一些标准库实现,那么,当然,您可以使用普通 OCaml 的标准库来实现它。在

input_line
模块中定义了一个函数
Pervasives
,它会在所有 OCaml 程序中自动打开(即,无需通过模块名称进一步说明即可访问其所有定义)。该函数接受
in_channel
类型的值并返回从通道读取的一行。使用这个函数你可以实现所需的功能:

let read_lines name : string list =
  let ic = open_in name in
  let try_read () =
    try Some (input_line ic) with End_of_file -> None in
  let rec loop acc = match try_read () with
    | Some s -> loop (s :: acc)
    | None -> close_in ic; List.rev acc in
  loop []

此实现使用递归,对于 OCaml 编程来说更加自然。


6
投票

这是一个简单的递归解决方案,它不会累积行或使用外部库,而是让您读取一行,使用函数处理它,递归读取下一行直到完成,然后干净地退出。 exit 函数关闭打开的文件句柄并向调用程序发出成功信号。

let read_lines file process =
  let in_ch = open_in file in
  let rec read_line () =
    let line = try input_line in_ch with End_of_file -> exit 0
    in (* process line in this block, then read the next line *)
       process line;
       read_line ();
in read_line ();;

read_lines some_file print_endline;;

3
投票

这将读取文件的行并打印每一行:

open Core.Std

let handle_line line =
  printf "Your line: %s \n" line

let () =
  let file_to_read = "./file_to_read.txt" in
    let lines = In_channel.read_lines file_to_read in
      List.iter ~f: handle_line lines

2
投票

这是使用 Scanf 的递归解决方案:

let read_all_lines file_name =
  let in_channel = open_in file_name in
  let rec read_recursive lines =
    try
      Scanf.fscanf in_channel "%[^\r\n]\n" (fun x -> read_recursive (x :: lines))
    with
      End_of_file ->
        lines in
  let lines = read_recursive [] in
  let _ = close_in_noerr in_channel in
  List.rev (lines);;

用途:

let all_lines = read_all_lines "input.txt";;

但是,我更喜欢逐行流式传输:

let make_reader file_name =
  let in_channel = open_in file_name in
  let closed = ref false in
  let read_next_line = fun () ->
    if !closed then
      None
    else
      try
        Some (Scanf.fscanf in_channel "%[^\r\n]\n" (fun x -> x))
      with
        End_of_file ->
          let _ = close_in_noerr in_channel in
          let _ = closed := true in
          None in
  read_next_line;;

用途:

let read_next = make_reader "input.txt";;
let next_line = read_next ();;

并且可能有点锦上添花:

type reader = {read_next : unit -> string option};;

let make_reader file_name =
  let in_channel = open_in file_name in
  let closed = ref false in
  let read_next_line = fun () ->
    if !closed then
      None
    else
      try
        Some (Scanf.fscanf in_channel "%[^\r\n]\n" (fun x -> x))
      with
        End_of_file ->
          let _ = close_in_noerr in_channel in
          let _ = closed := true in
          None in
  {read_next = read_next_line};;

用途:

let r = make_reader "input.txt";;
let next_line = r.read_next ();;

希望这有帮助!


2
投票

使用 Scanf“字符串指示”和零宽度字符从文件中读取行的另一种样式。这就像传统的命令式风格。

open Scanf 
open Printf

(* little helper functions *)
let id x = x 
let const x = fun _ -> x
let read_line file = fscanf file "%s@\n" id 
let is_eof file = try fscanf file "%0c" (const false) with End_of_file -> true

let _ = 
  let file = open_in "/path/to/file" in 

  while not (is_eof file) do 
    let s = read_line file in
    (* do something with s *) 
    printf "%s\n" s 
  done;

  close_in file

注意:

  1. read_line 忽略一个尾随 ,所以如果文件的最后一个字符 是 ,您可能错过了最后一个空行。
  2. 使用Scanf时,由于缓冲作用,请勿在同一通道上混合其他低级别读取,否则会导致奇怪的行为。

2
投票

这只是将整个文件加载到一个大字符串中,但稍后您可以随时将其拆分为一个列表。

let read_file path =
  let channel = open_in path in
  let buffer = Buffer.create 1024 in
  let rec go () =
    try
      Buffer.add_channel buffer channel 1024; go ()
    with End_of_file ->
      Buffer.contents buffer in
  go ();;

2
投票

我知道这是一个老问题,但是还有另一个有趣的递归变体(至少在我看来),没有库或逆转:

let read_lines filename =
  let f = open_in filename in
  let rec loop () =
    try
      let next = input_line f in
      next :: loop ()
    with End_of_file ->
      close_in f;
      []
  in loop ()

请注意,

let next = ...
行很重要,因为它确保在之前递归读取下一行。


2
投票

2022年3月28日发布的OCaml版本4.14.0的标准库中有新模块

In_channel
Out_channel
参考

现在,我们可以轻松地从文件中读取内容,如下所示:

# let conx = In_channel.with_open_text "example.dat" In_channel.input_all;;
val conx : string = "hey!\nworld"

说,

example.dat
的内容:

hey!
world

1
投票

要基于 @alwaysday1 的解决方案,您可以创建一个简单的函数,使用 OCaml 4.14.0 中的

In_channel
Str
读取文件的所有行,如下所示:

let read_lines file =
  In_channel.with_open_text file In_channel.input_all
  |> Str.(split (regexp "\n"))

这将生成文件中所有行的列表。


0
投票

Std.input_list
显然需要 Extlib,您应该将其安装在您的系统上(Debian 系统上的
libextlib-ocaml
libextlib-ocaml-dev
)。


0
投票

回复晚了,不过好在OCaml标准库正在不断完善。从 OCaml 5.1 开始,人们可以执行以下操作:

let read_lines (file_name : string) : string list =
  In_channel.with_open_text file_name (In_channel.input_lines)

In_channel
模块包含几个对文件内容读取有用的函数。

© www.soinside.com 2019 - 2024. All rights reserved.