我有一个文本文件,我想编写一个函数来读取该文件并返回一个元组列表,其中每个元组将包含字符串形式的单词、int 形式的单词行号以及最后一个字符的位置作为 int 的单词。输入示例,
example of the first line
followed by the second line
输出示例:
[
("example",1,8);
("of",1,11);
("the",1,15);
("first",1,21);
("line",1,26);
("followed",2,13);
("by",2,16);
("the",2,20);
("second",2,27);
("line",2,32)
]
您正在寻找的功能看起来像这样,
let read filename =
In_channel.read_lines filename |>
List.mapi ~f:(fun line data ->
String.split data ~on:' ' |>
List.fold_map ~init:0 ~f:(fun pos word ->
let pos = pos + String.length word in
pos+1, (word,line+1,pos-1)) |>
snd) |>
List.concat
以下是如何使用它。首先安装依赖项,
opam install dune stdio merlin
接下来,设置您的项目,
dune init exe readlines --libs=base,stdio
然后在您最喜欢的编辑器中打开
readlines.ml
并将其内容替换为以下内容,
open Base
open Stdio
let read filename =
In_channel.read_lines filename |>
List.mapi ~f:(fun line data ->
String.split data ~on:' ' |>
List.fold_map ~init:0 ~f:(fun pos word ->
let pos = pos + String.length word in
pos+1, (word,line+1,pos-1)) |>
snd) |>
List.concat
let print =
List.iter ~f:(fun (line,data,pos) ->
printf "(%s,%d,%d)\n" line data pos)
let main filename =
print (read filename)
let () = match Sys.get_argv () with
| [|_; filename|] -> main filename
| _ -> failwith "expects one argument: filename"
要运行和测试,请创建一个示例输入,例如名为
test.txt
的文件
example of the first line
followed by the second line
(确保最后一行后面有换行符)
现在你可以运行它了,
dune exec ./readlines.exe test.txt
结果应该如下,
(example,1,6)
(of,1,9)
(the,1,13)
(first,1,19)
(line,1,24)
(followed,2,7)
(by,2,10)
(the,2,14)
(second,2,21)
(line,2,26)
(注意,我是从 0 开始计算位置,而不是从 1 开始)。
您也可以在 utop 中交互运行此代码,但您需要安装
base
和 stdio
并将它们加载到解释器中,使用
#require "base";;
#require "stdio";;
如果您不使用
utop
而是使用默认的 OCaml 顶层,则还需要安装 ocamlfind (opam install ocamlfind
) 并执行
#use "topfind";;
#require "base";;
#require "stdio";;
如果您只想使用标准库作为字符串,您可以使用
String.split_on_char
以及应用于每行的其他一些内容来执行您想要的操作。
let ic = open_in (*your file name*) in
let first_line = input_line ic in
let words = String.split_on_char ' ' first_line in
let rec aux accLen =
function
| [] -> []
| s :: ts ->
match s with
(* empty string means that their were a white space before the split *)
| "" -> aux (accLen +1) ts
| s -> let l = accLen + String.length s in (1, s, l) :: aux l ts
in aux 0 words;;
正如ivg所说,您可以用
List.fold_left
替换aux功能:
let ic = open_in (*your file name*) in
let first_line = input_line ic in
let words = String.split_on_char ' ' first_line in
let _, l = List.fold_left (
fun (accLen, accRes) ->
function
| "" -> (accLen+1, accRes)
| s -> let l = accLen + String.length s in (l, (1, s, l) :: accRes)
) (0, []) words
in List.rev l;;
不包括文件 I/O 组件,但可以正确处理单词之间的多个空格,包括制表符。使用
fold_left
来娱乐新的 OCaml 程序员。
let words_with_last_index line =
line ^ " "
|> String.to_seqi
|> Seq.fold_left
(fun (wspace, cur_word, words) (cur_pos, cur_ch) ->
match cur_ch with
| (' ' | '\t') when wspace || cur_pos = 0 -> (true, cur_word, words)
| ' ' | '\t' -> (true, "", words @ [(cur_word, cur_pos - 1)])
| ch -> (false, cur_word ^ String.make 1 ch, words))
(false, "", [])
|> (fun (_, _, collection) -> collection)
let parse_lines text =
text
|> String.split_on_char '\n'
|> List.mapi
(fun i line ->
line
|> words_with_last_index
|> List.map (fun (word, pos) -> (word, i + 1, pos)))
|> List.flatten