我正在尝试学习函数式编程,但很难用函数式表达文件解析任务。 假设我有一个具有以下格式的文本文件:
val_0: <--- "header"
key_0_0 <--- these keys should be set to the "header" or val0
key_0_1
key_0_2
...
...
val_n:
...
key_n_m
我怎样才能得到一个所有键都设置为其关联值的哈希表?
编辑:我的解决方案。有谁可以改进吗?
open Core.Std
let contains s1 s2 =
let re = Str.regexp_string s2 in
try ignore (Str.search_forward re s1 0); true
with Not_found -> false
let read_db f =
let tbl = Caml.Hashtbl.create 123456 in
let lines = In_channel.read_lines f in
let src = ref "" in
List.iter ~f:(fun g -> if contains g ":" then src := else Caml.Hashtbl.add tbl g !src) lines;
tbl
这是我的解决方案,仅供比较。
let line_opt ic =
try Some (input_line ic) with End_of_file -> None
let fold_lines_in f init fn =
let ic = open_in fn in
let rec go accum =
match line_opt ic with
| None -> accum
| Some line -> go (f accum line)
in
let res = go init in
close_in ic;
res
let hashtable_of_file fn =
let ht = Hashtbl.create 16 in
let itab label line =
let len = String.length line in
if line.[len - 1] = ':' then
String.sub line 0 (len - 1)
else
let () = Hashtbl.add ht line label in
label
in
let _ = fold_lines_in itab "" fn in
ht
更新
(修复了非尾递归折叠实现,抱歉。)
向未来挥手,因为这需要 OCaml 库功能,而这些功能在提出问题时尚不可用。
OCaml 4.13.0 中添加了String.ends_with
。以及 OCaml 4.07.0 中的序列。
抛开所有文件读取,我们可以在不使用命令式
Hashtbl
的情况下仅使用Map
和列表折叠来完成此操作。
我们将使用
fold_left
构建一个列表,方法是保留当前键和反向关联值列表,然后反转关联列表和每个子列表中的值,然后转换为 Map
。
module SM = Map.Make (String)
let data = "val_0:\nhello\nworld\nval_1:\nfoo\nval_2:\nbar" in
let data' = String.split_on_char '\n' data in
data'
|> List.fold_left
(fun (cur_key, lst as acc) line ->
let label = String.ends_with ~suffix:":" line in
match cur_key with
| None when label -> (Some line, (line, [])::lst)
| Some _ when label -> (Some line, (line, [])::lst)
| None -> acc
| Some cur_key' -> (cur_key, let ((key, lst')::tl) = lst in (key, line::lst')::tl)
)
(None, [])
|> snd
|> List.map (fun (k, v) -> (k, List.rev v))
|> List.rev
|> List.to_seq
|> SM.of_seq
如果我们在这个值上运行
SM.bindings
,我们可以看到它已经起作用了:
[("val_0:", ["hello"; "world"]); ("val_1:", ["foo"]); ("val_2:", ["bar"])]