在 OCaml 中将文件解析为哈希表

问题描述 投票:0回答:2

我正在尝试学习函数式编程,但很难用函数式表达文件解析任务。 假设我有一个具有以下格式的文本文件:

val_0:       <--- "header"
key_0_0      <--- these keys should be set to the "header" or val0
key_0_1
key_0_2
...
...
val_n:     
...
key_n_m

我怎样才能得到一个所有键都设置为其关联值的哈希表?

编辑:我的解决方案。有谁可以改进吗?

open Core.Std

let contains s1 s2 =
        let re = Str.regexp_string s2 in
        try ignore (Str.search_forward re s1 0); true
        with Not_found -> false


let read_db f = 
        let tbl = Caml.Hashtbl.create 123456 in
        let lines = In_channel.read_lines f in
        let src = ref "" in
        List.iter ~f:(fun g -> if contains g ":" then src := else Caml.Hashtbl.add tbl g !src) lines;
        tbl
parsing ocaml
2个回答
2
投票

这是我的解决方案,仅供比较。

let line_opt ic =
    try Some (input_line ic) with End_of_file -> None

let fold_lines_in f init fn =
    let ic = open_in fn in
    let rec go accum =
        match line_opt ic with
        | None -> accum
        | Some line -> go (f accum line)
    in
    let res = go init in
    close_in ic;
    res

let hashtable_of_file fn =
    let ht = Hashtbl.create 16 in
    let itab label line =
        let len = String.length line in
        if line.[len - 1] = ':' then
            String.sub line 0 (len - 1)
        else
            let () = Hashtbl.add ht line label in
            label
    in
    let _ = fold_lines_in itab "" fn in
    ht

更新

(修复了非尾递归折叠实现,抱歉。)


0
投票

向未来挥手,因为这需要 OCaml 库功能,而这些功能在提出问题时尚不可用。

OCaml 4.13.0 中添加了

String.ends_with
。以及 OCaml 4.07.0 中的序列。

抛开所有文件读取,我们可以在不使用命令式

Hashtbl
的情况下仅使用
Map
和列表折叠来完成此操作。

我们将使用

fold_left
构建一个列表,方法是保留当前键和反向关联值列表,然后反转关联列表和每个子列表中的值,然后转换为
Map

module SM = Map.Make (String)

let data = "val_0:\nhello\nworld\nval_1:\nfoo\nval_2:\nbar" in
let data' = String.split_on_char '\n' data in

data' 
|> List.fold_left 
  (fun (cur_key, lst as acc) line ->
     let label = String.ends_with ~suffix:":" line in
     match cur_key with
     | None when label -> (Some line, (line, [])::lst)
     | Some _ when label -> (Some line, (line, [])::lst)
     | None -> acc
     | Some cur_key' -> (cur_key, let ((key, lst')::tl) = lst in (key, line::lst')::tl)
   )
  (None, [])
|> snd 
|> List.map (fun (k, v) -> (k, List.rev v)) 
|> List.rev
|> List.to_seq
|> SM.of_seq 

如果我们在这个值上运行

SM.bindings
,我们可以看到它已经起作用了:

[("val_0:", ["hello"; "world"]); ("val_1:", ["foo"]); ("val_2:", ["bar"])]
© www.soinside.com 2019 - 2024. All rights reserved.