久々OCaml

引き続き練習問題。

# type token = PCDATA of string | Open of string | Close of string;;
type token = PCDATA of string | Open of string | Close of string
# type ('a, 'b) xml = XLf of 'b option | XBr of 'a * ('a, 'b) xml list;;
type ('a, 'b) xml = XLf of 'b option | XBr of 'a * ('a, 'b) xml list

token型のリストからxml型の値を得る関数を作る。とりあえずエラー処理は無視。

# let xml_of_tokens tokens = 
  let rec parse_child tag = function
    [] -> ([], [])
  | token :: rest -> match token with
      PCDATA str -> 
        let (xml_list, rest') = parse_child tag rest in
        (XLf (Some str) :: xml_list, rest')
    | Open tag' -> 
        let (xml_list, rest') = parse_child tag' rest in
        let list = if xml_list = [] then [XLf None] else xml_list in
        let (xml_list', rest'') = parse_child tag rest' in
        let list' = if xml_list' = [XLf None] then [] else xml_list' in
        (XBr (tag', list) :: list', rest'')
    | Close tag' -> ([], rest)
  in
  match parse_child "" tokens with
    ([], _) -> XLf None
  | (xml :: rest, _) -> xml;;
val xml_of_tokens : token list -> (string, string) xml = <fun>

こんな感じ。

# let list = [Open "a"; Open "b"; Open "c"; PCDATA "Hello"; Close "c"; Close "a"];;
val list : token list =
  [Open "a"; Open "b"; Open "c"; PCDATA "Hello"; Close "c"; Close "a"]
# xml_of_tokens list;;
- : (string, string) xml =
XBr ("a", [XBr ("b", [XLf None]); XBr ("c", [XLf (Some "Hello")])])
# let list = [Open "a"; Open "a"; Open "a"; Close "a"; PCDATA "hage"; PCDATA "hoge"; Open "c"; PCDATA "hoge"; Close "c"; Close "a"; Open "b"; Close "b"; Close "a"];;
val list : token list =
  [Open "a"; Open "a"; Open "a"; Close "a"; PCDATA "hage"; PCDATA "hoge";
   Open "c"; PCDATA "hoge"; Close "c"; Close "a"; Open "b"; Close "b";
   Close "a"]
# xml_of_tokens list;;
- : (string, string) xml =
XBr ("a",
 [XBr ("a",
   [XBr ("a", [XLf None]); XLf (Some "hage"); XLf (Some "hoge");
    XBr ("c", [XLf (Some "hoge")])]);
  XBr ("b", [XLf None])])

あとはxmlからtokensへ変換出来ればOKなわけだ。属性に対応するのはそんなに難しく無さそうだし、Validな入力しか対応していないパーサなら結構簡単に作れるかも?

ただ、

<hoge></hoge>
<hoge/>

の区別をつけるために、

type ('a, 'b) xml = XLf of 'b | XBr of 'a * (('a, 'b) xml list) option;;

にした方がいいかも。
こうすると、両者はそれぞれ

XBr ("hoge", Some of []);;
XBr ("hoge", None);;

と表せる。
属性も導入するとしたら

type ('a, 'b) xml_attr = XAttr of 'a * 'b;;
type ('a, 'b, 'c, 'd) xml = XLf of 'b | XBr of 'a * (('c, 'd) xml_attr list) option * (('a, 'b, 'c, 'd) xml list) option;;

こんな感じか。なんか煩雑だなー。