shithub: martian9

ref: efd8060bae1542dd17556dcdc182b18b5bc35d36
dir: /reader.ml/

View raw version
module T = Types.Types

exception Syntax_error of string

let token_re = Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\$\\|[^][  \n{}('\"`,;)]*"
let string_re = Str.regexp "\"\\(\\\\.\\|[^\\\\\"]\\)*\""
let macros = Env.make None

type reader =
  { form : Types.m9type
  ; tokens : string list
  }

type list_reader =
  { list_form : Types.m9type list
  ; tokens : string list
  }

let tokenize str =
  List.map
    (function
      | Str.Delim x -> String.trim x (* move trim to regex for speed? *)
      | Str.Text x -> "tokenize botch")
    (List.filter
       (function
         | Str.Delim x -> true
         | Str.Text x -> false)
       (Str.full_split token_re str))
;;

let unescape_string token =
  if Str.string_match string_re token 0
  then (
    let without_quotes = String.sub token 1 (String.length token - 2) in
    Utils.gsub
      (Str.regexp "\\\\.")
      (function
        | "\\n" -> "\n"
        | x -> String.sub x 1 1)
      without_quotes)
  else (
    output_string stderr "expected '\"', got EOF\n";
    flush stderr;
    raise End_of_file)
;;

let read_atom token =
  match token with
  | "null" -> T.Nil
  | "#t" | "#true" -> T.Bool true
  | "#f" | "#false" -> T.Bool false
  | _ ->
    (match token.[0] with
    | '0' .. '9' -> Types.number (float_of_string token)
    | '#' ->
      (match token.[1], token.[2] with
      | '\\', '0' .. '9' | '\\', 'a' .. 'z' | '\\', 'A' .. 'Z' -> T.Char token.[2]
      | _ -> Types.symbol token)
    | '-' ->
      (match String.length token with
      | 1 -> Types.symbol token
      | _ ->
        (match token.[1] with
        | '0' .. '9' -> Types.number (float_of_string token)
        | _ -> Types.symbol token))
    | '"' -> T.String (unescape_string token)
    | _ -> Types.symbol token)
;;

let rec read_list eol list_reader =
  match list_reader.tokens with
  | [] -> raise (Syntax_error ("unterminated '" ^ eol ^ "'"))
  | token :: tokens ->
    if Str.string_match (Str.regexp eol) token 0
    then { list_form = list_reader.list_form; tokens }
    else (
      let reader = read_form list_reader.tokens in
      read_list eol { list_form = list_reader.list_form @ [ reader.form ]; tokens = reader.tokens })

and read_quote sym tokens =
  let reader = read_form tokens in
  { form = Types.list [ Types.symbol sym; reader.form ]; tokens = reader.tokens }

and read_vector all_tokens =
  match all_tokens with
  | [] -> raise End_of_file
  | token :: tokens ->
    (match token with
    | "(" ->
      let list_reader = read_list ")" { list_form = []; tokens } in
      { form = Types.vector list_reader.list_form; tokens = list_reader.tokens }
    | _ -> read_form tokens)

and read_macro tokens =
  let list_reader = read_list ")" { list_form = []; tokens } in
  print_endline ("MACRO: " ^ Printer.dump list_reader.list_form);
  (match list_reader.list_form with
     (* | sym :: T.List { T.value = [ T.Symbol { T.value = "syntax-rules" }; clause ] } -> *)
   | sym :: rest ->
      print_endline ("  sym: " ^ Printer.print sym true);
      print_endline ("    rest: " ^ Printer.dump rest);
      (match rest with
       | T.List { T.value = [ T.Symbol { T.value = "syntax-rules" }; foo ] } ->
          print_endline ("    foo: " ^ foo)
       | _ -> print_endline ("xxxxxxxx"))
   | _ as x -> print_endline ("  rest: " ^ Printer.dump x));
  (* | T.List { T.value = [ T.Symbol { T.value = "define-syntax" }; keyword; T.List { T.value = macro } ] } ->
   *    (match macro with
   *     | _ :: literals :: groups ->
   *        let macro_entry = Types.macro (Printer.print keyword true) literals (Types.list groups) in
   *        Env.set env keyword macro_entry;
   *        macro_entry) *)
  { form = Types.list list_reader.list_form; tokens = list_reader.tokens }

(*    (match ast with
 *    | [] -> raise End_of_file
 *    | hd :: tl -> print_endline ("   macro: " ^ String.concat " " tl));
 *    raise (Syntax_error ("macro botch"))
 * | _ -> raise (Syntax_error "bad macro read") *)
and read_form all_tokens =
  (* print_endline ("READ_FORM: " ^ String.concat " " all_tokens); *)
  match all_tokens with
  | [] -> raise End_of_file
  | token :: tokens ->
    (match token with
    | "'" -> read_quote "quote" tokens
    | "`" -> read_quote "quasiquote" tokens
    | "#" -> read_vector tokens
    | "#|" ->
      let list_reader = read_list "|#" { list_form = []; tokens } in
      print_endline ("block comment: " ^ Printer.dump list_reader.list_form);
      { form = T.Unspecified; tokens = list_reader.tokens }
    | "(" ->
      let list_reader = read_list ")" { list_form = []; tokens } in
      { form = Types.list list_reader.list_form; tokens = list_reader.tokens }
    | "" | "\t" | "\n" -> read_form tokens
    | "define-syntax" -> read_macro tokens
    | _ ->
      if token.[0] = ';'
      then (
        let list_reader = read_list "\\n" { list_form = []; tokens } in
        print_endline ("line comment: " ^ String.concat " " list_reader.tokens);
        { form = T.Unspecified; tokens = list_reader.tokens })
      else { form = read_atom token; tokens })
;;

let slurp filename =
  let chan = open_in filename in
  let b = Buffer.create 27 in
  Buffer.add_channel b chan (in_channel_length chan);
  close_in chan;
  Buffer.contents b
;;

let read str = (read_form (tokenize str)).form